<a href="https://colab.research.google.com/github/mohanseetha/machine-learning/blob/main/image_classification.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [4]:
import numpy as np
from PIL import Image
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms

In [5]:
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

In [6]:
train_data = torchvision.datasets.CIFAR10(root='./data', train=True, transform=transform, download=True)
test_data = torchvision.datasets.CIFAR10(root='./data', train=False, transform=transform, download=True)

train_loader = torch.utils.data.DataLoader(train_data, batch_size=32, shuffle=True, num_workers=2)
test_loader = torch.utils.data.DataLoader(test_data, batch_size=32, shuffle=True, num_workers=2)

100%|██████████| 170M/170M [00:02<00:00, 76.4MB/s]


In [9]:
img, label = train_data[0]
img.shape

torch.Size([3, 32, 32])

In [12]:
class_names = train_data.classes
class_names

['airplane',
 'automobile',
 'bird',
 'cat',
 'deer',
 'dog',
 'frog',
 'horse',
 'ship',
 'truck']

In [14]:
class NeuralNet(nn.Module):

    def __init__(self):
        super(NeuralNet, self).__init__()

        self.conv1 = nn.Conv2d(3, 12, 5)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(12, 24, 5)
        self.fc1 = nn.Linear(24 * 5 * 5, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = torch.flatten(x, 1)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x


In [15]:
cnn = NeuralNet()
loss_function = nn.CrossEntropyLoss()
optimizer = optim.SGD(cnn.parameters(), lr=0.001, momentum=0.9)

In [22]:
num_epochs = 50
for epoch in range(num_epochs):
    print(f"Training epoch {epoch + 1}/{num_epochs} ", end="")
    running_loss = 0.0

    for i, data in enumerate(train_loader):
        inputs, labels = data
        optimizer.zero_grad()
        outputs = cnn(inputs)
        loss = loss_function(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()

    print(f"Loss: {running_loss / len(train_loader):.4f}")

print("Training Completed")

Training epoch 1/50 Loss: 0.4411
Training epoch 2/50 Loss: 0.4219
Training epoch 3/50 Loss: 0.4040
Training epoch 4/50 Loss: 0.3832
Training epoch 5/50 Loss: 0.3698
Training epoch 6/50 Loss: 0.3492
Training epoch 7/50 Loss: 0.3323
Training epoch 8/50 Loss: 0.3139
Training epoch 9/50 Loss: 0.3041
Training epoch 10/50 Loss: 0.2846
Training epoch 11/50 Loss: 0.2757
Training epoch 12/50 Loss: 0.2649
Training epoch 13/50 Loss: 0.2455
Training epoch 14/50 Loss: 0.2346
Training epoch 15/50 Loss: 0.2236
Training epoch 16/50 Loss: 0.2114
Training epoch 17/50 Loss: 0.1954
Training epoch 18/50 Loss: 0.1955
Training epoch 19/50 Loss: 0.1872
Training epoch 20/50 Loss: 0.1731
Training epoch 21/50 Loss: 0.1621
Training epoch 22/50 Loss: 0.1635
Training epoch 23/50 Loss: 0.1671
Training epoch 24/50 Loss: 0.1475
Training epoch 25/50 Loss: 0.1368
Training epoch 26/50 Loss: 0.1307
Training epoch 27/50 Loss: 0.1285
Training epoch 28/50 Loss: 0.1332
Training epoch 29/50 Loss: 0.1315
Training epoch 30/50 Lo

In [23]:
torch.save(cnn.state_dict(), 'cnn.pth')

In [24]:
net = NeuralNet()
net.load_state_dict(torch.load('cnn.pth'))

<All keys matched successfully>

In [25]:
correct = 0
total = 0

net.eval()

with torch.no_grad():
    for data in test_loader:
        images, labels = data
        outputs = net(images)
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f"Accuracy: {100 * correct / total:.2f}%")

Accuracy: 65.73%


In [26]:
new_transform = transforms.Compose([
    transforms.Resize((32, 32)),
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

def load_image(img_path):
    img = Image.open(img_path)
    img = new_transform(img)
    img = img.unsqueeze(0)
    return img

img_paths = ['img1.webp', 'img2.jpg', 'img3.webp']
images = [load_image(img_path) for img_path in img_paths]

net.eval()
with torch.no_grad():
    for img in images:
        outputs = net(img)
        _, predicted = torch.max(outputs, 1)
        print(f"Prediction: {class_names[predicted.item()]}")


Prediction: dog
Prediction: bird
Prediction: airplane
