In [None]:
import numpy as np
from PIL import Image

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optimm

import torch
import torch.nn as nn
import torch.nn.functional as F

import torchvision
import torchvision.transforms as transforms

This cocde converts images into tensors and normalizes pixel values of all RGB channels to the range [-1, 1].

In [None]:
 transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

This code downloads the CIFAR-10 dataset, applies the transform to images, and then loads them into mini-batches using `DataLoader` for training (`batch_size=4`, shuffled) and testing (`batch_size=32`, shuffled).

In [None]:
train_data = torchvision.datasets.CIFAR10(root='./data', train=True,
                                        download=True, transform=transform)
test_data = torchvision.datasets.CIFAR10(root='./data', train=False,
                                       download=True, transform=transform)

train_loader = torch.utils.data.DataLoader(train_data, batch_size=4,
                                          shuffle=True, num_workers=2)
test_loader = torch.utils.data.DataLoader(test_data, batch_size=32, shuffle=True, num_workers=2)

100%|██████████| 170M/170M [00:10<00:00, 15.7MB/s]


In [None]:
image, label = train_data[0]

In [None]:
image.size()

torch.Size([3, 32, 32])

This code defines a CNN with three convolution–batchnorm–ReLU–pooling blocks followed by fully connected layers with dropout, outputting class scores for 10 CIFAR-10 categories.

In [None]:
class_names = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

This code builds a CNN that extracts features through three conv-batchnorm-ReLU-pool layers, flattens them, and classifies into 10 classes using fully connected layers with dropout.

In [None]:
class NeuralNet(nn.Module):
    def __init__(self):
        super().__init__()
        # Convolution layers
        self.conv1 = nn.Conv2d(3, 32, 3, padding=1)
        self.bn1 = nn.BatchNorm2d(32)

        self.conv2 = nn.Conv2d(32, 64, 3, padding=1)
        self.bn2 = nn.BatchNorm2d(64)

        self.conv3 = nn.Conv2d(64, 128, 3, padding=1)
        self.bn3 = nn.BatchNorm2d(128)

        # Pooling layer
        self.pool = nn.MaxPool2d(2, 2)

        # Fully connected layers
        self.fc1 = nn.Linear(128 * 4 * 4, 256)  # 32x32 -> 4x4 after pooling 3 times
        self.dropout1 = nn.Dropout(0.5)
        self.fc2 = nn.Linear(256, 128)
        self.dropout2 = nn.Dropout(0.5)
        self.fc3 = nn.Linear(128, 10)

    def forward(self, x):
        x = self.pool(F.relu(self.bn1(self.conv1(x))))
        x = self.pool(F.relu(self.bn2(self.conv2(x))))
        x = self.pool(F.relu(self.bn3(self.conv3(x))))
        x = torch.flatten(x, 1)
        x = self.dropout1(F.relu(self.fc1(x)))
        x = self.dropout2(F.relu(self.fc2(x)))
        x = self.fc3(x)
        return x

This code initializes the CNN model, sets cross-entropy as the loss function, and configures SGD with learning rate 0.001 and momentum 0.9 as the optimizer.

In [None]:
net = NeuralNet()
loss_function = nn.CrossEntropyLoss()
optimizer = optimm.SGD(net.parameters(), lr=0.001, momentum=0.9)

This code trains the network for 8 epochs by looping over training batches, computing loss, backpropagating, updating weights, and printing the average loss per epoch.

In [None]:
for epoch in range(8):
  print(f'Epoch: {epoch}')

  running_loss = 0.0
  for i, data in enumerate(train_loader, 0):
    inputs, labels = data

    optimizer.zero_grad()

    outputs = net(inputs)
    loss = loss_function(outputs, labels)
    loss.backward()
    optimizer.step()

    running_loss += loss.item()

  print(f'Loss: {running_loss / len(train_loader):4f}')

Epoch: 0
Loss: 1.671723
Epoch: 1
Loss: 1.327918
Epoch: 2
Loss: 1.152655
Epoch: 3
Loss: 1.026143
Epoch: 4
Loss: 0.921154
Epoch: 5
Loss: 0.841139
Epoch: 6
Loss: 0.778164
Epoch: 7
Loss: 0.724224


In [None]:
torch.save(net.state_dict(), 'trained_net.pth')

In [None]:
net = NeuralNet()
net.load_state_dict(torch.load('trained_net.pth'))

<All keys matched successfully>

This code evaluates the trained model on the test set by comparing predictions with true labels and computes the overall classification accuracy.

In [None]:
correct = 0
total = 0

net.eval()

with torch.no_grad():
  for data in test_loader:
    images, labels = data
    outputs = net(images)
    _, predicted = torch.max(outputs.data, 1)
    total += labels.size(0)
    correct += (predicted == labels).sum().item()

accuracy = 100 * correct / total
print(f'Accuracy: {accuracy}')

Accuracy: 75.62


This code loads and preprocesses custom images, passes them through the trained model in evaluation mode, and prints the predicted class labels.

In [None]:
new_transform = transforms.Compose([
    transforms.Resize((32, 32)),
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])


def load_image(image_path):
    image = Image.open(image_path)
    image = new_transform(image)
    image = image.unsqueeze(0)  # add batch dimension
    return image

image_path = ['example1.jpg', 'example2.jpg', 'example3.jpg']
images = [load_image(img) for img in image_paths]

net.eval()
with torch.no_grad():
    for image in images:
        output = net(image)
        _, predicted = torch.max(output.data, 1)
        print(f'Prediction: {class_names[predicted.item()]}')

Prediction: plane
Prediction: dog
Prediction: horse
