In [26]:
import torch
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torchvision.datasets import CIFAR10
from torch.utils.data import DataLoader, random_split, Subset
import torch.nn as nn
import torch.nn.functional as func

In [27]:
# Converts the image from [H,W,C] to a PyTorch tensor [C,H,W] --> what the model will expect
base_transform = transforms.ToTensor()
full_train = CIFAR10(root='./data', train=True, download=True)
full_test = CIFAR10(root='./data', train=False, download=True)
train_subset = Subset(full_train, range(50000))
test_subset = Subset(full_test, range(10000))

# For Normalisation, I used the values based on this repo : https://github.com/kuangliu/pytorch-cifar/issues/19
train_transform = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),
    transforms.RandomResizedCrop(32),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.4914, 0.4822, 0.4465],
                         std=[0.2023, 0.1994, 0.2010])
])

# No data augmentation in test set as it will show the the model works on real world, unseen data
test_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.4914, 0.4822, 0.4465],
                         std=[0.2023, 0.1994, 0.2010])
])

train_subset.dataset.transform = train_transform
test_subset.dataset.transform = test_transform
train_loader = DataLoader(train_subset, batch_size=128, shuffle=True)
test_loader = DataLoader(test_subset, batch_size=128, shuffle=False)

In [29]:
# Now, to design the CNN model, I noted the following :
# Start with fewer channels to learn basic edges and then increases to capture more complex features
# Two convolutional kernels of 3 x 3 should capture more precise patterns than one large kernel
# Perceptron dropout towards the end to prevent overfitting

class Trial1(nn.Module):
    def __init__(self):
        super(Trial1, self).__init__()

        self.block1 = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.Conv2d(32, 32, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )

        self.block2 = nn.Sequential(
            nn.Conv2d(32, 64, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.Conv2d(64, 64, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )

        self.block3 = nn.Sequential(
            nn.Conv2d(64, 128, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.Conv2d(128, 128, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Dropout(0.25)
        )

        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Linear(128 * 4 * 4, 512),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(512, 10)
        )

    def forward(self, x):
        x = self.block1(x)
        x = self.block2(x)
        x = self.block3(x)
        x = self.classifier(x)
        return x
nn = Trial1()

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
nn = nn.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(nn.parameters(), lr=0.01)

num_epochs = 10
for epoch in range(num_epochs):
    nn.train()
    running_loss = 0.0
    correct = 0
    total = 0
    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = nn(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
    train_accuracy = 100 * correct / total
    print(f"Epoch [{epoch + 1}/{num_epochs}], Loss: {running_loss / len(train_loader):.4f}, Accuracy: {train_accuracy:.2f}%")

nn.eval()
correct = 0
total = 0
with torch.no_grad():
    for inputs, labels in test_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = nn(inputs)
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

test_accuracy = 100 * correct / total
print(f"Test Accuracy: {test_accuracy:.2f}%")



Epoch [1/10], Loss: 2.3381, Accuracy: 18.20%
Epoch [2/10], Loss: 2.0280, Accuracy: 22.58%
Epoch [3/10], Loss: 2.0150, Accuracy: 23.45%
Epoch [4/10], Loss: 2.0059, Accuracy: 23.79%
Epoch [5/10], Loss: 2.0023, Accuracy: 24.21%
Epoch [6/10], Loss: 2.0176, Accuracy: 23.64%
Epoch [7/10], Loss: 2.0094, Accuracy: 23.75%
Epoch [8/10], Loss: 2.0222, Accuracy: 23.42%
Epoch [9/10], Loss: 2.0428, Accuracy: 22.90%
Epoch [10/10], Loss: 2.0738, Accuracy: 21.74%
Test Accuracy: 21.73%


In [30]:
class Trial2(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv = nn.Conv2d(3, 10, 3)
        self.pool = nn.AdaptiveAvgPool2d(1)
        self.fc = nn.Linear(10, 10)

    def forward(self, x):
        x = self.pool(F.relu(self.conv(x)))
        x = x.view(x.size(0), -1)
        return self.fc(x)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
nn = Trial2().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(nn.parameters(), lr=0.001)

for epoch in range(10):
    nn.train()
    total = 0
    correct = 0
    running_loss = 0.0
    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = nn(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
    print(f"Epoch {epoch+1}: loss = {running_loss:.4f}, acc = {100 * correct / total:.2f}%")

nn.eval()
correct = 0
total = 0
with torch.no_grad():
    for inputs, labels in test_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = nn(inputs)
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
print(f"Test Accuracy: {100 * correct / total:.2f}%")

Epoch 1: loss = 851.3347, acc = 18.78%
Epoch 2: loss = 804.6560, acc = 23.07%
Epoch 3: loss = 792.7060, acc = 24.68%
Epoch 4: loss = 785.9583, acc = 25.54%
Epoch 5: loss = 778.8800, acc = 26.05%
Epoch 6: loss = 772.7614, acc = 26.66%
Epoch 7: loss = 766.6993, acc = 27.23%
Epoch 8: loss = 760.3260, acc = 27.87%
Epoch 9: loss = 755.9728, acc = 28.25%
Epoch 10: loss = 751.1162, acc = 28.60%
Test Accuracy: 30.79%


In [33]:
# Implemented based on the repo : https://github.com/fares-ds/Cifar-10_Image_Classification_Using_CNNs/blob/master/Cifar_10_image_classification_using_cnn.ipynb
# The original code was written using the Tensorflow library, I attempted to understand the architecture and then replicate it in Pytorch

class Trial3(nn.Module):
    def __init__(self):
        super(Trial3, self).__init__()
        self.block1 = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=3, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.Conv2d(32, 32, kernel_size=3, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.MaxPool2d(2),
            nn.Dropout(0.25)
        )
        self.block2 = nn.Sequential(
            nn.Conv2d(32, 64, kernel_size=3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.Conv2d(64, 64, kernel_size=3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(2),
            nn.Dropout(0.25)
        )
        self.block3 = nn.Sequential(
            nn.Conv2d(64, 128, kernel_size=3, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.Conv2d(128, 128, kernel_size=3, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.MaxPool2d(2),
            nn.Dropout(0.25)
        )
        self.fc1 = nn.Linear(128 * 4 * 4, 512)
        self.dropout = nn.Dropout(0.25)
        self.fc2 = nn.Linear(512, 10)

    def forward(self, x):
        x = self.block1(x)
        x = self.block2(x)
        x = self.block3(x)
        x = x.view(x.size(0), -1)
        x = self.fc1(x)
        x = F.relu(x)
        x = self.dropout(x)
        x = self.fc2(x)
        return x

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = Trial3().to(device)
optimizer = optim.Adam(model.parameters(), lr=0.001)


num_epochs = 100
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0
    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
    train_accuracy = 100 * correct / total
    print(f"Epoch [{epoch + 1}/{num_epochs}], Loss: {running_loss / len(train_loader):.4f}, Accuracy: {train_accuracy:.2f}%")

model.eval()
correct = 0
total = 0
with torch.no_grad():
    for inputs, labels in test_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = model(inputs)
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

test_accuracy = 100 * correct / total
print(f"Test Accuracy: {test_accuracy:.2f}%")


Epoch [1/100], Loss: 1.8567, Accuracy: 31.04%
Epoch [2/100], Loss: 1.6153, Accuracy: 40.80%
Epoch [3/100], Loss: 1.5015, Accuracy: 45.27%
Epoch [4/100], Loss: 1.4235, Accuracy: 48.57%
Epoch [5/100], Loss: 1.3711, Accuracy: 51.08%
Epoch [6/100], Loss: 1.3172, Accuracy: 53.13%
Epoch [7/100], Loss: 1.2766, Accuracy: 54.45%
Epoch [8/100], Loss: 1.2394, Accuracy: 55.97%
Epoch [9/100], Loss: 1.2114, Accuracy: 57.17%
Epoch [10/100], Loss: 1.1824, Accuracy: 58.34%
Epoch [11/100], Loss: 1.1618, Accuracy: 58.79%
Epoch [12/100], Loss: 1.1428, Accuracy: 59.91%
Epoch [13/100], Loss: 1.1178, Accuracy: 60.54%
Epoch [14/100], Loss: 1.0921, Accuracy: 61.39%
Epoch [15/100], Loss: 1.0781, Accuracy: 62.16%
Epoch [16/100], Loss: 1.0667, Accuracy: 62.60%
Epoch [17/100], Loss: 1.0547, Accuracy: 63.29%
Epoch [18/100], Loss: 1.0347, Accuracy: 64.02%
Epoch [19/100], Loss: 1.0357, Accuracy: 63.79%
Epoch [20/100], Loss: 1.0164, Accuracy: 64.84%
Epoch [21/100], Loss: 1.0089, Accuracy: 64.69%
Epoch [22/100], Loss: 