In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms

In [3]:
# conda install pytorch torchvision torchaudio cpuonly -c pytorch

In [4]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [5]:
# lets define the transformation to be applied to CIFAR10, here we are just converting data in tensor format and perform 
#Normalisation to the dataset but we can also use data augmentation in this step.
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize(mean=(0.5, 0.5, 0.5), std =(0.5, 0.5, 0.5))])

# 3 values of mean to 3 channels in the image

In [6]:
# Load the CIFAR-10 dataset
trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)

trainloader = torch.utils.data.DataLoader(trainset, batch_size=50, shuffle=True, num_workers=2)

testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=50, shuffle=False, num_workers=2)

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/cifar-10-python.tar.gz


100%|██████████| 170498071/170498071 [00:02<00:00, 72840057.63it/s]


Extracting ./data/cifar-10-python.tar.gz to ./data
Files already downloaded and verified


In [7]:
trainset[0][0].shape

torch.Size([3, 32, 32])

In [8]:
# from prettytable import PrettyTable
def count_parameters(model):
    table = {"Modules": "Parameters"}
    total_params = 0
    for name, parameter in model.named_parameters():
        if not parameter.requires_grad: continue
        param = parameter.numel()
        table[name]=param
        total_params+=param
    print(table)
    print(f"Total Trainable Params: {total_params}")
    return total_params

In [9]:
# Define the first CNN architecture
class CNNModel1(nn.Module):
    def __init__(self):
        super(CNNModel1, self).__init__()
        self.conv1 = nn.Conv2d(3, 16, 3, padding=1)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(16, 12, 3, padding=1)
        self.fc1 = nn.Linear(12 * 8 * 8, 10)
        self.fc2 = nn.Linear(10, 10)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(-1, 12 * 8 * 8)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x
    
count_parameters(CNNModel1())

{'Modules': 'Parameters', 'conv1.weight': 432, 'conv1.bias': 16, 'conv2.weight': 1728, 'conv2.bias': 12, 'fc1.weight': 7680, 'fc1.bias': 10, 'fc2.weight': 100, 'fc2.bias': 10}
Total Trainable Params: 9988


9988

In [10]:
# Define the second CNN architecture
class CNNModel2(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(3, 32, 3, padding=1)
        self.conv2 = nn.Conv2d(32, 8, 3, padding=1)
        self.pool = nn.MaxPool2d(2, 2)
        self.fc1 = nn.Linear(8* 8*8, 12)
        self.fc2 = nn.Linear(12, 10)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(-1, 8* 8*8)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x
    
count_parameters(CNNModel2())

{'Modules': 'Parameters', 'conv1.weight': 864, 'conv1.bias': 32, 'conv2.weight': 2304, 'conv2.bias': 8, 'fc1.weight': 6144, 'fc1.bias': 12, 'fc2.weight': 120, 'fc2.bias': 10}
Total Trainable Params: 9494


9494

In [11]:
# Function to train the model
def train_model(model, criterion, optimizer, num_epochs=10):
    model.to(device)
    best_acc = 0.0

    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0

        for i, (inputs, labels) in enumerate(trainloader, 0):
            inputs, labels = inputs.to(device), labels.to(device)

            optimizer.zero_grad()

            # Forward pass
            outputs = model(inputs)
            loss = criterion(outputs, labels)

            # Backward and optimize
            loss.backward()
            optimizer.step()

            running_loss += loss.item()

        # Evaluate on the test set
        model.eval()
        correct = 0
        total = 0

        with torch.no_grad():
            for inputs, labels in testloader:
                inputs, labels = inputs.to(device), labels.to(device)
                outputs = model(inputs)
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()

        accuracy = 100.0 * correct / total

        # Save the model with the best accuracy on the test set
        if accuracy > best_acc:
            best_acc = accuracy
            torch.save(model.state_dict(), 'best_model.pth')

        print(f"Epoch {epoch + 1}/{num_epochs}, Loss: {running_loss:.3f}, Test Accuracy: {accuracy:.2f}%")

    print("Training complete!")
    print(f"Best Test Accuracy: {best_acc:.2f}%")

In [12]:
model1 = CNNModel1()
criterion = nn.CrossEntropyLoss()
optimizer1 = optim.Adam(model1.parameters(), lr=0.001)
train_model(model1, criterion, optimizer1, num_epochs=10)

Epoch 1/10, Loss: 1743.468, Test Accuracy: 45.32%
Epoch 2/10, Loss: 1430.194, Test Accuracy: 51.03%
Epoch 3/10, Loss: 1308.377, Test Accuracy: 54.29%
Epoch 4/10, Loss: 1241.921, Test Accuracy: 55.63%
Epoch 5/10, Loss: 1189.189, Test Accuracy: 57.30%
Epoch 6/10, Loss: 1149.491, Test Accuracy: 58.27%
Epoch 7/10, Loss: 1122.554, Test Accuracy: 59.73%
Epoch 8/10, Loss: 1099.631, Test Accuracy: 60.21%
Epoch 9/10, Loss: 1083.133, Test Accuracy: 59.77%
Epoch 10/10, Loss: 1064.673, Test Accuracy: 60.51%
Training complete!
Best Test Accuracy: 60.51%


In [13]:
model2 = CNNModel2()

criterion = nn.CrossEntropyLoss()
optimizer2 = optim.Adam(model2.parameters(), lr=0.001)
train_model(model2, criterion, optimizer2, num_epochs=10)

Epoch 1/10, Loss: 1666.123, Test Accuracy: 46.79%
Epoch 2/10, Loss: 1366.922, Test Accuracy: 52.24%
Epoch 3/10, Loss: 1261.156, Test Accuracy: 55.52%
Epoch 4/10, Loss: 1196.541, Test Accuracy: 57.69%
Epoch 5/10, Loss: 1151.331, Test Accuracy: 58.30%
Epoch 6/10, Loss: 1115.074, Test Accuracy: 60.23%
Epoch 7/10, Loss: 1088.834, Test Accuracy: 59.61%
Epoch 8/10, Loss: 1066.278, Test Accuracy: 61.26%
Epoch 9/10, Loss: 1048.827, Test Accuracy: 61.20%
Epoch 10/10, Loss: 1034.287, Test Accuracy: 61.86%
Training complete!
Best Test Accuracy: 61.86%


In [14]:
# Define the second CNN architecture
class CNNModel3(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(3, 32, 3, padding=1)
        self.conv2 = nn.Conv2d(32, 8, 3, padding=1)
        self.pool = nn.MaxPool2d(2, 2)
        self.fc1 = nn.Linear(8* 8*8, 10)
        self.fc2 = nn.Linear(10, 10)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(-1, 8* 8*8)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x
    
count_parameters(CNNModel3())


{'Modules': 'Parameters', 'conv1.weight': 864, 'conv1.bias': 32, 'conv2.weight': 2304, 'conv2.bias': 8, 'fc1.weight': 5120, 'fc1.bias': 10, 'fc2.weight': 100, 'fc2.bias': 10}
Total Trainable Params: 8448


8448

In [15]:
model3 = CNNModel3()

criterion = nn.CrossEntropyLoss()
optimizer3 = optim.Adam(model3.parameters(), lr=0.001)
train_model(model3, criterion, optimizer3, num_epochs=10)

Epoch 1/10, Loss: 1677.667, Test Accuracy: 47.94%
Epoch 2/10, Loss: 1366.206, Test Accuracy: 52.28%
Epoch 3/10, Loss: 1288.538, Test Accuracy: 54.64%
Epoch 4/10, Loss: 1234.133, Test Accuracy: 56.23%
Epoch 5/10, Loss: 1190.492, Test Accuracy: 56.37%
Epoch 6/10, Loss: 1149.077, Test Accuracy: 57.99%
Epoch 7/10, Loss: 1117.839, Test Accuracy: 59.12%
Epoch 8/10, Loss: 1089.676, Test Accuracy: 60.12%
Epoch 9/10, Loss: 1066.912, Test Accuracy: 58.55%
Epoch 10/10, Loss: 1044.630, Test Accuracy: 61.28%
Training complete!
Best Test Accuracy: 61.28%


In [16]:
class CNNModel4(nn.Module):
    def __init__(self):
        super(CNNModel4, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, 3, padding=1)
        self.conv2 = nn.Conv2d(32, 15, 3, padding=1)
        self.conv3 = nn.Conv2d(15, 7, 3, padding=1)
        self.pool = nn.MaxPool2d(2, 2)
        self.fc1 = nn.Linear(7* 8*8, 8)
        self.fc2 = nn.Linear(8, 10)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = (F.relu(self.conv3(x)))
        x = x.view(-1, 7* 8*8)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x
  
count_parameters(CNNModel4())

{'Modules': 'Parameters', 'conv1.weight': 864, 'conv1.bias': 32, 'conv2.weight': 4320, 'conv2.bias': 15, 'conv3.weight': 945, 'conv3.bias': 7, 'fc1.weight': 3584, 'fc1.bias': 8, 'fc2.weight': 80, 'fc2.bias': 10}
Total Trainable Params: 9865


9865

In [17]:
model4 = CNNModel4()

criterion = nn.CrossEntropyLoss()
optimizer4 = optim.Adam(model4.parameters(), lr=0.001)
train_model(model4, criterion, optimizer4, num_epochs=10)

Epoch 1/10, Loss: 1692.939, Test Accuracy: 47.26%
Epoch 2/10, Loss: 1359.810, Test Accuracy: 53.13%
Epoch 3/10, Loss: 1242.680, Test Accuracy: 54.84%
Epoch 4/10, Loss: 1170.087, Test Accuracy: 59.63%
Epoch 5/10, Loss: 1109.252, Test Accuracy: 61.64%
Epoch 6/10, Loss: 1059.987, Test Accuracy: 63.41%
Epoch 7/10, Loss: 1024.383, Test Accuracy: 63.05%
Epoch 8/10, Loss: 997.458, Test Accuracy: 64.63%
Epoch 9/10, Loss: 978.053, Test Accuracy: 64.87%
Epoch 10/10, Loss: 954.848, Test Accuracy: 64.92%
Training complete!
Best Test Accuracy: 64.92%


In [18]:
class CNNModel5(nn.Module):
    def __init__(self):
        super(CNNModel5, self).__init__()
        self.conv1 = nn.Conv2d(3, 64, 3, padding=1)
        self.conv2 = nn.Conv2d(64, 8, 3, padding=1)
        self.conv3 = nn.Conv2d(8, 4, 3, padding=1)
        self.pool = nn.MaxPool2d(2, 2)
        self.fc1 = nn.Linear(4*8*8, 8)
        self.fc2 = nn.Linear(8, 10)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = (F.relu(self.conv3(x)))
        x = x.view(-1, 4* 8*8)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x
  
count_parameters(CNNModel5())

{'Modules': 'Parameters', 'conv1.weight': 1728, 'conv1.bias': 64, 'conv2.weight': 4608, 'conv2.bias': 8, 'conv3.weight': 288, 'conv3.bias': 4, 'fc1.weight': 2048, 'fc1.bias': 8, 'fc2.weight': 80, 'fc2.bias': 10}
Total Trainable Params: 8846


8846

In [19]:
model5 = CNNModel5()

criterion = nn.CrossEntropyLoss()
optimizer5 = optim.Adam(model5.parameters(), lr=0.001)
train_model(model5, criterion, optimizer5, num_epochs=10)

Epoch 1/10, Loss: 1913.670, Test Accuracy: 35.20%
Epoch 2/10, Loss: 1631.697, Test Accuracy: 40.97%
Epoch 3/10, Loss: 1545.082, Test Accuracy: 43.28%
Epoch 4/10, Loss: 1482.709, Test Accuracy: 44.78%
Epoch 5/10, Loss: 1424.162, Test Accuracy: 47.46%
Epoch 6/10, Loss: 1372.698, Test Accuracy: 49.10%
Epoch 7/10, Loss: 1334.302, Test Accuracy: 50.93%
Epoch 8/10, Loss: 1303.888, Test Accuracy: 50.67%
Epoch 9/10, Loss: 1276.991, Test Accuracy: 50.81%
Epoch 10/10, Loss: 1255.244, Test Accuracy: 51.92%
Training complete!
Best Test Accuracy: 51.92%


In [20]:
class CNNModel6(nn.Module):
    def __init__(self):
        super(CNNModel6, self).__init__()
        self.conv1 = nn.Conv2d(3, 64, 3, padding=1)
        self.conv2 = nn.Conv2d(64, 8, 3, padding=1)
        self.conv3 = nn.Conv2d(8, 4, 3, padding=1)
        self.pool = nn.MaxPool2d(2, 2)
        self.fc1 = nn.Linear(4*8*8, 12)
        self.fc2 = nn.Linear(12, 10)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = (F.relu(self.conv3(x)))
        x = x.view(-1, 4* 8*8)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x
  
count_parameters(CNNModel6())

{'Modules': 'Parameters', 'conv1.weight': 1728, 'conv1.bias': 64, 'conv2.weight': 4608, 'conv2.bias': 8, 'conv3.weight': 288, 'conv3.bias': 4, 'fc1.weight': 3072, 'fc1.bias': 12, 'fc2.weight': 120, 'fc2.bias': 10}
Total Trainable Params: 9914


9914

In [22]:
model6 = CNNModel6()

criterion = nn.CrossEntropyLoss()
optimizer6 = optim.Adam(model6.parameters(), lr=0.001)
train_model(model6, criterion, optimizer6, num_epochs=10)

Epoch 1/10, Loss: 1721.178, Test Accuracy: 46.43%
Epoch 2/10, Loss: 1410.973, Test Accuracy: 50.87%
Epoch 3/10, Loss: 1324.119, Test Accuracy: 53.26%
Epoch 4/10, Loss: 1269.351, Test Accuracy: 54.95%
Epoch 5/10, Loss: 1226.404, Test Accuracy: 57.24%
Epoch 6/10, Loss: 1199.050, Test Accuracy: 57.55%
Epoch 7/10, Loss: 1172.926, Test Accuracy: 58.02%
Epoch 8/10, Loss: 1147.696, Test Accuracy: 58.36%
Epoch 9/10, Loss: 1129.711, Test Accuracy: 59.58%
Epoch 10/10, Loss: 1107.247, Test Accuracy: 59.76%
Training complete!
Best Test Accuracy: 59.76%


In [29]:
class CNNModel7(nn.Module):
    def __init__(self):
        super(CNNModel7, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, 3, padding=1)
        self.conv2 = nn.Conv2d(32, 16, 3, padding=1)
        self.conv3 = nn.Conv2d(16, 8, 3, padding=1)
        self.pool = nn.MaxPool2d(2, 2)
        self.fc1 = nn.Linear(8*4*4, 16)
        self.fc2 = nn.Linear(16, 10)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = self.pool(F.relu(self.conv3(x)))
        x = x.view(-1, 8* 4*4)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x
count_parameters(CNNModel7())

{'Modules': 'Parameters', 'conv1.weight': 864, 'conv1.bias': 32, 'conv2.weight': 4608, 'conv2.bias': 16, 'conv3.weight': 1152, 'conv3.bias': 8, 'fc1.weight': 2048, 'fc1.bias': 16, 'fc2.weight': 160, 'fc2.bias': 10}
Total Trainable Params: 8914


8914

In [30]:
model7 = CNNModel7()

criterion = nn.CrossEntropyLoss()
optimizer7 = optim.Adam(model7.parameters(), lr=0.001)
train_model(model7, criterion, optimizer7, num_epochs=10)

Epoch 1/10, Loss: 1731.568, Test Accuracy: 46.63%
Epoch 2/10, Loss: 1401.284, Test Accuracy: 51.87%
Epoch 3/10, Loss: 1297.304, Test Accuracy: 55.90%
Epoch 4/10, Loss: 1214.287, Test Accuracy: 57.99%
Epoch 5/10, Loss: 1152.906, Test Accuracy: 59.80%
Epoch 6/10, Loss: 1107.679, Test Accuracy: 61.57%
Epoch 7/10, Loss: 1077.494, Test Accuracy: 62.17%
Epoch 8/10, Loss: 1045.756, Test Accuracy: 62.39%
Epoch 9/10, Loss: 1024.427, Test Accuracy: 63.82%
Epoch 10/10, Loss: 1005.008, Test Accuracy: 64.96%
Training complete!
Best Test Accuracy: 64.96%


In [32]:
import pandas as pd

model=['model1','model2','model3','model4','model5','model6','model7']
params=[9988,9494,8448,9865,8846,9914,8914]
accuracy=[60.51,61.68,61.28,64.92,51.32,59.76,64.96]

df=pd.DataFrame({'Model':model,'Params':params,'accuracy':accuracy})
df

Unnamed: 0,Model,Params,accuracy
0,model1,9988,60.51
1,model2,9494,61.68
2,model3,8448,61.28
3,model4,9865,64.92
4,model5,8846,51.32
5,model6,9914,59.76
6,model7,8914,64.96


In [33]:
# model7 gave most accuarate result. it has been observed that no. of neurons per layer should of power 2 and increasing no. of layers kept lower number of parameters with greater accuracy