
###Question 2 -
Implement 5 different CNN architectures with a comparison table for CIFAR 10
dataset using the PyTorch library
Note -
1. The model parameters for each architecture should not be more than 10000 parameters
2. Code comments should be given for proper code understanding



In [7]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
from torchsummary import summary

In [8]:
# normalize the data by specifying a mean and standard deviation for each of the three channels.
# This will convert the data from [0,1] to [-1,1] # Normalization of data should help speed up conversion and
# reduce the chance of vanishing gradients with certain activation functions.
transform = transforms.Compose([transforms.ToTensor(),transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

trainset = torchvision.datasets.CIFAR10(root='./data/', train=True,download=True,transform=transform)
trainloader = torch.utils.data.DataLoader(trainset,batch_size=4,shuffle=True)

testset = torchvision.datasets.CIFAR10(root='./data', train=False,download=True,transform=transform)
testloader = torch.utils.data.DataLoader(testset,batch_size=4,shuffle=False)


Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/cifar-10-python.tar.gz


100%|██████████| 170498071/170498071 [00:11<00:00, 15326502.94it/s]


Extracting ./data/cifar-10-python.tar.gz to ./data/
Files already downloaded and verified


In [9]:
import torch.nn as nn
import torch.nn.functional as F
### define CNN Architecture1
class Net1(nn.Module):
  # CNN architecture 1

    def __init__(self):
        super(Net1, self).__init__()
        self.conv1 = nn.Conv2d(3, 5, kernel_size=3, padding=1)
        self.relu1 = nn.ReLU()
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(5, 5, kernel_size=3, padding=1)
        self.relu2 = nn.ReLU()
        self.fc1 = nn.Linear(5 * 8 * 8, 20)
        self.relu3 = nn.ReLU()
        self.fc2 = nn.Linear(20, 10)
    def forward(self, x):
        x = self.pool(self.relu1(self.conv1(x)))
        x = self.pool(self.relu2(self.conv2(x)))
        x = x.view(x.size(0), -1)
        x = self.relu3(self.fc1(x))
        x = self.fc2(x)
        return x





In [10]:
### define CNN Architecture2
class Net2(nn.Module):
  # CNN architecture 1

    def __init__(self):
        super(Net2, self).__init__()
        self.conv1 = nn.Conv2d(3, 6, kernel_size=3, padding=1)
        self.relu1 = nn.ReLU()
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(6, 5, kernel_size=3, padding=1)
        self.relu2 = nn.ReLU()
        self.fc1 = nn.Linear(5 * 8* 8, 20)
        self.relu3 = nn.ReLU()
        self.fc2 = nn.Linear(20, 10)
    def forward(self, x):
        x = self.pool(self.relu1(self.conv1(x)))
        x = self.pool(self.relu2(self.conv2(x)))
        x = x.view(x.size(0), -1)
        x = self.relu3(self.fc1(x))
        x = self.fc2(x)
        return x



In [65]:
### define CNN Architecture3
class Net3(nn.Module):
  def __init__(self):
        super(Net3, self).__init__()
        self.conv1 = nn.Conv2d(3, 6, kernel_size=3, padding=1)
        self.relu1 = nn.ReLU()
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(6,4, kernel_size=3, padding=1)
        self.relu2 = nn.ReLU()
        self.fc1 = nn.Linear(4*6*6, 20)
        self.relu3 = nn.ReLU()
        self.fc2 = nn.Linear(20, 10)
  def forward(self, x):
        x = self.pool(self.relu1(self.conv1(x)))
        x = self.pool(self.relu2(self.conv2(x)))
        x = x.view(x.size(0), -1)
        x = self.relu3(self.fc1(x))
        x = self.fc2(x)
        return x

In [None]:
### define CNN Architecture3
class Net4(nn.Module):
  def __init__(self):
        super(Net4, self).__init__()
        self.conv1 = nn.Conv2d(3, 4, 3, padding=1)
        self.relu1 = nn.ReLU()
        self.pool1 = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(4, 8, 3, padding=1)
        self.relu2 = nn.ReLU()
        self.pool2 = nn.MaxPool2d(2, 2)
        self.fc1 = nn.Linear(8* 6 * 6, 64)
        self.relu3 = nn.ReLU()
        self.fc2 = nn.Linear(64, 10)

  def forward(self, x):
        x = self.pool1(self.relu1(self.conv1(x)))
        x = self.pool2(self.relu2(self.conv2(x)))
        x = x.view(x.size(0), -1)
        x = self.relu3(self.fc1(x))
        x = self.fc2(x)
        return x


In [None]:
 ### define CNN Architecture5
class Net5(nn.Module):
     def __init__(self):
        super(Net5, self).__init__()
        self.conv1 = nn.Conv2d(3, 4, 5, padding=1)
        self.relu1 = nn.ReLU()
        self.pool1 = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(4, 16, 3, padding=1)
        self.relu2 = nn.ReLU()
        self.pool2 = nn.MaxPool2d(2, 2)
        self.fc1 = nn.Linear(16 * 8 * 8, 64)
        self.relu3 = nn.ReLU()
        self.fc2 = nn.Linear(32, 10)
     def forward(self, x):
        x = self.pool1(self.relu1(self.conv1(x)))
        x = self.pool2(self.relu2(self.conv2(x)))
        x = x.view(x.size(0), -1)
        x = self.relu3(self.fc1(x))
        x = self.fc2(x)
        return x



##### Initialize the networks

In [11]:
net1=Net1()

In [12]:
net2=Net2()

In [66]:
net3=Net3()

In [None]:
net4=Net4()

In [None]:
net5=Net5()

#### Stochastic gradient descent (SGD) with momentum to help get us over local minima and saddle points in the loss function space.
#### Define the loss function and optimizer

In [None]:
import torch.optim as optim

criterion = nn.CrossEntropyLoss()
optimizer1 = optim.SGD(net1.parameters(), lr=0.001, momentum=0.9)
optimizer2 = optim.SGD(net2.parameters(), lr=0.001, momentum=0.9)
optimizer3 = optim.SGD(net3.parameters(), lr=0.001, momentum=0.9)
optimizer4 = optim.SGD(net4.parameters(), lr=0.001, momentum=0.9)
optimizer5 = optim.SGD(net5.parameters(), lr=0.001, momentum=0.9)

NameError: ignored

In [13]:
criterion = nn.CrossEntropyLoss()
optimizer1 = optim.SGD(net1.parameters(), lr=0.001, momentum=0.9)

In [14]:
criterion = nn.CrossEntropyLoss()
optimizer2= optim.SGD(net2.parameters(), lr=0.001, momentum=0.9)

In [67]:
criterion = nn.CrossEntropyLoss()
optimizer3= optim.SGD(net3.parameters(), lr=0.001, momentum=0.9)

In [15]:

def get_variable_name(var):
    variables = globals()
    
    for name, value in variables.items():
        if value is var:
            return name

# Train the networks
def train(net, optimizer, trainloader):
    var = get_variable_name(net)
    print(f"Training started for {str.upper(var)}\n")
    net.train()
    for epoch in range(10):
        running_loss = 0.0
        for i, data in enumerate(trainloader, 0):
            inputs, labels = data
            optimizer.zero_grad()

            outputs = net(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            if i % 200 == 199:
                print('[%d, %5d] loss: %.3f' % (epoch + 1, i + 1, running_loss / 200))
                running_loss = 0.0
    print('\n')
    print('Finished Training.')

In [16]:
### Test the networks
def test(net, testloader):
    net.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for data in testloader:
            images, labels = data
            outputs = net(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    accuracy = 100 * correct / total
    return accuracy



In [None]:
#Train and test the networks
summary(net1, (3, 32, 32))
train(net1, optimizer1, trainloader)

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1            [-1, 5, 32, 32]             140
              ReLU-2            [-1, 5, 32, 32]               0
         MaxPool2d-3            [-1, 5, 16, 16]               0
            Conv2d-4            [-1, 5, 16, 16]             230
              ReLU-5            [-1, 5, 16, 16]               0
         MaxPool2d-6              [-1, 5, 8, 8]               0
            Linear-7                   [-1, 20]           6,420
              ReLU-8                   [-1, 20]               0
            Linear-9                   [-1, 10]             210
Total params: 7,000
Trainable params: 7,000
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.01
Forward/backward pass size (MB): 0.11
Params size (MB): 0.03
Estimated Total Size (MB): 0.15
-----------------------------------------------

In [22]:
summary(net2, (3, 32, 32))
train(net2, optimizer2, trainloader)

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1            [-1, 6, 32, 32]             168
              ReLU-2            [-1, 6, 32, 32]               0
         MaxPool2d-3            [-1, 6, 16, 16]               0
            Conv2d-4            [-1, 5, 16, 16]             275
              ReLU-5            [-1, 5, 16, 16]               0
         MaxPool2d-6              [-1, 5, 8, 8]               0
            Linear-7                   [-1, 20]           6,420
              ReLU-8                   [-1, 20]               0
            Linear-9                   [-1, 10]             210
Total params: 7,073
Trainable params: 7,073
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.01
Forward/backward pass size (MB): 0.13
Params size (MB): 0.03
Estimated Total Size (MB): 0.17
-----------------------------------------------

In [68]:
summary(net3, (3, 32, 32))
train(net3, optimizer3, trainloader)

RuntimeError: ignored

In [None]:
summary(net4, (3, 32, 32))
train(net4, optimizer4, trainloader)

In [None]:
summary(net5, (3, 32, 32))
train(net5, optimizer5, trainloader)

In [17]:
accuracy1 = test(net1, testloader)

print("Accuracy 1:", accuracy1)

Accuracy 1: 10.0


In [18]:
accuracy2 = test(net2, testloader)
print("Accuracy 2:", accuracy2)


Accuracy 2: 10.12


In [None]:
accuracy1 = test(net1, testloader)
accuracy2 = test(net2, testloader)
accuracy3 = test(net3, testloader)
accuracy4 = test(net4, testloader)
accuracy5 = test(net5, testloader)

print("Accuracy 1:", accuracy1)
print("Accuracy 2:", accuracy2)
print("Accuracy 3:", accuracy3)
print("Accuracy 4:", accuracy4)
print("Accuracy 5:", accuracy5)