**1. IMPORTANT LIBRARIES**

In [1]:
import torch 
import torchvision
from torch.utils.data import DataLoader
import torchvision.transforms as transforms
import torch.nn as nn
import torch.nn.functional as F
import matplotlib.pyplot as plt
import numpy as np
from torchvision.datasets import CIFAR10
import torch.optim as optim

  from .autonotebook import tqdm as notebook_tqdm


**2. LOADING & USING PYTORCH TRANSFORMS ON CIFAR10**

In [2]:
#augmentations and transforms to be used on the train and test sets
transform = transforms.Compose([
                                transforms.RandomHorizontalFlip(0.5),
                                transforms.ToTensor(),
                                transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
                                ])

#applying the transforms to the train-test set
train_ds = CIFAR10(root='./data', train=True, download=True, transform=transform)
test_ds = CIFAR10(root='./data', train=False, download=True, transform=transform)

#creating the train and test loaders from their respective sets
train_dl = DataLoader(train_ds, batch_size=32, shuffle=True)
test_dl = DataLoader(test_ds, batch_size=32, shuffle=True)

Files already downloaded and verified
Files already downloaded and verified


**3. CODING THE LENET-5 AND ALEXNET ARCHITECTURES**

In [3]:
class LeNet(nn.Sequential):
    def __init__(self, img_channels=3, num_classes=10):
        super(LeNet, self).__init__()
        self.conv1 = nn.Conv2d(img_channels, 6, 5)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.fc1 = nn.Linear(16 * 5 * 5, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, num_classes)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(-1, 16 * 5 * 5)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

class AlexNet(nn.Module):
    def __init__(self, img_channels=3, num_classes=10):
        super(AlexNet, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=img_channels, out_channels= 96, kernel_size= 3, stride=1, padding=0 )
        self.pool = nn.MaxPool2d(kernel_size=3, stride=2)
        self.conv2 = nn.Conv2d(in_channels=96, out_channels=256, kernel_size=3, stride= 1, padding= 1)
        self.conv3 = nn.Conv2d(in_channels=256, out_channels=384, kernel_size=3, stride= 1, padding= 1)
        self.conv4 = nn.Conv2d(in_channels=384, out_channels=384, kernel_size=3, stride=1, padding=1)
        self.conv5 = nn.Conv2d(in_channels=384, out_channels=256, kernel_size=3, stride=1, padding=1)
        self.fc1  = nn.Linear(in_features= 9216, out_features= 4096)
        self.fc2  = nn.Linear(in_features= 4096, out_features= 4096)
        self.fc3 = nn.Linear(in_features=4096 , out_features=num_classes)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = F.relu(self.conv2(x))
        x = self.pool(x)
        x = F.relu(self.conv3(x))
        x = F.relu(self.conv4(x))
        x = F.relu(self.conv5(x))
        x = self.pool(x)
        x = x.reshape(x.shape[0], -1)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

**4. SETTING UP GPU AND CRITERION**

In [4]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') #to train on GPU, else CPU
criterion = nn.CrossEntropyLoss()

**5. DEFINING THE TRAIN AND TEST FUNCTION**

In [5]:
def train_test(model, train_loader, test_loader, optimizer, n_epochs):
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') #to train on GPU, else CPU
    # model in training mode
    model.train()
    train_l = []
    test_acc = []
    for epoch in range(1, n_epochs+1):
        train_accuracy = 0
        train_samples = 0
        train_loss = 0.0
        for data, targets in train_loader:
            data = data.to(device=device)
            targets = targets.to(device=device)
            optimizer.zero_grad()
            output = model(data)
            loss = criterion(output, targets)
            loss.backward()
            optimizer.step()
            predictions = torch.argmax(output, dim=-1)
            train_samples += predictions.size(0)
            train_accuracy += (predictions == targets).sum()
            train_loss += loss.item()

        # calculate average losses
        train_loss = train_loss / len(train_loader)
        train_l.append(train_loss)

        with torch.no_grad():
            model.eval()
            test_loss = 0
            test_accuracy = 0
            test_samples = 0
            for data,targets in test_loader:
                data = data.to(device=device)
                targets = targets.to(device=device)
                ## Forward Pass
                scores = model(data)
                loss = criterion(scores,targets)
                predictions = torch.argmax(scores, dim=-1)
                test_accuracy += (predictions == targets).sum()
                test_samples += predictions.size(0)
                test_loss += loss.item() 
            t_a = (test_accuracy / test_samples)*100
            test_acc.append(t_a)
        print('Epoch: {} \tTraining Loss: {:.6f}'.format(epoch, train_loss), f"  Test Accuracy: {t_a:.3f}")
            
    return model, train_l, test_acc

**5(A). TRAINING LENET WITH SGD**

In [6]:
lenet = LeNet().to(device)
sgd_lenet = optim.SGD(lenet.parameters(), lr=1e-3) #SGD
lenet_sgd, loss_sgd, acc_sgd = train_test(lenet, train_dl, test_dl, sgd_lenet, n_epochs=50)

Epoch: 1 	Training Loss: 2.303624   Test Accuracy: 9.990
Epoch: 2 	Training Loss: 2.302592   Test Accuracy: 9.770
Epoch: 3 	Training Loss: 2.301587   Test Accuracy: 9.890
Epoch: 4 	Training Loss: 2.300380   Test Accuracy: 14.040
Epoch: 5 	Training Loss: 2.298598   Test Accuracy: 18.560
Epoch: 6 	Training Loss: 2.295333   Test Accuracy: 15.830
Epoch: 7 	Training Loss: 2.287461   Test Accuracy: 13.420
Epoch: 8 	Training Loss: 2.266315   Test Accuracy: 12.320
Epoch: 9 	Training Loss: 2.229234   Test Accuracy: 17.420
Epoch: 10 	Training Loss: 2.186216   Test Accuracy: 22.220
Epoch: 11 	Training Loss: 2.141644   Test Accuracy: 25.500
Epoch: 12 	Training Loss: 2.092237   Test Accuracy: 26.470
Epoch: 13 	Training Loss: 2.030339   Test Accuracy: 28.070
Epoch: 14 	Training Loss: 1.973105   Test Accuracy: 29.440
Epoch: 15 	Training Loss: 1.929547   Test Accuracy: 31.220
Epoch: 16 	Training Loss: 1.889619   Test Accuracy: 33.130
Epoch: 17 	Training Loss: 1.847090   Test Accuracy: 34.410
Epoch: 18

**5(B). TRAINING LENET WITH SGD+MOMENTUM**

In [7]:
lenet = LeNet().to(device)
sgdm_lenet = optim.SGD(lenet.parameters(), lr=1e-3, momentum=0.9) #SGD with momentum
lenet_sgdm, loss_sgdm, acc_sgdm = train_test(lenet, train_dl, test_dl, sgdm_lenet, n_epochs=50)

Epoch: 1 	Training Loss: 2.278836   Test Accuracy: 24.400
Epoch: 2 	Training Loss: 1.850428   Test Accuracy: 38.980
Epoch: 3 	Training Loss: 1.589276   Test Accuracy: 44.020
Epoch: 4 	Training Loss: 1.475928   Test Accuracy: 47.370
Epoch: 5 	Training Loss: 1.396388   Test Accuracy: 49.550
Epoch: 6 	Training Loss: 1.331649   Test Accuracy: 53.010
Epoch: 7 	Training Loss: 1.277900   Test Accuracy: 55.610
Epoch: 8 	Training Loss: 1.225606   Test Accuracy: 58.060
Epoch: 9 	Training Loss: 1.183693   Test Accuracy: 57.440
Epoch: 10 	Training Loss: 1.149505   Test Accuracy: 59.600
Epoch: 11 	Training Loss: 1.118117   Test Accuracy: 60.490
Epoch: 12 	Training Loss: 1.090882   Test Accuracy: 60.510
Epoch: 13 	Training Loss: 1.066007   Test Accuracy: 62.720
Epoch: 14 	Training Loss: 1.039286   Test Accuracy: 61.490
Epoch: 15 	Training Loss: 1.013967   Test Accuracy: 63.040
Epoch: 16 	Training Loss: 0.998496   Test Accuracy: 62.630
Epoch: 17 	Training Loss: 0.976476   Test Accuracy: 63.180
Epoch:

**5(C). TRAINING LENET WITH ADAGRAD**

In [8]:
lenet = LeNet().to(device)
ag_lenet = optim.Adagrad(lenet.parameters(), lr=1e-3) #Adagrad
lenet_ag, loss_ag, acc_ag = train_test(lenet, train_dl, test_dl, ag_lenet, n_epochs=50)

Epoch: 1 	Training Loss: 1.934985   Test Accuracy: 33.050
Epoch: 2 	Training Loss: 1.807557   Test Accuracy: 35.500
Epoch: 3 	Training Loss: 1.760149   Test Accuracy: 36.970
Epoch: 4 	Training Loss: 1.728753   Test Accuracy: 37.820
Epoch: 5 	Training Loss: 1.706745   Test Accuracy: 38.670
Epoch: 6 	Training Loss: 1.688164   Test Accuracy: 39.160
Epoch: 7 	Training Loss: 1.672609   Test Accuracy: 39.690
Epoch: 8 	Training Loss: 1.658776   Test Accuracy: 40.320
Epoch: 9 	Training Loss: 1.647318   Test Accuracy: 40.500
Epoch: 10 	Training Loss: 1.636636   Test Accuracy: 41.250
Epoch: 11 	Training Loss: 1.626778   Test Accuracy: 41.580
Epoch: 12 	Training Loss: 1.617146   Test Accuracy: 41.650
Epoch: 13 	Training Loss: 1.609347   Test Accuracy: 41.970
Epoch: 14 	Training Loss: 1.602517   Test Accuracy: 42.200
Epoch: 15 	Training Loss: 1.595558   Test Accuracy: 42.440
Epoch: 16 	Training Loss: 1.588992   Test Accuracy: 42.860
Epoch: 17 	Training Loss: 1.583499   Test Accuracy: 43.240
Epoch:

**5(D). TRAINING LENET WITH RMSPROP**

In [9]:
lenet = LeNet().to(device)
rms_lenet = optim.RMSprop(lenet.parameters(), lr=1e-3) #RMSprop
lenet_rms, loss_rms, acc_rms = train_test(lenet, train_dl, test_dl, rms_lenet, n_epochs=50)

Epoch: 1 	Training Loss: 1.569512   Test Accuracy: 50.670
Epoch: 2 	Training Loss: 1.297219   Test Accuracy: 56.230
Epoch: 3 	Training Loss: 1.182148   Test Accuracy: 57.400
Epoch: 4 	Training Loss: 1.112889   Test Accuracy: 57.120
Epoch: 5 	Training Loss: 1.052540   Test Accuracy: 60.780
Epoch: 6 	Training Loss: 1.008444   Test Accuracy: 62.470
Epoch: 7 	Training Loss: 0.970378   Test Accuracy: 63.160
Epoch: 8 	Training Loss: 0.938523   Test Accuracy: 63.980
Epoch: 9 	Training Loss: 0.910906   Test Accuracy: 65.590
Epoch: 10 	Training Loss: 0.885886   Test Accuracy: 65.680
Epoch: 11 	Training Loss: 0.861961   Test Accuracy: 65.070
Epoch: 12 	Training Loss: 0.845622   Test Accuracy: 65.290
Epoch: 13 	Training Loss: 0.827001   Test Accuracy: 65.210
Epoch: 14 	Training Loss: 0.809636   Test Accuracy: 66.180
Epoch: 15 	Training Loss: 0.795246   Test Accuracy: 66.370
Epoch: 16 	Training Loss: 0.783995   Test Accuracy: 66.320
Epoch: 17 	Training Loss: 0.769749   Test Accuracy: 66.230
Epoch:

**5(E). TRAINING LENET WITH ADAM**

In [10]:
lenet = LeNet().to(device)
adam_lenet = optim.Adam(lenet.parameters(), lr=1e-3) #Adam
lenet_adam, loss_adam, acc_adam = train_test(lenet, train_dl, test_dl, adam_lenet, n_epochs=50)

Epoch: 1 	Training Loss: 1.610249   Test Accuracy: 49.330
Epoch: 2 	Training Loss: 1.305762   Test Accuracy: 54.540
Epoch: 3 	Training Loss: 1.198563   Test Accuracy: 57.480
Epoch: 4 	Training Loss: 1.127415   Test Accuracy: 60.190
Epoch: 5 	Training Loss: 1.082566   Test Accuracy: 59.740
Epoch: 6 	Training Loss: 1.037528   Test Accuracy: 62.270
Epoch: 7 	Training Loss: 1.007054   Test Accuracy: 62.140
Epoch: 8 	Training Loss: 0.975199   Test Accuracy: 62.880
Epoch: 9 	Training Loss: 0.951073   Test Accuracy: 63.690
Epoch: 10 	Training Loss: 0.926880   Test Accuracy: 65.230
Epoch: 11 	Training Loss: 0.902628   Test Accuracy: 65.380
Epoch: 12 	Training Loss: 0.891919   Test Accuracy: 65.420
Epoch: 13 	Training Loss: 0.874810   Test Accuracy: 65.420
Epoch: 14 	Training Loss: 0.856405   Test Accuracy: 64.700
Epoch: 15 	Training Loss: 0.841723   Test Accuracy: 65.950
Epoch: 16 	Training Loss: 0.829818   Test Accuracy: 66.500
Epoch: 17 	Training Loss: 0.820007   Test Accuracy: 66.990
Epoch:

**6(A). TRAINING ALEXNET WITH SGD**

In [None]:
alexnet = AlexNet().to(device)
sgd_alexnet = optim.SGD(alexnet.parameters(), lr=1e-3) #SGD
alexnet_sgd, a_loss_sgd, a_acc_sgd = train_test(alexnet, train_dl, test_dl, sgd_alexnet, n_epochs=50)

**6(B). TRAINING ALEXNET WITH SGD+MOMENTUM**

In [None]:
alexnet = AlexNet().to(device)
sgdm_alexnet = optim.SGD(alexnet.parameters(), lr=1e-3, momentum=0.9) #SGD+momentum
alexnet_sgdm, a_loss_sgdm, a_acc_sgdm = train_test(alexnet, train_dl, test_dl, sgdm_alexnet, n_epochs=50)

**6(C). TRAINING ALEXNET WITH ADAGRAD**

In [None]:
alexnet = AlexNet().to(device)
ag_alexnet = optim.Adagrad(alexnet.parameters(), lr=1e-3) #adagrad
alexnet_ag, a_loss_ag, a_acc_ag = train_test(alexnet, train_dl, test_dl, ag_alexnet, n_epochs=50)

**6(D). TRAINING ALEXNET WITH RMSPROP**

In [None]:
alexnet = AlexNet().to(device)
rms_alexnet = optim.RMSprop(alexnet.parameters(), lr=1e-3) #rmsprop
alexnet_rms, a_loss_rms, a_acc_rms = train_test(alexnet, train_dl, test_dl, rms_alexnet, n_epochs=50)

**6(E). TRAINING ALEXNET WITH ADAM**

In [None]:
alexnet = AlexNet().to(device)
adam_alexnet = optim.Adam(alexnet.parameters(), lr=1e-3) #Adam
alexnet_adam, a_loss_adam, a_acc_adam = train_test(alexnet, train_dl, test_dl, adam_alexnet, n_epochs=50)

In [None]:
alexnet = AlexNet().to(device)
adam_alexnet = optim.Adam(alexnet.parameters(), lr=1e-5) #Adam
alexnet_adam, a_loss_adam, a_acc_adam = train_test(alexnet, train_dl, test_dl, adam_alexnet, n_epochs=50)

**PLOTTING THE RESULTS**

In [None]:
plt.plot(a_loss_sgd, label = "sgd")
plt.plot(a_loss_sgdm, label = "sgdm")
plt.plot(a_loss_ag, label = "adagrad")
plt.plot(a_loss_rms, label = "rmsprop")
plt.plot(a_loss_adam, label = "adam")
plt.legend(loc="upper right")
plt.title("AlexNet train loss")
plt.show()

In [None]:
plt.plot(a_acc_sgd, label = "sgd")
plt.plot(a_acc_sgdm, label = "sgdm")
plt.plot(a_acc_ag, label = "adagrad")
plt.plot(a_acc_rms, label = "rmsprop")
plt.plot(a_acc_adam, label = "adam")
plt.legend(loc="upper right")
plt.title("AlexNet test accuracy")
plt.show()

**7. FURTHER EXPERIMENTS**