**1. IMPORTANT LIBRARIES**

In [None]:
import torch 
import torchvision
from torch.utils.data import DataLoader
import torchvision.transforms as transforms
import torch.nn as nn
import torch.nn.functional as F
import matplotlib.pyplot as plt
import numpy as np
from torchvision.datasets import CIFAR10
import torch.optim as optim

**2. LOADING & USING PYTORCH TRANSFORMS ON CIFAR10**

In [None]:
#augmentations and transforms to be used on the train and test sets
transform = transforms.Compose([
                                transforms.RandomHorizontalFlip(0.5),
                                transforms.ToTensor(),
                                transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
                                ])

#applying the transforms to the train-test set
train_ds = CIFAR10(root='./data', train=True, download=True, transform=transform)
test_ds = CIFAR10(root='./data', train=False, download=True, transform=transform)

#creating the train and test loaders from their respective sets
train_dl = DataLoader(train_ds, batch_size=32, shuffle=True)
test_dl = DataLoader(test_ds, batch_size=32, shuffle=True)

**3. CODING THE LENET-5 AND ALEXNET ARCHITECTURES**

In [None]:
class LeNet(nn.Sequential):
    def __init__(self, img_channels=3, num_classes=10):
        super(LeNet, self).__init__()
        self.conv1 = nn.Conv2d(img_channels, 6, 5)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.fc1 = nn.Linear(16 * 5 * 5, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, num_classes)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(-1, 16 * 5 * 5)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

class AlexNet(nn.Module):
    def __init__(self, img_channels=3, num_classes=10):
        super(AlexNet, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=img_channels, out_channels= 96, kernel_size= 3, stride=1, padding=0 )
        self.pool = nn.MaxPool2d(kernel_size=3, stride=2)
        self.conv2 = nn.Conv2d(in_channels=96, out_channels=256, kernel_size=3, stride= 1, padding= 1)
        self.conv3 = nn.Conv2d(in_channels=256, out_channels=384, kernel_size=3, stride= 1, padding= 1)
        self.conv4 = nn.Conv2d(in_channels=384, out_channels=384, kernel_size=3, stride=1, padding=1)
        self.conv5 = nn.Conv2d(in_channels=384, out_channels=256, kernel_size=3, stride=1, padding=1)
        self.fc1  = nn.Linear(in_features= 9216, out_features= 4096)
        self.fc2  = nn.Linear(in_features= 4096, out_features= 4096)
        self.fc3 = nn.Linear(in_features=4096 , out_features=num_classes)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = F.relu(self.conv2(x))
        x = self.pool(x)
        x = F.relu(self.conv3(x))
        x = F.relu(self.conv4(x))
        x = F.relu(self.conv5(x))
        x = self.pool(x)
        x = x.reshape(x.shape[0], -1)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

**4. SETTING UP GPU AND CRITERION**

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') #to train on GPU, else CPU
criterion = nn.CrossEntropyLoss()

**5. DEFINING THE TRAIN AND TEST FUNCTION**

In [None]:
def train_test(model, train_loader, test_loader, optimizer, n_epochs):
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') #to train on GPU, else CPU
    # model in training mode
    model.train()
    train_l = []
    test_acc = []
    for epoch in range(1, n_epochs+1):
        train_accuracy = 0
        train_samples = 0
        train_loss = 0.0
        for data, targets in train_loader:
            data = data.to(device=device)
            targets = targets.to(device=device)
            optimizer.zero_grad()
            output = model(data)
            loss = criterion(output, targets)
            loss.backward()
            optimizer.step()
            predictions = torch.argmax(output, dim=-1)
            train_samples += predictions.size(0)
            train_accuracy += (predictions == targets).sum()
            train_loss += loss.item()

        # calculate average losses
        train_loss = train_loss / len(train_loader)
        train_l.append(train_loss)

        with torch.no_grad():
            model.eval()
            test_loss = 0
            test_accuracy = 0
            test_samples = 0
            for data,targets in test_loader:
                data = data.to(device=device)
                targets = targets.to(device=device)
                ## Forward Pass
                scores = model(data)
                loss = criterion(scores,targets)
                predictions = torch.argmax(scores, dim=-1)
                test_accuracy += (predictions == targets).sum()
                test_samples += predictions.size(0)
                test_loss += loss.item() 
            t_a = (test_accuracy / test_samples)*100
            test_acc.append(t_a)
        print('Epoch: {} \tTraining Loss: {:.6f}'.format(epoch, train_loss), f"  Test Accuracy: {t_a:.3f}")
            
    return model, train_l, test_acc

**5(A). TRAINING LENET WITH SGD**

In [None]:
lenet = LeNet().to(device)
sgd_lenet = optim.SGD(lenet.parameters(), lr=1e-3) #SGD
lenet_sgd, loss_sgd, acc_sgd = train_test(lenet, train_dl, test_dl, sgd_lenet, n_epochs=50)

**5(B). TRAINING LENET WITH SGD+MOMENTUM**

In [None]:
lenet = LeNet().to(device)
sgdm_lenet = optim.SGD(lenet.parameters(), lr=1e-3, momentum=0.9) #SGD with momentum
lenet_sgdm, loss_sgdm, acc_sgdm = train_test(lenet, train_dl, test_dl, sgdm_lenet, n_epochs=50)

**5(C). TRAINING LENET WITH ADAGRAD**

In [None]:
lenet = LeNet().to(device)
ag_lenet = optim.Adagrad(lenet.parameters(), lr=1e-3) #Adagrad
lenet_ag, loss_ag, acc_ag = train_test(lenet, train_dl, test_dl, ag_lenet, n_epochs=50)

**5(D). TRAINING LENET WITH RMSPROP**

In [None]:
lenet = LeNet().to(device)
rms_lenet = optim.RMSprop(lenet.parameters(), lr=1e-3) #RMSprop
lenet_rms, loss_rms, acc_rms = train_test(lenet, train_dl, test_dl, rms_lenet, n_epochs=50)

**5(E). TRAINING LENET WITH ADAM**

In [None]:
lenet = LeNet().to(device)
adam_lenet = optim.Adam(lenet.parameters(), lr=1e-3) #Adam
lenet_adam, loss_adam, acc_adam = train_test(lenet, train_dl, test_dl, adam_lenet, n_epochs=50)

**6(A). TRAINING ALEXNET WITH SGD**

In [None]:
alexnet = AlexNet().to(device)
sgd_alexnet = optim.SGD(alexnet.parameters(), lr=1e-3) #SGD
alexnet_sgd, a_loss_sgd, a_acc_sgd = train_test(alexnet, train_dl, test_dl, sgd_alexnet, n_epochs=50)

**6(B). TRAINING ALEXNET WITH SGD+MOMENTUM**

In [None]:
alexnet = AlexNet().to(device)
sgdm_alexnet = optim.SGD(alexnet.parameters(), lr=1e-3, momentum=0.9) #SGD+momentum
alexnet_sgdm, a_loss_sgdm, a_acc_sgdm = train_test(alexnet, train_dl, test_dl, sgdm_alexnet, n_epochs=50)

**6(C). TRAINING ALEXNET WITH ADAGRAD**

In [None]:
alexnet = AlexNet().to(device)
ag_alexnet = optim.Adagrad(alexnet.parameters(), lr=1e-3) #adagrad
alexnet_ag, a_loss_ag, a_acc_ag = train_test(alexnet, train_dl, test_dl, ag_alexnet, n_epochs=50)

**6(D). TRAINING ALEXNET WITH RMSPROP**

In [None]:
alexnet = AlexNet().to(device)
rms_alexnet = optim.RMSprop(alexnet.parameters(), lr=1e-5) #rmsprop
alexnet_rms, a_loss_rms, a_acc_rms = train_test(alexnet, train_dl, test_dl, rms_alexnet, n_epochs=50)

**6(E). TRAINING ALEXNET WITH ADAM**

In [None]:
alexnet = AlexNet().to(device)
adam_alexnet = optim.Adam(alexnet.parameters(), lr=1e-3) #Adam
alexnet_adam, a_loss_adam, a_acc_adam = train_test(alexnet, train_dl, test_dl, adam_alexnet, n_epochs=50)

In [None]:
alexnet = AlexNet().to(device)
adam_alexnet_1 = optim.Adam(alexnet.parameters(), lr=1e-5) #Adam
alexnet_adam_1, a_loss_adam_1, a_acc_adam_1 = train_test(alexnet, train_dl, test_dl, adam_alexnet_1, n_epochs=50)