#### AI61002_Spr2023
#### Tutorial 1: Training LeNet for for MNIST Classification

In [1]:
# import libaries
%matplotlib inline
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
from torchvision import transforms,datasets
import torch.optim as optim
from torch.autograd import Variable
import torchvision

import matplotlib.pyplot as plt

#### Load data

In [None]:
apply_transform = transforms.Compose([transforms.Resize(32), transforms.ToTensor()])
BatchSize = 256 # change according to system specs



trainset = datasets.MNIST(root='./MNIST', train=True, download=True, transform=apply_transform)
trainLoader = torch.utils.data.DataLoader(trainset, batch_size=BatchSize,
                                          shuffle=True, num_workers=1) # Creating dataloader


testset = datasets.MNIST(root='./MNIST', train=False, download=True, transform=apply_transform)
testLoader = torch.utils.data.DataLoader(testset, batch_size=BatchSize,
                                         shuffle=False, num_workers=1) # Creating dataloader
                                        

In [None]:
# Size of train and test datasets
print('No. of samples in train set: '+str(len(trainLoader.dataset)))
print('No. of samples in test set: '+str(len(testLoader.dataset)))


In [None]:
image, label2 = trainset[0]
image.shape, label2

In [None]:
def show_img(img, label):
    print('Label: ', label)
    plt.imshow(torch.squeeze(img), cmap = 'gray')
show_img(*trainset[0])

#### Define model architecture

In [None]:
class LeNet(nn.Module):
    def __init__(self):
        super(LeNet, self).__init__()
        self.conv1 = nn.Conv2d(1, 6, kernel_size=5)
        self.pool1 = nn.MaxPool2d(kernel_size=2,stride=2)
        self.conv2 = nn.Conv2d(6, 16, kernel_size=5)
        self.pool2 = nn.MaxPool2d(kernel_size=2,stride=2)        
        self.fc1 = nn.Linear(400, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)
        self.relu = nn.ReLU()
        self.logSoftmax = nn.LogSoftmax(dim=1)

    def forward(self, x):
        x = self.conv1(x)
        x = self.relu(x)
        x = self.pool1(x)
        x = self.conv2(x)
        x = self.relu(x)
        x = self.pool2(x)
        x = x.view(-1, 400)
        x = self.fc1(x)
        x = self.relu(x) 
        x = self.fc2(x)
        x = self.relu(x)
        x = self.fc3(x)
        return self.logSoftmax(x)

In [None]:
net= LeNet()
print(net)

In [None]:
# Define same network for shape print
class LeNet1(nn.Module):
    def __init__(self):
        super(LeNet1, self).__init__()
        self.conv1 = nn.Conv2d(1, 6, kernel_size=5)
        self.pool1 = nn.MaxPool2d(kernel_size=2,stride=2)
        self.conv2 = nn.Conv2d(6, 16, kernel_size=5)
        self.pool2 = nn.MaxPool2d(kernel_size=2,stride=2)        
        self.fc1 = nn.Linear(400, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)
        self.relu = nn.ReLU()
        self.logSoftmax = nn.LogSoftmax(dim=1)

    def forward(self, x):
        print("Shape of input:", x.shape)
        x = self.conv1(x)
        print("Shape after 1st Conv:", x.shape)
        x = self.relu(x)
        print("Shape after 1st ReLu:", x.shape)
        x = self.pool1(x)
        print("Shape after 1st MaxPool:", x.shape)
        x = self.conv2(x)
        print("Shape after 2nd Conv:", x.shape)
        x = self.relu(x)
        print("Shape after 2nd Relu:", x.shape)
        x = self.pool2(x)
        print("Shape after 2nd MaxPool:", x.shape)
        x = x.view(-1, 400)
        print("Shape before 1st FC:", x.shape)
        x = self.fc1(x)
        print("Shape after 1st FC:", x.shape)
        x = self.relu(x)
        print("Shape after 3rd ReLu:", x.shape)
        x = self.fc2(x)
        print("Shape after 2nd FC:", x.shape)
        x = self.relu(x)
        print("Shape after 4th ReLu:", x.shape)
        x = self.fc3(x)
        print("Shape after 3rd FC:", x.shape)
        return self.logSoftmax(x)

In [None]:
net1= LeNet1()
#print(net1)
image = image.reshape(1,1,32,32)# To maintain the input shape of the network
net1(image)
#torch.argmax(net1(image),dim=1)

In [None]:
# Check availability of GPU
use_gpu = torch.cuda.is_available()
if use_gpu:
    print('GPU is available!')
    device = "cuda"
else:
    print('GPU is not available!')
    device = "cpu"

net = net.to(device)

In [None]:
criterion = nn.CrossEntropyLoss() 
learning_rate = 0.01
optimizer = optim.Adam(net.parameters(), lr=learning_rate) # ADAM 
num_epochs = 20

train_loss = []
train_acc = []
for epoch in range(num_epochs):
    
    running_loss = 0.0 
    running_corr = 0
        
    for i,data in enumerate(trainLoader):
        inputs,labels = data
        if use_gpu:
            inputs, labels = inputs.to(device),labels.to(device)
        # Initializing model gradients to zero
        
        optimizer.zero_grad() 
        # Data feed-forward through the network
        outputs1 = net(inputs)
        # Predicted class is the one with maximum probability
        preds1 = torch.argmax(outputs1,dim=1)
        # Finding the loss
        loss = criterion(outputs1, labels)
        # Accumulating the loss for each batch
        running_loss += loss 
        # Accumulate number of correct predictions
        running_corr += torch.sum(preds1==labels)    
        
    totalLoss1 = running_loss/(i+1)
    # Calculating gradients
    totalLoss1.backward()
    # Updating the model parameters
    optimizer.step()
        
    epoch_loss = running_loss.item()/(i+1)   #Total loss for one epoch
    epoch_acc = running_corr.item()/60000
    
    
         
    train_loss.append(epoch_loss) #Saving the loss over epochs for plotting the graph
    train_acc.append(epoch_acc) #Saving the accuracy over epochs for plotting the graph
       
        
    print('Epoch {:.0f}/{:.0f} : Training loss: {:.4f} | Training Accuracy: {:.4f}'.format(epoch+1,num_epochs,epoch_loss,epoch_acc*100))

In [None]:
torch.argmax(net(image.to(device)),dim=1)

In [None]:
# Plot the curves of tranning loss and training accuracy
fig = plt.figure(figsize=[15,5]) 
plt.subplot(121)
plt.plot(range(num_epochs),train_loss,'r-',label='Loss/error') 
plt.legend(loc='upper right')
plt.xlabel('Epochs')
plt.ylabel('Training')
plt.subplot(122)
plt.plot(range(num_epochs),train_acc,'g-',label='Accuracy') 
plt.legend(loc='upper right')
plt.xlabel('Epochs')
plt.ylabel('Training')

#### Evaluation of trained model

In [None]:
correct_pred=0
for data in testLoader:
    inputs,labels = data
    if use_gpu:
        inputs, labels = inputs.to(device),labels.to(device)
    # Feedforward test data batch through model
    output = net(inputs) 
    # Predicted class is the one with maximum probability
    preds1 = torch.argmax(output,dim=1)
    correct_pred += torch.sum(preds1==labels)

test_accuracy = correct_pred.item()/10000.0
print('Testing accuracy = ',test_accuracy*100) 