In [104]:
# importng the libraries
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import DataLoader
import torchvision.datasets as datasets
import torchvision.transforms as transforms
import torchvision
import os

In [112]:
# implementing LeNet architecture
class LeNet(nn.Module):
  def __init__(self):
    super(LeNet,self).__init__()  # make our custom class LeNet to inherit __init__ of nn.module
    self.relu=F.relu
    self.pool=nn.AvgPool2d(kernel_size=(2,2))
    self.conv1=nn.Conv2d(in_channels=1,out_channels=6,kernel_size=(5,5),stride=1,padding=0) #padding:0 ==> no padding , padding:1 ==> 'same'
    self.conv2=nn.Conv2d(in_channels=6,out_channels=16,kernel_size=(5,5),stride=1,padding=0)
    self.conv3=nn.Conv2d(in_channels=16,out_channels=120,kernel_size=(5,5),stride=1,padding=0)
    self.linear1=nn.Linear(120,84)
    self.linear2=nn.Linear(84,10)


  def forward(self,x):
    x=self.relu(self.conv1(x))
    x=self.pool(x)
    x=self.relu(self.conv2(x))
    x=self.pool(x)
    x=self.relu(self.conv3(x))
    # reshaping  batchsize X 120 X 1 X 1 ====> batchsize X 120
    x=x.reshape(x.shape[0],-1)
    x=self.relu(self.linear1(x))
    x=self.linear2(x)
    return x

  def evaluate(self,loader):
    num_samples=0
    num_correct=0
    # setting model to evaluation mode  #Batch normalization layers behave differently during training and evaluation. During training, they use batch statistics for normalization, but during evaluation, they use population statistics.Dropout layers also behave differently during training and evaluation. During training, they randomly drop units, but during evaluation, they keep all units and adjust the weights accordingly.model.eval() sets the model to evaluation mode, which ensures that batch normalization and dropout layers behave in the appropriate way during inference.
    self.eval()
    # we don't want to compute gradients while evaluating
    with torch.no_grad():
      for x,y_true in loader:
        # moving x and y to the CPU
        x=x.to(device='cpu')
        y_true=y_true.to(device='cpu')
        logits=self(x)
        # getting labels
        _,y_pred=logits.max(dim=1)
        num_correct+=(y_pred==y_true).sum()
        num_samples+=y_pred.size(0)
        # computing the accurcy
      accuracy=float(num_correct)/float(num_samples)
      # assert(accuracy==float)
    # print(f"The accuracy is {accuracy}")
    return accuracy

In [113]:
# defining the transforms to be made
transformations=transforms.Compose(
    [
     transforms.Pad(padding=2,fill=0,padding_mode='constant'),
     transforms.ToTensor(),
     transforms.Normalize((0.1307,), (0.3081,))
     ]
    )

In [114]:
# loading the training data
trainset = torchvision.datasets.MNIST(root='./data', train=True, download=True,transform=transformations)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=64, shuffle=True)

In [115]:
# loading the test data
testset=torchvision.datasets.MNIST(root='./data',train=False,download=True,transform=transformations)
testloader=torch.utils.data.DataLoader(testset,batch_size=64,shuffle=True)

In [116]:
# definfing the model
model=LeNet()
# definging the optimzer
optimizer=optim.Adam(model.parameters(),lr=0.001)
# definfing the loss function
loss_func=nn.CrossEntropyLoss()

In [118]:
# training the model
num_epochs=5
for epoch in range(num_epochs):

  for batch_idx,(data,target) in enumerate(trainloader):

    # each batch starts from fresh gradients so zeroing out the gradient for each batch
    optimizer.zero_grad()
    # forward prop
    outputs=model(data)
    # calculating the loss
    loss=loss_func(outputs,target)
    # backprop
    loss.backward()
    # updating the parameters
    optimizer.step()

  print(f"Epoch {epoch+1}\t Accuracy:{model.evaluate(testloader)}")


Epoch 1	 Accuracy:0.974
Epoch 2	 Accuracy:0.9837
Epoch 3	 Accuracy:0.9833
Epoch 4	 Accuracy:0.9878
Epoch 5	 Accuracy:0.9882
