<a href="https://colab.research.google.com/github/ro-okie/LeNet-5/blob/main/LeNet-5.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
#importing relevant libraries
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms

#defining relevant variables
batch_size = 64
learning_rate = 0.001
num_classes = 10
num_epochs = 10

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device.type)

cuda


In [4]:
#  downloading the dataset, and applying transformation
#  initially MNIST dataset is of grayscale images and of size (28x28)
train_dataset = torchvision.datasets.MNIST(root='./data',train = True ,transform = transforms.Compose([
                                                  transforms.Resize((32,32)),
                                                  transforms.ToTensor(),
                                                  transforms.Normalize(mean = (0.1307,), std = (0.3081,))]), download = True)
test_dataset = torchvision.datasets.MNIST(root='./data',train = False, transform = transforms.Compose([
                                                  transforms.Resize((32,32)),
                                                  transforms.ToTensor(),
                                                  transforms.Normalize(mean = (0.1307,), std = (0.3081,))]), download = True)

In [5]:
# defining train_loader and test_loader
from torch.utils.data import DataLoader
train_loader = DataLoader(dataset = train_dataset, batch_size = batch_size,shuffle=True)
test_loader = DataLoader(dataset = test_dataset, batch_size = batch_size, shuffle = True)

In [16]:
# defining our Convolutional Neural Net architecture
class LeNet5(nn.Module):
  def __init__(self, num_classes):
    super().__init__()
    self.conv1 = nn.Sequential(
        nn.Conv2d(1, 6, kernel_size = 5),
        nn.BatchNorm2d(6), # using BatchNorm2d(args: input_channels)
        nn.ReLU(),
        nn.MaxPool2d(kernel_size = 2, stride = 2)
    )
    self.conv2 = nn.Sequential(
        nn.Conv2d(6, 16, kernel_size = 5),
        nn.BatchNorm2d(16),
        nn.ReLU(),
        nn.MaxPool2d(kernel_size = 2, stride = 2)
    )

    self.fc1 = nn.Linear(400,120)
    self.relu1 = nn.ReLU()
    self.fc2 = nn.Linear(120,84)
    self.relu2 = nn.ReLU()
    self.fc3 = nn.Linear(84,num_classes)

  def forward(self, x):
    out = self.conv1(x)
    out = self.conv2(out)
    out = out.reshape(out.size(0),-1)
    out = self.fc1(out)
    out = self.relu1(out)
    out = self.fc2(out)
    out = self.relu2(out)
    out = self.fc3(out)
    return out




In [17]:
# defining the hyperparameters

model = LeNet5(num_classes)

if(device == 'cuda'):
    model.cuda()

#defining our loss function
loss_fn = nn.CrossEntropyLoss()

#defining our optimizer
optimizer = torch.optim.Adam(model.parameters(), lr= learning_rate)

# will use this when calculating accuracy
total_step = len(train_loader)

In [18]:
for epoch in range(num_epochs):
  for i, (images, labels) in enumerate(train_loader):
    
    if(device=='cuda'):
      images.to(device)
      labels.to(device)

    # forward pass

    output = model(images)
    loss = loss_fn(output, labels)

    # backward pass and optimizer
    loss.backward()
    optimizer.step()
    optimizer.zero_grad()


    if (i+1) % 400 == 0:
            print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}' 
        		           .format(epoch+1, num_epochs, i+1, total_step, loss.item()))

  

Epoch [1/10], Step [400/938], Loss: 0.0219
Epoch [1/10], Step [800/938], Loss: 0.0266
Epoch [2/10], Step [400/938], Loss: 0.0209
Epoch [2/10], Step [800/938], Loss: 0.0177
Epoch [3/10], Step [400/938], Loss: 0.0138
Epoch [3/10], Step [800/938], Loss: 0.0243
Epoch [4/10], Step [400/938], Loss: 0.0741
Epoch [4/10], Step [800/938], Loss: 0.0305
Epoch [5/10], Step [400/938], Loss: 0.0077
Epoch [5/10], Step [800/938], Loss: 0.0067
Epoch [6/10], Step [400/938], Loss: 0.0046
Epoch [6/10], Step [800/938], Loss: 0.0011
Epoch [7/10], Step [400/938], Loss: 0.0221
Epoch [7/10], Step [800/938], Loss: 0.0594
Epoch [8/10], Step [400/938], Loss: 0.0034
Epoch [8/10], Step [800/938], Loss: 0.0296
Epoch [9/10], Step [400/938], Loss: 0.1219
Epoch [9/10], Step [800/938], Loss: 0.0007
Epoch [10/10], Step [400/938], Loss: 0.0002
Epoch [10/10], Step [800/938], Loss: 0.0045


In [20]:
# testing our model on test_set
with torch.no_grad():
  correct = 0
  total = 0
  for images, labels in test_loader:

    if(device=='cuda'):
      images.to(device)
      labels.to(device)
    
    output = model(images)

    _, predictedLabel = torch.max(output.data, 1) # (args: data, dimension)
    
    total += labels.size(0)
    correct += (predictedLabel == labels).sum().item()

  print('Accuracy of the network on the 10000 test images: {} %'.format(100 * correct / total))
	 



Accuracy of the network on the 10000 test images: 99.02 %


In [21]:
 # Accuracy of the network on the 10000 test images: 99.02 % :- which quite a good result with initial choice of hyperparameters 