<a href="https://colab.research.google.com/github/mahausmani/deep_learning/blob/main/digit-recognition/cnn.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Imports

In [None]:
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
from torchvision import datasets
from torchvision.transforms import ToTensor

from torch.nn import Module
from torch.nn import Conv2d
from torch.nn import Linear
from torch.nn import MaxPool2d
from torch.nn import ReLU
from torch.nn import LogSoftmax
from torch import flatten
from torch.optim import Adam
from sklearn.metrics import classification_report

import numpy as np
import matplotlib.pyplot as plt
import os

print('Using PyTorch version:', torch.__version__)
if torch.cuda.is_available():
    print('Using GPU, device name:', torch.cuda.get_device_name(0))
    device = torch.device('cuda')
else:
    print('No GPU found, using CPU instead.')
    device = torch.device('cpu')

Using PyTorch version: 2.2.1+cu121
Using GPU, device name: Tesla T4


In [None]:
!git clone https://github.com/mahausmani/deep_learning.git

Cloning into 'deep_learning'...
remote: Enumerating objects: 110, done.[K
remote: Counting objects: 100% (110/110), done.[K
remote: Compressing objects: 100% (97/97), done.[K
remote: Total 110 (delta 41), reused 0 (delta 0), pack-reused 0[K
Receiving objects: 100% (110/110), 15.84 MiB | 10.54 MiB/s, done.
Resolving deltas: 100% (41/41), done.


# constants

In [None]:
batch_size = 32
data_dir = "/content/data"
val_split = 0.3
epochs = 50
lr = 0.001

# Data Loading


In [None]:
train_dataset = datasets.MNIST(data_dir, train = True, download = True, transform = ToTensor())
test_dataset = datasets.MNIST(data_dir, train = False, download = True, transform = ToTensor())

train_dataloader = DataLoader(train_dataset, batch_size = batch_size, shuffle = True)
test_dataloader = DataLoader(test_dataset, batch_size = batch_size, shuffle = False)

In [None]:
print(len(train_dataset))
print(len(test_dataset))

60000
10000


# Model


In [None]:
for target, label in train_dataloader:
    print(target.shape, label.shape)
    print(f"Batch Size --> { target.shape[0]}")
    print(f"Input Size --> [{target.shape[2]} x {target.shape[-1]}]")
    break


torch.Size([32, 1, 28, 28]) torch.Size([32])
Batch Size --> 32
Input Size --> [28 x 28]


In [None]:
class CNNModel(nn.Module):
    def __init__(self, in_channels, num_classes):
        super().__init__()
        # 1. CONV --> RELU --> MAXPOOL
        self.conv1 = nn.Conv2d(in_channels = in_channels, out_channels = 20, kernel_size = (5,5))
        self.relu1 = nn.ReLU()
        self.pool1 = nn.MaxPool2d(kernel_size = (2,2), stride = (2,2))

        # 2. CONV --> RELU --> MAXPOOL
        self.conv2 = nn.Conv2d(in_channels = 20, out_channels = 50, kernel_size = (5,5))
        self.relu2 = nn.ReLU()
        self.pool2 = nn.MaxPool2d(kernel_size=(2,2), stride = (2,2))

        # 3. FC --> ReLU
        self.fc3 = nn.Linear(in_features = 800, out_features=500)
        self.relu3 = nn.ReLU()

        # 4. FC --> Softmax
        self.fc4 = nn.Linear(in_features = 500, out_features = 10)
        self.softmax = nn.LogSoftmax(dim = 1)
    def forward(self, x):
        # layer1
        x = self.conv1(x)
        x = self.relu1(x)
        x = self.pool1(x)

        # layer2
        x = self.conv2(x)
        x = self.relu2(x)
        x = self.pool2(x)

        # layer3
        x = flatten(x,1)
        x = self.fc3(x)
        x = self.relu3(x)

        # layer4
        x = self.fc4(x)
        output = self.softmax(x)
        return output


In [None]:
model = CNNModel(1,10)
model = model.to(device)
opt = Adam(model.parameters(), lr=lr)
lossFN = nn.NLLLoss()

In [None]:
print(model)

CNNModel(
  (conv1): Conv2d(1, 20, kernel_size=(5, 5), stride=(1, 1))
  (relu1): ReLU()
  (pool1): MaxPool2d(kernel_size=(2, 2), stride=(2, 2), padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(20, 50, kernel_size=(5, 5), stride=(1, 1))
  (relu2): ReLU()
  (pool2): MaxPool2d(kernel_size=(2, 2), stride=(2, 2), padding=0, dilation=1, ceil_mode=False)
  (fc3): Linear(in_features=800, out_features=500, bias=True)
  (relu3): ReLU()
  (fc4): Linear(in_features=500, out_features=10, bias=True)
  (softmax): LogSoftmax(dim=1)
)


# Train

In [None]:
for i in range(epochs):
    training_loss = 0
    correct_predictions = 0
    for x, y in train_dataloader:
        model.train()
        x = x.to(device)
        y = y.to(device)

        pred = model(x)
        loss = lossFN(pred, y)

        opt.zero_grad()
        loss.backward()
        opt.step()
        predicted = pred.argmax(dim=1).cpu().numpy()
        labels = y.cpu().numpy()
        correct_predictions += (predicted == labels).sum()
        training_loss+=loss
    if i%5==0:
        print(f"Epoch --> {i} Loss --> {training_loss}", end = " ")
        print(f"Correct Pedictions --> {correct_predictions}")
        print(f"Training Accuracy --> {correct_predictions/len(train_dataset)}")


Epoch --> 0 Loss --> 13.192895889282227 Correct Pedictions --> 59859
Training Accuracy --> 0.99765
Epoch --> 5 Loss --> 11.442106246948242 Correct Pedictions --> 59894
Training Accuracy --> 0.9982333333333333
Epoch --> 10 Loss --> 9.761368751525879 Correct Pedictions --> 59913
Training Accuracy --> 0.99855
Epoch --> 15 Loss --> 9.599959373474121 Correct Pedictions --> 59934
Training Accuracy --> 0.9989
Epoch --> 20 Loss --> 9.211004257202148 Correct Pedictions --> 59932
Training Accuracy --> 0.9988666666666667
Epoch --> 25 Loss --> 4.684169292449951 Correct Pedictions --> 59962
Training Accuracy --> 0.9993666666666666
Epoch --> 30 Loss --> 8.321145057678223 Correct Pedictions --> 59947
Training Accuracy --> 0.9991166666666667
Epoch --> 35 Loss --> 3.5121378898620605 Correct Pedictions --> 59977
Training Accuracy --> 0.9996166666666667
Epoch --> 40 Loss --> 8.410188674926758 Correct Pedictions --> 59949
Training Accuracy --> 0.99915
Epoch --> 45 Loss --> 8.81149959564209 Correct Pedicti

# Predict

In [None]:
correct_predictions = 0
testing_loss = 0
with torch.no_grad():
    preds = []
    for x, y in test_dataloader:
        model.eval()
        x = x.to(device)
        y = y.to(device)

        pred = model(x)
        loss = lossFN(pred, y)

        predicted = pred.argmax(dim=1).cpu().numpy()
        preds.extend(predicted)
        labels = y.cpu().numpy()
        correct_predictions += (predicted == labels).sum()
        testing_loss+=loss

print(f"Loss --> {testing_loss}", end = " ")
print(f"Correct Pedictions --> {correct_predictions}")
print(f"Testing Accuracy --> {correct_predictions/len(test_dataset)}")
print(classification_report(test_dataset.targets.cpu().numpy(),
	np.array(preds), target_names=test_dataset.classes))

Loss --> 40.65696716308594 Correct Pedictions --> 9931
Testing Accuracy --> 0.9931
              precision    recall  f1-score   support

    0 - zero       0.99      1.00      1.00       980
     1 - one       0.99      1.00      1.00      1135
     2 - two       0.99      0.99      0.99      1032
   3 - three       0.99      0.99      0.99      1010
    4 - four       0.99      0.99      0.99       982
    5 - five       0.99      0.99      0.99       892
     6 - six       1.00      0.99      0.99       958
   7 - seven       1.00      0.99      0.99      1028
   8 - eight       0.99      1.00      0.99       974
    9 - nine       0.99      0.99      0.99      1009

    accuracy                           0.99     10000
   macro avg       0.99      0.99      0.99     10000
weighted avg       0.99      0.99      0.99     10000

