<a href="https://colab.research.google.com/github/rohitk523/pytorch-deep-learning/blob/main/CNN_MNST(Handwritten_digit_classifier).ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [103]:
%matplotlib inline

In [104]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
import matplotlib.pyplot as plt

In [105]:
trainset = torchvision.datasets.MNIST(root='./data', train=True, download=True, transform=transforms.ToTensor())
trainloader = torch.utils.data.DataLoader(trainset, batch_size=64, shuffle=True, num_workers=2)

testset = torchvision.datasets.MNIST(root='./data', train=False, download=True, transform=transforms.ToTensor())
testloader = torch.utils.data.DataLoader(testset, batch_size=64, shuffle=False, num_workers=2)

In [106]:
dataiter = iter(trainloader)
image, label = next(dataiter)
label, image.shape

(tensor([1, 7, 4, 6, 6, 6, 5, 5, 7, 9, 4, 6, 7, 7, 9, 7, 7, 9, 3, 7, 5, 6, 1, 6,
         2, 4, 8, 1, 6, 8, 3, 9, 8, 2, 2, 8, 5, 0, 7, 5, 6, 7, 1, 8, 7, 5, 8, 1,
         0, 4, 2, 4, 1, 5, 1, 7, 2, 2, 5, 7, 7, 4, 4, 6]),
 torch.Size([64, 1, 28, 28]))

In [107]:
# we have CNN architecture for handwritten digit classifier, we need all other things

# class Net(nn.Module):
#     def __init__(self):
#         super(Net, self).__init__()
#         self.conv1 = nn.Conv2d(1, 32, 3, 1)
#         self.conv2 = nn.Conv2d(32, 64, 3, 1)
#         self.dropout1 = nn.Dropout(0.25)
#         self.dropout2 = nn.Dropout(0.5)
#         self.fc1 = nn.Linear(9216, 128)
#         self.fc2 = nn.Linear(128, 10)

#     def forward(self, x):
#         x = self.conv1(x)
#         x = F.relu(x)
#         x = self.conv2(x)
#         x = F.relu(x)
#         x = F.max_pool2d(x, 2)
#         x = self.dropout1(x)
#         x = torch.flatten(x, 1)
#         x = self.fc1(x)
#         x = F.relu(x)
#         x = self.dropout2(x)
#         x = self.fc2(x)
#         output = F.log_softmax(x, dim=1)
#         return output

# This is LeNet architecture

class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()

        # C1: Convolutional layer (input channels = 1, output channels = 6, kernel size = 5)
        self.conv1 = nn.Conv2d(1, 6, 5)

        # C3: Convolutional layer (input channels = 6, output channels = 16, kernel size = 5)
        self.conv2 = nn.Conv2d(6, 16, 5)

        # C5: Fully connected convolutional layer (flattened previous conv output)
        self.fc1 = nn.Linear(16*4*4, 120)  # 16*4*4 is the flattened output from conv2

        # F6: Fully connected layer
        self.fc2 = nn.Linear(120, 84)

        # Output layer: 10 classes for MNIST digits
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        # C1: Convolution + ReLU + Average Pooling
        x = F.relu(self.conv1(x))  # Input: 28x28x1 → Output: 24x24x6
        x = F.avg_pool2d(x, 2)     # Output: 12x12x6

        # C3: Convolution + ReLU + Average Pooling
        x = F.relu(self.conv2(x))  # Output: 8x8x16
        x = F.avg_pool2d(x, 2)     # Output: 4x4x16

        # Flatten the output from conv layers (prepare for fully connected layers)
        x = torch.flatten(x, 1)    # Output: 16*4*4

        # C5: Fully connected layer + ReLU
        x = F.relu(self.fc1(x))    # Output: 120

        # F6: Fully connected layer + ReLU
        x = F.relu(self.fc2(x))    # Output: 84

        # Output layer: Log Softmax for classification
        x = F.log_softmax(self.fc3(x), dim=1)  # Output: 10

        return x



net = Net()
print(net)

Net(
  (conv1): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
  (conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=256, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=10, bias=True)
)


In [108]:
criteria = nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters(), lr=0.001)

In [109]:
# training

for epoch in range(3):
  running_loss = 0
  for i, data in enumerate(trainloader, 0):
    inputs, labels = data
    optimizer.zero_grad()
    outputs = net(inputs)
    loss = criteria(outputs, labels)
    loss.backward()
    optimizer.step()

    # print statistics
    running_loss += loss.item()
    if i % 100 == 99:    # print every 100 mini-batches
        print(f'[{epoch + 1}, {i + 1}] loss: {running_loss / 10:.3f}')
        running_loss = 0.0

print('Finished Training')

[1, 100] loss: 13.153
[1, 200] loss: 4.936
[1, 300] loss: 4.201
[1, 400] loss: 3.322
[1, 500] loss: 2.698
[1, 600] loss: 2.397
[1, 700] loss: 1.989
[1, 800] loss: 1.895
[1, 900] loss: 1.747
[2, 100] loss: 1.448
[2, 200] loss: 1.358
[2, 300] loss: 1.309
[2, 400] loss: 1.213
[2, 500] loss: 1.154
[2, 600] loss: 1.194
[2, 700] loss: 1.050
[2, 800] loss: 1.110
[2, 900] loss: 1.005
[3, 100] loss: 0.875
[3, 200] loss: 0.851
[3, 300] loss: 0.798
[3, 400] loss: 0.921
[3, 500] loss: 0.876
[3, 600] loss: 0.918
[3, 700] loss: 0.767
[3, 800] loss: 0.776
[3, 900] loss: 0.775
Finished Training


In [110]:
PATH = './mnist_net.pth'
torch.save(net.state_dict(), PATH)

In [111]:
net = Net()
net.load_state_dict(torch.load(PATH, weights_only=True))

<All keys matched successfully>

In [112]:
dataiter = iter(testloader)
images, labels = next(dataiter)

In [113]:
labels, images.shape

(tensor([7, 2, 1, 0, 4, 1, 4, 9, 5, 9, 0, 6, 9, 0, 1, 5, 9, 7, 3, 4, 9, 6, 6, 5,
         4, 0, 7, 4, 0, 1, 3, 1, 3, 4, 7, 2, 7, 1, 2, 1, 1, 7, 4, 2, 3, 5, 1, 2,
         4, 4, 6, 3, 5, 5, 6, 0, 4, 1, 9, 5, 7, 8, 9, 3]),
 torch.Size([64, 1, 28, 28]))

In [114]:
outputs = net(images)

In [115]:
outputs.shape

torch.Size([64, 10])

In [116]:
torch.max(outputs, 1)

torch.return_types.max(
values=tensor([-8.1589e-04, -1.9550e-05, -7.6515e-04, -1.4305e-05, -2.5198e-04,
        -3.9457e-05, -5.0459e-03, -6.6647e-02, -5.5961e-01, -8.0458e-04,
        -1.8596e-05, -1.7594e-04, -5.3189e-04, -3.3259e-05, -1.9669e-05,
        -7.9277e-03, -1.1602e-03, -3.2352e-03, -2.6042e-02, -4.3272e-05,
        -1.6668e-02, -1.1288e-04, -1.3708e-04, -2.2802e-04, -1.4909e-03,
        -6.4373e-06, -5.6648e-03, -1.1921e-06, -3.3378e-05, -2.4597e-03,
        -5.7219e-05, -3.1825e-03, -4.8875e-05, -6.4402e-03, -6.2994e-04,
        -1.1028e-03, -9.6033e-02, -1.2898e-04, -9.6111e-02, -1.6295e-04,
        -1.1712e-02, -9.4842e-02, -3.3497e-05, -2.6715e-03, -2.9768e-03,
        -2.4101e-02, -1.7811e-03, -9.2476e-04, -3.6955e-06, -6.0318e-05,
        -5.1473e-04, -1.8239e-05, -3.9617e-04, -3.1206e-02, -1.1325e-05,
        -9.3337e-05, -2.3842e-07, -4.2489e-04, -1.3756e-04, -4.2911e-03,
        -1.6195e-02, -1.2407e-02, -3.0939e-01, -4.7964e-03],
       grad_fn=<MaxBackward0>),


In [117]:
_, predicted = torch.max(outputs, 1)

In [118]:
_, predicted

(tensor([-8.1589e-04, -1.9550e-05, -7.6515e-04, -1.4305e-05, -2.5198e-04,
         -3.9457e-05, -5.0459e-03, -6.6647e-02, -5.5961e-01, -8.0458e-04,
         -1.8596e-05, -1.7594e-04, -5.3189e-04, -3.3259e-05, -1.9669e-05,
         -7.9277e-03, -1.1602e-03, -3.2352e-03, -2.6042e-02, -4.3272e-05,
         -1.6668e-02, -1.1288e-04, -1.3708e-04, -2.2802e-04, -1.4909e-03,
         -6.4373e-06, -5.6648e-03, -1.1921e-06, -3.3378e-05, -2.4597e-03,
         -5.7219e-05, -3.1825e-03, -4.8875e-05, -6.4402e-03, -6.2994e-04,
         -1.1028e-03, -9.6033e-02, -1.2898e-04, -9.6111e-02, -1.6295e-04,
         -1.1712e-02, -9.4842e-02, -3.3497e-05, -2.6715e-03, -2.9768e-03,
         -2.4101e-02, -1.7811e-03, -9.2476e-04, -3.6955e-06, -6.0318e-05,
         -5.1473e-04, -1.8239e-05, -3.9617e-04, -3.1206e-02, -1.1325e-05,
         -9.3337e-05, -2.3842e-07, -4.2489e-04, -1.3756e-04, -4.2911e-03,
         -1.6195e-02, -1.2407e-02, -3.0939e-01, -4.7964e-03],
        grad_fn=<MaxBackward0>),
 tensor([7, 2, 1,

In [119]:
correct = 0
total = 0
with torch.no_grad():
  for data in testloader:
    images, labels = data
    outputs = net(images)
    _, predicted = torch.max(outputs, 1)
    total += labels.size(0)
    correct += (predicted == labels).sum().item()

print(f'Accuracy of the network on the 10000 test images: {100 * correct // total} %')

# Accuracy of the CNN network on the 10000 test images: 97 % Diff: epoch only 2
# Accuracy of the LeNet network on the 10000 test images: 98 % epoch only 2

Accuracy of the network on the 10000 test images: 98 %
