# CNN for MNIST hand-written digit classification using Pytorch

In [1]:
import sys
sys.version

'3.7.6 (default, Dec 30 2019, 19:38:28) \n[Clang 11.0.0 (clang-1100.0.33.16)]'

In [14]:
import torch
import torch.nn as nn
import torch.nn.functional as F

import torchvision as tv
import torchvision.datasets as ds

from tqdm import tqdm

print("torch version:", torch.__version__)
print("torchvision version:", tv.__version__)

torch version: 1.5.0
torchvision version: 0.6.0


## Download MNIST dataset

In [10]:
# Transforms to normalize images
transform = tv.transforms.Compose([
    tv.transforms.ToTensor(),
    tv.transforms.Normalize((0,), (1,))
])

# Download dataset
train_dataset = ds.MNIST(
    root='./data',
    train=True,
    transform=transform,
    download=True
)

test_dataset = ds.MNIST(
    root='./data',
    train=False,
    transform=transform,
    download=True
)

## Create Data Loaders

In [11]:
BS=64

train_loader = torch.utils.data.DataLoader(
    train_dataset,
    batch_size=BS,
    shuffle=True
)

test_loader = torch.utils.data.DataLoader(
    test_dataset,
    batch_size=BS,
    shuffle=False
)

## Create Model

In [5]:
class Net(nn.Module):

  def __init__(self):
    super(Net, self).__init__()
    self.conv1 = nn.Conv2d(in_channels=1, out_channels=8, kernel_size=3, padding=1)
    self.bn1 = nn.BatchNorm2d(8)
    self.conv2 = nn.Conv2d(in_channels=8, out_channels=16, kernel_size=3, padding=1)
    self.bn2 = nn.BatchNorm2d(16)
    self.conv3 = nn.Conv2d(in_channels=16, out_channels=32, kernel_size=3, padding=1)
    self.bn3 = nn.BatchNorm2d(32)
    self.classifier = nn.Linear(32 * 3 * 3, 10)
    self.pool = nn.MaxPool2d(2)

  def forward(self, x):
    x = F.dropout2d(self.pool(F.relu(self.bn1(self.conv1(x)))), p=0.05)
    x = F.dropout2d(self.pool(F.relu(self.bn2(self.conv2(x)))), p=0.05)
    x = F.dropout2d(self.pool(F.relu(self.bn3(self.conv3(x)))), p=0.05)
    x = x.view(-1, 32*3*3)
    x = self.classifier(x)
    return x

model = Net()

## Check for GPU availability

In [6]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

model.to(device)

cpu


Net(
  (conv1): Conv2d(1, 8, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (bn1): BatchNorm2d(8, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv2): Conv2d(8, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (bn2): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv3): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (bn3): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (classifier): Linear(in_features=288, out_features=10, bias=True)
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
)

In [7]:
from torchsummary import summary

summary(model, input_size=(1, 28, 28), batch_size=BS)

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1            [32, 8, 28, 28]              80
       BatchNorm2d-2            [32, 8, 28, 28]              16
         MaxPool2d-3            [32, 8, 14, 14]               0
            Conv2d-4           [32, 16, 14, 14]           1,168
       BatchNorm2d-5           [32, 16, 14, 14]              32
         MaxPool2d-6             [32, 16, 7, 7]               0
            Conv2d-7             [32, 32, 7, 7]           4,640
       BatchNorm2d-8             [32, 32, 7, 7]              64
         MaxPool2d-9             [32, 32, 3, 3]               0
           Linear-10                   [32, 10]           2,890
Total params: 8,890
Trainable params: 8,890
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.10
Forward/backward pass size (MB): 6.01
Params size (MB): 0.03
Estimated Total

## Define a loss function and an optimizer

In [8]:
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

## Train the network

In [16]:
from time import time

EPOCHS=2
LOG_FREQ=100

# Set model in training mode
model.train()

for epoch in range(EPOCHS):

    print(f'Epoch {epoch + 1}')
    running_loss = 0
    start_time = time()

    # Loop over each batch
    for i, (inputs, labels) in enumerate(tqdm(train_loader)):

        # Send to GPU if available
        inputs = inputs.to(device)
        labels = labels.to(device)

        # Clear gradients
        optimizer.zero_grad()

        # Forward pass
        outputs = model(inputs)

        # Calculate loss
        loss = criterion(outputs, labels)

        # Backward pass
        loss.backward()

        # Optimize
        optimizer.step()

        running_loss += loss.item()

        if (i+1) % LOG_FREQ == 0:
          print(f'[{epoch+1}, {i+1}] loss: {(running_loss/LOG_FREQ):.3f}')
          running_loss = 0

    end_time = time()
    print(f'{end_time - start_time:.2f}s for epoch')

  0%|          | 2/938 [00:00<00:48, 19.41it/s]

Epoch 1


 11%|█         | 104/938 [00:04<00:32, 25.87it/s]

[1, 100] loss: 0.077


 22%|██▏       | 203/938 [00:07<00:28, 25.86it/s]

[1, 200] loss: 0.072


 32%|███▏      | 302/938 [00:11<00:25, 24.54it/s]

[1, 300] loss: 0.067


 43%|████▎     | 404/938 [00:15<00:21, 24.66it/s]

[1, 400] loss: 0.065


 54%|█████▎    | 503/938 [00:19<00:17, 25.24it/s]

[1, 500] loss: 0.072


 64%|██████▍   | 605/938 [00:24<00:13, 24.54it/s]

[1, 600] loss: 0.065


 75%|███████▌  | 704/938 [00:28<00:09, 23.53it/s]

[1, 700] loss: 0.074


 86%|████████▌ | 803/938 [00:32<00:05, 23.97it/s]

[1, 800] loss: 0.068


 96%|█████████▌| 902/938 [00:36<00:01, 24.49it/s]

[1, 900] loss: 0.069


100%|██████████| 938/938 [00:37<00:00, 24.69it/s]
  0%|          | 3/938 [00:00<00:38, 24.46it/s]

37.99s for epoch
Epoch 2


 11%|█         | 105/938 [00:04<00:34, 24.46it/s]

[2, 100] loss: 0.067


 22%|██▏       | 204/938 [00:08<00:33, 21.98it/s]

[2, 200] loss: 0.065


 32%|███▏      | 303/938 [00:12<00:26, 23.84it/s]

[2, 300] loss: 0.070


 43%|████▎     | 405/938 [00:16<00:21, 24.70it/s]

[2, 400] loss: 0.066


 54%|█████▎    | 504/938 [00:20<00:17, 24.57it/s]

[2, 500] loss: 0.061


 64%|██████▍   | 603/938 [00:24<00:13, 24.92it/s]

[2, 600] loss: 0.057


 75%|███████▍  | 702/938 [00:28<00:09, 24.71it/s]

[2, 700] loss: 0.065


 86%|████████▌ | 804/938 [00:33<00:05, 24.93it/s]

[2, 800] loss: 0.067


 96%|█████████▋| 903/938 [00:37<00:01, 24.89it/s]

[2, 900] loss: 0.065


100%|██████████| 938/938 [00:38<00:00, 24.39it/s]

38.46s for epoch





## Check performance on test data

In [None]:

correct = 0
total = 0

model.eval()

for i, (inputs, labels) in enumerate(test_loader):

    # Send to GPU if available
    inputs = inputs.to(device)
    labels = labels.to(device)

    outputs = model(inputs)
    _, predicted = torch.max(outputs.data, 1)

    total += labels.size(0)
    correct += (predicted == labels).sum().item()

print(f'Test accuracy: {100 * correct / total}')