<a href="https://colab.research.google.com/github/rfdavid/simple-mnist-nn/blob/master/MNIST_Convolutional_NN_with_PyTorch.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

<h2>Convolutional Neural Network with PyTorch</h2>

This is a simple implementation to recognize handwritten digits from MNIST dataset using convolutional neural networks in PyTorch framework

In [None]:
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
import torchvision
from collections import OrderedDict
from torchvision.transforms import ToTensor, Lambda, Compose
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import time

'''
   PyTorch Neural Network Module
   Input: 
    - OrderedDict of sequence for the convolution layer
    - OrderedDict of sequence for the convolution layer
'''
class Model(nn.Module):
    def __init__(self, seq_convolutional, seq_linear):
        super(Model, self).__init__()
        self.convolutional_layers = nn.Sequential(seq_convolutional)
        self.linear_layers = nn.Sequential(seq_linear)

    def forward(self, x):
        x = self.convolutional_layers(x)
        x = x.view(x.size(0), -1)
        x = self.linear_layers(x)
        return x

'''
   The generic Neural Network class for MNIST dataset
   methods:
    - load_data()
    - run()
    - train()
    - test()
'''
class NeuralNetwork():
    def __init__(self, model = None, loss_fn = None, optimizer = None, debug = True):
        self.model = model
        self.loss_fn = loss_fn
        self.optimizer = optimizer
        self.debug = debug
        self.train_dataloader = None
        self.test_dataloader = None

    def train(self):
        correct = 0
        size = len(self.train_dataloader.dataset)
        for batch, (X, y) in enumerate(self.train_dataloader):
            pred = self.model(X)
            loss = self.loss_fn(pred, y)
            self.optimizer.zero_grad()
            loss.backward()
            self.optimizer.step()
            correct += (pred.argmax(1) == y).type(torch.float).sum().item()
        correct /= size
        if self.debug:
          print(f"Training Accuracy: {(100*correct):>0.1f}%")

        return correct

    def test(self):
        size = len(self.test_dataloader.dataset)
        correct = 0
        with torch.no_grad():
            for  batch, (X, y) in enumerate(self.test_dataloader):
                pred = self.model(X)
                correct += (pred.argmax(1) == y).type(torch.float).sum().item()
        correct /= size
        if self.debug:
          print(f"Test Accuracy: {(100*correct):>0.1f}%")

        return correct

    def load_data(self, batch_size = 64):
        training_data = datasets.MNIST(
            root='data',
            train=True,
            download=True,
            transform=ToTensor()
        )

        test_data = datasets.MNIST(
            root='data',        
            train=False,
            download=True,
            transform=ToTensor()
        )

        self.train_dataloader = torch.utils.data.DataLoader(training_data, batch_size = batch_size, shuffle = True)
        self.test_dataloader = DataLoader(test_data, batch_size = batch_size)

    def run(self, epochs):
        for t in range(epochs):
            print(f"\nEpoch {t+1}")
            print("-------")
            self.train()
            self.test()

In [None]:
''' 
  This is a helper function to calculate the output size to use in our
  architecture by calculating the following equation:

  (n + 2*p - f)/s + 1

  n x n image (28x28)
  f x f filter (kernel size)
  p padding
  s stride
'''
def calc_output(output_size, kernel_size, padding, stride):
  return ((output_size + 2*padding - kernel_size) / stride) + 1

def calc_structure(output_size, structure):
  for s in structure:
    output_size = calc_output(output_size, s[0], s[1], s[2])
    output_size = output_size / s[3] # maxpool
  return output_size

'''
   28x28 image
   [3, 1, 1, 2]   3 filters (kernel), padding 1, stride 1, followed by maxpool kernel size = 2
   [3, 1, 1, 2]   3 filters (kernel), padding 1, stride 1, followed by maxpool kernel size = 2
   Result: 7 (7x7 grid)
'''
print(calc_structure(28, [[3, 1, 1, 2],[3, 1, 1, 2]]))


7.0


In [None]:
'''
   Create two sequences: convolutional followed by linear
'''
sequence_convolutional = OrderedDict([
            # in channels = 1 (grayscale)
            ('conv1', nn.Conv2d(in_channels = 1, out_channels = 6, kernel_size = 3, padding = 1, stride = 1)),
            # (28 + 2*1 - 3)/1 + 1 = 28 output
            ('relu1', nn.ReLU()),
            ('maxpool1', nn.MaxPool2d(kernel_size = 2)),
            # 26 / 2 = 13
            ('conv2', nn.Conv2d(in_channels = 6, out_channels = 12, kernel_size = 3, padding = 1, stride = 1)),
            # (13 + 2*0 - 3)/1 + 1 = 11
            ('relu2', nn.ReLU()),
            ('maxpool2', nn.MaxPool2d(kernel_size=2)),
            # 14 / 2 = 7
])

'''
   The input size for the linear sequence is the output channel size from
   the convolutional sequence times n x n calculated size
'''
sequence_linear = OrderedDict([
            # output channel = 12
            # maxpool grid 7x7
            # channels x n x n
            # use calc_structure helper to calculate the n x n grid
            ('linear1', nn.Linear(12*7*7, 64)),
            ('relu3', nn.ReLU()),
            ('linear2', nn.Linear(64, 10))                               
])

device = "cuda" if torch.cuda.is_available() else "cpu"
network = NeuralNetwork()
network.model = Model(sequence_convolutional, sequence_linear).to(device)
network.load_data()
network.optimizer = torch.optim.SGD(network.model.parameters(), lr = 0.3)
network.loss_fn = nn.CrossEntropyLoss()
network.run(epochs = 3)


Epoch 1
-------
Training Accuracy: 90.2%
Test Accuracy: 97.2%

Epoch 2
-------
Training Accuracy: 97.5%
Test Accuracy: 96.1%

Epoch 3
-------
Training Accuracy: 98.2%
Test Accuracy: 98.4%
