# Multilayer Perceptron for MNIST digits
Trains a Multilayer Perceptron Neural network to detect hand-written digits.

## First - import required libraries

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
import sklearn
import numpy as np
import pandas as pd
from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt
%matplotlib inline
import time
torch.manual_seed(101)

<torch._C.Generator at 0x7f88f81358f0>

In [None]:
Transform = transforms.ToTensor()

## Load training and test data

In [None]:
train = datasets.MNIST(root='./DATA', train=True, download=True, transform=Transform)
train

In [None]:
test = datasets.MNIST(root='./DATA', train=False, download=True, transform=Transform)
test

## Display the first training image

In [None]:
image, label = train[0]
print('Shape:', image.shape, '\nLabel:', label)

In [None]:
plt.imshow(image.reshape((28,28)), cmap="gray")

## Define the Perceptron

In [None]:
train_loader = DataLoader(train, batch_size=100, shuffle=True)
test_loader = DataLoader(test, batch_size=500, shuffle=False)

In [None]:
class MultilayerPerceptron(nn.Module):
    def __init__(self, input_size=784, output_size=10, layers=[120,84]):
        super().__init__()
        self.d1 = nn.Linear(input_size,layers[0])
        self.d2 = nn.Linear(layers[0],layers[1])
        self.d3 = nn.Linear(layers[1],output_size)

    def forward(self,X):
        X = F.relu(self.d1(X))
        X = F.relu(self.d2(X))
        X = self.d3(X)
        return F.log_softmax(X, dim=1)

In [None]:
model = MultilayerPerceptron()
print(model)

In [None]:
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

In [None]:
for images, labels in train_loader:
    print('Initial Batch shape:', images.size())
    break
print('Batch shape after flattening',images.view(100,-1).size())

## Train the model with specified # of epochs (iterations)

In [None]:
epochs = 10
train_losses = []
test_losses = []
train_correct = []
test_correct = []

for i in range(epochs):
    trn_corr = 0
    tst_corr = 0

    # Run the training batches
    for b, (X_train, y_train) in enumerate(train_loader):
        b+=1

        # Apply the model
        y_pred = model(X_train.view(100, -1)) # Here we flatten X_train
        loss = criterion(y_pred, y_train)

        # Calculate the number of correct predicates
        predicted = torch.max(y_pred.data, 1)[1] # the prediction that has the maximum probability
        batch_corr = (predicted == y_train).sum()
        trn_corr += batch_corr

        # Update parameters
        optimizer.zero_grad() # reset the gradients after each training step
        loss.backward() # to trigger backprop
        optimizer.step() # perform parameter update

        # Print interim results
        if b%600 == 0:
            print(f'epoch: {i:2} batch: {b:4} [{100*b:6}/60000] Train loss: {loss.item():10.8f}')

        # Update train loss & accuracy for the epoch
        train_losses.append(loss)
        train_correct.append(trn_corr)

    # Run the testing batches
    with torch.no_grad():
        for b, (X_test, y_test) in enumerate(test_loader):

            # Apply the model
            y_val = model(X_test.view(500, -1))

            # Tally the number of correct predictions
            predicted = torch.max(y_val.data, 1)[1]
            tst_corr += (predicted == y_test).sum()

            # Update test loss & accuracy for the epoch
            loss = criterion(y_val, y_test)
            test_losses.append(loss)
            test_correct.append(tst_corr)



In [None]:
print(f'Test accuracy: {test_correct[-1].item()*100/10000:.3f}%')

In [None]:
plt.subplot(3, 1, 1)
plt.plot(train_losses, label='training loss')
plt.plot(test_losses, label='validation loss')
plt.title('Loss at the end of each epoch')

plt.subplot(3, 1, 3)
plt.plot([t/600 for t in train_correct], label='training accuracy')
plt.plot([t/100 for t in test_correct], label='validation accuracy')
plt.title('Accuracy at the end of each epoch')

plt.legend()

## *Optional* - use trained model on previously unseen data