In [None]:
# Based off of https://medium.com/@athul929/hand-written-digit-classifier-in-pytorch-42a53e92b63e
# Which appears may have been itself based on
# https://github.com/yunjey/pytorch-tutorial/blob/master/tutorials/01-basics/feedforward_neural_network/main.py

In [None]:
import torch
import torch.nn as nn
import torchvision
import matplotlib.pyplot as plt
import numpy as np

In [None]:
# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [None]:
# hyper parameters
num_epochs = 5
batch_size = 100

# Selected because a 28x28 pixel image is 784 pixels
input_size = 784
# Arbitrary number of intermediate nodes
hidden_size = 500
# Selected so the output corresponds to a digit 0-9.
output_size = 10

learning_rate = 0.001

In [None]:
# torchvision supplies a number of standard datasets

# MNIST is handwritten numeric digits image data set.
# 60000 training images and 10000 test images. 
# http://yann.lecun.com/exdb/mnist/

train_dataset = torchvision.datasets.MNIST(root='./data', 
                                           train=True, 
                                           transform=torchvision.transforms.ToTensor(), 
                                           download=True)
test_dataset = torchvision.datasets.MNIST(root='./data', 
                                          train=False, 
                                          transform=torchvision.transforms.ToTensor())

# Data Loaders sample from dataset
train_loader = torch.utils.data.DataLoader(dataset=train_dataset, 
                                           batch_size=batch_size, 
                                           shuffle=True)
test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
                                          batch_size=batch_size,
                                          shuffle=False)

In [None]:
# Define Model.
# Note how structure relationship between the layers
# is not defined statically ahead of time but dynamically
# in the forward step.
class NeuralNet(nn.Module):
    """A Neural Network with a hidden layer"""
    def __init__(self, input_size, hidden_size, output_size):
        super(NeuralNet, self).__init__()
        self.layer1 = nn.Linear(input_size, hidden_size)
        self.layer2 = nn.Linear(hidden_size, output_size)
        self.relu = nn.ReLU()

    def forward(self, x):
        output = self.layer1(x)
        output = self.relu(output)
        output = self.layer2(output)
        return output

In [None]:
# Instantiate module with appropriate hyper parameters
model = NeuralNet(input_size, hidden_size, output_size)

# Loss Function connected to model during training.
# https://www.cse.unsw.edu.au/~billw/cs9444/crossentropy.html
lossFunction = nn.CrossEntropyLoss()

# Connect Adam gradient descent optimizer to model
# https://machinelearningmastery.com/adam-optimization-algorithm-for-deep-learning/
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

In [None]:
def show(img):
    # Reshape image to 2-dimensional square in case we pass 1-dimensional data.
    img = img.reshape((28,28))
    plt.imshow(img)
    plt.show()

def show_grid(images):
    # 10 images per row rather than default of 8 better for 100 images block
    grid = torchvision.utils.make_grid(images, 10)
    plt.imshow(grid[0])
    plt.show()

In [None]:
# Do training
total_step = len(train_loader)
for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(train_loader):
        # Show sample of what labels looks like
        if i == 0 and epoch == 0:
            print(labels)
        
        # Show data sets being trained over
        if i == 0:
            show_grid(images)
        
        # Image data needs to be flattened to feed to the NN.
        images = images.reshape(-1, 28*28)
        out = model(images)
        loss = lossFunction(out, labels)

        # Zero the optimizer before training.
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if (i+1) % 100 == 0:
            print('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}' .format(epoch+1, num_epochs, i+1, total_step, loss.item()))

In [None]:
# Test trained network

# Locally disable gradient computation
with torch.no_grad():
    correct = 0
    total = 0
    for images,labels in test_loader:
        orig = images.clone()  # save image data for visualization convenience

        # Run the model on flattened image
        images = images.reshape(-1,28*28)
        out = model(images)
        
        # Convert each output vector to prediction by getting vector index of max value
        _,predicted = torch.max(out.data,1)
        
        # Update accuracy stats
        total += labels.size(0)
        correct += (predicted==labels).sum().item()
        
        # Visualize errors
        if (predicted!=labels).sum().item() > 0:
            errors = predicted != labels
            show_grid(orig[errors])
            print("Predicted: ", predicted[errors])
            print("Actual: ", labels[errors])
        
                
    print('Accuracy of the network on the 10000 test images: {} %'.format(100 * correct / total))