# Hand written digit identification

In [200]:
#Imports
import numpy as np
import torch
import torch.nn as nn
import torchvision.transforms as transforms
import torchvision.datasets as dsets
from torch.autograd import Variable

## Download datasets

In [201]:
#Training data - The transform turns the image into a tensor
train_dataset = dsets.MNIST(root = './data', train = True, transform = transforms.ToTensor(), download = True)
#Test dataset
test_dataset = dsets.MNIST(root = './data', train = False, transform = transforms.ToTensor())

## Explore training data

In [202]:
#Check how big training the data is
len(train_dataset)

60000

In [203]:
#Check how big test the data is
len(test_dataset)

10000

In [204]:
#Look at one of the items in the training data and get its type
type(train_dataset[0])

tuple

In [205]:
#It's a tuple so we want to extract the 2 items
img_0, label_0 = train_dataset[0]

In [206]:
#Ge the size of the image
img_0.size()

torch.Size([1, 28, 28])

In [207]:
#View the image (actually its tensor representation)
img_0


(0 ,.,.) = 

Columns 0 to 8 
   0.0000  0.0000  0.0000  0.0000  0.0000  0.0000  0.0000  0.0000  0.0000
  0.0000  0.0000  0.0000  0.0000  0.0000  0.0000  0.0000  0.0000  0.0000
  0.0000  0.0000  0.0000  0.0000  0.0000  0.0000  0.0000  0.0000  0.0000
  0.0000  0.0000  0.0000  0.0000  0.0000  0.0000  0.0000  0.0000  0.0000
  0.0000  0.0000  0.0000  0.0000  0.0000  0.0000  0.0000  0.0000  0.0000
  0.0000  0.0000  0.0000  0.0000  0.0000  0.0000  0.0000  0.0000  0.0000
  0.0000  0.0000  0.0000  0.0000  0.0000  0.0000  0.0000  0.0000  0.1176
  0.0000  0.0000  0.0000  0.0000  0.0000  0.0000  0.0000  0.1922  0.9333
  0.0000  0.0000  0.0000  0.0000  0.0000  0.0000  0.0000  0.0706  0.8588
  0.0000  0.0000  0.0000  0.0000  0.0000  0.0000  0.0000  0.0000  0.3137
  0.0000  0.0000  0.0000  0.0000  0.0000  0.0000  0.0000  0.0000  0.0000
  0.0000  0.0000  0.0000  0.0000  0.0000  0.0000  0.0000  0.0000  0.0000
  0.0000  0.0000  0.0000  0.0000  0.0000  0.0000  0.0000  0.0000  0.0000
  0.0000  0.0000  0.

In [208]:
#View the label
label_0

5

We can see above that our training dataset consists of 60,000 tuples.
Each tuple contains a tensor representation of a handwritten image and a label telling us what number from 0-9 it represents.

The test data set has the same structure but is smaller at 10,000 tuples.

## Set some model parameters

In [209]:
#Batch size - the number of observations to use before updating the parameters
batch_size = 100
#Epochs - the number of complete passes through the data (needs to be an int)
num_epochs = 7
#Number of iterations - The number of batches needed in total
n_iters = num_epochs*(len(train_dataset)/batch_size)
n_iters = int(n_iters)
n_iters

4200

## Make datasets iterable

In [210]:
#The datasets need to be iterable so we use a torch data loader
train_loader = torch.utils.data.DataLoader(dataset = train_dataset, batch_size = batch_size, shuffle = True)
test_loader = torch.utils.data.DataLoader(dataset = test_dataset, batch_size = batch_size, shuffle = False)

## Create model class

We create a feed forward neural network with 2 non linear functions.
We don't add a final softmax layer as it's provided by our loss class.

The init section is where we build the model layer by layer.
The forward method is where we define how the data moves through the layers.

In [211]:
class FeedForwardNeuralNetModel(nn.Module):
    def __init__(self, input_size, hidden_size, output_dim):
        super(FeedForwardNeuralNetModel, self).__init__()
        #Linear function
        self.fcl = nn.Linear(input_dim, hidden_dim)
        #Non linearity
        self.relu1 = nn.ReLU()
        #Linear function
        self.fc2 = nn.Linear(hidden_dim, hidden_dim)
        #Non linearity 2
        self.relu2 = nn.ReLU()
        #Linear function (readout)
        self.fc3 = nn.Linear(hidden_dim, output_dim)
        
    def forward(self, x):
        #Linear function
        out = self.fcl(x)
        #Non linear function
        out = self.relu1(out)
        #Linear function
        out = self.fc2(out)
        #Non linear function
        out = self.relu2(out)
        #Linear function
        out = self.fc3(out)
        #Return final values
        return out
        

## Instantiate model class

In [212]:
#These parameters are required to instantiate your model
#Size of the input data
input_dim = 28*28
#Number of hidden neurons (can be adjusted)
hidden_dim = 350
#Number of output values (digits 0-9)
output_dim = 10


#Create model using the parameters above    
model = FeedForwardNeuralNetModel(input_dim, hidden_dim, output_dim)

#If cuda is available, move the model to the GPU for increased performance
if torch.cuda.is_available():
    model.cuda()
    

## Instantiate loss class

In [213]:
#This is a multi class problem so we're using cross entropy (also known as log loss)
criterion = nn.CrossEntropyLoss()

## Instantiate optimiser

In [214]:
#Define the learning rate (this can be adjusted)
learning_rate = 0.1

#Create a pytorch optimiser
optimizer = torch.optim.SGD(model.parameters(), lr = learning_rate)

## Model parameters in depth

In [215]:
#Have a look at the model parameters
print(model.parameters())

#Above not useful, need to pass through a list
#Linear layer and bias (essentialy the intercept)
print(list(model.parameters())[0].size())
print(list(model.parameters())[1].size())

#Second linear layer and bias
print(list(model.parameters())[2].size())
print(list(model.parameters())[3].size())

#Third linear layer and bias
print(list(model.parameters())[4].size())
print(list(model.parameters())[5].size())

<generator object Module.parameters at 0x7f7e9b447570>
torch.Size([350, 784])
torch.Size([350])
torch.Size([350, 350])
torch.Size([350])
torch.Size([10, 350])
torch.Size([10])


## Train and test model

In [216]:
iter = 0

#Cycle through all of the epochs
for epoch in range(num_epochs):
    
    #Iterate through the batches in the train loader
    for i, (images,labels) in enumerate(train_loader):
        
        #Load images and variables as labels. If the GPU is available, move them there
        if torch.cuda.is_available():
            images  = Variable(images.view(-1,28*28).cuda())
            labels = Variable(labels.cuda())
        else:    
            images  = Variable(images.view(-1,28*28))
            labels = Variable(labels)
        
        
        
        #Clear gradients w.r.t parameters
        optimizer.zero_grad()
        
        #Forward pass to get outputs/logits
        outputs = model(images)
        
        #Calculate loss: cross entropy
        loss = criterion(outputs, labels)
        
        #Get gradients w.r.t parameters
        loss.backward()
        
        #Update parameters
        optimizer.step()
        
        iter += 1
        
        if iter % 200 == 0:
            #Calculate accuracy
            correct = 0
            total = 0
            #Iterate through test dataset
            for images, labels in test_loader:
                #Load images and labels to torch variables
                if torch.cuda.is_available():                
                    images = Variable(images.view(-1,28*28).cuda())
                else:
                    images = Variable(images.view(-1,28*28))
                
                #Forward pass only
                outputs = model(images)
                
                #Get predictions using the maximum value
                _, predicted = torch.max(outputs.data, 1)
                
                #Total number of labels
                total += labels.size(0)
                
                #Total correct predictions - needs to be moved back to the CPU for the sum function to work
                if torch.cuda.is_available():
                    correct += (predicted.cpu() == labels.cpu()).sum()
                else:
                    correct += (predicted == labels).sum()
                
            accuracy = 100 * correct / total  
            
            #Print loss
            print('Epoch: {}. Iteration: {}. Loss: {}. Accuracy {}'.format(epoch + 1 ,iter, loss.data[0], accuracy))
        

Epoch: 1. Iteration: 200. Loss: 0.4716443717479706. Accuracy 88.83
Epoch: 1. Iteration: 400. Loss: 0.3200089633464813. Accuracy 91.06
Epoch: 1. Iteration: 600. Loss: 0.3572040796279907. Accuracy 92.41
Epoch: 2. Iteration: 800. Loss: 0.21023090183734894. Accuracy 93.33
Epoch: 2. Iteration: 1000. Loss: 0.1356976181268692. Accuracy 93.84
Epoch: 2. Iteration: 1200. Loss: 0.24825680255889893. Accuracy 94.44
Epoch: 3. Iteration: 1400. Loss: 0.12240671366453171. Accuracy 95.16
Epoch: 3. Iteration: 1600. Loss: 0.16904540359973907. Accuracy 95.63
Epoch: 3. Iteration: 1800. Loss: 0.19754821062088013. Accuracy 95.89
Epoch: 4. Iteration: 2000. Loss: 0.23999981582164764. Accuracy 96.17
Epoch: 4. Iteration: 2200. Loss: 0.07208829373121262. Accuracy 96.39
Epoch: 4. Iteration: 2400. Loss: 0.06589969247579575. Accuracy 96.61
Epoch: 5. Iteration: 2600. Loss: 0.060507114976644516. Accuracy 96.67
Epoch: 5. Iteration: 2800. Loss: 0.1321420520544052. Accuracy 96.9
Epoch: 5. Iteration: 3000. Loss: 0.07334148

The model above gets us an accuracy of 97.56% on the test data