In [1]:
import requests, time
from urllib.error import URLError, HTTPError
import fastai
from fastai.vision.all import *

# Classifying Digits

The goal here is to build a model that classifies digits, and to improve the model parameters using gradient descent.

## Setup

First, load the data into tensors for training with a PyTorch model (assisted by fastai)

In [2]:
##. Get the raw data, MNIST 3's and 7's
path = untar_data(URLs.MNIST_SAMPLE)

##. Get the path names for all of the images files
threes = (path/'train'/'3').ls().sorted()
sevens = (path/'train'/'7').ls().sorted()

##. Load all of the images in as tensors
seven_tensors = [tensor(Image.open(o)) for o in sevens] 
three_tensors = [tensor(Image.open(o)) for o in threes] 

##. Stack the tensors, and normalize entries to values between 0 and 1
stacked_sevens = torch.stack(seven_tensors).float()/255
stacked_threes = torch.stack(three_tensors).float()/255

##. Compute the mean of each digit for validation against baseline
mean3 = stacked_threes.mean(0)
mean7 = stacked_sevens.mean(0)

##. Select a couple arbitrary digits for illustration
a_3 = stacked_threes[21]
a_7 = stacked_sevens[33]

##. Prepare the validation data (normalize it like the training data)
valid_3_tens = torch.stack([tensor(Image.open(o)) for o in (path/'valid'/'3').ls()])
valid_3_tens = valid_3_tens.float()/255

valid_7_tens = torch.stack([tensor(Image.open(o)) for o in (path/'valid'/'7').ls()]) 
valid_7_tens = valid_7_tens.float()/255

## Preparing the training data for the model

In [3]:
##. Concatenate the training data into a single tensor
##. and reshape (using .view) it into a rank-2 tensor
train_x = torch.cat([stacked_threes, stacked_sevens]).view(-1,28*28)

##. Assign the target values 1: three, 0:seven
##. Then, reshape it as a column vector (using unsqueeze)
train_y = tensor([1]*len(threes)+[0]*len(sevens)).unsqueeze(1)

##. Prepare the data to be used with `Dataset` in PyTorch
dset = list(zip(train_x, train_y))
##. Initialize x and y values
x,y = dset[0]

## Praparing the validation data for the model

In [4]:
##. Concatenate the validation data into a single tensor
##. and reshape (using .view) it into a rank-2 tensor
valid_x = torch.cat([valid_3_tens, valid_7_tens]).view(-1,28*28)

##. Assign the target values 1: three, 0:seven
##. Then, reshape it as a column vector (using unsqueeze)
valid_y = tensor([1]*len(valid_3_tens)+[0]*len(valid_7_tens)).unsqueeze(1)

##. Prepare the data to be used with `Dataset` in PyTorch
valid_dset = list(zip(valid_x, valid_y))

## Initializing the model
The model needs an initial set of weights and a bias.

In [5]:
def init_params(size, std=1.0):
    ##. Initialize parameters with random values, and make sure the tensor
    ##. is prepped for computing the gradient at the point
    return (torch.randn(size)*std).requires_grad_()

##. Initialize the model weights and bias
weights = init_params((28*28,1))
bias = init_params(1)

##. Make a function for computing the predictions as y=Wx+b
def linear1(xb): 
    return xb@weights + bias

### Defining a loss function and an activation function

#### First attempt loss function
As a first attempt, we can define a loss function that just measures the distance from the predictions and the targets by looking at the predictions for each training example and scoring it as 1 (incorrect) or 0 (correct), then taking the average of those scores.

In [6]:
def mnist_loss(predictions, targets):
    easy_loss = torch.where(targets==1, 1-predictions, predictions).mean()
    return easy_loss

This is great, but its super simple and doesn't ensure that out predictions will lie between 0 and 1, which we definitely want since we are doing a binary classification.

#### A second attempt... adding an activation function

We can add the the classic sigmoid activation function to our loss function to ensure values are in the range 0 to 1.

In [7]:
def sigmoid(x): 
    return 1/(1+torch.exp(-x))

def mnist_loss(predictions, targets):
    predictions = predictions.sigmoid()
    easy_loss_with_sigmoid = torch.where(targets==1, 1-predictions, predictions).mean()
    return easy_loss_with_sigmoid

Building, training, updating...

In [8]:
##. Create a DataLoader from the training Dataset
dl = DataLoader(dset, batch_size=256)
##. Load a batch of training data
xb, yb = first(dl)

##. Create a DataLoader from the validation Dataset
valid_dl = DataLoader(valid_dset, batch_size=256)

##. Grab a batch for testing
batch = train_x[:10]

##. Make predictions on the test batch
preds = linear1(batch)
preds

tensor([[ 3.4007],
        [-5.7881],
        [ 9.2380],
        [12.7564],
        [ 2.6253],
        [12.2671],
        [-7.5896],
        [12.7323],
        [ 7.1000],
        [14.6207]], grad_fn=<AddBackward0>)

In [9]:
##. Define a function to compute the gradient
def calc_grad(xb, yb, model):
    ##. Compute the prediction using the current model
    preds = model(xb)
    ##. Compute the loss of the current predictions
    loss = mnist_loss(preds, yb) 
    ##. Backpropagate the gradient of the loss at the current inputs
    loss.backward()

##. Define a function that perfoms an epoch of training to update weights and bias
def train_epoch(model, lr, params): 
    ##.  For every batch of data in the DataLoader
    for xb,yb in dl:
        ##. Compute the gradient of loss the current model at that batch of inputs
        calc_grad(xb, yb, model) 
        for p in params:
            ##. Update the parameteres in the direction of the gradient
            ##. scalled by the learning rate
            p.data -= p.grad*lr
            ##. Reset the gradient to zero for the next batch
            p.grad.zero_()

##. Define a function that calculates the accuracy on a batch
def batch_accuracy(xb, yb):
    preds = xb.sigmoid()
    correct = (preds>0.5) == yb 
    return correct.float().mean()

def validate_epoch(model):
    accs = [batch_accuracy(model(xb), yb) for xb,yb in valid_dl] 
    return round(torch.stack(accs).mean().item(), 4)

In [10]:
batch_accuracy(linear1(batch), train_y[:10])

tensor(0.8000)

In [11]:
validate_epoch(linear1)

0.3755

In [20]:
##. Initialize the model parameters to random weights and bias
weights = init_params((28*28,1))
bias = init_params(1)
params = weights, bias

##. Define the learning rate
lr = 1

##. Iterate over several training eopchs
for i in range(1, 301):
    train_epoch(linear1, lr, params) 
    if i%30 ==0:
        print(f'Accuracy after {i} epochs: {validate_epoch(linear1)}')

Accuracy after 30 epochs: 0.9789
Accuracy after 60 epochs: 0.9828
Accuracy after 90 epochs: 0.9838
Accuracy after 120 epochs: 0.9838
Accuracy after 150 epochs: 0.9848
Accuracy after 180 epochs: 0.9857
Accuracy after 210 epochs: 0.9862
Accuracy after 240 epochs: 0.9858
Accuracy after 270 epochs: 0.9858
Accuracy after 300 epochs: 0.9858


Neat... but i don't need to do this from scratch every time.