In [1]:
#|default_exp 03.01_app

In [2]:
#|export
import pickle,gzip,math,os,time,shutil,torch,matplotlib as mpl, numpy as np
from pathlib import Path
from torch import tensor
from fastcore.test import test_close
torch.manual_seed(42)
import tsensor
# Get rid of findfont: Font family 'Arial' not found.
# when running a remote notebook on Jupyter Server on Ubuntu Linux server
import logging
logging.getLogger("matplotlib.font_manager").setLevel(logging.ERROR)

from lolviz import *


mpl.rcParams['image.cmap'] = 'gray'
torch.set_printoptions(precision=2, linewidth=125, sci_mode=False)
np.set_printoptions(precision=2, linewidth=125)

path_data = Path('data')
path_gz = path_data/'mnist.pkl.gz'
with gzip.open(path_gz, 'rb') as f: ((x_train, y_train), (x_valid, y_valid), _) = pickle.load(f, encoding='latin-1')
x_train, y_train, x_valid, y_valid = map(tensor, [x_train, y_train, x_valid, y_valid])

## Foundations Version

### Basic Architecture

- Assign the number of items in our training set to `n` - where `n` is the number of training examples
- Assign the number of pixels in each item to `m` - where `m` is the number of pixels
- Assign possible values of digits to `c` - where `c` is the possible values of digits

In [3]:
x_train, x_train.shape

(tensor([[0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         ...,
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.]]),
 torch.Size([50000, 784]))

In [4]:
x_train[1, :].shape

torch.Size([784])

In [5]:
x_train[1,:].reshape(28,28).shape

torch.Size([28, 28])

In [6]:
#|export
n,m = x_train.shape
c = y_train.max()+1

- Assign to `nh` an integer for our 50 hidden layers, aka our 50 activation functinons, in this case *relus*

In [7]:
#|export
nh = 50

- Create the weight matrices for multiplication. 
    - The first set of weights, `w1` will be for our training set
    - The weights will also need a bias, `b1` that will begin as zeros 
    - The second set of weights, `w2` will be for our output matrix, which will just  be one output, which is the guess of what number it is
    - The second bias `b2` be initialized as zeros and match `w2`

In [8]:
n,m,c,nh

(50000, 784, tensor(10), 50)

In [9]:
#|export
w1 = torch.randn(m,nh)
b1 = torch.zeros(nh)
w2 = torch.randn(nh,1)
b2 = torch.zeros(1)

In [10]:
w1.shape, b1.shape, w2.shape, b2.shape

(torch.Size([784, 50]), torch.Size([50]), torch.Size([50, 1]), torch.Size([1]))

- Create a function `lin` to perform matrix multiplication and add the bias 

In [11]:
#|export
def lin(x, w, b):
    return x@w + b

In [12]:
x_valid.shape

torch.Size([10000, 784])

In [13]:
t = lin(x_valid, w1, b1)
t.shape

torch.Size([10000, 50])

In [14]:
t

tensor([[ -0.09,  11.87, -11.39,  ...,   5.48,   2.14,  15.30],
        [  5.38,  10.21, -14.49,  ...,   0.88,   0.08,  20.23],
        [  3.31,   0.12,   3.10,  ...,  16.89,  -6.05,  24.74],
        ...,
        [  4.01,  10.35, -11.25,  ...,   0.23,  -5.30,  18.28],
        [ 10.62,  -4.27,  10.72,  ...,  -2.87,  -2.87,  18.23],
        [  2.84,  -0.22,   1.43,  ...,  -3.91,   5.75,   2.12]])

- Create a `relu` function to flatten values at 0

In [15]:
#|export
def relu(x):
    return x.clamp_min(0.)

In [16]:
relu(x_train@w1 + b1)

tensor([[12.53, 10.99,  0.00,  ...,  6.05,  0.00, 11.34],
        [28.33,  6.83,  4.17,  ...,  0.00,  0.00, 17.52],
        [ 5.67,  0.00,  0.55,  ...,  0.00,  4.13,  4.68],
        ...,
        [ 1.14,  2.96,  0.00,  ...,  1.26,  4.31, 19.45],
        [ 0.00,  4.90,  0.00,  ...,  2.80,  0.00, 12.29],
        [ 8.51, 14.52,  0.00,  ..., 14.50,  1.10, 18.45]])

In [17]:
t = relu(t)
t

tensor([[ 0.00, 11.87,  0.00,  ...,  5.48,  2.14, 15.30],
        [ 5.38, 10.21,  0.00,  ...,  0.88,  0.08, 20.23],
        [ 3.31,  0.12,  3.10,  ..., 16.89,  0.00, 24.74],
        ...,
        [ 4.01, 10.35,  0.00,  ...,  0.23,  0.00, 18.28],
        [10.62,  0.00, 10.72,  ...,  0.00,  0.00, 18.23],
        [ 2.84,  0.00,  1.43,  ...,  0.00,  5.75,  2.12]])

- Create a `model` function to take a mini-batch `xs`
    - Create the first linear layer
    - Put first linear layer through a relu
    - Pass the relu-ed first linear layer through the second linear layer and return the value

In [18]:
#|export
def model(xb):
    l1 = lin(xb, w1, b1)
    l2 = relu(l1)
    return lin(l2, w2, b2)

In [19]:
lin(relu(x_train@w1 + b1), w2, b2),lin(relu(x_train@w1 + b1), w2, b2).shape

(tensor([[-30.97],
         [-99.38],
         [  8.72],
         ...,
         [-52.12],
         [-46.25],
         [ -4.35]]),
 torch.Size([50000, 1]))

- Pass the validation set through the model to check if working

In [20]:
#|export
res = model(x_valid)
res

tensor([[  25.75],
        [ -13.06],
        [-114.79],
        ...,
        [ -67.44],
        [ -74.48],
        [ -60.19]])

### Loss Function MSE

- Subtract `y_valid` from `res` and think about how broadcasting rules apply here

In [21]:
res.shape, y_valid.shape

(torch.Size([10000, 1]), torch.Size([10000]))

- Subtract the validation from the model results 

In [22]:
(res-y_valid).shape

torch.Size([10000, 10000])

- adjust to add a unit axis to correct the output from broadcasting

In [23]:
(res-y_valid[:,None]).shape, (res-y_valid[:,None])

(torch.Size([10000, 1]),
 tensor([[  22.75],
         [ -21.06],
         [-120.79],
         ...,
         [ -72.44],
         [ -80.48],
         [ -68.19]]))

- remove a unit axis to correct the output from broadcasting

In [28]:
(res[:,0] - y_valid).shape

torch.Size([10000])

In [30]:
y_train, y_valid = y_train.float(), y_valid.float()

create predictions

In [32]:
preds = model(x_train)
preds.shape

torch.Size([50000, 1])

create a mean squared error function

In [33]:
def mse(output, target):
    return (output[:,0] - target).pow(2).mean()

In [34]:
mse(preds, y_train)

tensor(4308.76)

## Gradients and Backward Pass

In [37]:
from sympy import symbols, diff
x,y = symbols('x y')
diff(x**2, x)

2*x

In [38]:
diff(3*x**2 + 9, x)

6*x

In [None]:
import nbdev

In [None]:
nbdev.export.nb_export('03.01_backpropagation_rebuild.ipynb', '03.01_app')