In [43]:
#|default_exp 03.01_app

In [1]:
#|export
import pickle,gzip,math,os,time,shutil,torch,matplotlib as mpl, numpy as np
from pathlib import Path
from torch import tensor
from fastcore.test import test_close
torch.manual_seed(42)
import tsensor
# Get rid of findfont: Font family 'Arial' not found.
# when running a remote notebook on Jupyter Server on Ubuntu Linux server
import logging
logging.getLogger("matplotlib.font_manager").setLevel(logging.ERROR)

from lolviz import *


mpl.rcParams['image.cmap'] = 'gray'
torch.set_printoptions(precision=2, linewidth=125, sci_mode=False)
np.set_printoptions(precision=2, linewidth=125)

path_data = Path('data')
path_gz = path_data/'mnist.pkl.gz'
with gzip.open(path_gz, 'rb') as f: ((x_train, y_train), (x_valid, y_valid), _) = pickle.load(f, encoding='latin-1')
x_train, y_train, x_valid, y_valid = map(tensor, [x_train, y_train, x_valid, y_valid])

## Foundations Version

### Basic Architecture

- Assign the number of items in our training set to `n` - where `n` is the number of training examples
- Assign the number of pixels in each item to `m` - where `m` is the number of pixels
- Assign possible values of digits to `c` - where `c` is the possible values of digits

In [4]:
x_train, x_train.shape

(tensor([[0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         ...,
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.]]),
 torch.Size([50000, 784]))

In [12]:
x_train[1, :].shape

torch.Size([784])

In [14]:
x_train[1,:].reshape(28,28).shape

torch.Size([28, 28])

In [15]:
#|export
n,m = x_train.shape
c = y_train.max()+1

- Assign to `nh` an integer for our 50 hidden layers, aka our 50 activation functinons, in this case *relus*

In [21]:
#|export
nh = 50

- Create the weight matrices for multiplication. 
    - The first set of weights, `w1` will be for our training set
    - The weights will also need a bias, `b1` that will begin as zeros 
    - The second set of weights, `w2` will be for our output matrix, which will just  be one output, which is the guess of what number it is
    - The second bias `b2` be initialized as zeros and match `w2`

In [22]:
n,m,c,nh

(50000, 784, tensor(10), 50)

In [29]:
#|export
w1 = torch.randn(m,c)
b1 = torch.zeros(nh)
w2 = torch.randn(nh,1)
b2 = torch.zeros(1)

In [30]:
w1.shape, b1.shape, w2.shape, b2.shape

(torch.Size([784, 10]), torch.Size([50]), torch.Size([50, 1]), torch.Size([1]))

In [34]:
b2.shape

torch.Size([1])

- Create a function `lin` to perform matrix multiplication and add the bias 

In [None]:
b2 = torch.zeros(n,c)

- Create a `relu` function to flatten values at 0

- Create a `model` function to take a mini-batch `xs`
    - Create the first linear layer
    - Put first linear layer through a relu
    - Pass the relu-ed first linear layer through the second linear layer and return the value

- Pass the validation set through the model to check if working

### Loss Function MSE

- Subtract `y_valid` from `res` and think about how broadcasting rules apply here

In [44]:
import nbdev

In [45]:
nbdev.export.nb_export('03.01_backpropagation_rebuild.ipynb', '03.01_app')