<a href="https://colab.research.google.com/github/mzohaibnasir/NeuralNotes/blob/main/02_deepDiveIntoBasics.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Forward & Backward passes

In [None]:
from pathlib import Path
import pickle , gzip,math, os, time, shutil,matplotlib.pyplot as plt, matplotlib as mpl, numpy as np

import torch
from torch import tensor
from fastcore.test import test_close


In [None]:
MNIST_URL='https://github.com/mnielsen/neural-networks-and-deep-learning/blob/d15df08a69ed33ae16a2fff874f83b57a956172c/data/mnist.pkl.gz?raw=true'
path_data = Path('data')
path_data.mkdir(exist_ok=True )
path_gz=path_data/'mnist.pkl.gz'
path_gz

PosixPath('data/mnist.pkl.gz')

In [None]:
from urllib.request import urlretrieve

if not path_gz.exists(): urlretrieve(MNIST_URL, path_gz)

In [None]:
torch.manual_seed(42)


mpl.rcParams['image.cmap'] = 'gray'
torch.set_printoptions(precision=2, linewidth=125, sci_mode=False)
np.set_printoptions(precision=2, linewidth=125)


In [None]:
!ls -l data

total 16656
-rw-r--r-- 1 root root 17051982 Jan  8 13:21 mnist.pkl.gz


In [None]:
with gzip.open(path_gz, 'rb') as f:   #read as binary as opposed to text
   ((x_train,y_train), (x_valid,y_valid), _) = pickle.load(f, encoding='latin-1') #destructuring
x_train, y_train, x_valid, y_valid = map(tensor, [x_train, y_train, x_valid, y_valid])

In [None]:
np.array(x_train).shape

(50000, 784)

In [None]:
path_gz

PosixPath('data/mnist.pkl.gz')

In [None]:
n,m = x_train.shape  # n: # of training samples   m: pixels
c = y_train.max()+1  # posible ouputs
n, m, c

(50000, 784, tensor(10))

In [None]:
nh = 50  # hidden activations # # of REctified lines to add up

In [None]:
w1 = torch.randn(m, nh)
b1 = torch.zeros(nh) # bias
w2 = torch.randn(nh,1)  # 1  output
b2 = torch.zeros(1)

w1.shape, b1.shape,w2.shape, b2.shape

(torch.Size([784, 50]), torch.Size([50]), torch.Size([50, 1]), torch.Size([1]))

In [None]:
def lin(x, w, b):
  print(f" x:{x.shape} \n w:{w.shape} \n b:{b.shape}")
  return x@w + b

In [None]:
t = lin(x_valid, w1, b1)
t.shape

 x:torch.Size([10000, 784]) 
 w:torch.Size([784, 50]) 
 b:torch.Size([50])


torch.Size([10000, 50])

In [None]:
t

tensor([[ -0.09,  11.87, -11.39,  ...,   5.48,   2.14,  15.30],
        [  5.38,  10.21, -14.49,  ...,   0.88,   0.08,  20.23],
        [  3.31,   0.12,   3.10,  ...,  16.89,  -6.05,  24.74],
        ...,
        [  4.01,  10.35, -11.25,  ...,   0.23,  -5.30,  18.28],
        [ 10.62,  -4.27,  10.72,  ...,  -2.87,  -2.87,  18.23],
        [  2.84,  -0.22,   1.43,  ...,  -3.91,   5.75,   2.12]])

In [None]:
def relu(x):
  return x.clamp_min(0.)  # under 0 would be 0


t = relu(t)
t

tensor([[ 0.00, 11.87,  0.00,  ...,  5.48,  2.14, 15.30],
        [ 5.38, 10.21,  0.00,  ...,  0.88,  0.08, 20.23],
        [ 3.31,  0.12,  3.10,  ..., 16.89,  0.00, 24.74],
        ...,
        [ 4.01, 10.35,  0.00,  ...,  0.23,  0.00, 18.28],
        [10.62,  0.00, 10.72,  ...,  0.00,  0.00, 18.23],
        [ 2.84,  0.00,  1.43,  ...,  0.00,  5.75,  2.12]])

In [None]:
def model(xb):
  l1 = lin(xb, w1, b1)
  l2 = relu(l1)
  return lin(l2,w2,b2)

model(x_valid).shape

 x:torch.Size([10000, 784]) 
 w:torch.Size([784, 50]) 
 b:torch.Size([50])
 x:torch.Size([10000, 50]) 
 w:torch.Size([50, 1]) 
 b:torch.Size([1])


torch.Size([10000, 1])

In [None]:
res  = model(x_valid)
res.shape

 x:torch.Size([10000, 784]) 
 w:torch.Size([784, 50]) 
 b:torch.Size([50])
 x:torch.Size([10000, 50]) 
 w:torch.Size([50, 1]) 
 b:torch.Size([1])


torch.Size([10000, 1])

## Loss: MSE
ofcourse MSE is not suitable

In [None]:
res.shape, y_valid.shape  # y_valid is to compare with   # 1 in (10000,1) would be first broadcasted to corresponding y_valid first
                            # and when it comes 10000 in (10000,1) y_valis's shape becomes (1, 10000); repeating everything

(torch.Size([10000, 1]), torch.Size([10000]))

In [None]:
(res - y_valid).shape  # each element in res's distance to each element in y_valid so a matrix

torch.Size([10000, 10000])

In [None]:
# so make Y-valid shapes (10000, 1)

(res - y_valid[:,None]).shape, (res - y_valid[:,None])

(torch.Size([10000, 1]),
 tensor([[  22.75],
         [ -21.06],
         [-120.79],
         ...,
         [ -72.44],
         [ -80.48],
         [ -68.19]]))

In [None]:
(res.squeeze() - y_valid).shape, (res.squeeze() - y_valid)

(torch.Size([10000]),
 tensor([  22.75,  -21.06, -120.79,  ...,  -72.44,  -80.48,  -68.19]))

In [None]:
#or
#res[:, 0].shape : extracts first columns
#res[0,:].shape : extracts first row


res.shape, res[:, 0].shape, res[0,:].shape

(torch.Size([10000, 1]), torch.Size([10000]), torch.Size([1]))

In [None]:
res.shape, res[None, : , None].shape, (res[None, : , None]).squeeze().shape

(torch.Size([10000, 1]), torch.Size([1, 10000, 1, 1]), torch.Size([10000]))

In [None]:
# so,

(res[:,0] - y_valid).shape

torch.Size([10000])

In [None]:
y_train, y_valid = y_train.float(), y_valid.float()

preds = model(x_train)
preds.shape

 x:torch.Size([50000, 784]) 
 w:torch.Size([784, 50]) 
 b:torch.Size([50])
 x:torch.Size([50000, 50]) 
 w:torch.Size([50, 1]) 
 b:torch.Size([1])


torch.Size([50000, 1])

In [None]:
def mse( output, targ):
  print(f"output: {output.shape}, targ: {targ.shape}")
  return (output[:, 0] - targ).pow(2).mean()

mse(preds, y_train)

output: torch.Size([50000, 1]), targ: torch.Size([50000])


tensor(4308.76)

## Gradients & backward pass

In [None]:
from sympy import symbols, diff

In [None]:
x,y = symbols('x y')
x,y

(x, y)

In [None]:
diff(x**2, x) # taking differential

2*x

In [None]:
diff(3*x**2 +9, x)

6*x