In [1]:
import torch
from torch import tensor
from pathlib import Path
import pickle, gzip
import matplotlib as mpl, matplotlib.pyplot as plt
from torch import nn
import torch.nn.functional as F

In [2]:
torch.set_printoptions(precision=2, linewidth=140,sci_mode=False)
torch.manual_seed(42)
mpl.rcParams['image.cmap'] = 'gray'

In [3]:
data_path = Path('data/mnist.pkl.gz')

In [4]:
with gzip.open(data_path, "rb") as f :
    ((x_train,y_train),(x_valid, y_valid),_) = pickle.load(f,encoding="latin-1")

In [5]:
x_train,y_train,x_valid, y_valid = map(tensor,[x_train,y_train,x_valid, y_valid])

In [6]:
n,m = x_train.shape
c = y_train.max()+1
n,m,c

(50000, 784, tensor(10))

In [7]:
# num hidden
nh = 50

In [8]:
w1 = torch.rand(m,nh)
b1 = torch.zeros(nh)
w2 = torch.rand(nh, 1)
b2 = torch.zeros(1)

In [9]:
def lin(x, w, b): return x@w + b

In [10]:
t = lin(x_valid, w1, b1)
t.shape

torch.Size([10000, 50])

In [11]:
def relu(x): return torch.clamp(x,0.) #

In [12]:
def model(xb):
    l1 = lin(xb, w1, b1)
    l2 = relu(l1)
    return lin(l2, w2,b2)

In [13]:
res = model(x_valid)
res.shape

torch.Size([10000, 1])

In [14]:
def mse(out,targ): return (out[:,0]-targ).pow(2).mean()

In [15]:
preds = model(x_train)
y_train,y_valid = y_train.float(),y_valid.float()

In [16]:
mse(preds,y_train)

tensor(1843195.12)

In [17]:
class Linear(nn.Module):
    def __init__(self, n_in, n_out):
        super().__init__()
        self.w = torch.rand(n_in, n_out).requires_grad_()
        self.b = torch.rand(n_out).requires_grad_()

    def forward(self, x): return x@self.w + self.b

In [23]:
class Model(nn.Module):
    def __init__(self, n_in, nh, n_out):
        super().__init__()
        self.layers = [Linear(n_in, nh), nn.ReLU(), Linear(nh, n_out)]

    def __call__(self, x, targ):
        for l in self.layers: x = l(x)
        return F.mse_loss(x, targ[:,None])

In [24]:
model = Model(m, nh, 1)
loss = model(x_train, y_train)
loss.backward()

In [25]:
l0 = model.layers[0]
l0.b.grad

tensor([ 819.31, 1935.70, 1296.50, 1791.27, 1343.30,   43.01, 2397.94,  109.76,  452.63,  581.36,  267.11, 2411.60, 1506.75, 1428.59,
        1918.31,  259.10, 1286.14,  380.38, 1748.03,   34.61, 1761.98, 1926.91, 1739.22, 1702.04, 1340.79,  291.98, 2468.33, 1784.18,
         356.71,  967.13, 1290.01,  371.27, 2064.24, 1867.94, 1191.89, 2060.98, 1921.87, 1982.37,  813.83, 1334.99, 1724.84, 1238.74,
         573.66, 1243.55, 2012.00,  369.63,  235.69,   50.12,   25.87, 1395.90])

In [91]:
model = Model(m, nh, 1)
loss = model(x_train, y_train)
loss.backward()

In [92]:
l0 = model.layers[0]
l0.b.grad
     