In [5]:
%load_ext autoreload
%autoreload 2

%matplotlib inline

### Matrix Multiplication - 01_matmul.ipynb

In [2]:
import torch

In [3]:
x = torch.rand((5, 784))
w = torch.rand(784, 10)

In [4]:
x.shape, w.shape

(torch.Size([5, 784]), torch.Size([784, 10]))

In [5]:
%timeit torch.matmul(x, w)

4.11 µs ± 127 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)


In [6]:
%timeit x@w

4.06 µs ± 103 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)


In [7]:
c = torch.zeros((5, 10))

In [8]:
def matmul(x, w):
    for i in range(x.shape[0]):
        for j in range(w.shape[1]):
            for k in range(w.shape[0]):
                c[i,j] += x[i, k]*w[k, j]
    return c

In [14]:
%timeit matmul(x, w)

516 ms ± 4.01 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [12]:
509000/4

127250.0

In [15]:
def matmul_elop(x, w):
    for i in range(x.shape[0]):
        for j in range(w.shape[1]):
            c[i,j] = (x[i, :]*w[:, j]).sum()
    return c

In [16]:
%timeit matmul_elop(x, w)

752 µs ± 5.44 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


In [13]:
509000/740

687.8378378378378

In [38]:
def matmul_elop_bc(x, w):
    for i in range(x.shape[0]):
        c[i,:] = (x[i, :].unsqueeze(1)*w).sum(dim=0)
    return c

In [39]:
%timeit matmul_elop_bc(x, w)

166 µs ± 611 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)


In [41]:
matmul_elop_bc(x, w).shape

torch.Size([5, 10])

In [42]:
752/166

4.530120481927711

In [44]:
%timeit torch.einsum('ij,jk->ik', x, w)

23.5 µs ± 603 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)


In [45]:
166/24

6.916666666666667

In [46]:
%timeit x.matmul(w)

3.94 µs ± 83.9 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)


### 02_fully_connected.ipynb

In [1]:
import fastai

In [2]:
fastai.__version__

'1.0.51'

In [3]:
from pathlib import Path
from IPython.core.debugger import set_trace
from fastai import datasets
import pickle, gzip, math, torch, matplotlib as mpl
import matplotlib.pyplot as plt
from torch import tensor

MNIST_URL='http://deeplearning.net/data/mnist/mnist.pkl'

In [6]:
#export
# from exp.nb_01 import *

def get_data():
    path = datasets.download_data(MNIST_URL, ext='.gz')
    with gzip.open(path, 'rb') as f:
        ((x_train, y_train), (x_valid, y_valid), _) = pickle.load(f, encoding='latin-1')
    return map(tensor, (x_train,y_train,x_valid,y_valid))

def normalize(x, m, s): return (x-m)/s

In [7]:
x_train,y_train,x_valid,y_valid = get_data()

In [9]:
x_train.shape, y_train.shape

(torch.Size([50000, 784]), torch.Size([50000]))

In [12]:
train_mean, train_std = x_train.mean(), x_train.std()

In [13]:
def normalize(x, mean, std):
    return (x-mean)/std

In [16]:
x_train = normalize(x_train, train_mean, train_std)
x_valid = normalize(x_valid, train_mean, train_std)

In [33]:
x_train.mean(), x_train.std()
# x_valid.mean(), x_valid.std()

(tensor(-6.2598e-06), tensor(1.))

In [18]:
y_train.max()

tensor(9)

#### Forward

In [21]:
ni= x_train.shape[1]

In [44]:
nh = 50
w1 = torch.randn((ni, nh))/math.sqrt(ni/2)
b1 = torch.zeros((nh))
w2 = torch.randn((nh, 1))/math.sqrt(nh/2)
b2 = torch.zeros(1)

In [40]:
w1.mean(), w1.std()

(tensor(0.0003), tensor(0.0505))

In [27]:
w1.shape

torch.Size([784, 50])

In [28]:
x_train.shape

torch.Size([50000, 784])

In [47]:
def lin(x, w, b):
    return x@w+b

def relu(x):
    return torch.clamp(x, 0.) - 0.5

In [48]:
t = relu(lin(x_train, w1, b1))
t.mean(), t.std()

In [50]:
from torch.nn import init

In [71]:
init.kaiming_normal_(w1, mode='fan_out')
# init.kaiming_uniform_(w1, mode='fan_out')

tensor([[ 0.0204, -0.1071,  0.0575,  ...,  0.0156, -0.0138,  0.0485],
        [-0.0912,  0.0439,  0.0484,  ..., -0.0351,  0.0066,  0.0752],
        [-0.0552,  0.0526,  0.0356,  ...,  0.0148,  0.0584,  0.0037],
        ...,
        [-0.0647, -0.0029,  0.0191,  ...,  0.0115, -0.0389, -0.0086],
        [-0.0685,  0.0834, -0.0691,  ..., -0.0095,  0.0548,  0.1023],
        [ 0.0209,  0.0162, -0.0214,  ..., -0.0051,  0.0751,  0.0189]])

In [73]:
w1.shape

torch.Size([784, 50])

In [72]:
t = relu(lin(x_train, w1, b1))
t.mean(), t.std()

(tensor(0.0301), tensor(0.7743))

In [76]:
torch.nn.Linear(ni, nh).weight.shape

torch.Size([50, 784])

In [90]:
torch.nn.Linear??
# torch.nn.functional.linear??
# init.uniform_??

In [91]:
torch.nn.modules.conv._ConvNd.reset_parameters??

In [97]:
def model(x):
    l1 = relu(lin(x, w1, b1))
    print(l1.mean(), l1.std())
    l2 = relu(lin(l1, w2, b2))
    print(l2.mean(), l2.std())
    return l2

In [98]:
%timeit -n 10 _=model(x_valid)

tensor(0.0253) tensor(0.7681)
tensor(0.3651) tensor(0.7439)
tensor(0.0253) tensor(0.7681)
tensor(0.3651) tensor(0.7439)
tensor(0.0253) tensor(0.7681)
tensor(0.3651) tensor(0.7439)
tensor(0.0253) tensor(0.7681)
tensor(0.3651) tensor(0.7439)
tensor(0.0253) tensor(0.7681)
tensor(0.3651) tensor(0.7439)
tensor(0.0253) tensor(0.7681)
tensor(0.3651) tensor(0.7439)
tensor(0.0253) tensor(0.7681)
tensor(0.3651) tensor(0.7439)
tensor(0.0253) tensor(0.7681)
tensor(0.3651) tensor(0.7439)
tensor(0.0253) tensor(0.7681)
tensor(0.3651) tensor(0.7439)
tensor(0.0253) tensor(0.7681)
tensor(0.3651) tensor(0.7439)
tensor(0.0253) tensor(0.7681)
tensor(0.3651) tensor(0.7439)
tensor(0.0253) tensor(0.7681)
tensor(0.3651) tensor(0.7439)
tensor(0.0253) tensor(0.7681)
tensor(0.3651) tensor(0.7439)
tensor(0.0253) tensor(0.7681)
tensor(0.3651) tensor(0.7439)
tensor(0.0253) tensor(0.7681)
tensor(0.3651) tensor(0.7439)
tensor(0.0253) tensor(0.7681)
tensor(0.3651) tensor(0.7439)
tensor(0.0253) tensor(0.7681)
tensor(0.3

In [104]:
def mse(out, tar): return (out.squeeze(-1)-tar.float()).pow(2).mean()

In [105]:
mse(model(x_valid), y_valid)

tensor(0.0253) tensor(0.7681)
tensor(0.3651) tensor(0.7439)


tensor(26.0104)

#### Backward pass

In [153]:
l1 = x_train@w1 + b1
l2 = relu(l1)
l3 = l2@w2 + b2
loss = mse(l3, y_train.float())

In [160]:
l1.shape, l2.shape, l3.shape, loss, y_train.shape, l3.squeeze().shape

(torch.Size([50000, 50]),
 torch.Size([50000, 50]),
 torch.Size([50000, 1]),
 tensor(22.8813),
 torch.Size([50000]),
 torch.Size([50000]))

In [170]:
l3.g = 2*(l3.squeeze()-y_train.float()).unsqueeze(-1)

In [171]:
l2.g = l3.g@w2.t()
w2.g = l3.g * l2

In [176]:
w2.shape

torch.Size([50, 1])

In [150]:
def loss_grad(inp, out):
    inp.g = 2.*(inp.squeeze()-out).unsqueeze(-1)

def linear_grad(inp, w, b, out):
    inp.g = out.g@w2.t()
    w.g = out.g*inp
    b.g = out.g

def relu_grad(inp, out):
    inp.g = out.g * (inp>0).float()

In [151]:
def fwd_back_pass(x, y):
    l1 = x@w1 + b1
    l2 = relu(l1)
    l3 = l2@w2 + b2
    loss = mse(l3, y)
    print(loss)
    
    loss_grad(l3, y)
    linear_grad(l2, w2, b2, l3)
    relu_grad(l1, l2)
    linear_grad(x, w1, b1, l1)

In [152]:
fwd_back_pass(x_train, y_train.float())

tensor(22.8813)


RuntimeError: size mismatch, m1: [50000 x 50], m2: [1 x 50] at /opt/conda/conda-bld/pytorch_1549630534704/work/aten/src/TH/generic/THTensorMath.cpp:940