In [13]:
%load_ext autoreload
%autoreload 2

%matplotlib inline

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [21]:
#export
from exp.nb_util import *
import torch.nn.functional as F
import torch.nn as nn

In [22]:
mpl.rcParams['image.cmap'] = 'gray'

In [23]:
x_train,y_train,x_valid,y_valid = get_data()

In [24]:
n,m = x_train.shape
c = y_train.max()+1
nh = 100
n,m

(50000, 784)

In [25]:
??nn.Module

In [26]:
class Model(nn.Module):
    def __init__(self, n_in, n_h, n_out):
        super().__init__()
        self.layers = [nn.Linear(n_in, n_h), nn.ReLU(), nn.Linear(nh, n_out)]
        
    def __call__(self, x):
        for l in self.layers: x=l(x)
        return x;

In [27]:
model = Model(784, 100, 10)

In [28]:
loss_func = F.cross_entropy

In [29]:
def accuracy(out, yb): return (torch.argmax(out, dim=1)==yb).float().mean()

In [30]:
bs=64                  # batch size

xb = x_train[0:bs]     # a mini-batch from x
preds = model(xb)      # predictions
preds[0], preds.shape

(tensor([ 0.1063,  0.0203, -0.0922,  0.0924,  0.0612,  0.1220, -0.1918,  0.0160,
         -0.0114, -0.0395], grad_fn=<SelectBackward>), torch.Size([64, 10]))

In [31]:
lr = 0.5   # learning rate
epochs = 10

In [32]:
def fit():
    for epoch in range(epochs):
        for batch in range(n//bs+1):
            from_ = epoch*batch;
            to = from_ + bs;
            
            xb = x_train[from_:to]
            yb = y_train[from_:to]
            
            loss = loss_func(model(xb), yb)
            
            loss.backward()
            with torch.no_grad():
                for l in model.layers:
                    if hasattr(l, 'weight'):
                        l.weight -= l.weight.grad * lr
                        l.bias -= l.bias.grad * lr
                        l.weight.grad.zero_()
                        l.bias.grad.zero_()
        print(accuracy(model(x_train), y_train))

In [33]:
fit()

tensor(0.6769)
tensor(0.8410)
tensor(0.8690)
tensor(0.8936)
tensor(0.9069)
tensor(0.9165)
tensor(0.9175)
tensor(0.9248)
tensor(0.9180)
tensor(0.9089)


In [34]:
class Model(nn.Module):
    def __init__(self, n_in, n_h, n_out):
        super().__init__()
        self.l1 = nn.Linear(n_in, n_h);
        self.l2 = nn.Linear(nh, n_out);
        
    def __call__(self, x):
        return self.l2(F.relu(self.l1(x)));

In [35]:
model = Model(784, 100, 10)

In [36]:
model

Model(
  (l1): Linear(in_features=784, out_features=100, bias=True)
  (l2): Linear(in_features=100, out_features=10, bias=True)
)

In [37]:
def fit():
    for epoch in range(epochs):
        for batch in range(n//bs+1):
            from_ = epoch*batch;
            to = from_ + bs;
            
            xb = x_train[from_:to]
            yb = y_train[from_:to]
            
            loss = loss_func(model(xb), yb)
            
            loss.backward()
            with torch.no_grad():
                for p in model.parameters(): p -= p.grad * lr
                model.zero_grad()
        print(accuracy(model(x_train), y_train))

In [38]:
fit()

tensor(0.6676)
tensor(0.8411)
tensor(0.8747)
tensor(0.8914)
tensor(0.9076)
tensor(0.9156)
tensor(0.9118)
tensor(0.9296)
tensor(0.9062)
tensor(0.9204)


In [39]:
class Model(nn.Module):
    def __init__(self, layers):
        super().__init__()
        self.layers = layers;
        for i,l in enumerate(self.layers): self.add_module(f'layer_{i}', l)
        
    def __call__(self, x):
        for layer in self.layers : x = layer(x)
        return x

In [40]:
layers = [nn.Linear(784, nh), nn.ReLU(), nn.Linear(nh, 10)]

In [41]:
model = Model(layers)

In [42]:
model

Model(
  (layer_0): Linear(in_features=784, out_features=100, bias=True)
  (layer_1): ReLU()
  (layer_2): Linear(in_features=100, out_features=10, bias=True)
)

In [43]:
fit()

tensor(0.6754)
tensor(0.8499)
tensor(0.8727)
tensor(0.8910)
tensor(0.9105)
tensor(0.9174)
tensor(0.9231)
tensor(0.9298)
tensor(0.9094)
tensor(0.9173)


In [44]:
class Model(nn.Module):
    def __init__(self, layers):
        super().__init__()
        self.layers = nn.ModuleList(layers);
        
    def __call__(self, x):
        for layer in self.layers : x = layer(x)
        return x

In [45]:
model = Model(layers)

In [46]:
model

Model(
  (layers): ModuleList(
    (0): Linear(in_features=784, out_features=100, bias=True)
    (1): ReLU()
    (2): Linear(in_features=100, out_features=10, bias=True)
  )
)

In [47]:
model = nn.Sequential(nn.Linear(784, nh), nn.ReLU(), nn.Linear(nh, 10))

In [48]:
model

Sequential(
  (0): Linear(in_features=784, out_features=100, bias=True)
  (1): ReLU()
  (2): Linear(in_features=100, out_features=10, bias=True)
)

In [49]:
fit()

tensor(0.6783)
tensor(0.8485)
tensor(0.8749)
tensor(0.8928)
tensor(0.9092)
tensor(0.9127)
tensor(0.9199)
tensor(0.9261)
tensor(0.9115)
tensor(0.9229)


In [50]:
??nn.Sequential

In [51]:
#export
class Optimizer():
    def __init__(self, params, lr=0.5):
        self.params, self.lr = list(params), lr
        
    def step(self):
        with torch.no_grad():
            for p in self.params: p -= p.grad * lr
    
    def zero_grad(self):
        for p in self.params: p.grad.data.zero_()

In [52]:
model = nn.Sequential(nn.Linear(784, nh), nn.ReLU(), nn.Linear(nh, 10))

In [53]:
opt = Optimizer(model.parameters())

In [54]:
def fit():
    for epoch in range(epochs):
        for batch in range(n//bs+1):
            from_ = epoch*batch;
            to = from_ + bs;
            
            xb = x_train[from_:to]
            yb = y_train[from_:to]
            
            loss = loss_func(model(xb), yb)
            
            loss.backward()
            opt.step()
            opt.zero_grad()
        print(accuracy(model(x_train), y_train))

In [55]:
fit()

tensor(0.6743)
tensor(0.8449)
tensor(0.8719)
tensor(0.8969)
tensor(0.9070)
tensor(0.9120)
tensor(0.9163)
tensor(0.9266)
tensor(0.9132)
tensor(0.9212)


In [56]:
from torch import optim

In [57]:
opt = optim.SGD(model.parameters(), lr=0.5)

In [58]:
fit()

tensor(0.9321)
tensor(0.9380)
tensor(0.9386)
tensor(0.9388)
tensor(0.9384)
tensor(0.9387)
tensor(0.9393)
tensor(0.9405)
tensor(0.9309)
tensor(0.9253)


In [59]:
#export
class DataSet():
    def __init__(self, x, y): self.x, self.y = x, y
    def __len__(self): return len(self.x)
    def __getitem__(self, i): return self.x[i], self.y[i]

In [60]:
train_ds = DataSet(x_train, y_train)

In [61]:
len(train_ds)

50000

In [62]:
def fit():
    for epoch in range(epochs):
        for batch in range(n//bs+1):
            xb, yb = train_ds[epoch*batch: epoch*batch + bs]            
            loss = loss_func(model(xb), yb)
            
            loss.backward()
            opt.step()
            opt.zero_grad()
        print(accuracy(model(x_train), y_train))

In [63]:
fit()

tensor(0.9353)
tensor(0.9424)
tensor(0.9438)
tensor(0.9445)
tensor(0.9449)
tensor(0.9444)
tensor(0.9455)
tensor(0.9455)
tensor(0.9371)
tensor(0.9414)


In [64]:
#export
class DataLoader():
    def __init__(self, ds, bs): self.ds,self.bs = ds,bs
    def __iter__(self):
        for i in range(0, len(self.ds), self.bs): yield self.ds[i:i+self.bs]

In [65]:
train_dl = DataLoader(train_ds, bs)

In [66]:
next(iter(train_dl))

(tensor([[0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         ...,
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.]]),
 tensor([5, 0, 4, 1, 9, 2, 1, 3, 1, 4, 3, 5, 3, 6, 1, 7, 2, 8, 6, 9, 4, 0, 9, 1,
         1, 2, 4, 3, 2, 7, 3, 8, 6, 9, 0, 5, 6, 0, 7, 6, 1, 8, 7, 9, 3, 9, 8, 5,
         9, 3, 3, 0, 7, 4, 9, 8, 0, 9, 4, 1, 4, 4, 6, 0]))

In [67]:
def fit():
    for epoch in range(epochs):
        for xb, yb in train_dl:
            loss = loss_func(model(xb), yb)
            
            loss.backward()
            opt.step()
            opt.zero_grad()
        print(accuracy(model(x_train), y_train))

In [68]:
fit()

tensor(0.9306)
tensor(0.9697)
tensor(0.9798)
tensor(0.9858)
tensor(0.9878)
tensor(0.9897)
tensor(0.9911)
tensor(0.9924)
tensor(0.9935)
tensor(0.9940)


In [69]:
from torch.utils.data import DataLoader, SequentialSampler, RandomSampler

In [70]:
DataLoader??

## Export

In [73]:
!python notebook2script.py rework_pytorch_model.ipynb

Converted rework_pytorch_model.ipynb to exp/nb_rework.py
