In [74]:
%load_ext autoreload
%autoreload 2

%matplotlib inline

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [75]:
#export
from exp.nb_util import *
import torch.nn.functional as F
import torch.nn as nn

In [76]:
mpl.rcParams['image.cmap'] = 'gray'

In [77]:
x_train,y_train,x_valid,y_valid = get_data()

In [78]:
n,m = x_train.shape
c = y_train.max()+1
nh = 100
n,m

(50000, 784)

In [79]:
??nn.Module

In [80]:
class Model(nn.Module):
    def __init__(self, n_in, n_h, n_out):
        super().__init__()
        self.layers = [nn.Linear(n_in, n_h), nn.ReLU(), nn.Linear(nh, n_out)]
        
    def __call__(self, x):
        for l in self.layers: x=l(x)
        return x;

In [81]:
model = Model(784, 100, 10)

In [82]:
loss_func = F.cross_entropy

In [312]:
#export
def accuracy(out, yb): return (torch.argmax(out, dim=1)==yb).float().mean()

In [84]:
bs=64                  # batch size

xb = x_train[0:bs]     # a mini-batch from x
preds = model(xb)      # predictions
preds[0], preds.shape

(tensor([ 0.0883, -0.0970,  0.0027, -0.1198,  0.0389,  0.0816, -0.0881,  0.2678,
         -0.1058,  0.1064], grad_fn=<SelectBackward>), torch.Size([64, 10]))

In [85]:
lr = 0.5   # learning rate
epochs = 10

In [86]:
def fit():
    for epoch in range(epochs):
        for batch in range(n//bs+1):
            from_ = epoch*batch;
            to = from_ + bs;
            
            xb = x_train[from_:to]
            yb = y_train[from_:to]
            
            loss = loss_func(model(xb), yb)
            
            loss.backward()
            with torch.no_grad():
                for l in model.layers:
                    if hasattr(l, 'weight'):
                        l.weight -= l.weight.grad * lr
                        l.bias -= l.bias.grad * lr
                        l.weight.grad.zero_()
                        l.bias.grad.zero_()
        print(accuracy(model(x_train), y_train))

In [87]:
fit()

tensor(0.6728)
tensor(0.8376)
tensor(0.8752)
tensor(0.8888)
tensor(0.9100)
tensor(0.9168)
tensor(0.9198)
tensor(0.9315)
tensor(0.9130)
tensor(0.9184)


In [88]:
class Model(nn.Module):
    def __init__(self, n_in, n_h, n_out):
        super().__init__()
        self.l1 = nn.Linear(n_in, n_h);
        self.l2 = nn.Linear(nh, n_out);
        
    def __call__(self, x):
        return self.l2(F.relu(self.l1(x)));

In [89]:
model = Model(784, 100, 10)

In [90]:
model

Model(
  (l1): Linear(in_features=784, out_features=100, bias=True)
  (l2): Linear(in_features=100, out_features=10, bias=True)
)

In [91]:
def fit():
    for epoch in range(epochs):
        for batch in range(n//bs+1):
            from_ = epoch*batch;
            to = from_ + bs;
            
            xb = x_train[from_:to]
            yb = y_train[from_:to]
            
            loss = loss_func(model(xb), yb)
            
            loss.backward()
            with torch.no_grad():
                for p in model.parameters(): p -= p.grad * lr
                model.zero_grad()
        print(accuracy(model(x_train), y_train))

In [92]:
fit()

tensor(0.6711)
tensor(0.8442)
tensor(0.8733)
tensor(0.8820)
tensor(0.9090)
tensor(0.9128)
tensor(0.9132)
tensor(0.9311)
tensor(0.9109)
tensor(0.9215)


In [93]:
class Model(nn.Module):
    def __init__(self, layers):
        super().__init__()
        self.layers = layers;
        for i,l in enumerate(self.layers): self.add_module(f'layer_{i}', l)
        
    def __call__(self, x):
        for layer in self.layers : x = layer(x)
        return x

In [94]:
layers = [nn.Linear(784, nh), nn.ReLU(), nn.Linear(nh, 10)]

In [95]:
model = Model(layers)

In [96]:
model

Model(
  (layer_0): Linear(in_features=784, out_features=100, bias=True)
  (layer_1): ReLU()
  (layer_2): Linear(in_features=100, out_features=10, bias=True)
)

In [97]:
fit()

tensor(0.6653)
tensor(0.8455)
tensor(0.8767)
tensor(0.8922)
tensor(0.9117)
tensor(0.9121)
tensor(0.9162)
tensor(0.9309)
tensor(0.9121)
tensor(0.9198)


In [98]:
class Model(nn.Module):
    def __init__(self, layers):
        super().__init__()
        self.layers = nn.ModuleList(layers);
        
    def __call__(self, x):
        for layer in self.layers : x = layer(x)
        return x

In [99]:
model = Model(layers)

In [100]:
model

Model(
  (layers): ModuleList(
    (0): Linear(in_features=784, out_features=100, bias=True)
    (1): ReLU()
    (2): Linear(in_features=100, out_features=10, bias=True)
  )
)

In [101]:
model = nn.Sequential(nn.Linear(784, nh), nn.ReLU(), nn.Linear(nh, 10))

In [102]:
model

Sequential(
  (0): Linear(in_features=784, out_features=100, bias=True)
  (1): ReLU()
  (2): Linear(in_features=100, out_features=10, bias=True)
)

In [103]:
fit()

tensor(0.6754)
tensor(0.8434)
tensor(0.8745)
tensor(0.8926)
tensor(0.9103)
tensor(0.9137)
tensor(0.9224)
tensor(0.9271)
tensor(0.9215)
tensor(0.9280)


In [104]:
??nn.Sequential

In [313]:
#export
class Optimizer():
    def __init__(self, params, lr=0.5):
        self.params, self.lr = list(params), lr
        
    def step(self):
        with torch.no_grad():
            for p in self.params: p -= p.grad * lr
    
    def zero_grad(self):
        for p in self.params: p.grad.data.zero_()

In [106]:
model = nn.Sequential(nn.Linear(784, nh), nn.ReLU(), nn.Linear(nh, 10))

In [107]:
opt = Optimizer(model.parameters())

In [108]:
def fit():
    for epoch in range(epochs):
        for batch in range(n//bs+1):
            from_ = epoch*batch;
            to = from_ + bs;
            
            xb = x_train[from_:to]
            yb = y_train[from_:to]
            
            loss = loss_func(model(xb), yb)
            
            loss.backward()
            opt.step()
            opt.zero_grad()
        print(accuracy(model(x_train), y_train))

In [109]:
fit()

tensor(0.6724)
tensor(0.8445)
tensor(0.8772)
tensor(0.8930)
tensor(0.9116)
tensor(0.9132)
tensor(0.9152)
tensor(0.9270)
tensor(0.9203)
tensor(0.9196)


In [304]:
#export
from torch import optim

In [305]:
opt = optim.SGD(model.parameters(), lr=0.5)

In [306]:
#export
class DataSet():
    def __init__(self, x, y): self.x, self.y = x, y
    def __len__(self): return len(self.x)
    def __getitem__(self, i): return self.x[i], self.y[i]

In [300]:
train_ds = DataSet(x_train, y_train)

In [115]:
len(train_ds)

50000

In [116]:
def fit():
    for epoch in range(epochs):
        for batch in range(n//bs+1):
            xb, yb = train_ds[epoch*batch: epoch*batch + bs]            
            loss = loss_func(model(xb), yb)
            
            loss.backward()
            opt.step()
            opt.zero_grad()
        print(accuracy(model(x_train), y_train))

In [117]:
# fit()

tensor(0.9429)
tensor(0.9464)
tensor(0.9458)
tensor(0.9465)
tensor(0.9451)
tensor(0.9454)
tensor(0.9471)
tensor(0.9454)
tensor(0.9434)
tensor(0.9394)


In [283]:
class DataLoader():
    def __init__(self, ds, bs): self.ds,self.bs = ds,bs
    def __iter__(self):
        for i in range(0, len(self.ds), self.bs): yield self.ds[i:i+self.bs]

In [119]:
train_dl = DataLoader(train_ds, bs)

In [120]:
next(iter(train_dl))

(tensor([[0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         ...,
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.]]),
 tensor([5, 0, 4, 1, 9, 2, 1, 3, 1, 4, 3, 5, 3, 6, 1, 7, 2, 8, 6, 9, 4, 0, 9, 1,
         1, 2, 4, 3, 2, 7, 3, 8, 6, 9, 0, 5, 6, 0, 7, 6, 1, 8, 7, 9, 3, 9, 8, 5,
         9, 3, 3, 0, 7, 4, 9, 8, 0, 9, 4, 1, 4, 4, 6, 0]))

In [121]:
def fit():
    for epoch in range(epochs):
        for xb, yb in train_dl:
            loss = loss_func(model(xb), yb)
            
            loss.backward()
            opt.step()
            opt.zero_grad()
        print(accuracy(model(x_train), y_train))

In [122]:
# fit()

tensor(0.9480)
tensor(0.9668)
tensor(0.9759)
tensor(0.9831)
tensor(0.9852)
tensor(0.9873)
tensor(0.9898)
tensor(0.9917)
tensor(0.9924)
tensor(0.9947)


In [307]:
#export
from torch.utils.data import DataLoader, SequentialSampler, RandomSampler

In [129]:
DataLoader??

In [290]:
#export
def get_dls(train_ds, valid_ds, bs, **kwargs):
    return (DataLoader(train_ds, batch_size=bs, shuffle=True, **kwargs),
            DataLoader(valid_ds, batch_size=bs*2, **kwargs))

## DataBunch/ Learner

In [291]:
#export
class DataBunch():
    def __init__(self, train_dl, valid_dl, c):
        self.train_dl, self.valid_dl, self.c = train_dl, valid_dl, c
    @property    
    def train_ds(self): return self.train_dl.dataset
    @property
    def valid_ds(self): return self.test_dl.dataset

In [292]:
train_ds,valid_ds = DataSet(x_train, y_train),DataSet(x_valid, y_valid)

In [293]:
c = y_train.max().item()+1

In [294]:
data = DataBunch(*get_dls(train_ds, valid_ds, bs), c)

In [314]:
#export
def get_model(data, lr=0.5, nh=50):
    m = data.train_ds.x.shape[1]
    model = nn.Sequential(nn.Linear(m,nh), nn.ReLU(), nn.Linear(nh,data.c))
    return model, optim.SGD(model.parameters(), lr=lr)

In [315]:
#export
class Learner():
    def __init__(self, model, opt, loss_func, data):
        self.model,self.opt,self.loss_func,self.data = model,opt,loss_func,data

In [189]:
learn = Learner(*get_model(data), loss_func, data)

In [193]:
def fit(epochs, learn):
    for epoch in range(epochs):
        learn.model.train()
        for xb, yb in learn.data.train_dl:
            loss = learn.loss_func(learn.model(xb), yb)
            
            loss.backward()
            learn.opt.step()
            learn.opt.zero_grad()
            
        learn.model.eval()
            
        with torch.no_grad():
            tot_loss,tot_acc = 0.,0.
            for xb,yb in learn.data.valid_dl:
                pred = learn.model(xb)
                tot_loss += learn.loss_func(pred, yb)
                tot_acc  += accuracy (pred,yb)
        nv = len(learn.data.valid_dl)
        print(epoch, tot_loss/nv, tot_acc/nv)

In [194]:
fit(10, learn)

0 tensor(0.3086) tensor(0.9281)
1 tensor(0.1087) tensor(0.9725)
2 tensor(0.0985) tensor(0.9747)
3 tensor(0.1084) tensor(0.9724)
4 tensor(0.1002) tensor(0.9745)
5 tensor(0.1029) tensor(0.9749)
6 tensor(0.1086) tensor(0.9767)
7 tensor(0.1105) tensor(0.9763)
8 tensor(0.1130) tensor(0.9759)
9 tensor(0.1085) tensor(0.9747)


In [172]:
t = accuracy(model(learn.data.train_ds.x), learn.data.train_ds.y)

In [199]:
def one_batch(xb, yb, cb):
    if not cb.begin_batch(xb,yb): return
    loss = cb.learn.loss_func(cb.learn.model(xb), yb)
    if not cb.after_loss(loss): return
    loss.backward()
    if cb.after_backward(): cb.learn.opt.step()
    if cb.after_step(): cb.learn.opt.zero_grad()

def all_batches(dl, cb):
    for xb,yb in dl:
        one_batch(xb, yb, cb)
        if cb.do_stop(): return

def fit(epochs, learn, cb):
    if not cb.begin_fit(learn): return
    for epoch in range(epochs):
        if not cb.begin_epoch(epoch): continue
        all_batches(learn.data.train_dl, cb)
        
        if cb.begin_validate():
            with torch.no_grad(): all_batches(learn.data.valid_dl, cb)
        if cb.do_stop() or not cb.after_epoch(): break
    cb.after_fit()

In [200]:
class Callback():
    def begin_fit(self, learn):
        self.learn = learn
        return True
    def after_fit(self): return True
    def begin_epoch(self, epoch):
        self.epoch=epoch
        return True
    def begin_validate(self): return True
    def after_epoch(self): return True
    def begin_batch(self, xb, yb):
        self.xb,self.yb = xb,yb
        return True
    def after_loss(self, loss):
        self.loss = loss
        return True
    def after_backward(self): return True
    def after_step(self): return True

In [198]:
class CallbackHandler():
    def __init__(self,cbs=None):
        self.cbs = cbs if cbs else []

    def begin_fit(self, learn):
        self.learn,self.in_train = learn,True
        learn.stop = False
        res = True
        for cb in self.cbs: res = res and cb.begin_fit(learn)
        return res

    def after_fit(self):
        res = not self.in_train
        for cb in self.cbs: res = res and cb.after_fit()
        return res
    
    def begin_epoch(self, epoch):
        learn.model.train()
        self.in_train=True
        res = True
        for cb in self.cbs: res = res and cb.begin_epoch(epoch)
        return res

    def begin_validate(self):
        self.learn.model.eval()
        self.in_train=False
        res = True
        for cb in self.cbs: res = res and cb.begin_validate()
        return res

    def after_epoch(self):
        res = True
        for cb in self.cbs: res = res and cb.after_epoch()
        return res
    
    def begin_batch(self, xb, yb):
        res = True
        for cb in self.cbs: res = res and cb.begin_batch(xb, yb)
        return res

    def after_loss(self, loss):
        res = self.in_train
        for cb in self.cbs: res = res and cb.after_loss(loss)
        return res

    def after_backward(self):
        res = True
        for cb in self.cbs: res = res and cb.after_backward()
        return res

    def after_step(self):
        res = True
        for cb in self.cbs: res = res and cb.after_step()
        return res
    
    def do_stop(self):
        try:     return learn.stop
        finally: learn.stop = False

In [206]:
class TestCallback(Callback):
    def begin_fit(self, learn):
        super().begin_fit(learn)
        self.n_iters=0;
        return True
    
    def after_step(self):
        super().after_step()
        self.n_iters+=1
        print(self.n_iters)
        if(self.n_iters==10): learn.stop = True
        return True
    

In [211]:
fit(1, learn, CallbackHandler([TestCallback()]))

1
2
3
4
5
6
7
8
9
10


In [215]:
#export
from typing import *

def listify(o):
    if o is None: return []
    if isinstance(o, list): return o
    if isinstance(o, Iterable): return list(o)
    return [o]

In [250]:
#export
import re

_camel_re1 = re.compile('(.)([A-Z][a-z]+)')
_camel_re2 = re.compile('([a-z0-9])([A-Z])')
def camel2snake(name):
    s1 = re.sub(_camel_re1, r'\1_\2', name)
    return re.sub(_camel_re2, r'\1_\2', s1).lower()

class Callback():
    _order=0
    def set_runner(self, run): self.run=run
        
    def __getattr__(self, k): return getattr(self.run, k)
    
    @property
    def name(self):
        name = re.sub(r'Callback$', '', self.__class__.__name__)
        return camel2snake(name or 'callback')

In [253]:
#export
class TrainEvalCallback(Callback):

    def begin_fit(self):
        self.run.n_epochs=0.
        self.run.n_iter=0
    
    def after_batch(self):
        if not self.in_train: return
        self.run.n_epochs += 1./self.iters
        self.run.n_iter   += 1
        
    def begin_epoch(self):
        self.run.n_epochs=self.epoch
        self.model.train()
        self.run.in_train=True

    def begin_validate(self):
        self.model.eval()
        self.run.in_train=False

In [261]:
#export
class Runner():
    def __init__(self, cbs=None, cb_funcs=None):
        cbs = listify(cbs)
        for cbf in listify(cb_funcs):
            cb = cbf()
            setattr(self, cb.name, cb)
            cbs.append(cb)
        self.stop,self.cbs = False, [TrainEvalCallback()]+cbs
        
    @property
    def opt(self):       return self.learn.opt
    @property
    def model(self):     return self.learn.model
    @property
    def loss_func(self): return self.learn.loss_func
    @property
    def data(self):      return self.learn.data
    
    def one_batch(self, xb, yb):
        self.xb,self.yb = xb,yb
        if self('begin_batch'): return
        self.pred = self.model(self.xb)
        if self('after_pred'): return
        self.loss = self.loss_func(self.pred, self.yb)
        if self('after_loss') or not self.in_train: return
        self.loss.backward()
        if self('after_backward'): return
        self.opt.step()
        if self('after_step'): return
        self.opt.zero_grad()

    def all_batches(self, dl):
        self.iters = len(dl)
        for xb,yb in dl:
            if self.stop: break
            self.one_batch(xb, yb)
            self('after_batch')
        self.stop=False
    
    def fit(self, epochs, learn):
        self.epochs, self.learn = epochs, learn
        
        try:
            for cb in self.cbs: cb.set_runner(self)
            if self('begin_fit'): return
            for epoch in range(epochs):
                self.epoch = epoch
                if not self('begin_epoch'): self.all_batches(self.data.train_dl)

                with torch.no_grad(): 
                    if not self('begin_validate'): self.all_batches(self.data.valid_dl)
                if self('after_epoch'): break
            
        finally:
            self('after_fit')
            self.learn = None
            
    def __call__(self, cb_name):
        for cb in sorted(self.cbs, key=lambda x: x._order):
            f = getattr(cb, cb_name, None)
            if f and f(): return True
        return False 

In [262]:
runner = Runner(); 
runner.fit(1, learn)

In [264]:
#export
class AvgStats():
    def __init__(self, metrics, in_train): self.metrics,self.in_train = listify(metrics),in_train
    
    def reset(self):
        self.tot_loss,self.count = 0.,0
        self.tot_mets = [0.] * len(self.metrics)
        
    @property
    def all_stats(self): return [self.tot_loss.item()] + self.tot_mets
    @property
    def avg_stats(self): return [o/self.count for o in self.all_stats]
    
    def __repr__(self):
        if not self.count: return ""
        return f"{'train' if self.in_train else 'valid'}: {self.avg_stats}"

    def accumulate(self, run):
        bn = run.xb.shape[0]
        self.tot_loss += run.loss * bn
        self.count += bn
        for i,m in enumerate(self.metrics):
            self.tot_mets[i] += m(run.pred, run.yb) * bn

class AvgStatsCallback(Callback):
    def __init__(self, metrics):
        self.train_stats,self.valid_stats = AvgStats(metrics,True),AvgStats(metrics,False)
        
    def begin_epoch(self):
        self.train_stats.reset()
        self.valid_stats.reset()
        
    def after_loss(self):
        stats = self.train_stats if self.in_train else self.valid_stats
        with torch.no_grad(): stats.accumulate(self.run)
    
    def after_epoch(self):
        print(self.train_stats)
        print(self.valid_stats)

In [318]:
callbacks = [AvgStatsCallback(accuracy)]
runner = Runner(callbacks)
learn = Learner(*get_model(data), loss_func, data)
runner.fit(3, learn)

train: [0.31406525390625, tensor(0.9044)]
valid: [0.22495751953125, tensor(0.9355)]
train: [0.14121029296875, tensor(0.9567)]
valid: [0.1189144775390625, tensor(0.9664)]
train: [0.10742591796875, tensor(0.9675)]
valid: [0.13239752197265625, tensor(0.9632)]


In [319]:
#export
def get_model_func(lr=0.5): return partial(get_model, lr=lr)

## Export

In [317]:
!python notebook2script.py model_pytorch_rework.ipynb

Converted model_pytorch_rework.ipynb to exp/nb_model.py
