In [1]:
from pathlib import Path
from IPython.core.debugger import set_trace
from fastai import datasets
import pickle, gzip, math, torch, matplotlib as mpl
import matplotlib.pyplot as plt
from torch import tensor
from torch import nn
from torch.nn import init

MNIST_URL='http://deeplearning.net/data/mnist/mnist.pkl'

In [2]:
# import more libraries
from torch.functional import F
from torch import optim

In [3]:
def get_data():
    path = datasets.download_data(MNIST_URL, ext='.gz')
    with gzip.open(path, 'rb') as f:
        ((x_train, y_train), (x_valid, y_valid), _) = pickle.load(f, encoding='latin-1')
    return map(tensor, (x_train,y_train,x_valid,y_valid))

def normalize(x, m, s): return (x-m)/s

def accuracy(out, yb): return (torch.argmax(out, dim=1)==yb).float().mean()

In [4]:
x_train, y_train, x_valid, y_valid = get_data()

In [5]:
loss_func = F.cross_entropy
nh, bs = 50, 64
c = y_train.max().item() + 1
n, m = x_train.shape

### Dataset and DataLoader

In [6]:
from torch.utils.data import DataLoader, SequentialSampler, RandomSampler

In [7]:
class Dataset():
  def __init__(self, x, y): self.x, self.y = x,y
  def __len__(self): return len(self.x)
  def __getitem__(self, i): return self.x[i], self.y[i]

In [8]:
train_ds,valid_ds = Dataset(x_train, y_train),Dataset(x_valid, y_valid)

In [9]:
train_dl = DataLoader(train_ds, bs)
valid_dl = DataLoader(valid_ds, bs)

In [10]:
def get_dls(train_ds, valid_ds, bs, **kwargs):
    return (DataLoader(train_ds, batch_size=bs, shuffle=True, **kwargs),
            DataLoader(valid_ds, batch_size=bs*2, **kwargs))

### DataBunch/Learner

In [11]:
class DataBunch():
    def __init__(self, train_dl, valid_dl, c=None):
        self.train_dl,self.valid_dl,self.c = train_dl,valid_dl,c
        
    @property
    def train_ds(self): return self.train_dl.dataset
        
    @property
    def valid_ds(self): return self.valid_dl.dataset

In [12]:
data = DataBunch(*get_dls(train_ds, valid_ds, bs), c)

### Model

In [13]:
def get_model(data, lr=0.5, nh=50):
    model = nn.Sequential(nn.Linear(m,nh), nn.ReLU(), nn.Linear(nh,10))
    return model, optim.SGD(model.parameters(), lr=lr)

### Learner

In [14]:
class Learner():
    def __init__(self, model, opt, loss_func, data):
        self.model, self.opt, self.loss_func, self.data = model, opt, loss_func, data

In [15]:
learn = Learner(*get_model(data), loss_func, data)

### Traditional Fit

In [16]:
def fit(epochs, learn):
    for epoch in range(epochs):
        learn.model.train()
        for xb,yb in learn.data.train_dl:
            for xb, yb in learn.data.train_dl:
                loss = learn.loss_func(learn.model(xb), yb)
                loss.backward()
                learn.opt.step()
                learn.opt.zero_grad()
            
            learn.model.eval()
            with torch.no_grad():
                tot_loss, tot_acc = 0., 0.
                for xb,yb in learn.data.valid_dl:
                    pred = learn.model(xb)
                    tot_loss += learn.loss_func(pred, yb)
                    tot_acc += accuracy (pred,yb)
            nv = len(learn.data.valid_dl)
            print(epoch, tot_loss/nv, tot_acc/nv)
    return tot_loss/nv, tot_acc/nv

In [17]:
loss, acc = fit(1, learn)

0 tensor(0.3175) tensor(0.9058)
0 tensor(0.1624) tensor(0.9528)
0 tensor(0.1507) tensor(0.9528)
0 tensor(0.1069) tensor(0.9687)
0 tensor(0.0944) tensor(0.9731)
0 tensor(0.1022) tensor(0.9706)
0 tensor(0.0952) tensor(0.9737)
0 tensor(0.0982) tensor(0.9723)
0 tensor(0.1005) tensor(0.9741)
0 tensor(0.1681) tensor(0.9560)
0 tensor(0.1014) tensor(0.9741)
0 tensor(0.2174) tensor(0.9513)
0 tensor(0.1441) tensor(0.9662)
0 tensor(0.1158) tensor(0.9723)
0 tensor(0.1078) tensor(0.9741)
0 tensor(0.1237) tensor(0.9720)
0 tensor(0.1163) tensor(0.9732)
0 tensor(0.1280) tensor(0.9694)
0 tensor(0.1137) tensor(0.9744)
0 tensor(0.1212) tensor(0.9735)
0 tensor(0.1229) tensor(0.9727)
0 tensor(0.1255) tensor(0.9734)
0 tensor(0.1156) tensor(0.9760)
0 tensor(0.1169) tensor(0.9759)
0 tensor(0.1340) tensor(0.9739)
0 tensor(0.1151) tensor(0.9768)
0 tensor(0.1185) tensor(0.9763)
0 tensor(0.1184) tensor(0.9773)
0 tensor(0.1198) tensor(0.9764)
0 tensor(0.1192) tensor(0.9773)
0 tensor(0.1193) tensor(0.9774)
0 tensor

KeyboardInterrupt: 

## Callbacks

This was our training loop(without validation) from the previous notebook, with inner loop contents factored out:

```python
def one_batch(xb,yb):
    pred = model(xb)
    loss = loss_func(pred, yb)
    loss.backward()
    loss.step()
    loss.zero_gard()
```

```python
def fit():
    for epoch in range(epochs):
        for b in train_dl: one_batch(*b)
```

In [20]:
def one_batch(xb, yb, cb):
    if not cb.begin_batch(xb, yb): return
    loss = cb.learn.loss_func(cb.learn.model(xb), yb)
    if not cb.after_loss(loss): return
    loss.backward()
    if cb.after_backward(): cb.learn.opt.step() 
    if cb.after_step(): cb.learn.opt.zero_grad()
        
def all_batches(dl, cb):
    for xb, yb in dl:
        one_batch(xb, yb, cb)
        if cb.do_stop(): return
        
def fit(epochs, learn, cb):
    if not cb.begin_fit(learn): return
    for epoch in range(epochs):
        if not cb.begin_epoch(epoch): continue
        all_batches(learn.data.train_dl, cb)
    
        if cb.begin_validate():
            with torch.no_grad(): all_batches(learn.data.valid_dl, cb)
        if cb.do_stop() or not cb.after_epoch: break
    cb.after_fit()   

In [21]:
class Callback():
    def begin_fit(self, learn):
        self.learn = learn
        return True
    def after_fit(self): return True
    def begin_epoch(self, epoch):
        self.epoch = epoch
        return True
    def after_epoch(self): return True
    def begin_batch(self, xb, yb):
        self.xb, self.yb = xb, yb
        return True
    def after_loss(self, loss):
        self.loss = loss
        return True
    def after_backward(self): return True
    def after_step(self): return True

In [None]:
class CallbackHandler():
    def __init__(self, cbs=None):
        self.cbs = cbs if cbs else []