# Learner
The goal of the learner is to
have a framework that allows us to tray anything very quickly

In [1]:
import pickle,gzip,math,os,time,shutil,torch,matplotlib as mpl, numpy as np
import torch
import pandas as pd,matplotlib.pyplot as plt
from pathlib import Path
from torch import tensor, nn
from torch.utils.data import DataLoader
from datasets import load_dataset
from torch.utils.data import default_collate
import torch.nn.functional as F

import fastcore.all as fc

from torch import optim

torch.manual_seed(1)

<torch._C.Generator at 0x2252d55b990>

In [2]:
from lib import *

Let's start again from fashion mnist

In [3]:
name = "fashion_mnist"
dsd = load_dataset(name)

In [4]:
dsd

DatasetDict({
    train: Dataset({
        features: ['image', 'label'],
        num_rows: 60000
    })
    test: Dataset({
        features: ['image', 'label'],
        num_rows: 10000
    })
})

In [5]:
x,y = 'image','label'

In [6]:
import torchvision.transforms.functional as TF

@inplace
def transformi(b): b[x] = [torch.flatten(TF.to_tensor(o)) for o in b[x]]

In [7]:
bs = 64
tds = dsd.with_transform(transformi)

Now we have recreated the situation of the same notebook of ae.
Let's see how to set up the Learner Class

In [8]:
class DataLoaders:
    def __init__(self, train_data_loader, valid_data_loader):
        self.train = train_data_loader
        self.valid = valid_data_loader

    @classmethod # static method
    def from_datasetDict(cls, datasetDict, batch_size): #, as_tuple=True):
        return cls (*[DataLoader(ds, batch_size, collate_fn=collate_dict(ds)) for ds in datasetDict.values()])
        # this return calls __init__
    # static method with cls allows the instanciation of the class
    # recall that DataLoader can use multiple workers
    # dont send anything to device here cuz huge overload 

In [9]:
bs = 32
dls = DataLoaders.from_datasetDict(tds, bs)
dt = dls.train
xb, yb = next(iter(dt))
xb.shape, yb[:bs]

(torch.Size([32, 784]),
 tensor([9, 0, 0, 3, 0, 2, 7, 2, 5, 5, 0, 9, 5, 5, 7, 9, 1, 0, 6, 4, 3, 1, 4, 8,
         4, 3, 0, 2, 4, 4, 5, 3]))

# Learner
## First very simple approach to a generic Learner class for classification tasks
It will replace the fit() function.
It's main parts are:
- fit method
- one_epoch method
- one_batch method
- calc_stats

In [19]:
class Learner:
    def __init__(self, model, dls, loss_func, lr, opt_func=optim.SGD): fc.store_attr()
        
    def one_batch(self):
        self.xb, self.yb = to_device(self.batch)
        self.preds = model(self.xb)
        self.loss = self.loss_func(self.preds, self.yb)
        if self.model.training:
            self.loss.backward()
            self.opt.step()
            self.opt.zero_grad()
        with torch.no_grad():
            self.calc_stats()
            
    def calc_stats(self): # over single batch
        acc = (self.preds.argmax(dim=1)==self.yb).float().sum() 
        self.accs.append(acc)
        n = len(self.xb)
        self.losses.append(self.loss*n) # takes loss not averaged over the batch
        self.ns.append(n) # stores size of batch
        
    def one_epoch(self, isTrain):
        self.model.training = isTrain
        dl = self.dls.train if isTrain else self.dls.valid
        for self.num, self.batch in enumerate(dl):
            self.one_batch()
        n = sum(self.ns) # sum of observations analyzed over multiple epochs
        txt = "Train step" if isTrain else "Validation step"
        print(f"Epoch: {self.epoch}, mode: {txt}, loss: {sum(self.losses)/n}, accuracy: {sum(self.accs)/n}")
        # why these metrics are over multiple epochs??
        
    def fit(self, n_epochs):
        self.accs, self.losses, self.ns = [], [], [] # list of metrics over every single batch analyzed over multiple epochs
        self.model.to(device) # device imported from lib # can't use to_device(device) here cuz it's for model!
        self.opt = self.opt_func(self.model.parameters(), self.lr)
        self.n_epochs = n_epochs
        for self.epoch in range(n_epochs):
            self.one_epoch(isTrain=True)
            self.one_epoch(isTrain=False)

In [20]:
m,nh = 28*28,50
model = nn.Sequential(nn.Linear(m,nh), nn.ReLU(), nn.Linear(nh,10))

learn = Learner(model, dls, F.cross_entropy, lr=0.2)
learn.fit(3)

Epoch: 0, mode: Train step, loss: 0.5637474656105042, accuracy: 0.7900333404541016
Epoch: 0, mode: Validation step, loss: 0.5492904782295227, accuracy: 0.7962714433670044
Epoch: 1, mode: Train step, loss: 0.4891236126422882, accuracy: 0.8192384839057922
Epoch: 1, mode: Validation step, loss: 0.486274778842926, accuracy: 0.8205785751342773
Epoch: 2, mode: Train step, loss: 0.4552265703678131, accuracy: 0.8321800231933594
Epoch: 2, mode: Validation step, loss: 0.45522287487983704, accuracy: 0.8325904607772827


The problem with this learner is that it is not very flexible, eg it can be used only for classification since it uses
a hardcoded way to compute whatever loss_func it takes in input (i.e. loss_func is always called with (xhat,y) while eg in AE
we want (xhat,xb). Plus it computes mandatory accuracy (sensless in AE).

So let's move step by step to make the Learner very flexible. Let's start to fix the metrics issue: let us create an interface/superclass for a generic Metric concept:

In [36]:
class Metric:
    '''
        Base class to be extended if particular metric is desired.
        If not extended it computes the weighted average of its input wrt batch_size 
    '''
    def __init__(self):
        self.reset()
        
    def reset(self):
        self.vals, self.ns = [], []
        
    def add(self, input, target=None, batch_size=1):
        # adds (x_hats, y) for minibatch
        self.last = self.calc(input, target)
        self.vals.append(self.last)
        self.ns.append(batch_size)
    
    @property
    def value(self):
        ns = torch.tensor(self.ns)
        return (torch.tensor(self.vals)*ns).sum()/ns.sum()
    
    def calc(self, inputs, targets): 
        ''' method to be overwritten in derived class '''
        return inputs

In [38]:
# usage of Metric class:
loss = Metric() # suppose x_hat is a probability that has to be increased (supposing that last layer activation = sigmoid)
loss.add(.9, batch_size=32)
loss.add(.6, batch_size=2)

loss.value, (.9*32 +.6*2)/(32+2)

(tensor(0.8824), 0.8823529411764706)

In [33]:
class Accuracy(Metric):
    def calc(self, inputs, targets):
        return (inputs==targets).float().sum() 

In [34]:
# usage of Accuracy class:
acc = Accuracy()

x_hat_b1 = tensor([0,1,2,0,1]) 
y_b1 = tensor([0,0,2,1,1]) 

x_hat_b2 = tensor([1,1,2,0,0]) 
y_b2 = tensor([0,1,2,0,0]) 

acc.add(x_hat_b1, y_b1)
acc.add(x_hat_b2, y_b2)

acc.value

tensor(3.5000)

# Let's now add Callbacks

In [57]:
class with_cbs():
    '''
    a callable class that is used to decorate learner methods
    a decorator is called with the funct that it is decorating as input
    '''
    
    def __init__(self, method_name):  # method_name is what is passed in @with_cbs("method_name")
        self.method_name = method_name
        
    def __call__(self, f): 
        def _f(o, *args, **kwargs): # allows to forwards all inputs to original f; o its the ref to the learner
            try:
                o.callback(f'before_{self.method_name}') 
                f(o, *args, **kwargs)
                o.callback(f'before_{self.method_name}')
            except globals()(f'Cancel {self.method_name.title()} Exception'):
                pass
        return _f
    
# when the decorated function is called, it is CALLED/executed the function returned by the __call__ method 
# of its decorator. In with_cbs() thus:
# - it is executed PRE a callback 
# - it is executed the original decorated function
# - it is executed POST a callback 

In [58]:
def identity(*args): # whatever args it are passed to this func, it returns them
    if not args: 
        return
    x, *args = args
    if args:
        return (x,)+tuple(args)
    return x

In [59]:
identity(1), identity("a"), identity("a", 1, ["l"])

(1, 'a', ('a', 1, ['l']))

In [60]:
from operator import attrgetter

class Learner:
    def __init__(self, model, dls, loss_func, lr, callbacks, opt_func=optim.SGD): 
        fc.store_attr()
        for cb in callbacks:
            cb.learner = self # in each callback object create a reference to this learner
        
    def one_batch(self):
        self.predict()
        self.get_loss()
        if self.model.training:
            self.backward()
            self.step()
            self.zero_grad()
        
    def one_epoch(self, isTrain):
        self.model.training = isTrain
        self.dl = self.dls.train if isTrain else self.dls.valid
        self._one_epoch()
        
    def _one_epoch(self):
        for self.iter, self.batch in enumerate(self.dl):
            self.one_batch()

    def fit(self, n_epochs):
        self.n_epochs = n_epochs
        self.epochs = range(n_epochs)
        self.opt = self.opt_func(self.model.parameters(), self.lr)
        self._fit() # actually calls -> with_cbs.__call__(self, "fit") # self is this learner
    
    @with_cbs("fit")            
    def _fit(self):
        for self.epoch in self.epochs:
            self.one_epoch(isTrain=True)
            self.one_epoch(isTrain=False)
            
    def callback(self, method_name):
        for cb in sorted(self.callbacks, key=attrgetter('order')):
            getattr(cb, method_name, identity)() # if getattr does not find the method it will return identity
            
    

We have a decorator and the Learner with callbacks, let's look at an example

In [61]:
class Callback():
    order = 0

In [62]:
class DeviceCB(Callback):
    
    def before_fit(self):
        self.learner.model.to(device)
    
    def before_batch(self):
        self.learner.batch = to_device(device)

In [63]:
cbs = [DeviceCB()]
learner = Learner(model, dls, F.cross_entropy, lr=0.2, callbacks=cbs)
learner.fit(1)

TypeError: 'dict' object is not callable