# Fit function

based off jeremy howards great tutorial! https://pytorch.org/tutorials/beginner/nn_tutorial.html

## Setup

In [3]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [4]:
from matplotlib import pyplot as plt
import numpy as np

from pathlib import Path
import requests
import pickle
import gzip
import torch

In [5]:
DATA_PATH = Path("data")
PATH = DATA_PATH / "mnist"
FILENAME = "mnist.pkl.gz"

In [6]:
def unzip_data():
    with gzip.open((PATH / FILENAME).as_posix(), "rb") as f:
            ((x_train, y_train), (x_valid, y_valid), _) = pickle.load(f, encoding="latin-1")
    
    return x_train, y_train, x_valid, y_valid

## PyTorch Datasets

abstract class where you need to implement

__len__ & __getitem__ methods 

In [7]:
from torch.utils.data import Dataset, DataLoader, TensorDataset

In [8]:
class MnistDataset(Dataset):
    def __init__(self, is_valid=False):
        x_train, y_train, x_valid, y_valid = unzip_data()
        
        self.x = x_train if is_valid else x_valid
        self.y = y_train if is_valid else y_valid
        self.len = self.x.shape[0] 
    
    def __getitem__(self, index):
        return self.x[index], self.y[index]
        
    def __len__(self):
        return self.len

In [9]:
train_ds = MnistDataset()
valid_ds = MnistDataset(is_valid=True)

## PyTorch Dataloader

manages getting batches for our training loop from our dataloader

returns a python interator

In [14]:
class WrappedDataLoader:
    def __init__(self, dl, func):
        self.dl = dl
        self.func = func

    def __len__(self):
        return len(self.dl)

    def __iter__(self):
        batches = iter(self.dl)
        for b in batches:
            yield (self.func(*b))

In [15]:
dev = torch.device(
    "cuda") if torch.cuda.is_available() else torch.device("cpu")

def to_device(x, y):
    return x.to(dev), y.to(dev)

def get_data(bs):
    train_ds = MnistDataset()
    valid_ds = MnistDataset(is_valid=True)

    train_dl = DataLoader(train_ds, batch_size=bs, shuffle=True)
    valid_dl = DataLoader(valid_ds, batch_size=bs * 2)
    
    train_dl = WrappedDataLoader(train_dl, to_device)
    valid_dl = WrappedDataLoader(valid_dl, to_device)
    
    return train_dl, valid_dl

In [28]:
from torch import nn
import torch.nn.functional as F

loss_func = F.cross_entropy

def accuracy(out, yb):
    preds = torch.argmax(out, dim=1)
    return (preds == yb).float().mean()

class Mnist_NN(nn.Module):
    def __init__(self, n_h):
        super().__init__()
        self.inp = nn.Linear(784, n_h)
        self.hid = nn.Linear(n_h, n_h)
        self.out = nn.Linear(n_h, 10)
        
    def forward(self, xb):
        ab1 = F.relu(self.inp(xb))
        ab2 = F.relu(self.hid(ab1))
        ab3 = F.relu(self.out(ab2))
        return ab3

In [29]:
from torch import optim

def get_model():
    model = Mnist_NN(100)
    opt = optim.SGD(model.parameters(), lr=0.02)
    return model, opt

In [108]:
F.cross_entropy??

## Write loss_batch and fit functions

In [158]:
to4dec = lambda a: np.around(a, decimals=4) if not a==None else None

def print_epoch_progress(epoch, train_loss=None, valid_loss=None, metrics=[]):
    metrics = [f"{key}: {to4dec(value)}" for key, value in metrics.items()]
    print(epoch, 'train loss: ', to4dec(train_loss),'valid loss: ', to4dec(valid_loss,), ' '.join(metrics))

In [159]:
def loss_batch(model, loss_func, xb, yb, opt=None):
    y_predb = model(xb)
    loss = loss_func(y_predb, yb)
    
    # if validation dont update weights (you cant learn from them)
    if opt is not None:
        loss.backward()
        opt.step()
        opt.zero_grad()

    return loss.item(), y_predb, yb

In [161]:
def fit(epochs, model, loss_func, opt, train_dl, valid_dl):
    for epoch in range(epochs):
        
        model.train()
        trn_losses,_,_ = zip(*[loss_batch(model, loss_func, xb, yb, opt) for xb, yb in train_dl ])

        model.eval()
        with torch.no_grad():
            val_losses, ypreds, ys = zip(*[loss_batch(model, loss_func, xb, yb) for xb, yb in valid_dl])

        training_loss = np.sum(trn_losses) / len(trn_losses)
        validation_loss = np.sum(val_losses) / len(val_losses)
        
        accs = [ accuracy(yhat, y).item() for yhat, y in zip(ypreds, ys)]
        epoch_accuracy = np.sum(accs)/ len(accs)

        
        print_epoch_progress(epoch, training_loss, validation_loss, {'accuracy': epoch_accuracy})

In [162]:
model, opt = get_model()
fit(10, model, loss_func, opt, train_dl, valid_dl)

0 train loss:  2.2842 valid loss:  2.2541 accuracy: 0.2562
1 train loss:  2.1601 valid loss:  2.0338 accuracy: 0.3699
2 train loss:  1.8148 valid loss:  1.6558 accuracy: 0.4727
3 train loss:  1.4797 valid loss:  1.4214 accuracy: 0.5628
4 train loss:  1.2924 valid loss:  1.2929 accuracy: 0.5732
5 train loss:  1.1888 valid loss:  1.2171 accuracy: 0.5866
6 train loss:  1.1242 valid loss:  1.1688 accuracy: 0.5913
7 train loss:  1.0829 valid loss:  1.1347 accuracy: 0.5988
8 train loss:  1.0556 valid loss:  1.111 accuracy: 0.6027
9 train loss:  1.0298 valid loss:  1.1044 accuracy: 0.6017


###  CNN

In [375]:
class Mnist_CNN(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(1, 16, kernel_size=3, stride=2, padding=1)
        self.conv2 = nn.Conv2d(16, 16, kernel_size=3, stride=2, padding=1)
        self.conv3 = nn.Conv2d(16, 10, kernel_size=3, stride=2, padding=1)
        
    def forward(self, xb):
        xb = xb.view(-1, 1, 28, 28)
        xb = F.relu(self.conv1(xb))
        xb = F.relu(self.conv2(xb))
        xb = F.relu(self.conv3(xb))
        xb = F.adaptive_avg_pool2d(xb, 1)
        return xb.view(-1, xb.size(1))
        

In [376]:
Mnist_CNN()

Mnist_CNN(
  (conv1): Conv2d(1, 16, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
  (conv2): Conv2d(16, 16, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
  (conv3): Conv2d(16, 10, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
)

In [377]:
model = Mnist_CNN()
opt = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)

fit(3, model, loss_func, opt, train_dl, valid_dl)

0 1.4628131328582763 0.5412381329113924


KeyboardInterrupt: 

In [None]:
fit(3, model, loss_func, opt, train_dl, valid_dl)

In [None]:
F.adaptive_avg_pool2d??

### nn.Sequential

In [378]:
model = nn.Sequential(
    nn.Conv2d(1, 16, kernel_size=3, stride=2, padding=1),
    nn.ReLU(),
    nn.Conv2d(16, 16, kernel_size=3, stride=2, padding=1),
    nn.ReLU(),
    nn.Conv2d(16, 10, kernel_size=3, stride=2, padding=1),
    nn.ReLU(),
    nn.AdaptiveAvgPool2d(1),
)

In [379]:
xb, yb = next(iter(train_dl))

In [380]:
model(xb)

RuntimeError: Expected 4-dimensional input for 4-dimensional weight [16, 1, 3, 3], but got 2-dimensional input of size [64, 784] instead

In [264]:
class Lambda(nn.Module):
    def __init__(self, func):
        super().__init__()
        self.func = func

    def forward(self, x):
        return self.func(x)

In [382]:
model = nn.Sequential(
    Lambda(lambda xb: xb.view(-1, 1, 28, 28)),
    nn.Conv2d(1, 16, kernel_size=3, stride=2, padding=1),
    nn.ReLU(),
    nn.Conv2d(16, 16, kernel_size=3, stride=2, padding=1),
    nn.ReLU(),
    nn.Conv2d(16, 10, kernel_size=3, stride=2, padding=1),
    nn.ReLU(),
    nn.AdaptiveAvgPool2d(1),
    Lambda(lambda xb: xb.view(-1, xb.size(1)))
)

In [383]:
opt = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)

train_dl, valid_dl = get_data(x_train, y_train, 1024)

fit(3, model, loss_func, opt, train_dl, valid_dl)

0 2.3019332149505614 0.10640815198421479
1 2.3015339069366454 0.10640815198421479
2 2.3010899620056153 0.10640815198421479


# GPU!

In [292]:
sum(p.numel() for p in model.parameters())

3930

In [385]:
torch.cuda.is_available()

False

In [235]:
dev = torch.device(
    "cuda") if torch.cuda.is_available() else torch.device("cpu")

In [389]:
torch.Tensor([1.0]).to(torch.device("cpu"))

tensor([1.])

### We need all our tensor ops to be on the GPU 

Otherwise we will get an error 

This means we have to put our dataset onto the gpu as well as our model

In [269]:
train_dl, valid_dl = get_data(x_train, y_train, 64)
train_dl = WrappedDataLoader(train_dl, to_device)
valid_dl = WrappedDataLoader(valid_dl, to_device)

In [270]:
model = model.to(dev)
opt = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)

In [272]:
%%time
fit(10, model, loss_func, opt, train_dl, valid_dl)

0 0.20664597454071046 0.9376977848101266
1 0.2013221917152405 0.9416534810126582
2 0.199952001953125 0.9433346518987342
3 0.18992808513641357 0.9444224683544303
4 0.18371812086105346 0.9486748417721519
5 0.1758351182937622 0.9511471518987342
6 0.16914637775421143 0.9523338607594937
7 0.1881415760040283 0.9475870253164557
8 0.18738684339523315 0.9472903481012658
9 0.17089933853149414 0.9521360759493671
CPU times: user 55.7 s, sys: 1min 52s, total: 2min 47s
Wall time: 1min 24s
