In [1]:
%load_ext autoreload
%autoreload 2

%matplotlib inline

In [2]:
# We will use the model from pytorch as we already created a fully connected NN from scratch
# We will implement classes for Dataset, a Dataloader and Optimizer from scratch
# And finish the notebook with a fully repeatable training loop 

In [3]:
import torch
import torch.nn as nn
import torch.nn.functional as F

In [4]:
# Download a dataset to train
from fastai import datasets
import gzip
import pickle

MNIST_URL='http://deeplearning.net/data/mnist/mnist.pkl'

def get_data():
    path = datasets.download_data(MNIST_URL, ext='.gz')
    with gzip.open(path, 'rb') as f:
        ((x_train, y_train), (x_valid, y_valid), _) = pickle.load(f, encoding='latin-1')
    return map(torch.tensor, (x_train,y_train,x_valid,y_valid))

x_train,y_train,x_valid,y_valid = get_data()

In [5]:
x_train.shape, y_train.shape, x_valid.shape, y_valid.shape

(torch.Size([50000, 784]),
 torch.Size([50000]),
 torch.Size([10000, 784]),
 torch.Size([10000]))

In [6]:
layers = [nn.Linear(784,200), nn.ReLU(), nn.Linear(200,10)]

model = nn.Sequential(*layers)
model

Sequential(
  (0): Linear(in_features=784, out_features=200, bias=True)
  (1): ReLU()
  (2): Linear(in_features=200, out_features=10, bias=True)
)

In [7]:
# Minibatch test
epochs = 3
bs = 16

x = torch.randn(200,50)
y = torch.randn(200,1)

for i in range((len(x)-1)//bs+1):
    print(x[i*bs:i*bs+bs].shape)

torch.Size([16, 50])
torch.Size([16, 50])
torch.Size([16, 50])
torch.Size([16, 50])
torch.Size([16, 50])
torch.Size([16, 50])
torch.Size([16, 50])
torch.Size([16, 50])
torch.Size([16, 50])
torch.Size([16, 50])
torch.Size([16, 50])
torch.Size([16, 50])
torch.Size([8, 50])


## Basic Training Loop

In [8]:
epochs = 1
bs = 16
lr = 0.1

loss_func = F.cross_entropy

In [9]:
#basic training loop with minibatches

for i in range(epochs):
    for i in range((len(x_train)-1)//bs+1):
        
        xb = x_train[i*bs:i*bs+bs]
        yb = y_train[i*bs:i*bs+bs]
    
        pred = model(xb)
        loss = loss_func(pred,yb)
        loss.backward()

        with torch.no_grad():
            for p in model.parameters(): p -= p.grad * lr
            model.zero_grad()
            
    print(loss)

tensor(0.2529, grad_fn=<NllLossBackward>)


In [10]:
def accuracy(pred, yb): 
    return (torch.argmax(pred, dim=1)==yb).float().mean()

accuracy(model(x_valid), y_valid)

tensor(0.9570)

## Optimizer

In [11]:
# Create Optimizer class

class Optimizer():
    """This class takes care of updating model parameters and
    reseting the gradients"""
    
    def __init__(self, parameters, lr=0.1):
        self.parameters = parameters
        self.lr = lr
    
    def step(self):
        with torch.no_grad():
            for p in self.parameters(): p -= p.grad * lr
            model.zero_grad()
            
    def zero_grad(self):
        for p in self.parameters():
            p.grad.zero_()

In [12]:
layers = [nn.Linear(784,200), nn.ReLU(), nn.Linear(200,10)]

model = nn.Sequential(*layers)
opt = Optimizer(model.parameters, 0.1)

In [13]:
#basic training loop with minibatches

for i in range(epochs):
    for i in range((len(x_train)-1)//bs+1):
        
        xb = x_train[i*bs:i*bs+bs]
        yb = y_train[i*bs:i*bs+bs]
    
        pred = model(xb)
        loss = loss_func(pred,yb)
        loss.backward()

        opt.step()
        opt.zero_grad()
            
print(loss)
accuracy(model(x_valid), y_valid)

tensor(0.1725, grad_fn=<NllLossBackward>)


tensor(0.9589)

## Dataset and Dataloader

In [14]:
class Dataset():
    """This class holds the dataset"""
    
    def __init__(self, x,y):
        self.x, self.y = x, y
        
    def __len__(self):
        return len(self.x)
    
    def __getitem__(self, i):
            return self.x[i], self.y[i]       

In [15]:
ds_train = Dataset(x_train,y_train)

print(len(ds_train))

x,y = ds_train[10]
x.shape

50000


torch.Size([784])

In [16]:
class Dataloader():
    """This class supports loading batches from the whole dataset one by one."""
    
    def __init__(self, dataset, bs=16):
        self.dataset = dataset
        self.bs = bs
        
    def __iter__(self):
        for i in range((len(self.dataset)-1)//self.bs+1):
            print(i)
            n_start = i * self.bs
            n_end = i * self.bs + self.bs
            yield self.dataset[n_start:n_end]        

In [17]:
def print_batch():
    dl_train = Dataloader(ds_train, 16)

    batch = iter(dl_train)
    
    for i in range(1):
        x,y = next(batch)
        print(x, x.shape, y, y.shape)

print_batch()

0
tensor([[0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        ...,
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.]]) torch.Size([16, 784]) tensor([5, 0, 4, 1, 9, 2, 1, 3, 1, 4, 3, 5, 3, 6, 1, 7]) torch.Size([16])


In [18]:
# Refator __iter__ for loop

class Dataloader():
    """This class supports loading batches from the whole dataset one by one.
    It also allows for memory optimization for large datasets."""
    
    def __init__(self, dataset, bs=16):
        self.dataset = dataset
        self.bs = bs
        
    def __iter__(self):
        for i in range(0, len(self.dataset), self.bs):
            yield self.dataset[i:i+self.bs]    

In [19]:
print_batch()

tensor([[0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        ...,
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.]]) torch.Size([16, 784]) tensor([5, 0, 4, 1, 9, 2, 1, 3, 1, 4, 3, 5, 3, 6, 1, 7]) torch.Size([16])


In [20]:
# Update training loop 
dl_train = Dataloader(ds_train, 16)

for i in range(epochs):
    for xb, yb in dl_train:
        pred = model(xb)
        loss = loss_func(pred,yb)
        loss.backward()

        opt.step()
        opt.zero_grad()
            
print(loss)
accuracy(model(x_valid), y_valid)

tensor(0.0822, grad_fn=<NllLossBackward>)


tensor(0.9674)

## Sampler

In [21]:
x = torch.Tensor(list(range(100)))

torch.randperm(100)[:5]

ds_train[torch.randperm(100)[:5]]

(tensor([[0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.]]), tensor([7, 6, 1, 2, 9]))

In [22]:
class Sampler():
    """Shuffles the dataset for every epoch."""
    
    def __init__(self, dataset, bs):
        self.n = len(dataset)
        self.bs = bs
        
    def __iter__(self):
        shuffled_n = torch.randperm(self.n)
        
        for i in range(0, self.n, self.bs):
            yield shuffled_n[i:i+self.bs]      

In [23]:
sampler = Sampler(ds_train, 16)

batch = iter(sampler)
print(next(batch))
print(next(batch))

tensor([12607, 14062, 17362, 11569, 17652, 29281, 30774, 31564,  4993,  2927,
        14066, 37823, 44265,  1977, 35454, 23178])
tensor([33139,  5762, 32364, 17667, 24168, 39245, 38993, 37226, 41782, 24556,
        44577, 36693, 32249, 42138, 39617, 28805])


In [24]:
# Add Sampler to Dataloader

class Dataloader():
    """This class supports loading batches from the whole dataset one by one.
    It also allows for memory optimization for large datasets."""
    
    def __init__(self, dataset, bs=16, sampler=Sampler, shuffle=True):
        self.dataset = dataset
        self.bs = bs
        self.sampler = sampler(dataset, bs)
        self.shuffle = shuffle
        
    def __iter__(self):
        if self.shuffle == True:
            for sample in self.sampler:
                yield self.dataset[sample]
        else:
            for i in range(0, len(self.dataset), self.bs):
                yield self.dataset[i:i+self.bs]

In [25]:
print_batch()

tensor([[0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        ...,
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.]]) torch.Size([16, 784]) tensor([4, 6, 1, 5, 8, 7, 9, 1, 8, 4, 1, 5, 1, 2, 6, 4]) torch.Size([16])


In [26]:
# Rerun training loop 
dl_train = Dataloader(ds_train, 16)

for i in range(epochs):
    for xb, yb in dl_train:
        pred = model(xb)
        loss = loss_func(pred,yb)
        loss.backward()

        opt.step()
        opt.zero_grad()
            
print(loss)
accuracy(model(x_valid), y_valid)

tensor(0.0095, grad_fn=<NllLossBackward>)


tensor(0.9737)

## Integration a Collate Function

In [27]:
def collate(b):    
    xs, ys = zip(*b)
    return torch.stack(xs), torch.stack(ys)

In [28]:
dl_train = Dataloader(ds_train, 16)
xb,yb = next(iter(dl_train))

xb.shape, yb.shape

(torch.Size([16, 784]), torch.Size([16]))

In [29]:
# Add Collate to Dataloader

class Dataloader():
    """This class supports loading batches from the whole dataset one by one.
    It also allows for memory optimization for large datasets."""
    
    def __init__(self, dataset, bs=16, sampler=Sampler, shuffle=True, collate_fn=collate):
        self.dataset = dataset
        self.bs = bs
        self.sampler = sampler(dataset, bs)
        self.shuffle = shuffle
        self.collate_fn = collate_fn
        
    def __iter__(self):
        if self.shuffle == True:
            for sample in self.sampler:
                yield self.collate_fn([self.dataset[i] for i in sample])
        else:
            for i in range(0, len(self.dataset), self.bs):
                yield self.dataset[i:i+self.bs]

In [30]:
print_batch()

tensor([[0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        ...,
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.]]) torch.Size([16, 784]) tensor([7, 0, 7, 7, 8, 6, 7, 3, 3, 5, 9, 2, 2, 6, 8, 0]) torch.Size([16])


In [31]:
# Rerun training loop 
dl_train = Dataloader(ds_train, 16)

for i in range(epochs):
    for xb, yb in dl_train:
        pred = model(xb)
        loss = loss_func(pred,yb)
        loss.backward()

        opt.step()
        opt.zero_grad()
            
print(loss)
accuracy(model(x_valid), y_valid)

tensor(0.0242, grad_fn=<NllLossBackward>)


tensor(0.9777)

In [32]:
# Fin