In [1]:
from fastai.vision.all import *
path = untar_data(URLs.MNIST_SAMPLE)
Path.BASE_PATH = path

#### Dataloader creation - should be changed to have all categories

In [104]:
train_x_lst = [tensor(Image.open(file)) for cat in (path/'train').ls() for file in cat.ls()]
train_x_tns = torch.stack(train_x_lst)
# change rank 3 tns to rank 2 tns so that each example is 1 row
train_x_tns = train_x_tns.view(-1, 28*28)
# normalize all greyscale values
train_x_tns = train_x_tns.float()/255  

In [105]:
valid_x_lst = [tensor(Image.open(file)) for cat in (path/'valid').ls() for file in cat.ls()]
valid_x_tns = torch.stack(valid_x_lst)
# change rank 3 tns to rank 2 tns so that each example is 1 row
valid_x_tns = valid_x_tns.view(-1, 28*28)
# normalize all greyscale values
valid_x_tns = valid_x_tns.float()/255 

In [106]:
train_3s_count = len((path/'train'/'3').ls())
train_7s_count = len((path/'train'/'7').ls())
valid_3s_count = len((path/'valid'/'3').ls())
valid_7s_count = len((path/'valid'/'7').ls())

In [120]:
# make tensors of labels. As x is in order, labels also in order and depend only on the count 
# of examples in the folder, unsqueeze is needed to make it same rank as x
train_y_tns = tensor([1] * train_3s_count + [0] * train_7s_count).unsqueeze(1)
valid_y_tns = tensor([1] * valid_3s_count + [0] * valid_7s_count).unsqueeze(1)

In [108]:
train_dset = list(zip(train_x_tns, train_y_tns)) 
valid_dset = list(zip(valid_x_tns, valid_y_tns)) 

In [118]:
train_dl = DataLoader(train_dset, bs=256, shuffle=True)
valid_dl = DataLoader(valid_dset, bs=256, shuffle=True)

#### Model

In [270]:
class TwoHLLearner:
    
    def __init__(self, dls, n_hs, lr=1): 
        # dls - tuple of train dataloader and valid dataloader respectivly
        # n_hs tuple neurons in hidden layer 1 and 2 respectivly
        n_h1, n_h2 = n_hs
        self.train_dl, self.valid_dl = dls
        self.lr = lr
        
        # initialize all parameters of the model and save them for tuple for easy passing
        self.w1, self.b1 = self.init_param((28*28, n_h1)), self.init_param((1, n_h1))
        self.w2, self.b2 = self.init_param((n_h1, n_h2)), self.init_param((1, n_h2))
        self.w3, self.b3 = self.init_param((n_h2, 1)), self.init_param((1))
        self.params = self.w1, self.b1, self.w2, self.b2, self.w3, self.b3
    
    def init_param(self, shape, scale = 1): 
        return (torch.randn(shape) * scale).requires_grad_()
    
    def fit(self, num_epochs):
        for i in range(num_epochs):
            self.train_epoch()
            
    def train_epoch(self):
        for xb, yb in self.train_dl:
            # make prediction and get gradient of all parameters
            pred = self.model(xb)
            loss = self.loss_f(pred, yb)
            loss.backward()
            # update parameters and make sure to reset grad back to zero 
            # as grad adds up
            for p in self.params:
                p.data -= self.lr * p.grad
                p.grad.zero_()
        print(self.valid_acc(), end='% ')
    
    def model(self, xb): 
        # basic linear relu linear relu model
        res = (xb @ self.w1 + self.b1).max(tensor(0.))
        res = (res @ self.w2 + self.b2).max(tensor(0.))
        res = res @ self.w3 + self.b3
        return res
    
    def loss_f(self, pred, target): 
        pred = pred.sigmoid()
        return torch.where(target == 1, 1 - pred, pred).mean()
    
    def valid_acc(self):
        batch_accs = []
        for xb, yb in self.valid_dl:
            pred = torch.where(sigmoid(self.model(xb)) > 0.5, 1., 0.)
            batch_accs.append(torch.where(pred == yb, 1., 0.).mean())
        return round(tensor(batch_accs).mean().item()*100, 2) 

In [271]:
learner = TwoHLLearner((train_dl, valid_dl), (32, 16))

In [272]:
learner.fit(10)

92.94% 94.4% 94.86% 95.49% 95.73% 95.78% 95.97% 96.18% 96.18% 96.16% 