# ANN from scratch

* Model:simple neural net
* Optimiser: SGD with mini batches
* Metric: accuracy
* Loss: cross entropy


SGD optimisation process:
1. Feedforward one batch
2. Calculate loss
3. Calculate gradients
4. Update weights & biases (step)
5. Repeat


References: 
* [Notebook 4 from Fast AI](https://github.com/fastai/fastbook/blob/master/04_mnist_basics.ipynb)
* [An introduction to Pytorch and Fastai v2 on the MNIST dataset](https://jonathan-sands.com/deep%20learning/fastai/pytorch/vision/classifier/2020/11/15/MNIST.html)
* [PyTorch tutorial](https://pytorch.org/tutorials/beginner/basics/optimization_tutorial.html)

In [0]:
batch_size = 64 # for dataloader
learning_rate = 1e-3
num_epochs = 5
input_size = 28*28
num_classes = 10

activations_per_layer = [input_size, 512, 512, num_classes]

In [0]:
!pip3 install torch

In [0]:
!pip install fastai

In [0]:
from fastai.vision.all import *
import torchvision
import torchvision.transforms as transforms

import torch.nn.functional as F

In [0]:
set_seed(2*33) # Set random seed for random, torch, and numpy (where available)

## Load data

In [0]:
path = untar_data(URLs.MNIST_SAMPLE)
Path.BASE_PATH = path # important for torchvision.datasets.ImageFolder

In [0]:
transform = transforms.Compose(
    [transforms.Grayscale(), transforms.ToTensor(), transforms.Normalize([0.5], [0.5])]
)

In [0]:
full_dataset = torchvision.datasets.ImageFolder((path/"train").as_posix(), transform = transform)

# Splitting the above dataset into a training and validation dataset
train_size = int(0.8 * len(full_dataset))
valid_size = len(full_dataset) - train_size
training_set, validation_set = torch.utils.data.random_split(full_dataset, [train_size, valid_size])

In [0]:
train_loader = torch.utils.data.DataLoader(training_set, batch_size=batch_size, shuffle=True)
validation_loader = torch.utils.data.DataLoader(validation_set, batch_size=batch_size)
dataloaders = {
    "train": train_loader,
    "validation": validation_loader
}

## Define model

In [0]:
def init_params(size, std=1.0):
  return (torch.randn(size)*std).requires_grad_()

def relu(x):
  return x.max(tensor(0.0))


class SimpleNN:
  
  def __init__(self, activations_per_layer):
    self.activations_per_layer = activations_per_layer
    self.num_layers = len(activations_per_layer)
    self.parameters = {}
    for l in range(self.num_layers-1):
      self.parameters['W'+str(l+1)] = init_params((activations_per_layer[l], activations_per_layer[l+1]))
      self.parameters['b'+str(l+1)] = init_params(activations_per_layer[l+1])
    print("Initialised neural network with:")
    for p_n, p_v in self.parameters.items():
      print(f'{p_n}: {p_v.shape}')
    
  def forward(self, xb):
    res = xb.reshape((-1,self.activations_per_layer[0])).float()
    for l in range(1, self.num_layers):
      res = res@self.parameters['W'+str(l)] + self.parameters['b'+str(l)]
      if l!=self.num_layers-1: res = relu(res)
    return res

## Define SGD optimiser and required methods

In [0]:
def loss(preds, yb):
  # todo: reimplement
  return F.cross_entropy(preds, yb) #  nn.CrossEntropyLoss combines nn.LogSoftmax and nn.NLLLoss.


def train_epoch(feed_forward, opt, criterion, dl):
    for xb,yb in dl: #xb and yb are batches
        preds = feed_forward(xb) # feed forward
        loss = criterion(preds, yb)
        opt.zero_grad()
        loss.backward() # back propagation
        opt.step()

        
def validate_epoch(feed_forward, valid_dl):
  correct = 0
  size = len(valid_dl.dataset)
  with torch.no_grad():
        for xb,yb in valid_dl:
            pred = feed_forward(xb)
            correct += (pred.argmax(1) == yb).type(torch.float).sum().item()
  correct /= size
  return 100 * correct


def train_model(model, criterion, epochs, opt, dl, valid_dl):
    for i in range(epochs):
        print(f"Epoch {i+1}\n-------------------------------")
        train_epoch(model.forward, opt, criterion, dl)
        valid_acc = validate_epoch(model.forward, valid_dl)
        print(f"Validation Accuracy: {(valid_acc):>0.1f}%\n")

In [0]:
class SGDOptim:
  
  def __init__(self, params, learning_rate):
    self.params = params
    self.learning_rate = learning_rate
    
  def step(self, *args, **kwargs):
      for p in self.params.values():
        p.data -= p.grad*self.learning_rate

  def zero_grad(self, *args, **kwargs):
      for p in self.params.values():
        if p.grad is not None:
            p.grad.zero_()

## Optimise params = train model

In [0]:
simple_nn = SimpleNN(activations_per_layer)
opt = SGDOptim(simple_nn.parameters, learning_rate)

In [0]:
train_model(simple_nn,  loss, num_epochs, opt, train_loader, validation_loader)