# Coding up a Simple Neural Network from Scratch

In this demo we will code up a simple neural network using (i) PyTorch and (ii) from scratch. Then we will demonstrate that both produce _identical_ outcome.


## A simple DNN from scratch

In [1]:
import numpy as np
import random

class OurLinear:

    def __init__(self):
        self.w = random.uniform(-1, 1)
        self.b = random.uniform(-1, 1)
        
    def forward(self, x):
        self.x = x
        return x * self.w + self.b
    
    def backward(self, grad_out, lr):
        grad_w = grad_out * self.x
        grad_b = grad_out
        grad_in = grad_out * self.w
        self.w = self.w - lr * grad_w
        self.b = self.b - lr * grad_b
        return grad_in

In [2]:
class OurTanh:
        
    def forward(self, x):
        self.y = np.tanh(x)
        return self.y
    
    def backward(self, grad_out, lr):
        grad_in = grad_out * (1 - self.y**2)
        return grad_in

In [3]:
class OurDNN:

    def __init__(self, num_layers):
        self.layers = []
        for i in range(num_layers):
            self.layers.append(OurLinear())
            self.layers.append(OurTanh())
        
    def forward(self, x):
        for layer in self.layers:
            x = layer.forward(x)
        return x
    
    def backward(self, grad_out, lr):
        for layer in self.layers:
            grad_out = layer.backward(grad_out, lr)
        return grad_out

## A simple DNN in PyTorch

In [4]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

class PyDNN(torch.nn.Module):
    
    def __init__(self, num_layers):
        super(PyDNN, self).__init__()
        layers = []
        for i in range(num_layers):
            layers.append(nn.Linear(1, 1))
            layers.append(nn.Tanh())
        self.model = nn.Sequential(*layers)

    def forward(self, x):
        return self.model(x)

## A simple mean squared loss from scratch

In [5]:
class OurMSELoss:

    def forward(self, x, y):
        self.x = x
        self.y = y
        return (y - x)**2
    
    def backward(self):
        grad_in = -2 * (self.y - self.x)
        return grad_in

## A simple dataset

In [6]:
from torch.utils import data

class SimpleDataset(data.Dataset):

    def __init__(self, func, num_samples=50):
        super(SimpleDataset, self).__init__()
        self.num_samples = num_samples
        xs = [random.uniform(-1, 1) for i in range(self.num_samples)]
        self.samples = [(torch.from_numpy(np.asarray([x], dtype=np.float32)), torch.from_numpy(np.asarray([func(x)], dtype=np.float32))) for x in xs]

    def __len__(self):
        return len(self.samples)

    def __getitem__(self, idx):
        return self.samples[idx]

### Methods to get and set DNN parameters

In [7]:
def get_py_params(py_dnn):
    params = {}
    params['ws'] = []
    params['bs'] = []
    for name, param in py_dnn.named_parameters():
        if param.requires_grad:
            if name.endswith('weight'):
                params['ws'].append(param.data[0].item())
            elif name.endswith('bias'):
                params['bs'].append(param.data[0].item())
    return params

def get_our_params(our_dnn):
    params = {}
    params['ws'] = []
    params['bs'] = []
    for layer in our_dnn.layers:
        if isinstance(layer, OurLinear):
            params['ws'].append(layer.w)
            params['bs'].append(layer.b)
    return params

def format_params(params):
    return 'params = {}'.format({k : [round(x, 3) for x in v] for k, v in params.items()})

def copy_params(py_dnn, our_dnn):
    py_params = get_py_params(py_dnn)
    for i in range(len(py_params['ws'])):
        our_dnn.layers[i].w = py_params['ws'][i]
        our_dnn.layers[i].b = py_params['bs'][i]

## Define the function we want to learn

In [8]:
def some_function_we_want_to_learn(x):
    return np.tanh(-0.6 * x - 0.9)

## Initialize model and data loader

In [9]:
from torch.utils.data import DataLoader

num_layers = 1
lr = 0.01
epoch_size = 50
num_epochs = 10

dataset = SimpleDataset(some_function_we_want_to_learn, num_samples = epoch_size * num_epochs)
dataloader = DataLoader(dataset, shuffle=False, batch_size=1)

py_dnn = PyDNN(num_layers)
our_dnn = OurDNN(num_layers)

## Initialize both models to the same random start state

In [10]:
# initialize to the same starting parameters
copy_params(py_dnn, our_dnn)

## Train PyDNN model

In [11]:
py_params = get_py_params(py_dnn)
py_loss = nn.MSELoss()
py_opt = optim.SGD(py_dnn.parameters(), lr=lr)
py_dnn.train()
batch_idx = 0
print('[before training]\t{}'.format(format_params(py_params)))
for _, (x, y) in enumerate(dataloader):
    py_opt.zero_grad()
    out = py_dnn(x)
    loss = py_loss(out, y)
    loss.backward()
    py_opt.step()
    batch_idx += 1
    py_params = get_py_params(py_dnn)
    if batch_idx % epoch_size == 0:
        print('[after batch {}] loss = {:.3f} {}'.format(batch_idx, loss.item(), format_params(py_params)))

[before training]	params = {'ws': [-0.157], 'bs': [-0.955]}
[after batch 50] loss = 0.002 params = {'ws': [-0.2], 'bs': [-0.916]}
[after batch 100] loss = 0.001 params = {'ws': [-0.24], 'bs': [-0.886]}
[after batch 150] loss = 0.004 params = {'ws': [-0.262], 'bs': [-0.875]}
[after batch 200] loss = 0.002 params = {'ws': [-0.29], 'bs': [-0.861]}
[after batch 250] loss = 0.007 params = {'ws': [-0.319], 'bs': [-0.843]}
[after batch 300] loss = 0.021 params = {'ws': [-0.345], 'bs': [-0.833]}
[after batch 350] loss = 0.009 params = {'ws': [-0.364], 'bs': [-0.831]}
[after batch 400] loss = 0.002 params = {'ws': [-0.383], 'bs': [-0.828]}
[after batch 450] loss = 0.013 params = {'ws': [-0.398], 'bs': [-0.826]}
[after batch 500] loss = 0.006 params = {'ws': [-0.412], 'bs': [-0.827]}


## Train the OurDNN model

In [12]:
our_params = get_our_params(our_dnn)
our_loss = OurMSELoss()
batch_idx = 0
print('[before training]\t{}'.format(format_params(our_params)))
for _, (x, y) in enumerate(dataloader):
    out = our_dnn.forward(x.item())
    loss = our_loss.forward(out, y.item())
    grad = our_loss.backward()
    our_dnn.backward(grad, lr)
    batch_idx += 1
    our_params = get_our_params(our_dnn)
    if batch_idx % epoch_size == 0:
        print('[after batch {}] loss = {:.3f} {}'.format(batch_idx, loss, format_params(our_params)))

[before training]	params = {'ws': [-0.157], 'bs': [-0.955]}
[after batch 50] loss = 0.002 params = {'ws': [-0.235], 'bs': [-0.898]}
[after batch 100] loss = 0.001 params = {'ws': [-0.298], 'bs': [-0.872]}
[after batch 150] loss = 0.002 params = {'ws': [-0.333], 'bs': [-0.87]}
[after batch 200] loss = 0.001 params = {'ws': [-0.37], 'bs': [-0.866]}
[after batch 250] loss = 0.003 params = {'ws': [-0.405], 'bs': [-0.857]}
[after batch 300] loss = 0.009 params = {'ws': [-0.435], 'bs': [-0.857]}
[after batch 350] loss = 0.004 params = {'ws': [-0.457], 'bs': [-0.862]}
[after batch 400] loss = 0.000 params = {'ws': [-0.476], 'bs': [-0.864]}
[after batch 450] loss = 0.005 params = {'ws': [-0.491], 'bs': [-0.865]}
[after batch 500] loss = 0.002 params = {'ws': [-0.504], 'bs': [-0.869]}
