In [1]:
import sys
sys.path.append('../')
from lib.layers import *
from lib.utils import *
%load_ext autoreload
%autoreload 2

import torch
import torchvision
import numpy as np
from IPython import embed

In [2]:
# Download data
MNIST_MEAN = 0.1307
MNIST_STDEV = 0.3081

def get_mnist(get_train_set):
  return torchvision.datasets.MNIST(
      root="data",
      train=get_train_set,
      download=True,
      # For demonstration
      transform=torchvision.transforms.Compose([
          torchvision.transforms.ToTensor(),
          torchvision.transforms.Normalize((MNIST_MEAN,), (MNIST_STDEV,))
      ])
  )

trn_set = get_mnist(get_train_set=True)
val_set = get_mnist(get_train_set=False)  # Technically the test set

In [3]:
class NN(Network):

    def __init__(self):
        self.layers = [
            FC(28*28, 5, "relu"),
            FC(5, 10)
        ]

    def __call__(self, inputs):
        output = []
        inputs = inputs.reshape(-1, 28*28)
        for input in inputs:
            x = self.layers[0](input)
            x = self.layers[1](x)
            output.append(x)
        return np.array(output)

In [4]:
class NN2(Network):

    def __init__(self):
        self.layers = [
            FC(28*28, 64, "relu"),
            FC(64, 64, "relu"),
            FC(64, 10)
        ]

    def __call__(self, inputs):
        output = []
        inputs = inputs.reshape(-1, 28*28)
        for input in inputs:
            x = self.layers[0](input)
            x = self.layers[1](x)
            x = self.layers[2](x)
            output.append(x)
        return np.array(output)

In [5]:
# Data formatting
NUM_DATA = 1000
trn_set_np = []
trn_set_np_labels = []
for d in range(NUM_DATA):
    trn_set_np.append(np.array(trn_set[d][0][0]))
    trn_set_np_labels.append(np.array(trn_set[d][1]))
trn_set_np = np.array(trn_set_np)
trn_set_np_labels = np.array(trn_set_np_labels)
# trn_set_one_hot_labels = np.zeros((len(trn_set_np_labels), 10))
# for i, l in enumerate(trn_set_np_labels):
#     trn_set_one_hot_labels[i][l] = 1

# val_set_np = np.array(val_set[:][])
# val_set_np_labels = np.array(val_set.targets)

In [6]:
def cross_entropy_loss(outputs, labels):
    loss = 0
    for out, l in zip(outputs, labels):
        numerator = np.sum([np.e ** o for o in out])
        denominator = np.e ** out[l]
        loss += (numerator/denominator).log()
    return loss


In [7]:
# Training
nn = NN2()
timer = Timer()
NUM_EPOCHS = 30
BATCH_SIZE = 32
LEARNING_RATE = 0.001

for epoch in range(NUM_EPOCHS):
    running_loss = 0.0
    running_correct = 0.0
    batch_data = np.split(trn_set_np, len(trn_set_np) / BATCH_SIZE)
    batch_labels = np.split(trn_set_np_labels, len(trn_set_np) / BATCH_SIZE)
    timer.start()
    for d, l in zip(batch_data, batch_labels):
        nn.zero_grad()

        # Forward pass
        timer.start()
        outputs = nn(d)
        loss = cross_entropy_loss(outputs, l)
        timer.stop('forward')

        # Backward pass
        timer.start()
        loss.backward()
        timer.stop('backward')
        
        # Update weights
        for p in nn.parameters():
            p.val = p.val + -1 * LEARNING_RATE * p.grad

        # Metrics
        running_loss += loss.val
        outputs_vals = np.array([o.val for o in outputs.flatten()]).reshape(outputs.shape)
        running_correct += np.sum(np.argmax(outputs_vals, axis=1) == l)
    
    avg_trn_loss = running_loss / len(batch_data)
    avg_trn_acc = running_correct / len(trn_set_np)

    print(f"Epoch: {epoch} | Trn Loss: {avg_trn_loss} | Trn Acc: {avg_trn_acc} | Time elapsed: {timer.stop(None)}")
        


forward | 19.725831747055054


OverflowError: (34, 'Result too large')