# 3. A Concise Implementation of MLP

In [1]:
import time
import math
import random

import numpy as np
import pandas as pd
from IPython import display
import matplotlib.pyplot as plt

import torch
import torchvision
from torch import nn
from torch.utils import data
from torchvision import transforms

## Helper Functions

In [2]:
class Accumulator: 

    def __init__(self, n):
        self.data = [0.0] * n

    def add(self, *args):
        self.data = [a + float(b) for a, b in zip(self.data, args)]

    def reset(self):
        self.data = [0.0] * len(self.data)

    def __getitem__(self, idx):
        return self.data[idx]

## Import Data

In [3]:
def get_fashion_mnist_labels(labels):
    '''
    Function to obtain the text label for each data point.
    '''
    text_labels = ['t-shirt', 'trouser', 'pullover', 'dress', 'coat',
                   'sandal', 'shirt', 'sneaker', 'bag', 'ankle boot']
    return [text_labels[int(i)] for i in labels]

In [4]:
def load_data(batch_size, resize=None):
    '''
    Function to create a data iterator.
    '''
    trans = [transforms.ToTensor()]
    if resize:
        trans.insert(0, transforms.Resize(resize))
    trans = transforms.Compose(trans)
    mnist_train = torchvision.datasets.FashionMNIST(root='../data', train=True, transform=trans, download=True)
    mnist_test = torchvision.datasets.FashionMNIST(root='../data', train=False, transform=trans, download=True)
    return (data.DataLoader(mnist_train, batch_size, shuffle=True, num_workers=4),
            data.DataLoader(mnist_test, batch_size, shuffle=False, num_workers=4))

In [5]:
batch_size = 256
train_iter, test_iter = load_data(batch_size)

## Model

In [6]:
net = nn.Sequential(nn.Flatten(),
                    nn.Linear(784, 256),
                    nn.ReLU(),
                    nn.Linear(256, 10))

## Initialize Weights

In [7]:
def init_weights(m):
    if type(m) == nn.Linear:
        nn.init.normal_(m.weight, std=0.01)

In [10]:
net.apply(init_weights)

Sequential(
  (0): Flatten(start_dim=1, end_dim=-1)
  (1): Linear(in_features=784, out_features=256, bias=True)
  (2): ReLU()
  (3): Linear(in_features=256, out_features=10, bias=True)
)

## Loss Function

In [11]:
loss = nn.CrossEntropyLoss(reduction='none')

## Optimizer

In [12]:
lr = 0.1
trainer = torch.optim.SGD(net.parameters(), lr=lr)

## Evaluation

In [13]:
def accuracy(y_hat, y):
    '''
    Count the number of correctly predicted samples.
    '''
    if len(y_hat.shape) > 1 and y_hat.shape[1] > 1:
        y_hat = y_hat.argmax(axis=1) 
    cmp = y_hat.type(y.dtype) == y
    return float(cmp.type(y.dtype).sum())

In [14]:
def evaluate_accuracy(net, data_iter):
    '''
    Cumulative evaluation over multiple mini-batches.
    '''
    if isinstance(net, torch.nn.Module):
        net.eval()  #set the model to evaluation mode
    metric = Accumulator(2)  #number of correct predictions and total predictions
    with torch.no_grad():
        for X, y in data_iter:
            metric.add(accuracy(net(X), y), y.numel())
    return metric[0] / metric[1]

## Training

In [15]:
def train_epoch(net, train_iter, loss, updater):
    '''
    Training for a single epoch.
    '''
    if isinstance(net, torch.nn.Module):
        net.train()   
    metric = Accumulator(3)      #training loss, training accuracy, sample size
    for X, y in train_iter:
        y_hat = net(X)
        l = loss(y_hat, y)
        if isinstance(updater, torch.optim.Optimizer):
            updater.zero_grad()
            l.mean().backward()
            updater.step()
        else:
            l.sum().backward()
            updater(X.shape[0])
        metric.add(float(l.sum()), accuracy(y_hat,y), y.numel())
    return metric[0]/metric[2], metric[1]/metric[2]

In [16]:
def train(net, train_iter, test_iter, loss, num_epochs, updater):
    for epoch in range(num_epochs):
        train_metrics = train_epoch(net, train_iter, loss, updater)
        test_acc = evaluate_accuracy(net, test_iter)
        print(f'Epoch {epoch+1}')
        print(f'Training loss: {train_metrics[0]}')
        print(f'Training accuracy: {train_metrics[1]}')
        print(f'Test accuracy: {test_acc}')
    train_loss, train_acc = train_metrics
    assert train_loss < 0.5, train_loss
    assert train_acc <= 1 and train_acc > 0.7, train_acc
    assert test_acc <= 1 and test_acc > 0.7, test_acc

In [18]:
num_epochs = 10
train(net, train_iter, test_iter, loss, num_epochs, trainer)

Epoch 1
Training loss: 1.0385905300776164
Training accuracy: 0.6374
Test accuracy: 0.7513
Epoch 2
Training loss: 0.592103688176473
Training accuracy: 0.7914833333333333
Test accuracy: 0.8021
Epoch 3
Training loss: 0.5178286349614462
Training accuracy: 0.8177666666666666
Test accuracy: 0.8016
Epoch 4
Training loss: 0.4807955680847168
Training accuracy: 0.8310166666666666
Test accuracy: 0.8245
Epoch 5
Training loss: 0.44789006741841636
Training accuracy: 0.8431833333333333
Test accuracy: 0.8288
Epoch 6
Training loss: 0.4307175532341003
Training accuracy: 0.8489333333333333
Test accuracy: 0.8389
Epoch 7
Training loss: 0.41489901943206786
Training accuracy: 0.8543666666666667
Test accuracy: 0.8305
Epoch 8
Training loss: 0.4013115811665853
Training accuracy: 0.8583333333333333
Test accuracy: 0.8322
Epoch 9
Training loss: 0.3918970505396525
Training accuracy: 0.8613333333333333
Test accuracy: 0.8443
Epoch 10
Training loss: 0.38094614283243816
Training accuracy: 0.8650833333333333
Test accura