# Multi Layer Perceptron

In [None]:
import numpy as np
import torch
from torch import nn, optim
from torch.nn import functional as F
from torchvision import datasets, transforms
import matplotlib.pyplot as plt
%matplotlib inline  
%config InlineBackend.figure_format='retina'
print ("PyTorch version:[%s]."%(torch.__version__))

# Device Configuration
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print ("This notebook use [%s]."%(device))

### Helper functions

In [None]:
def mlp(in_features=3, h_dims=[256,256], actv=nn.ReLU, out_actv=nn.ReLU):
    """
    Multi-layer perceptron 
    """
    layers = []
    in_features = in_features
    for h_dim in h_dims[:-1]:
        
        ki = torch.randn(h_dim, in_features, dtype=torch.float, device=device, requires_grad=True)
        
        linear = nn.Linear(in_features, h_dim).to(device)
        linear.weight = nn.Parameter(ki)
        layers.append(linear)
        
        act = actv(inplace=True)
        layers.append(act)

        in_features = h_dim
        
    ki = torch.randn(h_dims[-1], in_features, dtype=torch.float, device=device, requires_grad=True)
    linear = nn.Linear(in_features, h_dims[-1]).to(device)
    linear.weight = nn.Parameter(ki)
    layers.append(linear)
    
    if out_actv:
        act = out_actv(inplace=True)
        layers.append(act)
    
    return nn.Sequential(*layers)

print ("Ready.")

### Dataset

In [None]:
train_dataset = datasets.MNIST('../data/mnist_data/',
                             download=True,
                             train=True) 
x_train = train_dataset.data.unsqueeze(1)/255.
x_train = torch.flatten(x_train, 1)
y_train = train_dataset.targets
n_train = train_dataset.data.shape[0]

test_dataset = datasets.MNIST("../data/mnist_data/", 
                             download=True,
                             train=False)

x_test = test_dataset.data.unsqueeze(1)/255.
x_test = torch.flatten(x_test, 1)
y_test = test_dataset.targets

### Model

In [None]:
class MultiLayerPerceptronClsClass(object):
    """
    MLP for classification
    """
    def __init__(self, name='mlp', x_dim=784, y_dim=10, h_dims=[128,128], actv=nn.ReLU):
        """
        Initialize
        """
        self.name = name
        self.x_dim = x_dim
        self.y_dim = y_dim
        self.h_dims = h_dims
        self.actv = actv
        self.build_model()
        self.main_vars = sum([parameter.numel() for parameter in self.net.parameters()])
        print("[%s] instantiated."%(self.name))
        
    def build_model(self):
        """
        Build model
        """
        self.net = mlp(in_features=self.x_dim, h_dims=self.h_dims+[self.y_dim],
                         actv=self.actv, out_actv=None)
        self.loss = nn.CrossEntropyLoss()
        self.optim = optim.Adam(self.net.parameters(), lr=0.001)
    
    def update(self, x_batch, y_batch):
        """
        Update model 
        """
        y_pred = self.net(x_batch)
        cost_val = self.loss(y_pred, y_batch)
        self.optim.zero_grad()
        cost_val.backward()
        self.optim.step()
        return cost_val
    
    def test(self, test_x, test_y, batch_size):
        """
        Test the model
        """
        n_test = len(x_test)
        p_idx = np.random.permutation(n_test)
        max_iter = np.ceil(n_test/batch_size).astype(np.int) # number of iterations
        with torch.no_grad():
            test_loss = 0
            total = 0
            correct = 0
            for it in range(max_iter):
                b_idx = p_idx[batch_size*(it):batch_size*(it+1)]
                x_batch, y_batch = test_x[b_idx].to(device), test_y[b_idx].to(device)
                y_pred = self.net(x_batch)
                _, predicted = torch.max(y_pred.data, 1)
                total += y_batch.size(0)
                correct += (predicted == y_batch).sum().item()
            val_acc = (100 * correct / total)
        return val_acc
print ("Ready.")

### Instantiate Model

In [None]:
M = MultiLayerPerceptronClsClass()

In [None]:
for v_idx,(name, var) in enumerate(M.net.named_parameters()):
    print (v_idx, name, var.shape)

### Loop

In [None]:
max_epoch,batch_size,print_every = 20,128,1

max_iter = np.ceil(n_train/batch_size).astype(np.int) # number of iterations
for epoch in range(max_epoch):
    p_idx = np.random.permutation(n_train)
    cost_val_sum,cnt = 0,0
    for it in range(max_iter):
        b_idx = p_idx[batch_size*(it):batch_size*(it+1)]
        x_batch,y_batch = x_train[b_idx].to(device), y_train[b_idx].to(device)
        cost_val = M.update(x_batch=x_batch,y_batch=y_batch)
        cost_val_sum += cost_val*len(b_idx)
        cnt += len(b_idx)
    cost_val_avg = cost_val_sum / cnt
    if ((epoch%print_every)==0) or (epoch==(max_epoch-1)):
        accr_val = M.test(x_test, y_test, batch_size)
        print ("epoch:[%d/%d] cost:[%.3f] test_accuracy:[%.3f]"%
               (epoch,max_epoch,cost_val_avg,accr_val))
print ("Done.")