# Multi Layer Perceptron

In [1]:
import numpy as np
import torch
from torch import nn, optim
from torch.nn import functional as F
from torchvision import datasets, transforms
import matplotlib.pyplot as plt
%matplotlib inline  
%config InlineBackend.figure_format='retina'
print ("PyTorch version:[%s]."%(torch.__version__))

# Device Configuration
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print ("This notebook use [%s]."%(device))

PyTorch version:[1.3.1].
This notebook use [cpu].


### Helper functions

In [2]:
def truncated_normal(size, threshold=1, dtype=torch.float, requires_grad=True):
    values = truncnorm.rvs(-threshold, threshold, size=size)
    values = torch.from_numpy(values).type(dtype)
    values.requires_grad = requires_grad
    return values

class mlp(nn.Module):
    def __init__(self, in_features=784, h_dims=[256, 256], 
                    actv=nn.ReLU, out_actv=nn.ReLU, USE_DROPOUT=False):
        """
        Multi-layer perceptron 
        """
        super(mlp, self).__init__()

        layers = []
        in_features = in_features
        for h_dim in h_dims[:-1]:

            linear = nn.Linear(in_features, h_dim)
            # ki = truncated_normal(size=(h_dim, in_features), dtype=torch.float, requires_grad=True)
            # linear.weight = nn.Parameter(ki)
            layers.append(linear)
            
            act = actv(inplace=True)
            layers.append(act)

            in_features = h_dim
            
            if USE_DROPOUT:
                layers.append(nn.Dropout())
        linear = nn.Linear(in_features, h_dims[-1])     
        # ki = truncated_normal(size=(h_dims[-1], in_features), dtype=torch.float, requires_grad=True)
        # linear.weight = nn.Parameter(ki)
        layers.append(linear)
        
        if out_actv:
            act = out_actv(inplace=True)
            layers.append(act)
        
        self.moedl = nn.Sequential(*layers)
    
    def forward(self, X):
        return self.moedl(X)

Ready.


### Dataset

In [3]:
train_dataset = datasets.MNIST('../data/mnist_data/',
                             download=True,
                             train=True) 
x_train = train_dataset.data.unsqueeze(1)/255.
x_train = torch.flatten(x_train, 1)
y_train = train_dataset.targets
n_train = train_dataset.data.shape[0]

test_dataset = datasets.MNIST("../data/mnist_data/", 
                             download=True,
                             train=False)

x_test = test_dataset.data.unsqueeze(1)/255.
x_test = torch.flatten(x_test, 1)
y_test = test_dataset.targets

### Model

In [4]:
class MultiLayerPerceptronClsClass(object):
    """
    MLP for classification
    """
    def __init__(self, name='mlp', x_dim=784, y_dim=10, h_dims=[128,128], actv=nn.ReLU):
        """
        Initialize
        """
        self.name = name
        self.x_dim = x_dim
        self.y_dim = y_dim
        self.h_dims = h_dims
        self.actv = actv
        self.build_model()
        self.main_vars = sum([parameter.numel() for parameter in self.net.parameters()])
        print("[%s] instantiated."%(self.name))
        
    def build_model(self):
        """
        Build model
        """
        self.net = mlp(in_features=self.x_dim, h_dims=self.h_dims+[self.y_dim],
                         actv=self.actv, out_actv=None)
        self.loss = nn.CrossEntropyLoss()
        self.optim = optim.Adam(self.net.parameters(), lr=0.001)
    
    def update(self, x_batch, y_batch):
        """
        Update model 
        """
        y_pred = self.net(x_batch)
        cost_val = self.loss(y_pred, y_batch)
        self.optim.zero_grad()
        cost_val.backward()
        self.optim.step()
        return cost_val
    
    def test(self, test_x, test_y, batch_size):
        """
        Test the model
        """
        n_test = len(x_test)
        p_idx = np.random.permutation(n_test)
        max_iter = np.ceil(n_test/batch_size).astype(np.int) # number of iterations
        with torch.no_grad():
            test_loss = 0
            total = 0
            correct = 0
            for it in range(max_iter):
                b_idx = p_idx[batch_size*(it):batch_size*(it+1)]
                x_batch, y_batch = test_x[b_idx].to(device), test_y[b_idx].to(device)
                y_pred = self.net(x_batch)
                _, predicted = torch.max(y_pred.data, 1)
                total += y_batch.size(0)
                correct += (predicted == y_batch).sum().item()
            val_acc = (100 * correct / total)
        return val_acc
print ("Ready.")

Ready.


### Instantiate Model

In [5]:
M = MultiLayerPerceptronClsClass()
M.net = M.net.to(device)

[mlp] instantiated.


In [6]:
for v_idx,(name, var) in enumerate(M.net.named_parameters()):
    print (v_idx, name, var.shape)

0 moedl.0.weight torch.Size([128, 784])
1 moedl.0.bias torch.Size([128])
2 moedl.2.weight torch.Size([128, 128])
3 moedl.2.bias torch.Size([128])
4 moedl.4.weight torch.Size([10, 128])
5 moedl.4.bias torch.Size([10])


### Loop

In [7]:
max_epoch,batch_size,print_every = 20,128,1

max_iter = np.ceil(n_train/batch_size).astype(np.int) # number of iterations
for epoch in range(max_epoch):
    p_idx = np.random.permutation(n_train)
    cost_val_sum,cnt = 0,0
    for it in range(max_iter):
        b_idx = p_idx[batch_size*(it):batch_size*(it+1)]
        x_batch,y_batch = x_train[b_idx].to(device), y_train[b_idx].to(device)
        cost_val = M.update(x_batch=x_batch,y_batch=y_batch)
        cost_val_sum += cost_val*len(b_idx)
        cnt += len(b_idx)
    cost_val_avg = cost_val_sum / cnt
    if ((epoch%print_every)==0) or (epoch==(max_epoch-1)):
        accr_val = M.test(x_test, y_test, batch_size)
        print ("epoch:[%d/%d] cost:[%.3f] test_accuracy:[%.3f]"%
               (epoch+1,max_epoch,cost_val_avg,accr_val))
print ("Done.")

epoch:[1/20] cost:[119.244] test_accuracy:[80.330]
epoch:[2/20] cost:[26.327] test_accuracy:[85.920]
epoch:[3/20] cost:[17.132] test_accuracy:[88.060]
epoch:[4/20] cost:[12.645] test_accuracy:[89.360]
epoch:[5/20] cost:[9.816] test_accuracy:[90.490]
epoch:[6/20] cost:[7.815] test_accuracy:[90.900]
epoch:[7/20] cost:[6.324] test_accuracy:[91.140]
epoch:[8/20] cost:[5.239] test_accuracy:[92.100]
epoch:[9/20] cost:[4.349] test_accuracy:[92.320]
epoch:[10/20] cost:[3.611] test_accuracy:[92.700]
epoch:[11/20] cost:[3.036] test_accuracy:[92.640]
epoch:[12/20] cost:[2.537] test_accuracy:[92.820]
epoch:[13/20] cost:[2.131] test_accuracy:[93.240]
epoch:[14/20] cost:[1.788] test_accuracy:[93.210]
epoch:[15/20] cost:[1.530] test_accuracy:[93.440]
epoch:[16/20] cost:[1.298] test_accuracy:[93.490]
epoch:[17/20] cost:[1.075] test_accuracy:[93.810]
epoch:[18/20] cost:[0.925] test_accuracy:[93.640]
epoch:[19/20] cost:[0.758] test_accuracy:[93.540]
epoch:[20/20] cost:[0.636] test_accuracy:[93.620]
Done