# Multi Layered Perceptron

In [2]:
import numpy as np

import torch
import torch.nn as nn
from torch.autograd import Variable
from torch import optim 

from data_utils import load_mnist

### Defining Model 

In [3]:
def build_model(input_dim, output_dim):
    '''
    Input: network dimensions of input and output
    Output : model
    '''
    model = nn.Sequential(
                nn.Linear(input_dim, 512, bias = False),
                nn.ReLU(),
                nn.Dropout(),
                nn.Linear(512, 512, bias = False),
                nn.ReLU(),
                nn.Dropout(),
                nn.Linear(512, output_dim, bias = False)
    )
    return model

### Training Function

In [4]:
def train(model, loss, optimizer, x_val, y_val):
    x = Variable(x_val, requires_grad=False)
    y = Variable(y_val, requires_grad=False)

    # Reset gradient
    optimizer.zero_grad()

    # Forward
    fx = model.forward(x)
    output = loss.forward(fx, y)

    # Backward
    output.backward()

    # Update parameters
    optimizer.step()

    return output

### Prediction Function

In [5]:
def predict(model, x_val):
    x = Variable(x_val, requires_grad=False)
    output = model.forward(x)
    return output.data.numpy().argmax(axis=1)

### Training

In [6]:
torch.manual_seed(42)
trX, teX, trY, teY = load_mnist(onehot=False)
trX = torch.from_numpy(trX).float()
teX = torch.from_numpy(teX).float()
trY = torch.from_numpy(trY).long()



In [7]:
n_examples, n_features = trX.size()
n_classes = 10
model = build_model(n_features, n_classes)
loss = torch.nn.CrossEntropyLoss(reduction='mean')
optimizer = optim.Adam(model.parameters())
batch_size = 100

In [8]:
model

Sequential(
  (0): Linear(in_features=784, out_features=512, bias=False)
  (1): ReLU()
  (2): Dropout(p=0.5, inplace=False)
  (3): Linear(in_features=512, out_features=512, bias=False)
  (4): ReLU()
  (5): Dropout(p=0.5, inplace=False)
  (6): Linear(in_features=512, out_features=10, bias=False)
)

In [7]:
train_loss = []
for i in range(50):
    cost = 0.
    num_batches = n_examples // batch_size
    for k in range(num_batches):
        start, end = k * batch_size, (k + 1) * batch_size
        cost += train(model, loss, optimizer, trX[start:end], trY[start:end])
    predY = predict(model, teX)
    train_loss.append(cost/num_batches)
    print("Epoch %d, cost = %f, acc = %.2f%%"
          % (i + 1, cost / num_batches, 100. * np.mean(predY == teY)))

Epoch 1, cost = 0.352064, acc = 94.13%
Epoch 2, cost = 0.164834, acc = 95.27%
Epoch 3, cost = 0.130883, acc = 95.71%
Epoch 4, cost = 0.109014, acc = 96.02%
Epoch 5, cost = 0.095283, acc = 96.22%
Epoch 6, cost = 0.086254, acc = 96.44%
Epoch 7, cost = 0.082451, acc = 96.72%
Epoch 8, cost = 0.076097, acc = 96.61%
Epoch 9, cost = 0.070434, acc = 96.76%
Epoch 10, cost = 0.068656, acc = 96.76%
Epoch 11, cost = 0.065097, acc = 96.98%
Epoch 12, cost = 0.059400, acc = 96.64%
Epoch 13, cost = 0.059059, acc = 96.93%
Epoch 14, cost = 0.057031, acc = 97.08%
Epoch 15, cost = 0.056099, acc = 96.93%
Epoch 16, cost = 0.055068, acc = 96.98%
Epoch 17, cost = 0.049694, acc = 96.81%
Epoch 18, cost = 0.052037, acc = 97.00%
Epoch 19, cost = 0.051153, acc = 97.09%
Epoch 20, cost = 0.047949, acc = 97.20%
Epoch 21, cost = 0.050635, acc = 97.26%
Epoch 22, cost = 0.045552, acc = 97.17%
Epoch 23, cost = 0.047034, acc = 97.22%
Epoch 24, cost = 0.046436, acc = 97.09%
Epoch 25, cost = 0.040770, acc = 97.36%
Epoch 26,