<a href="https://colab.research.google.com/github/sc22lg/ML-Notebooks/blob/MLP_in_Pytorch/Pytorch_MLP_on_MNIST.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# MLP For MNIST

In [85]:
try:
  %pip install fancy_einsum
  %pip install einops
  %pip install keras
except:
  print("Package failed to install")

from keras.datasets import mnist
import torch as t
import einops
from fancy_einsum import einsum
import torch.nn as nn
import numpy as np
import math
from dataclasses import dataclass




###Investigate MNIST dataset:

In [86]:
(train_X, train_y), (test_X, test_y) = mnist.load_data()

#printing the shapes of the vectors
print('X_train: ' + str(train_X.shape))
print('Y_train: ' + str(train_y.shape))
print('X_test:  '  + str(test_X.shape))
print('Y_test:  '  + str(test_y.shape))

X_train: (60000, 28, 28)
Y_train: (60000,)
X_test:  (10000, 28, 28)
Y_test:  (10000,)


In [87]:
# expand each element of train_y to a 10-element tensor
# with a 1 in the position corresponding to that element's number
train_y_tensor = t.nn.functional.one_hot(t.tensor(train_y).to(t.long), num_classes=10).to(t.float)
print('original: ' + str(train_y[0]) + '\n new: ' + str(train_y_tensor[0]))
#repeat for test_y
test_y_tensor = t.nn.functional.one_hot(t.tensor(test_y).to(t.long), num_classes=10).to(t.float)

original: 5
 new: tensor([0., 0., 0., 0., 0., 1., 0., 0., 0., 0.])


###Model Setup:

In [88]:
@dataclass
class Config:
  batch:int = 1
  d_img:int = 28
  n_layer:int = 56
  n_out:int = 10

cfg = Config()

###MLP Layer:

In [89]:
class MLPLayer(nn.Module):
  def __init__(self, cfg):
    super(MLPLayer, self).__init__()
    self.cfg = cfg
    self.in_W = nn.Parameter(t.randn(cfg.d_img, cfg.n_layer), requires_grad=True)
    self.in_B = nn.Parameter(t.randn(cfg.n_layer), requires_grad=True)
    self.out_W = nn.Parameter(t.randn(cfg.n_layer, cfg.n_out), requires_grad=True)
    self.out_B = nn.Parameter(t.randn(cfg.n_out), requires_grad=True)

  def forward(self, data_in):
    #data_in format: [batch, d_img, d_img]
    first_layer = einsum('batch d_img d_img, d_img n_layer -> batch n_layer', data_in, self.in_W) + self.in_B
    #apply ReLU function
    post_ReLu = t.maximum(first_layer, t.zeros_like(first_layer))
    #second matrix:
    second_layer = einsum('batch n_layer, n_layer n_out -> batch n_out', post_ReLu, self.out_W) + self.out_B
    return second_layer

###Prediction Extraction

In [90]:
class Softmax(nn.Module):
  def __init__(self):
    super(Softmax, self).__init__()

  def forward(self, MLP_result):
    return nn.functional.softmax(MLP_result, dim=1)


###MLP Body

In [91]:
class MLP(nn.Module):
  def __init__(self, cfg):
    super(MLP, self).__init__()
    self.cfg = cfg
    self.layer = MLPLayer(cfg)
    self.Predictor = Softmax()

  def forward(self, data_in):
    #data_in format: [batch, d_img, d_img]
    MLP_result = self.layer(data_in)
    return self.Predictor.forward(MLP_result)

###Testing Forward Propagation

In [92]:
MLP = MLP(cfg)
for i in range(10):
  #create tensor of data, set as floats, add extra 'batch' dimension, run through network
  prediction = MLP.forward(t.unsqueeze(t.tensor(train_X[i]).float(), 0))
  print('Prediction: ' + str(prediction) + 'Actual: ' + str(train_y[i]))

Prediction: tensor([[0., 0., 0., 0., 0., 1., 0., 0., 0., 0.]], grad_fn=<SoftmaxBackward0>)Actual: 5
Prediction: tensor([[0., 0., 0., 1., 0., 0., 0., 0., 0., 0.]], grad_fn=<SoftmaxBackward0>)Actual: 0
Prediction: tensor([[0., 0., 0., 0., 0., 1., 0., 0., 0., 0.]], grad_fn=<SoftmaxBackward0>)Actual: 4
Prediction: tensor([[0., 0., 0., 0., 0., 1., 0., 0., 0., 0.]], grad_fn=<SoftmaxBackward0>)Actual: 1
Prediction: tensor([[0., 0., 0., 1., 0., 0., 0., 0., 0., 0.]], grad_fn=<SoftmaxBackward0>)Actual: 9
Prediction: tensor([[0., 0., 0., 1., 0., 0., 0., 0., 0., 0.]], grad_fn=<SoftmaxBackward0>)Actual: 2
Prediction: tensor([[0., 0., 0., 1., 0., 0., 0., 0., 0., 0.]], grad_fn=<SoftmaxBackward0>)Actual: 1
Prediction: tensor([[0., 0., 0., 0., 0., 1., 0., 0., 0., 0.]], grad_fn=<SoftmaxBackward0>)Actual: 3
Prediction: tensor([[0., 0., 0., 1., 0., 0., 0., 0., 0., 0.]], grad_fn=<SoftmaxBackward0>)Actual: 1
Prediction: tensor([[0., 0., 0., 0., 0., 0., 0., 0., 1., 0.]], grad_fn=<SoftmaxBackward0>)Actual: 4


###Training

In [93]:
optimiser = t.optim.SGD(MLP.parameters(), lr=0.01)

In [94]:
log_every = 100
for i in range(len(test_X)):
  #create tensor of data, set as floats, add extra 'batch' dimension, run through network
  prediction = MLP.forward(t.unsqueeze(t.tensor(train_X[i]).float(), 0))
  #find negative log likelyhood loss of prediction and corresponding test_y
  loss = nn.functional.binary_cross_entropy(prediction, train_y_tensor[i].unsqueeze(0))
  # train network with loss
  loss.backward()
  optimiser.step()
  if i % log_every == 0:
    print('Loss: ' + str(loss))

Loss: tensor(0., grad_fn=<BinaryCrossEntropyBackward0>)
Loss: tensor(20., grad_fn=<BinaryCrossEntropyBackward0>)
Loss: tensor(20., grad_fn=<BinaryCrossEntropyBackward0>)
Loss: tensor(20., grad_fn=<BinaryCrossEntropyBackward0>)
Loss: tensor(20., grad_fn=<BinaryCrossEntropyBackward0>)
Loss: tensor(0., grad_fn=<BinaryCrossEntropyBackward0>)
Loss: tensor(20., grad_fn=<BinaryCrossEntropyBackward0>)
Loss: tensor(20., grad_fn=<BinaryCrossEntropyBackward0>)
Loss: tensor(20., grad_fn=<BinaryCrossEntropyBackward0>)
Loss: tensor(20., grad_fn=<BinaryCrossEntropyBackward0>)
Loss: tensor(20., grad_fn=<BinaryCrossEntropyBackward0>)
Loss: tensor(20., grad_fn=<BinaryCrossEntropyBackward0>)
Loss: tensor(20., grad_fn=<BinaryCrossEntropyBackward0>)
Loss: tensor(20., grad_fn=<BinaryCrossEntropyBackward0>)
Loss: tensor(20., grad_fn=<BinaryCrossEntropyBackward0>)
Loss: tensor(20., grad_fn=<BinaryCrossEntropyBackward0>)
Loss: tensor(20., grad_fn=<BinaryCrossEntropyBackward0>)
Loss: tensor(20., grad_fn=<Binary

###Testing:

In [95]:
#Test model with test_X and test_Y data, output correct guess %
correct_guess = 0
for i in range(len(test_X)):
  prediction = MLP.forward(t.unsqueeze(t.tensor(test_X[i]).float(), 0))
  if t.argmax(prediction) == test_y[i]:
    correct_guess += 1
print('Score: ' +  str(correct_guess) + '/' + str(len(test_X)) + ', ' + str(correct_guess/len(test_X)*100) + '%')

Score: 1010/10000, 10.100000000000001%
