<a href="https://colab.research.google.com/github/sc22lg/ML-Notebooks/blob/MLP_in_Pytorch/Pytorch_MLP_on_MNIST.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# MLP For MNIST

In [22]:
try:
  %pip install fancy_einsum
  %pip install einops
  %pip install keras
except:
  print("Package failed to install")

from keras.datasets import mnist
import torch as t
import einops
from fancy_einsum import einsum
import torch.nn as nn
import numpy as np
import math
from dataclasses import dataclass




###Investigate MNIST dataset:

In [8]:
(train_X, train_y), (test_X, test_y) = mnist.load_data()

#printing the shapes of the vectors
print('X_train: ' + str(train_X.shape))
print('Y_train: ' + str(train_y.shape))
print('X_test:  '  + str(test_X.shape))
print('Y_test:  '  + str(test_y.shape))

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz
[1m11490434/11490434[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
X_train: (60000, 28, 28)
Y_train: (60000,)
X_test:  (10000, 28, 28)
Y_test:  (10000,)


In [9]:
print('X_train 0: ' + str(train_X[0]))
print('Y_train 0: ' + str(train_y[0]))

X_train 0: [[  0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
    0   0   0   0   0   0   0   0   0   0]
 [  0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
    0   0   0   0   0   0   0   0   0   0]
 [  0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
    0   0   0   0   0   0   0   0   0   0]
 [  0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
    0   0   0   0   0   0   0   0   0   0]
 [  0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
    0   0   0   0   0   0   0   0   0   0]
 [  0   0   0   0   0   0   0   0   0   0   0   0   3  18  18  18 126 136
  175  26 166 255 247 127   0   0   0   0]
 [  0   0   0   0   0   0   0   0  30  36  94 154 170 253 253 253 253 253
  225 172 253 242 195  64   0   0   0   0]
 [  0   0   0   0   0   0   0  49 238 253 253 253 253 253 253 253 253 251
   93  82  82  56  39   0   0   0   0   0]
 [  0   0   0   0   0   0   0  18 219 253 253 253 253

###Model Setup:

In [48]:
@dataclass
class Config:
  batch:int = 1
  d_img:int = 28
  n_layer:int = 56
  n_out:int = 10

cfg = Config()

###MLP Layer:

In [49]:
class MLPLayer(nn.Module):
  def __init__(self, cfg):
    super(MLPLayer, self).__init__()
    self.cfg = cfg
    self.in_W = nn.Parameter(t.randn(cfg.d_img, cfg.n_layer), requires_grad=True)
    self.in_B = nn.Parameter(t.randn(cfg.n_layer), requires_grad=True)
    self.out_W = nn.Parameter(t.randn(cfg.n_layer, cfg.n_out), requires_grad=True)
    self.out_B = nn.Parameter(t.randn(cfg.n_out), requires_grad=True)

  def forward(self, data_in):
    #data_in format: [batch, d_img, d_img]
    first_layer = einsum('batch d_img d_img, d_img n_layer -> batch n_layer', data_in, self.in_W) + self.in_B
    #apply ReLU function
    post_ReLu = t.maximum(first_layer, t.zeros_like(first_layer))
    #second matrix:
    second_layer = einsum('batch n_layer, n_layer n_out -> batch n_out', post_ReLu, self.out_W) + self.out_B
    return second_layer

###Prediction Extraction

In [50]:
class PredictionExtractor(nn.Module):
  def __init__(self):
    super(PredictionExtractor, self).__init__()

  def applySoftmax(self, MLP_result):
    return nn.functional.softmax(MLP_result, dim=1)


###MLP Body

In [51]:
class MLP(nn.Module):
  def __init__(self, cfg):
    super(MLP, self).__init__()
    self.cfg = cfg
    self.layer = MLPLayer(cfg)
    self.Predictor = PredictionExtractor()

  def forward(self, data_in):
    #data_in format: [batch, d_img, d_img]
    MLP_result = self.layer(data_in)
    return self.Predictor.applySoftmax(MLP_result)

###Testing Forward Propagation

In [52]:
MLP = MLP(cfg)
for i in range(len(train_X)):
  prediction = MLP.forward(t.unsqueeze(t.tensor(train_X[i]).float(), 0))
  print('Prediction: ' + str(prediction) + 'Actual: ' + str(train_y[i]))

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Prediction: tensor([[0., 0., 0., 0., 0., 0., 0., 0., 0., 1.]], grad_fn=<SoftmaxBackward0>)Actual: 5
Prediction: tensor([[0., 0., 0., 0., 0., 0., 0., 0., 0., 1.]], grad_fn=<SoftmaxBackward0>)Actual: 4
Prediction: tensor([[0., 0., 0., 0., 0., 0., 0., 1., 0., 0.]], grad_fn=<SoftmaxBackward0>)Actual: 1
Prediction: tensor([[0., 0., 0., 0., 1., 0., 0., 0., 0., 0.]], grad_fn=<SoftmaxBackward0>)Actual: 5
Prediction: tensor([[0., 1., 0., 0., 0., 0., 0., 0., 0., 0.]], grad_fn=<SoftmaxBackward0>)Actual: 1
Prediction: tensor([[0., 0., 0., 0., 0., 1., 0., 0., 0., 0.]], grad_fn=<SoftmaxBackward0>)Actual: 7
Prediction: tensor([[0., 1., 0., 0., 0., 0., 0., 0., 0., 0.]], grad_fn=<SoftmaxBackward0>)Actual: 9
Prediction: tensor([[0., 0., 0., 0., 0., 1., 0., 0., 0., 0.]], grad_fn=<SoftmaxBackward0>)Actual: 0
Prediction: tensor([[0., 1., 0., 0., 0., 0., 0., 0., 0., 0.]], grad_fn=<SoftmaxBackward0>)Actual: 6
Prediction: tensor([[0., 1., 0., 0.

KeyboardInterrupt: 