<a href="https://colab.research.google.com/github/sagsarkar/ML_Coding/blob/main/feed_forward_mnist_sgd.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [10]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import load_digits
from sklearn.preprocessing import StandardScaler

In [2]:
# Load MNIST
mnist = load_digits()

# Get features and labels and normalize features
features, labels = mnist.data, mnist.target
scaler = StandardScaler()
features = scaler.fit_transform(features)

# Get dataset parameters
num_data_points, feature_dim = features.shape
num_classes = len(np.unique(labels))

In [3]:
class Dense():
    def __init__(self, input_size, output_size):
        self.input_size = input_size
        self.output_size = output_size
        self.W = np.random.randn(output_size, input_size)
        self.bias = np.random.randn(output_size, 1)

    def forward(self, input_data):
        self.input = input_data
        return np.dot(input, self.W.T) + self.bias.T

    def backward(self, output_gradient, learning_rate=1e-3):
        dW = np.dot(output_gradient, self.input.T)
        db = output_gradient
        input_gradient = np.dot(self.W.T, output_gradient)

        self.W -= learning_rate * dW
        self.bias -= learning_rate * db

        return input_gradient

In [4]:
class Sigmoid:
  def __init__(self):
    def sigmoid(x):
      return 1 / (1 + np.exp(-x))
    def sigmoid_prime(x):
      s = sigmoid(x)
      return s * (1 - s)

    self.activation = sigmoid
    self.activation_prime = sigmoid_prime

  def forward(self, input):
    self.input = input
    return self.activation(self.input)

  def backward(self, output_gradient, learning_rate=None):
        return np.multiply(output_gradient, self.activation_prime(self.input))

In [5]:
class SoftmaxWithCrossEntropy:
  def forward(self, input):
    tmp = np.exp(input)
    self.output = tmp / np.sum(tmp)
    return self.output

  def backward(self, y_true):
    input_gradient = self.output - y_true
    return input_gradient

In [6]:
hidden_dim1 = 32
hidden_dim2 = 16
output_dim = num_classes
network = [Dense(feature_dim, hidden_dim1),
           Sigmoid(),
           Dense(hidden_dim1, hidden_dim2),
           Sigmoid(),
           Dense(hidden_dim2, output_dim)]

output_layer = SoftmaxWithCrossEntropy()

In [7]:
def cross_entropy_loss(y_pred, oh_label):
  return sum([-oh_label[i]*np.log2(y_pred[i]) for i in range(oh_label.shape[0])])

In [8]:
def get_accuracy(true_classes_per_epoch, predictions_per_epoch):
  return 100 * np.sum(np.array(predictions_per_epoch) == np.array(true_classes_per_epoch))/num_data_points

In [9]:
classes = np.array([i for i in range(num_classes)])
NUM_EPOCHS = 200
for epoch_no in range(NUM_EPOCHS):
  loss_per_epoch = []
  predictions_per_epoch = []
  true_classes_per_epoch = []
  for idx in range(num_data_points):
    input = features[idx].reshape(-1,1)
    true_class = labels[idx]
    oh_label = np.where(classes==true_class, 1, 0).reshape(-1,1)

    x = input
    for i, layer in enumerate(network):
      x = layer.forward(x)

    y_pred = output_layer.forward(x)
    predicted_class = np.argmax(y_pred)
    predictions_per_epoch.append(predicted_class)
    true_classes_per_epoch.append(true_class)

    loss = cross_entropy_loss(y_pred, oh_label)
    loss_per_epoch.append(loss)

    loss_grad = output_layer.backward(oh_label)
    output_gradient = loss_grad
    for layer in reversed(network):
      output_gradient = layer.backward(output_gradient)

  loss_per_epoch = np.mean(np.array(loss_per_epoch))
  accuracy_per_epoch = get_accuracy(true_classes_per_epoch, predictions_per_epoch)

  if (epoch_no+1) % 10 == 0:
    values = {'epoch_no': epoch_no, 'loss_per_epoch': loss_per_epoch, 'accuracy_per_epoch': accuracy_per_epoch}
    print("{epoch_no}: {loss_per_epoch}, {accuracy_per_epoch}".format(**values))

9: 2.233545802301231, 49.91652754590985
19: 1.5460926933414634, 69.17084028937117
29: 1.1838421817633955, 77.90762381747356
39: 0.9480992686814801, 83.58375069560378
49: 0.7828895772687312, 87.0895937673901
59: 0.6614192295531732, 89.64941569282136
69: 0.5693775096464049, 91.37451307735114
79: 0.49818849693546585, 92.65442404006677
89: 0.4418897911316802, 93.37785197551474
99: 0.39580766125338895, 94.04563160823595
109: 0.3570576575192365, 94.93600445186422
119: 0.3249037596569477, 95.60378408458541
129: 0.2980170534517782, 95.88202559821926
139: 0.27500085078415865, 96.38286032276015
149: 0.2549722529600767, 96.60545353366723
159: 0.23735921795278095, 96.99499165275459
169: 0.2217410452473486, 97.27323316638842
179: 0.20771011151949242, 97.6627712854758
189: 0.19498106024615916, 97.94101279910963
199: 0.18337847173076974, 98.10795770728993
