<a href="https://colab.research.google.com/github/stewartjollymore/MNIST_NNs/blob/main/Fully_Connected_singlepass_MNIST.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from keras.datasets import mnist
from keras.utils import to_categorical


#Below we separate and arange the data as seen around the web
(x_train, y_train), (x_test, y_test) = mnist.load_data()

x_train = x_train.reshape(x_train.shape[0], 1, 28*28)
x_train = x_train.astype('float32')
x_train /= 255
# encode output which is a number in range [0,9] into a vector of size 10
# e.g. number 3 will become [0, 0, 0, 1, 0, 0, 0, 0, 0, 0]
y_train = to_categorical(y_train)

# same for test data : 10000 samples
x_test = x_test.reshape(x_test.shape[0], 1, 28*28)
x_test = x_test.astype('float32')
x_test /= 255
y_test = to_categorical(y_test)

In [None]:
#Creating the layer dimensions

input_size1 = 28*28
output_size1 = 100
input_size2 = 100
output_size2 = 50
input_sizefinal = 50
output_sizefinal = 10

In [None]:
import numpy as np

#Initialze weights and biases

w1 = np.random.rand(input_size1, output_size1) - 0.5
b1 = np.random.rand(1, output_size1) - 0.5

w2 = np.random.rand(input_size2, output_size2) - 0.5
b2 = np.random.rand(1, output_size2) - 0.5

w3 = np.random.rand(input_sizefinal, output_sizefinal) -0.5
b3 = np.random.rand(1, output_sizefinal) - 0.5

In [None]:
import tensorflow as tf
import keras as ks

#Define activations for forward prop and backprop

def sigmd(x):
  return  1/(1+np.exp(-x))

def gradient(x):
  return sigmd(x)*(1-sigmd(x))


In [None]:
#Single pass for a single data element

single_xtrain = x_train[0]
single_ytrain = y_train[0]

#Pass through first hidden layer and activation neruon
layer1 = np.dot(single_xtrain,w1)+b1
act_layer1 = sigmd(layer1)

#Pass through second hidden layer and activation neuron
layer2 = np.dot(act_layer1,w2)+b2
act_layer2 = sigmd(layer2)

#Pass through output layer and acrivation neuron
layer3 = np.dot(act_layer2,w3)+b3
output = sigmd(layer3)

In [None]:
#define the error metrics

def mse(y_true, y_pred):
    return np.mean(np.power(y_true-y_pred, 2));

def mse_grad(y_true, y_pred):
    return 2*(y_pred-y_true)/y_true.size;

In [None]:
#Mesuring measn squared error
output_error = mse(single_ytrain, output)
output_error

0.36345142888109305

In [None]:
#Backward Propigation steps

#Define batch size and learning rate
N = single_ytrain.size
learning_rate = 0.1

#calculate the output error
output_error = output - single_ytrain

#Backprop through output layer
output_grad = output_error * gradient(output)
layer3_error = np.dot(output_grad, w3.T)

#Backkprop through second hidden layer
layer3_grad = layer3_error * gradient(act_layer2)
layer2_error = np.dot(layer3_grad, w2.T)

#Backprop through first hidden layer
layer2_grad = layer2_error * gradient(act_layer1)
layer1_error = np.dot(layer2_grad, w1.T)

#Calculating the graident back to the input
layer1_grad = layer1_error * gradient(single_xtrain)

#Calculating the average gradient direction for each layer
w3_update = np.dot(output.T, layer3_grad) / N
w2_update = np.dot(act_layer2.T, layer2_grad) / N
w1_update = np.dot(act_layer1.T, layer1_grad) / N

#Updating the new weights by magnitude of the learning rate
w3_new = w3 - learning_rate * w3_update.T
w2_new = w2 - learning_rate * w2_update.T
w1_new = w1 - learning_rate * w1_update.T

#Updating the new biases
b3_new = b3 - learning_rate * output_error
b2_new = b2 - learning_rate * layer3_error
b1_new = b1 - learning_rate * layer2_error


In [None]:
#Second pass through forward direction and
#error calculation

layer1_new = np.dot(single_xtrain,w1_new)+b1_new
act_layer1_new = sigmd(layer1_new)

layer2_new = np.dot(act_layer1_new,w2_new)+b2_new
act_layer2_new = sigmd(layer2_new)

layer3_new = np.dot(act_layer2_new,w3_new)+b3_new
output_new = sigmd(layer3_new)

mse(single_ytrain, output_new)

0.3487951706994674