#MNIST
Our objective is to build a neural network for the classification of the MNIST dataset. This neural network will comprise two layers, each with 10 nodes, and an input layer with 784 nodes corresponding to the image pixels. The specific structure of the neural network is outlined below, where $X$ represents the input, $A^{[0]}$ denotes the first layer, $Z^{[1]}$ signifies the unactivated layer 1, $A^{[1]}$ stands for the activated layer 1, and so forth. The weights and biases are represented by $W$ and $b$ respectively:


<div align="center">

$A^{[0]}=X$

$Z^{[1]}=W^{[1]}A^{[0]}+b^{[1]}$

$A^{[1]}=\text{ReLU}(Z^{[1]})$

$Z^{[2]}=W^{[2]}A^{[1]}+b^{[2]}$

$A^{[2]}=\text{softmax}(Z^{[2]})$
</div>




You have the flexibility to create any function within or outside the class, allowing you to modify parameters as needed

In [67]:
#importing libraries
import pandas as pd
import numpy as np
import sys
from keras.datasets import mnist
import matplotlib.pyplot as plt
np.set_printoptions(threshold=sys.maxsize)

### Required functions

In [68]:
# activation and loss functions
def ReLU(x):
    return np.maximum(0, x)

def derivative_ReLU(x):
    return np.where(x > 0, 1, 0)

def softmax(x):
    # print("hi")
    exp_x = np.exp(x - np.max(x, axis=-1, keepdims=True))
    return exp_x / np.sum(exp_x, axis=-1, keepdims=True)
weights = np.zeros(100000)
# print(softmax(weights))
# print(weights.shape)

In [86]:
#complete the class of neural network

class NN:
  def __init__(self,X):
      a,b=X.shape
      self.W1 = np.random.randn(b,10)*0.01
      self.b1 = np.zeros((1,10))
      self.W2=np.random.randn(10,10)*0.01
      self.b2=np.zeros((1,10))
      self.A1=np.zeros((a,10))
      self.A2=np.zeros((a,10))
      self.A1=None
      self.A2=None
      self.z=None


  def forward_propagation(self,X):
      self.A1=ReLU(np.dot(X,self.W1)+self.b1)
      self.A2=softmax(np.dot(self.A1,self.W2)+self.b2)
      # print(self.A2.shape)


  def one_hot(self,targets): #return a 0 vector with 1 only in the position corresponding to the value in test target
    one_hot_matrix = np.zeros((len(targets), 10))
    one_hot_matrix[np.arange(len(targets)), targets] = 1

    return one_hot_matrix


  def backward_propagation(self,X,y,n):
      self.dW2=(1/n)*np.dot(self.A1.T,self.A2-y)
      self.db2=(1/n)*np.sum(self.A2-y,axis=0)
      self.z=np.dot(self.A2-y,self.W2.T)*derivative_ReLU(np.dot(X,self.W1)+self.b1)
      # print(np.dot(X,self.W1)+self.b1,derivative_ReLU(np.dot(X,self.W1)+self.b1),self.z)
      self.dW1=(1/n)*np.dot(X.T,self.z)
      self.db1=(1/n)*np.sum(self.z,axis=0)


  def update_params(self,lr):
      self.W1 = self.W1-lr *self.dW1
      self.b1 -= lr * self.db1
      self.W2 -= lr * self.dW2
      self.b2 -= lr * self.db2
      # print(self.W1,self.b1)

  def get_predictions(self):
      pass

  def get_accuracy(self):
      pass

  def gradient_descent(self, X, y, epochs, lr,n):
      for a in range(epochs):
        self.forward_propagation(X)
        self.backward_propagation(X,y,n)
        self.update_params(lr)
      return np.argmax(self.A2,axis=1)
        # print(a)
        # print("A1=",self.A1,"A2=",self.A2)
        # print(self.A1.shape,self.A2.shape)
        # print("dW2=",self.dW2,"h=",self.A2-y,"db2=",self.db2,"W2=",self.W2,"b2=",self.b2,"z=",self.z)

  def make_predictions(self):
      pass

  def show_prediction(self): #show the prediction and actual output for an image in mnist dataset
      pass


## main

In [69]:
(X_train, Y_train), (X_test, Y_test) = mnist.load_data()

###preprocessing the data


In [84]:
# all values of pixels should be in range[0,1]

# print(X_test.shape)
miu = np.mean(X_train, axis=(0, 1), keepdims=True)
stds = np.std(X_train, axis=(0, 1), keepdims=True)

mius = np.mean(X_test, axis=(0, 1), keepdims=True)
stdse = np.std(X_test, axis=(0, 1), keepdims=True)

X_train = (X_train - miu) / (stds + 1e-7)
X_test = (X_test - mius) / (stdse + 1e-7)
X_train=X_train.reshape(-1,784)
X_test=X_test.reshape(-1,784)

###Model Training

In [92]:
#training model using gradient descent
epochs=500
lr=0.01
n=60000
b=X_train[0:60000,]
c=Y_train[0:60000,]
# print(Y_train.shape)?
# print(b)
a=NN(b)
y=a.one_hot(c)
# print(y.shape)
# print(y)
z=a.gradient_descent(b,y,epochs,lr,n)
# print(a.A2)
# print(c)
# print(z)
# p=0
# for i in range(len(z)):
#   if(z[i]==c[i]):
#     p+=1
# print(p/len(z))

# print(a.W1,a.b1)
# print(y_tested)

In [93]:
print(c)
print(z)
a.forward_propagation(X_test)
k=np.argmax(a.A2,axis=1)
p=0
for i in range(len(k)):
  if(k[i]==Y_test[i]):
    p+=1
print(p/len(k))

[5 0 4 1 9 2 1 3 1 4 3 5 3 6 1 7 2 8 6 9 4 0 9 1 1 2 4 3 2 7 3 8 6 9 0 5 6
 0 7 6 1 8 7 9 3 9 8 5 9 3 3 0 7 4 9 8 0 9 4 1 4 4 6 0 4 5 6 1 0 0 1 7 1 6
 3 0 2 1 1 7 9 0 2 6 7 8 3 9 0 4 6 7 4 6 8 0 7 8 3 1 5 7 1 7 1 1 6 3 0 2 9
 3 1 1 0 4 9 2 0 0 2 0 2 7 1 8 6 4 1 6 3 4 5 9 1 3 3 8 5 4 7 7 4 2 8 5 8 6
 7 3 4 6 1 9 9 6 0 3 7 2 8 2 9 4 4 6 4 9 7 0 9 2 9 5 1 5 9 1 2 3 2 3 5 9 1
 7 6 2 8 2 2 5 0 7 4 9 7 8 3 2 1 1 8 3 6 1 0 3 1 0 0 1 7 2 7 3 0 4 6 5 2 6
 4 7 1 8 9 9 3 0 7 1 0 2 0 3 5 4 6 5 8 6 3 7 5 8 0 9 1 0 3 1 2 2 3 3 6 4 7
 5 0 6 2 7 9 8 5 9 2 1 1 4 4 5 6 4 1 2 5 3 9 3 9 0 5 9 6 5 7 4 1 3 4 0 4 8
 0 4 3 6 8 7 6 0 9 7 5 7 2 1 1 6 8 9 4 1 5 2 2 9 0 3 9 6 7 2 0 3 5 4 3 6 5
 8 9 5 4 7 4 2 7 3 4 8 9 1 9 2 8 7 9 1 8 7 4 1 3 1 1 0 2 3 9 4 9 2 1 6 8 4
 7 7 4 4 9 2 5 7 2 4 4 2 1 9 7 2 8 7 6 9 2 2 3 8 1 6 5 1 1 0 2 6 4 5 8 3 1
 5 1 9 2 7 4 4 4 8 1 5 8 9 5 6 7 9 9 3 7 0 9 0 6 6 2 3 9 0 7 5 4 8 0 9 4 1
 2 8 7 1 2 6 1 0 3 0 1 1 8 2 0 3 9 4 0 5 0 6 1 7 7 8 1 9 2 0 5 1 2 2 7 3 5
 4 9 7 1 8 3 9 6 0 3 1 1 

### Viewing Results


In [73]:
#viewing prediction for 10 random images in dataset