<a href="https://colab.research.google.com/github/kailashnirola/python_notebooks/blob/main/mnistnn.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#Neural Network from scratch for MNIST Dataset

In [None]:
import numpy as np
from tensorflow.keras.datasets import mnist


In [None]:
(x_train,y_train),(x_test,y_test)= mnist.load_data()

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz
[1m11490434/11490434[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 0us/step


In [None]:
def preprocess(x,y):
  x=x.reshape(-1,28*28)/255.0
  y= np.eye(10)[y.reshape(-1).astype(int)]
  return x,y

x_train,y_train=preprocess(x_train,y_train)
x_test,y_test = preprocess(x_test,y_test)

val_size=10000
x_val,y_val=x_train[:val_size],y_train[:val_size]
x_train,y_train=x_train[val_size:],y_train[val_size:]

input_size=784
hidden_size1=128
hidden_size2=128
output_size=10

In [None]:
def initialize_parameters():
  W1=np.random.randn(input_size,hidden_size1)*np.sqrt(2/input_size)
  b1=np.random.randn(hidden_size1)
  W2=np.random.randn(hidden_size1,hidden_size2)*np.sqrt(2/hidden_size1)
  b2=np.random.randn(hidden_size2)
  W3=np.random.randn(hidden_size2,output_size)*np.sqrt(2/hidden_size2)
  b3=np.random.randn(output_size)
  return W1,b1,W2,b2,W3,b3

def relu(z):
  return np.maximum(0,z)

def sigmoid(z):
  z=z-np.max(z,axis=1,keepdims=True)
  exp_z=np.exp(z)
  return exp_z/np.sum(exp_z,axis=1,keepdims=True)

def forward(W1,b1,W2,b2,W3,b3,x):
  Z1=x.dot(W1)+b1
  A1=relu(Z1)
  Z2=A1.dot(W2)+b2
  A2=relu(Z2)
  Z3=A2.dot(W3)+b3
  A3=sigmoid(Z3)
  return Z1,A1,Z2,A2,Z3,A3

def compute_loss(Y_,Y):
  m=Y.shape[0]
  return -np.sum(Y*np.log(Y_ + 1e-8))/m

def backward(X,Y,Z1,A1,Z2,A2,Z3,A3,W2,W3):
  m=X.shape[0]

  dZ3=(A3-Y)/m
  dW3=np.dot(A2.T,dZ3)
  db3=np.sum(dZ3,axis=0)

  dA2=np.dot(dZ3,W3.T)
  dZ2=dA2*(Z2>0)
  dW2=np.dot(A1.T,dZ2)
  db2=np.sum(dZ2,axis=0)

  dA1=np.dot(dZ2,W2.T)
  dZ1=dA1*(Z1>0)
  dW1=np.dot(X.T,dZ1)
  db1=np.sum(dZ1,axis=0)

  return dW1,db1,dW2,db2,dW3,db3

def update_params(W1,b1,W2,b2,W3,b3,dW1,db1,dW2,db2,dW3,db3,lr):
  W1-=lr*dW1
  b1-=lr*db1
  W2-=lr*dW2
  b2-=lr*db2
  W3-=lr*dW3
  b3-=lr*db3
  return W1,b1,W2,b2,W3,b3


def accuracy(X,Y,W1,b1,W2,b2,W3,b3):
  _,_,_,_,_,A3=forward(W1,b1,W2,b2,W3,b3,X)
  pred=np.argmax(A3,axis=1)
  actual=np.argmax(Y,axis=1)
  return np.mean(pred==actual)

In [None]:
def train(X_train,y_train,x_val,y_val,epochs,batch_size,lr):
  W1,b1,W2,b2,W3,b3= initialize_parameters()
  n_sample = X_train.shape[0]

  for epoch in range(epochs):
    perm = np.random.permutation(n_sample)
    X_suff,y_suff=X_train[perm],y_train[perm]

    for i in range(0,n_sample,batch_size):
      X_batch,y_batch=X_suff[i:i+batch_size],y_suff[i:i+batch_size]

      Z1,A1,Z2,A2,Z3,Y = forward(W1,b1,W2,b2,W3,b3,X_batch)

      dW1,db1,dW2,db2,dW3,db3= backward(X_batch,y_batch,Z1,A1,Z2,A2,Z3,Y,W2,W3)

      W1,b1,W2,b2,W3,b3=update_params(W1,b1,W2,b2,W3,b3,dW1,db1,dW2,db2,dW3,db3,lr=0.1)

    acc= accuracy(x_val,y_val,W1,b1,W2,b2,W3,b3)
    print(f"{epoch+1}/{epochs}, Validation accuracy: {acc:.4f}")

  return W1,b1,W2,b2,W3,b3

In [None]:
print(y_test.shape)

(10000, 10)


In [None]:
epochs=10
batch_size=64
lr=0.1

W1,b1,W2,b2,W3,b3 = train(x_train,y_train,x_val,y_val,epochs,batch_size,lr)

test_acc= accuracy(x_test,y_test,W1,b1,W2,b2,W3,b3)
print(f'Test accuracy:{test_acc:.4f}')

1/10, Validation accuracy: 0.9239
2/10, Validation accuracy: 0.9559
3/10, Validation accuracy: 0.9595
4/10, Validation accuracy: 0.9684
5/10, Validation accuracy: 0.9696
