<a href="https://colab.research.google.com/github/saithepaithewhyyy/saipraneeth_aimlbcs_210900/blob/main/digit_recognition.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Handwritten digit recognition using numpy

In this notebook, i have built a nn model using numpy and built the model from the ground up that predicts what thye written digit is

The model uses the standard MNIST dataset to train and predict

In [1]:
#importing the required libraries

import numpy as np
import matplotlib.pyplot as plt
import math
import tensorflow as tf

%matplotlib inline

In [2]:
#importing and storing the required dataset

from keras.datasets import mnist 
(X,Y), (X_test, Y_test) = mnist.load_data()

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz


In [3]:
#splitting the dataset into training and validation sets
rand=np.arange(60000)
np.random.shuffle(rand)
train_no=rand[:50000]

val_no=np.setdiff1d(rand,train_no)

X_train,X_val=X[train_no,:,:],X[val_no,:,:]
Y_train,Y_val=Y[train_no],Y[val_no]

In [4]:
#defining the sigmoid function and its derivative 

def sigmoid(x):
    return 1/(np.exp(-x)+1)    

def d_sigmoid(x):
    return (np.exp(-x))/((np.exp(-x)+1)**2)

#defining the softmax function and its derivative

def softmax(x):
    exp_element=np.exp(x-x.max())
    return exp_element/np.sum(exp_element,axis=0)
def d_softmax(x):
    exp_element=np.exp(x-x.max())
    return exp_element/np.sum(exp_element,axis=0)*(1-exp_element/np.sum(exp_element,axis=0))

In [5]:
#mat function takes matrix size and returns a matrix with randomized weights
def mat(x,y):
    layer=np.random.uniform(-1.,1.,size=(x,y))/np.sqrt(x*y)
    return layer.astype(np.float32)

np.random.seed(42)
l1=mat(28*28,128) # matrix of size 784*128 corresponding to the first layer
l2=mat(128,10) # matrix of size 128*10 corresponding to the output layer

In [6]:
# forward and backward propogation

def forwardbackward(x,y):
    targets = np.zeros((len(y),10), np.float32)
    targets[range(targets.shape[0]),y] = 1
    
    
    #input matrix is multiplied with the weights and passed through the sigmoid functions
    x_l1=x.dot(l1)
    x_sigmoid=sigmoid(x_l1)
    x_l2=x_sigmoid.dot(l2)
    out=softmax(x_l2)
   
    #calculating errors and updating l2
    error=2*(out-targets)/out.shape[0]*d_softmax(x_l2)
    update_l2=x_sigmoid.T@error
    
    #calculating errors and updating l1
    error=((l2).dot(error.T)).T*d_sigmoid(x_l1)
    update_l1=x.T@error

    return out,update_l1,update_l2 

In [7]:
#since gradient descent would take too long, sgd on a size of 128 with learning rate 0f 0.001

epochs=10000
lr=0.001
batch=128

losses,accuracies,val_accuracies=[],[],[]

for i in range(epochs):
    #creating a random sample with size 128
    sample=np.random.randint(0,X_train.shape[0],size=(batch))
    x=X_train[sample].reshape((-1,28*28))
    y=Y_train[sample]
 

    out,update_l1,update_l2=forwardbackward(x,y)
    #calculating the accuracy and adding it to the accuracies array
    #category stores the most likely number 
    category=np.argmax(out,axis=1)
    accuracy=(category==y).mean()
    accuracies.append(accuracy)
    #calculating the loss using mean squared error and adding it to the losses array
    loss=((category-y)**2).mean()
    losses.append(loss.item())
    
    #updating the weights
    l1=l1-lr*update_l1
    l2=l2-lr*update_l2
    
    #printing out the data
    if(i%100==0):    
        X_val=X_val.reshape((-1,28*28))
        val_out=np.argmax(softmax(sigmoid(X_val.dot(l1)).dot(l2)),axis=1)
        val_acc=(val_out==Y_val).mean()
        val_accuracies.append(val_acc.item())
    if(i%500==0): print(f'For {i}th epoch: train accuracy: {accuracy:.3f} | validation accuracy:{val_acc:.3f}')


For 0th epoch: train accuracy: 0.047 | validation accuracy:0.072
For 500th epoch: train accuracy: 0.672 | validation accuracy:0.655
For 1000th epoch: train accuracy: 0.781 | validation accuracy:0.744
For 1500th epoch: train accuracy: 0.734 | validation accuracy:0.769
For 2000th epoch: train accuracy: 0.844 | validation accuracy:0.784
For 2500th epoch: train accuracy: 0.789 | validation accuracy:0.794
For 3000th epoch: train accuracy: 0.781 | validation accuracy:0.801
For 3500th epoch: train accuracy: 0.836 | validation accuracy:0.807
For 4000th epoch: train accuracy: 0.797 | validation accuracy:0.812
For 4500th epoch: train accuracy: 0.828 | validation accuracy:0.815
For 5000th epoch: train accuracy: 0.828 | validation accuracy:0.817
For 5500th epoch: train accuracy: 0.812 | validation accuracy:0.820
For 6000th epoch: train accuracy: 0.844 | validation accuracy:0.822
For 6500th epoch: train accuracy: 0.781 | validation accuracy:0.822
For 7000th epoch: train accuracy: 0.820 | validation

In [8]:
accuracy*100

81.25

In [9]:
val_acc*100

82.43

# As can be seen, the accuracy is pretty low. This can be improved if more complex functions are used for activation and loss