[View in Colaboratory](https://colab.research.google.com/github/manharsharma007/Neural-Network-For-Digit-Recognition-Using-Numpy/blob/master/NN_revised.ipynb)

In [0]:
#Libraries

import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import load_digits

In [0]:
##Neural network class

class DigitClassifier():
    
    #initialize parameters
    def __init__(self):
        #Define Hyperparameters
        self.inputLayerSize = 64
        self.outputLayerSize = 10
        self.hiddenLayerSize = 20
        self.learning_rate = 0.01
        
        #Weights (parameters)
        self.W1 = np.random.randn(self.inputLayerSize,self.hiddenLayerSize)
        self.W2 = np.random.randn(self.hiddenLayerSize,self.outputLayerSize)
        
        
    def forward(self, input_data):
        
        self.y = np.dot(input_data, self.W1)
        self.yHat = self.sigmoid(self.y)
        
        self.z = np.dot(self.yHat, self.W2)
        self.zHat = self.sigmoid(self.z)
        
        return self.zHat
    
    
    #compute Cost
    def cost(self, input_data, output_data):
        #Compute cost for given X,y, use weights already stored in class.
        self.zHat = self.forward(input_data)
        
        J = 0.5*sum((output_data-self.zHat)**2)
        return J
    
    
    #Compute average cost
    def averageCost(self,cost):
        avg_cost = 0
        for i in range(len(cost)):
            avg_cost += cost[i]
        
        avg_cost /= len(cost)
        
        return avg_cost
    
    def sigmoid(self, z):
        return 1.0/(1.0+np.exp(-z))
    
    def sigmoidPrime(self, z):
        
        """Derivative of the sigmoid function."""
        return self.sigmoid(z)*(1-self.sigmoid(z))
    
    
    def computeGradients(self, input_data, output_data):
        
        self.zHat = self.forward(input_data)
        
        self.J = self.cost(input_data, output_data)
        
        delta3 = np.multiply(-(output_data-self.zHat), self.sigmoidPrime(self.z))
        dJdW2 = np.dot(self.yHat.T, delta3)
        
        delta2 = np.dot(delta3, self.W2.T)*self.sigmoidPrime(self.y)
        dJdW1 = np.dot(input_data.T, delta2)
        
        return dJdW2,dJdW1
    
    
    def train(self, input_examples, output_examples):
        temp_cost = 0
        for i in range(len(input_examples)):
            
            input_data = input_examples
            output_data = output_examples
            
            for j in range(1000000):
                
                dJdW2,dJdW1 = self.computeGradients(input_data, output_data)
                
                self.W2 = self.W2 - (self.learning_rate * dJdW2)
                self.W1 = self.W1 - (self.learning_rate * dJdW1)
                
                if(temp_cost == 0 or temp_cost > self.averageCost(self.J)):
                  temp_cost = self.averageCost(self.J)
                  
                else:
                  break
        
            print(self.averageCost(self.J))

In [0]:
digits = load_digits()

In [0]:
#Filtering the output data
output_data = [[0 for x in range(10)] for y in range(len(digits.data))]

i = 0

for x in digits.target:
    
    output_data[i][x] = 1
    i = i + 1

#Creating np array of output data
output_data = np.array(output_data)

In [0]:
#Initializing the network
digit_classifier = DigitClassifier()

In [0]:
#training on a small dataset
#digit_classifier.train(digits.data[0:1000], output_data[0:1000])

In [56]:
np.argmax(digit_classifier.forward(digits.data[7]))

7

In [0]:
np.set_printoptions(suppress=True)