# Defining Neural Network Architecture


## Architecture :
- 784 neurons (input layer) + 1 bias and relu activation
- 16 neurons + 1 bias and relu activation
- 16 neurons + 1 bias and relu activation
- 10 neurons (output layer) and softmax activation



In [2]:
import numpy as np

In [None]:
class DNN:
    def __init__(self, learning_rate):
        self.learning_rate = learning_rate

        self.theta_matrix_1 = np.zeros((16, 785))
        self.theta_matrix_2 = np.zeros((16, 17))
        self.theta_matrix_3 = np.zeros((10, 17))

        self.delta_matrix_3 = np.zeros((10, 17))
        self.delta_matrix_2 = np.zeros((16, 17))
        self.delta_matrix_1 = np.zeros((16, 785))
    
    def relu(self, inp:np.array):
        a = []
        for i in inp:
            b = max(i, 0)
            if(b == 0):
                b = np.array([0])
            a.append(b)
        a = np.array(a)
        return(a)
    
    def softmax(self, inp:np.array):
        a = []
        denominator = np.sum(np.exp(inp))
        for i in inp:
            numerator = np.exp(i)
            probability = numerator/denominator
            a.append(probability)
        a = np.array(a)
        return(a)
    
    def forwardpropogation(self, x:np.array):
        global a1,a2,a3,a4
        z1 = np.reshape(x, (x.shape[0], 1))
        a1 = self.relu(z1)
        a1 = np.insert(a1, 0, 1, axis=0)
        
        z2 = np.matmul(self.theta_matrix_1, a1)
        a2 = self.relu(z2)
        a2 = np.insert(a2, 0, 1, axis=0)

        z3 = np.matmul(self.theta_matrix_2, a2)
        a3 = self.relu(z3)
        a3 = np.insert(a3, 0, 1, axis=0)

        z4 = np.matmul(self.theta_matrix_3, a3)
        a4 = self.softmax(z4)
        hx = a4
        return(hx)
    
    def Cost(self, hx:np.array, y:np.array):
        y = np.reshape(y, (y.shape[0], 1))
        error = -np.sum(y*np.log(hx))
        return(error)
    
    def backpropogation(self, y:np.array, hx:np.array):
        y = np.reshape(y, hx.shape)
        delta_vector_4 = hx - y
        delta_vector_3 = np.matmul(self.theta_matrix_3.T[1:, :], delta_vector_4) * a3[1:] * (1 - a3[1:])
        delta_vector_2 = np.matmul(self.theta_matrix_2.T[1:, :], delta_vector_3) * a2[1:] * (1 - a2[1:])

        self.delta_matrix_3 += np.matmul(delta_vector_4, a3.T)
        self.delta_matrix_2 += np.matmul(delta_vector_3, a2.T)
        self.delta_matrix_1 += np.matmul(delta_vector_2, a1.T)
    
    def fit(self, x_train:np.array, y_train:np.array, epochs=10, batch_size = 100):
        training_data = []
        for i,j in zip(x_train, y_train):
            training_data.append((i,j))
        
        train_data = []
        b = int(len(x_train)/batch_size)
        for i in range(batch_size):
            train_data.append(training_data[:b])
            training_data = training_data[b:]
        
        for i in range(epochs):
            for data in train_data:
                m = len(data)
                for i in data:
                    x = i[0]
                    y = i[1]
                    hx = self.forwardpropogation(x)
                    self.backpropogation(y, hx)
                t3 = self.theta_matrix_3 + (self.learning_rate/m)*(self.delta_matrix_3)
                t2 = self.theta_matrix_2 + (self.learning_rate/m)*(self.delta_matrix_2)
                t1 = self.theta_matrix_1 + (self.learning_rate/m)*(self.delta_matrix_1)

                self.theta_matrix_1 = t1
                self.theta_matrix_2 = t2
                self.theta_matrix_3 = t3

                self.delta_matrix_3 = np.zeros((10, 17))
                self.delta_matrix_2 = np.zeros((16, 17))
                self.delta_matrix_1 = np.zeros((16, 785))
    
    def predict(self, x_test:np.array):
        return(self.forwardpropogation(x_test))