# Defining Neural Network Architecture


## Architecture :
- 784 neurons (input layer) + 1 bias and relu activation
- 16 neurons + 1 bias and relu activation
- 16 neurons + 1 bias and relu activation
- 10 neurons (output layer) and softmax activation



In [72]:
import numpy as np
import pickle

In [73]:
class DNN:
    def __init__(self, learning_rate, input_layer, hidden_layer_1, hidden_layer_2, output_layer):
        self.learning_rate = learning_rate
        self.a1 = np.zeros((input_layer, 1))
        self.a2 = np.zeros((hidden_layer_1, 1))
        self.a3 = np.zeros((hidden_layer_2, 1))
        self.a4 = np.zeros((output_layer, 1))

        self.weight_1 = np.random.randn(hidden_layer_1, input_layer)
        self.weight_2 = np.random.randn(hidden_layer_2, hidden_layer_1)
        self.weight_3 = np.random.randn(output_layer, hidden_layer_2)

        self.bias_1 = np.random.randn(hidden_layer_1, 1)
        self.bias_2 = np.random.randn(hidden_layer_2, 1)
        self.bias_3 = np.random.randn(output_layer, 1)
    
    def relu(self, inp):
        a = []
        for i in inp:
            b = max(i, 0)
            if(b == 0):
                b = np.array([0])
            a.append(b)
        return(np.array(a))
    
    def softmax(self, inp):
        denominator = np.sum(np.exp(inp))
        a = []
        for i in inp:
            numerator = np.exp(i)
            probability = numerator/denominator
            a.append(probability)
        return(np.array(a))
    
    def forwardpropogation(self, x:np.array):
        self.a1 = self.relu(np.reshape(x, (x.shape[0], 1)))

        z2 = np.matmul(self.weight_1, self.a1) + self.bias_1
        self.a2 = self.relu(z2)

        z3 = np.matmul(self.weight_2, self.a2) + self.bias_2
        self.a3 = self.relu(z3)

        z4 = np.matmul(self.weight_3, self.a3) + self.bias_3
        self.a4 = self.softmax(z4)
        return(self.a4)
    
    def fit(self, x_train:np.array, y_train:np.array, batch_size=100, epochs=10):
        training_data = []
        for i,j in zip(x_train, y_train):
            training_data.append((i,j))
        
        train_data = []
        b = int(len(x_train)/batch_size)
        for i in range(batch_size):
            train_data.append(training_data[:b])
            training_data = training_data[b:]
        
        for i in range(epochs):
            print(f'epochs : {i+1}')
            for data in train_data:
                m = len(data)
                sum_bias = [np.zeros(self.bias_1.shape), np.zeros(self.bias_2.shape), np.zeros(self.bias_3.shape)]
                sum_weights = [np.zeros(self.weight_1.shape), np.zeros(self.weight_2.shape), np.zeros(self.weight_3.shape)]
                for item in data:
                    x = item[0]
                    y = item[1]

                    hx = self.forwardpropogation(x)
                    delta_4 = hx - np.reshape(y, (y.shape[0], 1))
                    delta_3 = np.matmul(self.weight_3.T, delta_4)
                    delta_2 = np.matmul(self.weight_2.T, delta_3)

                    sum_bias[0] += delta_2
                    sum_bias[1] += delta_3
                    sum_bias[2] += delta_4

                    delta_matrix_3 = self.delta_matrix(self.a3, delta_4)
                    delta_matrix_2 = self.delta_matrix(self.a2, delta_3)
                    delta_matrix_1 = self.delta_matrix(self.a1, delta_2)

                    sum_weights[0] += delta_matrix_1
                    sum_weights[1] += delta_matrix_2
                    sum_weights[2] += delta_matrix_3

                D_w_1 = (1/m)*sum_weights[0]
                D_w_2 = (1/m)*sum_weights[1]
                D_w_3 = (1/m)*sum_weights[2]

                D_b_1 = (1/m)*sum_bias[0]
                D_b_2 = (1/m)*sum_bias[1]
                D_b_3 = (1/m)*sum_bias[2]

                self.weight_1 = self.weight_1 - self.learning_rate*D_w_1
                self.weight_2 = self.weight_2 - self.learning_rate*D_w_2
                self.weight_3 = self.weight_3 - self.learning_rate*D_w_3

                self.bias_1 = self.bias_1 - self.learning_rate*D_b_1
                self.bias_2 = self.bias_2 - self.learning_rate*D_b_2
                self.bias_3 = self.bias_3 - self.learning_rate*D_b_3
    
    def delta_matrix(self, activation_layer, delta_vector):
        a = activation_layer.flatten()
        d = delta_vector.flatten()
        l = []
        for i in d:
            r = a*i
            l.append(r)
        return(np.array(l))

    def save(self, path=''):
        weights_list = [self.weight_1, self.weight_2, self.weight_3]
        bias_list = [self.bias_1, self.bias_2, self.bias_3]
        model_list = [weights_list, bias_list]
        pickle.dump(model_list, open(f'{path}/model.pkl'))
    
    def load(self, path=''):
        with open(path, 'rb') as file:
            model_list = pickle.load(file)

            weights_list = model_list[0]
            bias_list = model_list[1]

            self.weight_1 = weights_list[0]
            self.weight_2 = weights_list[1]
            self.weight_3 = weights_list[2]

            self.bias_1 = bias_list[0]
            self.bias_2 = bias_list[1]
            self.bias_3 = bias_list[2]
    
    def predict(self, x_test:np.array):
        hx = self.forwardpropogation(x_test)
        return(hx.flatten())
    
    def accuracy(self, hx:np.array, y:np.array):
        l_hx = []
        for i in hx:
            l_hx.append(np.argmax(i))

        l_y = []
        for i in y:
            l_y.append(np.argmax(i))
        
        correct_count = 0
        for i,j in zip(l_y,l_hx):
            if(i == j):
                correct_count += 1
        
        num_samples = len(l_hx)
        accuracy = correct_count/num_samples
        return(accuracy)

In [74]:
model = DNN(0.01, 784, 16, 16, 10)

# Load Data

In [75]:
x_train = np.load('data/preprocessed/x_train.npy')
x_test = np.load('data/preprocessed/x_test.npy')

y_train = np.load('data/preprocessed/y_train.npy')
y_test = np.load('data/preprocessed/y_test.npy')

# Start Training

In [76]:
model.fit(x_train, y_train, epochs=20)

epochs : 1
epochs : 2
epochs : 3
epochs : 4
epochs : 5
epochs : 6
epochs : 7
epochs : 8
epochs : 9
epochs : 10
epochs : 11
epochs : 12
epochs : 13
epochs : 14
epochs : 15
epochs : 16
epochs : 17
epochs : 18
epochs : 19
epochs : 20


# Test the Model

In [77]:
x = x_test[0]

In [78]:
prediction = model.predict(x)

In [79]:
prediction

array([1.62112991e-15, 8.78515365e-13, 1.97993212e-05, 9.95678563e-01,
       3.02699723e-06, 3.90281970e-03, 2.45118646e-14, 2.48397329e-31,
       1.50954459e-04, 2.44836784e-04])

In [80]:
np.argmax(prediction)

3

In [81]:
y_test[0]

array([0., 0., 0., 1., 0., 0., 0., 0., 0., 0.])

In [86]:
prediction = model.predict(x_test[1])
np.argmax(prediction)

1

In [87]:
y_test[1]

array([0., 1., 0., 0., 0., 0., 0., 0., 0., 0.])

In [88]:
hx = []
for x in x_test:
    p = model.predict(x)
    hx.append(p)
hx

[array([1.62112991e-15, 8.78515365e-13, 1.97993212e-05, 9.95678563e-01,
        3.02699723e-06, 3.90281970e-03, 2.45118646e-14, 2.48397329e-31,
        1.50954459e-04, 2.44836784e-04]),
 array([2.30423064e-11, 9.98189279e-01, 1.48118997e-18, 6.46340739e-04,
        4.77097310e-05, 5.50114488e-04, 7.58262161e-11, 2.92449633e-17,
        5.45993944e-05, 5.11956839e-04]),
 array([2.93761791e-07, 9.99981309e-01, 8.46742812e-21, 3.94253085e-07,
        1.77320122e-23, 1.17485908e-06, 1.50790151e-05, 2.61053614e-07,
        1.48773662e-06, 5.37154244e-11]),
 array([1.51257821e-03, 3.77709383e-36, 2.16598127e-04, 9.93354931e-08,
        2.50605874e-01, 1.68117158e-03, 5.09661855e-03, 1.79984484e-03,
        1.31927547e-04, 7.38955288e-01]),
 array([1.18013718e-002, 1.28651704e-010, 2.88384707e-101, 5.59525454e-014,
        2.86312756e-102, 3.99195360e-001, 5.88999111e-001, 1.54493550e-033,
        4.15715863e-006, 1.75766874e-062]),
 array([4.61147413e-06, 9.98867229e-01, 3.16292913e-20, 4.50

In [89]:
c = 0
for i,j in zip(hx, y_test):
    a = np.argmax(i)
    b = np.argmax(j)
    if(a == b):
        c += 1

acc = c/len(y_test)

In [90]:
acc = model.accuracy(hx, y_test)

In [91]:
acc

0.7586428571428572