<a href="https://colab.research.google.com/github/nahid0335/ANN-from-scratch-using-numpy/blob/main/ANN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **Import Library**

In [1]:
from sklearn.datasets import fetch_openml
from keras.utils.np_utils import to_categorical
import numpy as np
from sklearn.model_selection import train_test_split
import time
import copy

# **Load Dataset**

In [2]:
x, y = fetch_openml('mnist_784', version=1, return_X_y=True)
x = (x/255).astype('float32')
y = to_categorical(y)

In [3]:
x = x.to_numpy()
x_train, x_val, y_train, y_val = train_test_split(x, y, test_size=0.15, random_state=42)

In [4]:
class DeepNeuralNetwork():
    def __init__(self, sizes, epochs=10, l_rate=0.001):
        self.sizes = sizes
        self.epochs = epochs
        self.l_rate = l_rate
        # save all parameters in the neural network in this dictionary
        self.params = self.initialization()


    def sigmoid(self, x, derivative=False):
        if derivative:
            return (np.exp(-x))/((np.exp(-x)+1)**2)
        return 1/(1 + np.exp(-x))


    def softmax(self, x, derivative=False):
        exps = np.exp(x - x.max())
        if derivative:
            return exps / np.sum(exps, axis=0) * (1 - exps / np.sum(exps, axis=0))
        return exps / np.sum(exps, axis=0)


    def initialization(self):
        # number of nodes in each layer
        input_layer=self.sizes[0]
        hidden_1=self.sizes[1]
        hidden_2=self.sizes[2]
        output_layer=self.sizes[3]

        params = {
            'W1':np.random.randn(hidden_1, input_layer) * np.sqrt(1. / hidden_1),
            'W2':np.random.randn(hidden_2, hidden_1) * np.sqrt(1. / hidden_2),
            'W3':np.random.randn(output_layer, hidden_2) * np.sqrt(1. / output_layer)
        }
        return params


    def forward_pass(self, x_train):
        params = self.params

        # input layer activations becomes sample
        params['A0'] = x_train
        #print(params["W1"])
        #print(params['A0'])
        # input layer to hidden layer 1
        params['Z1'] = np.dot(params["W1"], params['A0'])
        params['A1'] = self.sigmoid(params['Z1'])

        # hidden layer 1 to hidden layer 2
        params['Z2'] = np.dot(params["W2"], params['A1'])
        params['A2'] = self.sigmoid(params['Z2'])

        # hidden layer 2 to output layer
        params['Z3'] = np.dot(params["W3"], params['A2'])
        params['A3'] = self.softmax(params['Z3'])

        return params['A3']


    def backward_pass(self, y_train, output):
        params = self.params
        change_w = {}

        # Calculate W3 update
        error = 2 * (output - y_train) / output.shape[0] * self.softmax(params['Z3'], derivative=True)
        change_w['W3'] = np.outer(error, params['A2'])

        # Calculate W2 update
        error = np.dot(params['W3'].T, error) * self.sigmoid(params['Z2'], derivative=True)
        change_w['W2'] = np.outer(error, params['A1'])

        # Calculate W1 update
        error = np.dot(params['W2'].T, error) * self.sigmoid(params['Z1'], derivative=True)
        change_w['W1'] = np.outer(error, params['A0'])

        return change_w


    def update_network_parameters(self, changes_to_w):
        for key, value in changes_to_w.items():
            self.params[key] -= self.l_rate * value


    def compute_accuracy(self, x_val, y_val):
        predictions = []
        for x, y in zip(x_val, y_val):
            output = self.forward_pass(x)
            pred = np.argmax(output)
            predictions.append(pred == np.argmax(y))
        
        return np.mean(predictions)



    def train(self, x_train, y_train, x_val, y_val):
        start_time = time.time()
        for iteration in range(self.epochs):
            for x,y in zip(x_train, y_train):
                output = self.forward_pass(x)
                changes_to_w = self.backward_pass(y, output)
                self.update_network_parameters(changes_to_w)
            
            accuracy = self.compute_accuracy(x_val, y_val)
            print('Epoch: {0}, Time Spent: {1:.2f}s, Accuracy: {2:.2f}%'.format(
                iteration+1, time.time() - start_time, accuracy * 100
            ))

In [5]:
dnn = DeepNeuralNetwork(sizes=[784, 128, 64, 10],epochs=150,l_rate=0.001)

In [6]:
dnn.train(x_train, y_train, x_val, y_val)

Epoch: 1, Time Spent: 58.03s, Accuracy: 24.50%
Epoch: 2, Time Spent: 115.63s, Accuracy: 34.27%
Epoch: 3, Time Spent: 172.76s, Accuracy: 37.06%
Epoch: 4, Time Spent: 229.60s, Accuracy: 38.14%
Epoch: 5, Time Spent: 285.85s, Accuracy: 39.99%
Epoch: 6, Time Spent: 342.12s, Accuracy: 42.93%
Epoch: 7, Time Spent: 400.10s, Accuracy: 46.42%
Epoch: 8, Time Spent: 457.76s, Accuracy: 49.64%
Epoch: 9, Time Spent: 514.33s, Accuracy: 52.32%
Epoch: 10, Time Spent: 570.65s, Accuracy: 55.19%
Epoch: 11, Time Spent: 627.39s, Accuracy: 57.86%
Epoch: 12, Time Spent: 685.34s, Accuracy: 60.18%
Epoch: 13, Time Spent: 741.76s, Accuracy: 62.20%
Epoch: 14, Time Spent: 798.56s, Accuracy: 63.75%
Epoch: 15, Time Spent: 855.14s, Accuracy: 64.89%
Epoch: 16, Time Spent: 911.16s, Accuracy: 65.99%
Epoch: 17, Time Spent: 966.85s, Accuracy: 67.20%
Epoch: 18, Time Spent: 1023.63s, Accuracy: 68.09%
Epoch: 19, Time Spent: 1079.44s, Accuracy: 68.82%
Epoch: 20, Time Spent: 1136.23s, Accuracy: 69.31%
Epoch: 21, Time Spent: 1193