# Importing Libraries

In [1]:
import numpy as np
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import pickle

# Load preprocessed data

In [2]:
X = np.load('data/preprocessed/features.npy')
Y = np.load('data/preprocessed/labels.npy')

# Split the data into training and testing set

In [3]:
x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=0.4, random_state=42)

# Normalize the data

In [4]:
x_train = x_train/x_train.max()
x_test = x_test/x_test.max()

# One hot encoding the labels

In [5]:
y_train

array([0., 6., 9., ..., 9., 9., 6.])

In [6]:
def one_hot_encode(arr:np.array):
    a = arr.max()
    one_hot = np.zeros((len(arr), a))
    final = []
    for i,j in zip(arr, one_hot):
        one_hot_row = np.insert(j, i, 1)
        final.append(one_hot_row)
    one_hot = np.array(final)
    return(one_hot)


In [7]:
y_train = y_train.astype(int)
y_test = y_test.astype(int)

In [8]:
y_train = one_hot_encode(y_train)
y_test = one_hot_encode(y_test)

# Building Neural Network

In [23]:
class DNN:
    def __init__(self, Learing_rate:float):
        ''' 
        Defining architeture of neural network

        784 neurons (input layer)
        392 neurons (tanh activation)
        392 neurons (tanh activation)
        186 neurons (tanh activation)
        10 neurons (output layer) (sigmoid activation)
        '''
        self.LearningRate = Learing_rate

        self.theta_matrix_1 = np.zeros((392, 785))
        self.theta_matrix_2 = np.zeros((392, 393))
        self.theta_matrix_3 = np.zeros((186, 393))
        self.theta_matrix_4 = np.zeros((10, 187))

        self.delta_matrix_4 = np.zeros((10, 187))
        self.delta_matrix_3 = np.zeros((186, 393))
        self.delta_matrix_2 = np.zeros((392, 393))
        self.delta_matrix_1 = np.zeros((392, 785))
    
    def g(self, inp):
        return(1/(1+np.exp(-inp)))
    
    def tanh(self, inp):
        return((np.exp(inp) - np.exp(-inp))/(np.exp(inp) + np.exp(-inp)))
    
    def relu(self, inp):
        return np.maximum(inp, 0)
    
    def softmax(self, inp):
        l = []
        for i in inp:
            l.append(np.exp(i)/(sum(np.exp(inp)) - np.exp(i)))
        a = np.array(l)
        a.shape = inp.shape
        return(a)

    def forwardpropogation(self, X:np.array):
        global a1,a2,a3,a4,a5
        a1 = X.T
        a1 = np.insert(a1, 0, 1)

        z2 = np.matmul(self.theta_matrix_1, a1)
        a2 = self.relu(z2)
        a2 = np.insert(a2, 0, 1)

        z3 = np.matmul(self.theta_matrix_2, a2)
        a3 = self.relu(z3)
        a3 = np.insert(a3, 0, 1)

        z4 = np.matmul(self.theta_matrix_3, a3)
        a4 = self.relu(z4)
        a4 = np.insert(a4, 0, 1)

        z5 = np.matmul(self.theta_matrix_4, a4)
        a5 = self.softmax(z5)
        hx = a5
        return(hx)
    
    def backpropogation(self, output_layer:np.array, Y:np.array):
        delta_5 = output_layer - Y.T
        delta_4 = np.matmul(self.theta_matrix_4.T, delta_5)
        delta_3 = np.matmul(self.theta_matrix_3.T, delta_4[1:])
        delta_2 = np.matmul(self.theta_matrix_2.T, delta_3[1:])

        self.delta_matrix_4 += self.delta_matrix(delta_5, a4)
        self.delta_matrix_3 += self.delta_matrix(delta_4[1:], a3)
        self.delta_matrix_2 += self.delta_matrix(delta_3[1:], a2)
        self.delta_matrix_1 += self.delta_matrix(delta_2[1:], a1)

    def delta_matrix(self, delta_vector:np.array, activation_layer:np.array):
        final_delta_matrix = []
        for i in delta_vector:
            a = i*activation_layer
            final_delta_matrix.append(a.T)
        return(np.array(final_delta_matrix))

    def GradientDescent(self, theta_matrix:np.array, delta_matrix:np.array, m:int, lamb):
        new_delta = (1/m)*delta_matrix

        D_bias = new_delta[:, 0]
        D_weights = new_delta[:, 1:] + theta_matrix[:, 1:]*lamb

        new_bias_theta = theta_matrix[:, 0] - self.LearningRate*D_bias
        new_weights_theta = theta_matrix[:, 1:] - self.LearningRate*D_weights

        new_bias_theta.shape = (new_bias_theta.shape[0], 1)

        new_theta_matrix = np.concatenate((new_bias_theta, new_weights_theta), axis=1)
        return(new_theta_matrix)
    
    def fit(self, x_train:np.array, y_train:np.array, batch_size:int, epochs = 10):
        m = len(x_train)
        training_data = []
        for i,j in zip(x_train,y_train):
            training_data.append((i,j))
        
        b = int(m/batch_size)
        final_train_data = []

        for i in range(b):
            final_train_data.append(training_data[:batch_size])
            training_data = training_data[batch_size:]
        
        for i in range(epochs):
            print(f'epochs : {i}')
            for data in final_train_data:
                x_train = np.array([j[0] for j in data])
                y_train = np.array([j[1] for j in data])
                for k,h in zip(x_train, y_train):
                    hx = self.forwardpropogation(k)
                    self.backpropogation(hx, h)
                t1 = self.GradientDescent(self.theta_matrix_1, self.delta_matrix_1, m, 0.001)
                t2 = self.GradientDescent(self.theta_matrix_2, self.delta_matrix_2, m, 0.001)
                t3 = self.GradientDescent(self.theta_matrix_3, self.delta_matrix_3, m, 0.001)
                t4 = self.GradientDescent(self.theta_matrix_4, self.delta_matrix_4, m, 0.001)

                self.theta_matrix_1 = t1
                self.theta_matrix_2 = t2
                self.theta_matrix_3 = t3
                self.theta_matrix_4 = t4

    
    def predict(self, x_test:np.array):
        hx = self.forwardpropogation(x_test)
        return(hx)
    
    def Save(self, path:str):
        l = [self.theta_matrix_1, self.theta_matrix_2, self.theta_matrix_3, self.theta_matrix_4]
        with open(path, 'wb') as f:
            pickle.dump(l, f)
    
    def Load(self, path):
        with open(path, 'rb') as file:
            theta_vectors = pickle.load(file)
            self.theta_matrix_1 = theta_vectors[0]
            self.theta_matrix_2 = theta_vectors[1]
            self.theta_matrix_3 = theta_vectors[2]
            self.theta_matrix_4 = theta_vectors[3]

# Training

In [24]:
model = DNN(0.01)

In [44]:
model.fit(x_train, y_train, 100, epochs=10)

epochs : 0
epochs : 1
epochs : 2
epochs : 3
epochs : 4
epochs : 5
epochs : 6
epochs : 7
epochs : 8
epochs : 9


In [45]:
model.Save('/mnt/c/MachineLearning/DNNScratch/SavedModel/model.pkl')

# Loading model, no need to train every time i open this file : )

In [25]:
model.Load('SavedModel/model.pkl')

In [32]:
i = x_test[10]

In [33]:
predictions = model.predict(i)

In [34]:
predictions

array([0.10971629, 0.12042861, 0.10998997, 0.11133679, 0.10952186,
       0.10849868, 0.10991029, 0.11210873, 0.10971694, 0.10997691])