In [358]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from scipy.io import loadmat
from sklearn.datasets import fetch_openml

In [359]:
mnist = fetch_openml('mnist_784', version=1)

  warn(


In [360]:
# Extract the features (pixel values) and labels from the dataset
X = mnist.data.values.astype('float32')
y = mnist.target.values.astype('int64')

<h3>Standardize the data:</h3>

In [361]:
# Define a small epsilon value to add to the standard deviation to avoid division by zero
eps = 1e-8

# Calculate the standard deviation of each feature and replace any zero values with eps
std_dev = np.std(X, axis=0)
std_dev[std_dev == 0] = eps

# Normalize the data by subtracting the mean and dividing by the standard deviation
X = (X - np.mean(X, axis=0)) / std_dev

<h3>Divide data into training and test:</h3>

In [362]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=42)

In [363]:
from keras.utils.np_utils import to_categorical

y_train = to_categorical(y_train)

<h3>Dynamic Neural Network Implementation:</h3>

In [364]:
import numpy as np

class Layer:
    def __init__(self, input_size, output_size):
        self.weights = np.random.randn(input_size, output_size)
        self.bias = np.random.randn(1, output_size)

    def feed_forward(self, input):
        self.input = input
          
        return np.dot(self.input, self.weights) + self.bias
    
    def backpropagation(self, output_gradient, learning_rate):
        # output_gradient = dE/dY 
        # dE/dW = (dE/dY)*X.T
        dw = np.dot(self.input.T, output_gradient)
        self.weights -= learning_rate*dw
        # dE/dB = dE/dY(output_gradient)
        self.bias -= learning_rate*output_gradient
        # dE/dX = W.T*(dE/dY)(output_gradient)
        return np.dot(output_gradient, self.weights.T)
    
class Activation(Layer):
    def __init__(self):
        pass

    def feed_forward(self, input):
        self.input = input
        return sigmoid(self.input)
    
    def backpropagation(self, output_gradient, learning_rate):
        return sigmoid_prime(self.input)*output_gradient

def sigmoid(linear_pred):
    return (1 / (1 + np.exp(-linear_pred)))

def sigmoid_prime(x):
    return sigmoid(x)*(1- sigmoid(x)) 

def mse(y_true, y_pred):
    squared_error = np.square(np.subtract(y_true, y_pred))

    return squared_error.mean()

def mse_prime(y_true, y_pred):
    return(2*(y_pred-y_true)/y_true.size)

class NN:
    def __init__(self, num_of_layers, size_of_layers, epochs = 1000, learning_rate = 0.1):
        self.num_of_layers = num_of_layers
        self.size_of_layers = size_of_layers
        self.epochs = epochs
        self.learning_rate = learning_rate
        self.network = []

    def train(self, X, Y):
        self.network = []
        last_output = X.shape[1]
        for i in range(self.num_of_layers):
            # We initialize every layer with the input size which is equal to the output of the previous layer
            self.network.append(Layer(last_output, self.size_of_layers[i]))
            last_output = self.size_of_layers[i]
            self.network.append(Activation())

        for i in range(self.epochs):
            error = 0
            for x, y in zip(X, Y):
                ### Feed Forward ###
                output = x
                output = output.reshape((1, x.shape[0]))
                
                for layer in self.network:
                    output = layer.feed_forward(output)
                    if output.shape[0] != 1: 
                        output = np.transpose(output)

                # Calculate the error for each sample
                error += mse(y, output)

                ### Backpropagation ###
                output_gradient = mse_prime(y, output)
                for layer in reversed(self.network):
                    output_gradient = layer.backpropagation(output_gradient, self.learning_rate)

            error /= X.shape[0]

            print("{}/{} loss: {} ".format(i+1, self.epochs, error))

    def predict(self, X):
        output_result = []
        for x in X:
            output = x
            output = output.reshape((1, x.shape[0]))
            for layer in self.network:
                output = layer.feed_forward(output)
                if output.shape[0] != 1: 
                    output = np.transpose(output)
            
            output_result.append(output)

        return output_result

In [365]:
def accuracy(y_pred, y):
    return np.sum((y_pred == y))/len(y)

In [366]:
accuracies = {}

<h3>NN with only 2 layers => 1 hidden layer and 1 output layer</h3>

In [367]:
nn1 = NN(num_of_layers = 2, size_of_layers =[20, 10], epochs=50)

In [368]:
nn1.train(X_train, y_train)

  return (1 / (1 + np.exp(-linear_pred)))


1/50 loss: 0.07327014354635457 
2/50 loss: 0.0449111279610398 
3/50 loss: 0.0342251433359864 
4/50 loss: 0.02902979230249089 
5/50 loss: 0.026027144899373154 
6/50 loss: 0.023981029060812727 
7/50 loss: 0.02248376744531853 
8/50 loss: 0.021354027809687263 
9/50 loss: 0.02044767336678559 
10/50 loss: 0.01970549328035553 
11/50 loss: 0.01907641668105688 
12/50 loss: 0.018518856108391973 
13/50 loss: 0.018039045430829348 
14/50 loss: 0.017605674733962337 
15/50 loss: 0.017218574784650584 
16/50 loss: 0.016864691675553246 
17/50 loss: 0.016551501329355862 
18/50 loss: 0.016266715609448017 
19/50 loss: 0.01598969656479519 
20/50 loss: 0.01574587181083787 
21/50 loss: 0.015534482951133568 
22/50 loss: 0.015325277484819322 
23/50 loss: 0.015130464131561378 
24/50 loss: 0.01494469593967693 
25/50 loss: 0.014769446719160731 
26/50 loss: 0.014572559853752005 
27/50 loss: 0.014402282563260456 
28/50 loss: 0.014248388750981339 
29/50 loss: 0.0141036482776228 
30/50 loss: 0.013968360484645824 
31/5

In [369]:
y_pred1 = nn1.predict(X_test)

  return (1 / (1 + np.exp(-linear_pred)))


In [370]:
predicted_labels1 = np.ravel(np.argmax(y_pred1, axis=2))

In [371]:
acc1 = accuracy(predicted_labels1, y_test)
accuracies["NN with 2 layers"] = acc1
acc1

0.8965

<h3>NN with 3 layers=> 2 hidden layers</h3>
<h5># of neurons in first layer < # of neurons in second layer</h5>

In [372]:
nn2 = NN(num_of_layers = 3, size_of_layers =[20, 30, 10], epochs=50)

In [373]:
nn2.train(X_train, y_train)

  return (1 / (1 + np.exp(-linear_pred)))


1/50 loss: 0.07105491334390256 
2/50 loss: 0.04138520963462935 
3/50 loss: 0.030785621260748467 
4/50 loss: 0.02571909991615781 
5/50 loss: 0.022913192383723347 
6/50 loss: 0.021139536799921293 
7/50 loss: 0.019838436926637906 
8/50 loss: 0.018849824557446233 
9/50 loss: 0.018025017940869446 
10/50 loss: 0.017353957487981218 
11/50 loss: 0.016797284830442363 
12/50 loss: 0.016323427389784873 
13/50 loss: 0.015892885928703273 
14/50 loss: 0.015498518751479026 
15/50 loss: 0.015154556107462348 
16/50 loss: 0.014831187738860357 
17/50 loss: 0.014524998782623142 
18/50 loss: 0.014257289878065531 
19/50 loss: 0.013994553952003524 
20/50 loss: 0.013759504588459301 
21/50 loss: 0.013550606170606815 
22/50 loss: 0.013353094443944339 
23/50 loss: 0.01316042516549546 
24/50 loss: 0.01298872594264445 
25/50 loss: 0.012813461319130803 
26/50 loss: 0.012632635692051945 
27/50 loss: 0.01246434622091689 
28/50 loss: 0.01232331058688973 
29/50 loss: 0.012182501401092807 
30/50 loss: 0.0120430678217423

In [374]:
y_pred2 = nn2.predict(X_test)

  return (1 / (1 + np.exp(-linear_pred)))


In [375]:
predicted_labels2 = np.ravel(np.argmax(y_pred2, axis=2))

In [376]:
acc2 = accuracy(predicted_labels2, y_test)
accuracies["NN with 3 layers (#1st layer < #2nd layer)"] = acc2
acc2

0.8982142857142857

<h3>NN with 3 layers=> 2 hidden layers</h3>
<h5># of neurons in first layer > # of neurons in second layer</h5>

In [377]:
nn3 = NN(num_of_layers = 3, size_of_layers =[30, 20, 10], epochs=50)

In [378]:
nn3.train(X_train, y_train)

  return (1 / (1 + np.exp(-linear_pred)))


1/50 loss: 0.07639697446015814 
2/50 loss: 0.04730401247335965 
3/50 loss: 0.03242271740186554 
4/50 loss: 0.02694724371553464 
5/50 loss: 0.02375423153931089 
6/50 loss: 0.021541144481008172 
7/50 loss: 0.01993157443712929 
8/50 loss: 0.018670080536234087 
9/50 loss: 0.017656549840873385 
10/50 loss: 0.016837744427874026 
11/50 loss: 0.0161586735910201 
12/50 loss: 0.015587390436058425 
13/50 loss: 0.015092295503989456 
14/50 loss: 0.014651974178703788 
15/50 loss: 0.014266481663385622 
16/50 loss: 0.01391415854386508 
17/50 loss: 0.013593892944592791 
18/50 loss: 0.013293634114018214 
19/50 loss: 0.013017935414982409 
20/50 loss: 0.012775219325890097 
21/50 loss: 0.012546267964237237 
22/50 loss: 0.012323464250346356 
23/50 loss: 0.012109956525952622 
24/50 loss: 0.011912346429453755 
25/50 loss: 0.011731813155955846 
26/50 loss: 0.011560556735306222 
27/50 loss: 0.011398318918539504 
28/50 loss: 0.011249122100109235 
29/50 loss: 0.011108663411473756 
30/50 loss: 0.01097316199616569 

In [379]:
y_pred3 = nn3.predict(X_test)

  return (1 / (1 + np.exp(-linear_pred)))


In [380]:
predicted_labels3 = np.ravel(np.argmax(y_pred3, axis=2))

In [381]:
acc3 = accuracy(predicted_labels3, y_test)
accuracies["NN with 3 layers (#1st layer > #2nd layer)"] = acc3
acc3

0.8981428571428571

In [382]:
accuracies

{'NN with 2 layers': 0.8965,
 'NN with 3 layers (#1st layer < #2nd layer)': 0.8982142857142857,
 'NN with 3 layers (#1st layer > #2nd layer)': 0.8981428571428571}