In [2]:
from sklearn.datasets import fetch_openml
import pandas as pd
import numpy as np
import random
import matplotlib.pyplot as plt


## Load MNIST Dataset:


In [3]:
# Load the MNIST dataset using fetch_openml
mnist = fetch_openml('mnist_784', version=1)

# Extract the features (pixel values) and labels from the dataset
X = mnist.data.values.astype('float32')
y = mnist.target.values.astype('int64')

## Standardize Dataset:

In [45]:
# Define a small epsilon value to add to the standard deviation to avoid division by zero
eps = 1e-8

# Calculate the standard deviation of each feature and replace any zero values with eps
std_dev = np.std(X, axis=0)
std_dev[std_dev == 0] = eps

# Normalize the data by subtracting the mean and dividing by the standard deviation
X = (X - np.mean(X, axis=0)) / std_dev

## Split Dataset:

In [46]:
from sklearn.model_selection import train_test_split

X_train, X_test,y_train, y_test = train_test_split(X , y ,test_size=0.2, random_state=42)

## Apply one-hot encoding

In [47]:
# Reshape the input data
X_train = X_train.reshape(-1, 784)
X_test = X_test.reshape(-1, 784)

# Convert y_train and y_test to one-hot encoded vectors
y_train = np.eye(10)[y_train]
y_test = np.eye(10)[y_test]

## Implement Dynamic Neural Network from scratch

In [48]:

# Sigmoid activation function
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

# Derivative of sigmoid activation function
def sigmoid_derivative(x):
    return sigmoid(x) * (1 - sigmoid(x))

# Mean Squared Error (MSE) loss function
def mse(y_true, y_pred):
    return np.mean((y_true - y_pred) ** 2)

# Neural Network class
class NeuralNetwork:
    def __init__(self, x, y ,num_of_layers, size_of_layers):

        num_features = x.shape[1]

        size_of_layers.insert(0, num_features)
        # Size (number of neurons) of each layer 
        self.size_of_layers = size_of_layers

        # Number of layers in the network
        num_of_layers = len(size_of_layers) 
        self.num_of_layers = num_of_layers
       
        
        # Initialize random weights between layers
        self.weights = [np.random.randn(size_of_layers[i], size_of_layers[i+1]) for i in range(num_of_layers-1)]

        # Initialize random biases for each layer
        self.biases = [np.random.randn(size_of_layers[i+1]) for i in range(num_of_layers-1)]

        # Store the outputs of each layer
        self.outputs = [np.zeros(size) for size in size_of_layers]

        # Store the errors in each layer (excluding the input layer)
        self.errors = [np.zeros(size) for size in size_of_layers[1:]]
        
    
    def forward(self, x):

        # Set the input layer output as the input data
        self.outputs[0] = x  

        for i in range(self.num_of_layers-1):

            # Compute the weighted sum of inputs and biases
            z = np.dot(self.outputs[i], self.weights[i]) + self.biases[i]

            # Apply the sigmoid activation function
            self.outputs[i+1] = sigmoid(z)  

            # Return the output of the last layer
        return self.outputs[-1]  
    
    def backward(self, x, y_true, learning_rate):
      num_samples = x.shape[0]
      
      # Perform forward propagation to get the predicted output
      y_pred = self.forward(x)
      
      # Compute the error between predicted and true output
      error = (y_true - y_pred) / num_samples
      
      # Compute the delta (error * derivative of sigmoid)
      delta = error * sigmoid_derivative(y_pred)
      
      for i in range(self.num_of_layers-2, -1, -1):
          # Store the error in the current layer
          self.errors[i] = delta
          
          # Compute the delta for the previous layer using the weights and derivative of sigmoid
          delta = np.dot(delta, self.weights[i].T) * sigmoid_derivative(self.outputs[i])
          
          # Compute the gradient of weights using outer product of outputs and errors
          grad_w = np.dot(self.outputs[i].T, self.errors[i]) / num_samples
          
          # Gradient of biases is equal to the mean of errors along the samples
          grad_b = np.mean(self.errors[i], axis=0)
          
          # Update the weights using the learning rate and gradient
          self.weights[i] += learning_rate * grad_w
          
          # Update the biases using the learning rate and gradient
          self.biases[i] += learning_rate * grad_b

    def train(self, X_train, y_train, epochs, learning_rate):


      for epoch in range(epochs):

        # Perform backward propagation and update weights and biases for training set
        self.backward(X_train, y_train, learning_rate)
              
        if epoch % 100 == 0:
          
          # Compute the mean squared error loss for the training set
          train_loss = mse(y_train, self.forward(X_train))
          print(f"Epoch {epoch}: Train Loss = {train_loss:.4f}")
          
      train_accuracy = self.accuracy(X_train, y_train)
      # Compute the training accuracy
      print(f"Train Accuracy: {train_accuracy:.4f}")
    
    def predict(self, X):

        # Perform forward propagation and round the predicted output to the nearest integer (0 or 1)
        return np.round(self.forward(X))

        
    def accuracy(self, X, y):
        # Compute the accuracy by comparing predicted output with true output
        y_pred = self.predict(X)
        return np.mean(y_pred == y)
        

def NN(x, y, num_of_layers, size_of_layers):
  
    nn = NeuralNetwork(x ,y ,num_of_layers=num_of_layers, size_of_layers=size_of_layers)
    nn.train(x, y, epochs=1000, learning_rate=0.1)
    return nn


### **Build NN with only 2 layers => 1 hidden layer and 1 output layer**

In [18]:
# Test NN function on MNIST dataset
nn1 = NN(X_train, y_train, num_of_layers=2, size_of_layers=[20 , 10])
accuracy1_test = nn1.accuracy(X_test, y_test)


print("Test Accuracy1 :", accuracy1_test)


  return 1 / (1 + np.exp(-x))


Epoch 0: Train Loss = 0.3562
Epoch 100: Train Loss = 0.3562
Epoch 200: Train Loss = 0.3562
Epoch 300: Train Loss = 0.3562
Epoch 400: Train Loss = 0.3562
Epoch 500: Train Loss = 0.3562
Epoch 600: Train Loss = 0.3562
Epoch 700: Train Loss = 0.3561
Epoch 800: Train Loss = 0.3561
Epoch 900: Train Loss = 0.3561
Train Accuracy: 0.5459
Test Accuracy1 : 0.5461928571428571


### **Build NN with 3 layers=> 2 hidden layers**

### **Where # of neurons in first layer < # of neurons in second layer and 1 output layer**

In [19]:
nn2 = NN(X_train, y_train, num_of_layers=3, size_of_layers=[20, 30, 10])
accuracy2_test = nn2.accuracy(X_test, y_test)
print("Test Accuracy2 :", accuracy2_test)

  return 1 / (1 + np.exp(-x))


Epoch 0: Train Loss = 0.4102
Epoch 100: Train Loss = 0.4102
Epoch 200: Train Loss = 0.4102
Epoch 300: Train Loss = 0.4101
Epoch 400: Train Loss = 0.4101
Epoch 500: Train Loss = 0.4101
Epoch 600: Train Loss = 0.4101
Epoch 700: Train Loss = 0.4101
Epoch 800: Train Loss = 0.4100
Epoch 900: Train Loss = 0.4100
Train Accuracy: 0.4966
Test Accuracy2 : 0.4982714285714286


### **Build NN with 3 layers=> 2 hidden layers**

### **Where # of neurons in first layer > # of neurons in second layer and 1 output layer**

In [49]:
nn3 = NN(X_train, y_train, num_of_layers=3, size_of_layers=[30, 20, 10])
accuracy3_test = nn3.accuracy(X_test, y_test)
print("Test Accuracy3 :", accuracy3_test)

  return 1 / (1 + np.exp(-x))


Epoch 0: Train Loss = 0.2901
Epoch 100: Train Loss = 0.2901
Epoch 200: Train Loss = 0.2901
Epoch 300: Train Loss = 0.2901
Epoch 400: Train Loss = 0.2901
Epoch 500: Train Loss = 0.2901
Epoch 600: Train Loss = 0.2901
Epoch 700: Train Loss = 0.2900
Epoch 800: Train Loss = 0.2900
Epoch 900: Train Loss = 0.2900
Train Accuracy: 0.6130
Test Accuracy3 : 0.6115785714285714
