# **Nural Network From Scratch**

In [1]:
# Importing necessary libraries
import numpy as np
import pandas as pd
from keras.datasets import mnist
from sklearn.metrics import confusion_matrix


Dataset Loading

In [2]:
# Load MNIST dataset

data = mnist.load_data()
(X_train, y_train), (X_test, y_test) = data

train_shape = X_train.shape
test_shape = X_test.shape
label_train_shape = y_train.shape
label_test_shape = y_test.shape

print("Shape of training images:", train_shape)
print("Shape of training labels:", label_train_shape)
print("Shape of test images:", test_shape)
print("Shape of test labels:", label_test_shape)


Shape of training images: (60000, 28, 28)
Shape of training labels: (60000,)
Shape of test images: (10000, 28, 28)
Shape of test labels: (10000,)


Data Normalization

In [3]:
# Data normalization
X_train = np.array(X_train, dtype=np.float32) / 255
X_test = np.array(X_test, dtype=np.float32) / 255

# Flatten the training and test data
X_train_flattened = X_train.reshape(len(X_train), -1)
X_test_flattened = X_test.reshape(len(X_test), -1)

print(f'Flattened training data shape: {X_train_flattened.shape}')
print(f'Flattened test data shape: {X_test_flattened.shape}')



Flattened training data shape: (60000, 784)
Flattened test data shape: (10000, 784)


One-Hot Encoding

In [4]:
# One-hot encoding for labels
num_classes = 10
one_hot_encoded = np.zeros((len(y_train), num_classes))

for i in range(len(y_train)):
    one_hot_encoded[i, y_train[i]] = 1

ReLU Activation Function

In [5]:
# ReLU activation function
def relu_activation(x):
    return np.maximum(0, x)


Softmax Activation Function

In [6]:
#Softmax activation function
def softmax_activation(x):
    exp_x = np.exp(x - np.max(x, axis=0, keepdims=True))
    return exp_x / np.sum(exp_x, axis=0, keepdims=True)


Initialize The Weights For Each Layer Using Random Values
Scaled By The Initialization (sqrt(2/input_dim)) For Better Convergence in Deep Networks.

In [7]:
# Initialize weights
def initialize_params(input_dim, output_dim):
    weights = np.random.randn(output_dim, input_dim) * np.sqrt(2.0 / input_dim)
    biases = np.zeros((output_dim, 1))
    return weights, biases

# Define sizes for each layer
input_layer_size = 784
hidden_layer_size = 532
output_layer_size = 10

# Initialize weights and biases for hidden and output layers
weights_hidden_layer, bias_hidden_layer = initialize_params(input_layer_size, hidden_layer_size)
weights_output_layer, bias_output_layer = initialize_params(hidden_layer_size, output_layer_size)


 Forward propagation

In [8]:
# Forward propagation
def forward_pass(X):
    # input to the hidden layer
    hidden_layer_input = np.dot(weights_hidden_layer, X.T) + bias_hidden_layer
    hidden_layer_output = relu_activation(hidden_layer_input)

    # input to the output layer
    output_layer_input = np.dot(weights_output_layer, hidden_layer_output) + bias_output_layer
    output_layer_output = softmax_activation(output_layer_input)

    return output_layer_output, hidden_layer_output


Back Propagation

In [9]:
# Backpropagation function
def backpropagation(X, true_labels, predicted_output, hidden_output):
    # output layer error
    error_output_layer = predicted_output - true_labels.T

    # Gradients for the output layer weights and biases
    gradient_weights_output = np.dot(error_output_layer, hidden_output.T) / X.shape[0]
    gradient_bias_output = np.sum(error_output_layer, axis=1, keepdims=True) / X.shape[0]

    # hidden layer error
    error_hidden_layer = np.dot(weights_output_layer.T, error_output_layer) * (hidden_output > 0)

    # Gradients for the hidden layer weights and biases
    gradient_weights_hidden = np.dot(error_hidden_layer, X) / X.shape[0]
    gradient_bias_hidden = np.sum(error_hidden_layer, axis=1, keepdims=True) / X.shape[0]

    return gradient_weights_output, gradient_bias_output, gradient_weights_hidden, gradient_bias_hidden



**Model Training**

In [10]:
#  Training function
def train_neural_network(X_train_data, one_hot_targets, epochs, lr):
    global weights_output_layer, bias_output_layer, weights_hidden_layer, bias_hidden_layer
    for ep in range(epochs):
        # Forward pass
        predictions, hidden_layer_output = forward_pass(X_train_data)

        # Compute gradients via backpropagation
        grad_w_output, grad_b_output, grad_w_hidden, grad_b_hidden = backpropagation(
            X_train_data, one_hot_targets, predictions, hidden_layer_output)

        # Update weights and biases
        weights_output_layer -= lr * grad_w_output
        bias_output_layer -= lr * grad_b_output
        weights_hidden_layer -= lr * grad_w_hidden
        bias_hidden_layer -= lr * grad_b_hidden

        # Compute loss
        loss_value = -np.mean(np.sum(one_hot_targets * np.log(predictions.T + 1e-8), axis=1))
        print(f"Epoch {ep + 1}/{epochs} - Loss: {loss_value:.4f}")


Execution

In [11]:
#  training parameters
epochs_to_run = 250
learning_rate_value = 0.03
train_neural_network(X_train_flattened, one_hot_encoded, epochs_to_run, learning_rate_value)



Epoch 1/250 - Loss: 2.3899
Epoch 2/250 - Loss: 2.3325
Epoch 3/250 - Loss: 2.2823
Epoch 4/250 - Loss: 2.2365
Epoch 5/250 - Loss: 2.1937
Epoch 6/250 - Loss: 2.1531
Epoch 7/250 - Loss: 2.1142
Epoch 8/250 - Loss: 2.0767
Epoch 9/250 - Loss: 2.0405
Epoch 10/250 - Loss: 2.0053
Epoch 11/250 - Loss: 1.9712
Epoch 12/250 - Loss: 1.9379
Epoch 13/250 - Loss: 1.9055
Epoch 14/250 - Loss: 1.8740
Epoch 15/250 - Loss: 1.8432
Epoch 16/250 - Loss: 1.8132
Epoch 17/250 - Loss: 1.7839
Epoch 18/250 - Loss: 1.7554
Epoch 19/250 - Loss: 1.7276
Epoch 20/250 - Loss: 1.7004
Epoch 21/250 - Loss: 1.6739
Epoch 22/250 - Loss: 1.6481
Epoch 23/250 - Loss: 1.6229
Epoch 24/250 - Loss: 1.5983
Epoch 25/250 - Loss: 1.5743
Epoch 26/250 - Loss: 1.5509
Epoch 27/250 - Loss: 1.5281
Epoch 28/250 - Loss: 1.5059
Epoch 29/250 - Loss: 1.4842
Epoch 30/250 - Loss: 1.4631
Epoch 31/250 - Loss: 1.4424
Epoch 32/250 - Loss: 1.4223
Epoch 33/250 - Loss: 1.4028
Epoch 34/250 - Loss: 1.3837
Epoch 35/250 - Loss: 1.3650
Epoch 36/250 - Loss: 1.3469
E

In [17]:
# Prediction function
def predict_labels(X_test_data):
    output_probs, _ = forward_pass(X_test_data)
    predicted_classes = np.argmax(output_probs, axis=0)
    return predicted_classes

# Generate predictions using the test data
test_predictions = predict_labels(X_test_flattened)

# accuracy
test_accuracy = np.sum(test_predictions == y_test) / len(y_test)
print(f"Accuracy on the test set: {test_accuracy:.2%}")


Accuracy on the test set: 88.76%


In [18]:
# Confusion matrix
confusion_mat = confusion_matrix(y_test, test_predictions)
print("Confusion Matrix:")
print(confusion_mat)


Confusion Matrix:
[[ 944    0    3    3    1    5   17    1    6    0]
 [   0 1103    6    4    0    2    5    1   14    0]
 [  17   15  871   19   17    0   20   25   40    8]
 [   5    1   19  883    1   36    6   18   28   13]
 [   5    6    3    0  888    2   11    1    8   58]
 [  16   12    4   51   19  713   25    9   30   13]
 [  19    3    8    1   14   19  887    1    6    0]
 [   4   27   30    3   12    1    2  907    4   38]
 [   7   13   11   34   12   29   15   13  823   17]
 [  11   10   10   10   61   12    0   30    8  857]]
