<a href="https://colab.research.google.com/github/malak-elbanna/NN_from_scratch/blob/main/NN_from_scratch.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import pandas as pd
from keras.datasets import mnist
from sklearn.metrics import confusion_matrix

In [None]:
(X_train, y_train), (X_test, y_test) = mnist.load_data()

In [None]:
print('X_train: ' + str(X_train.shape))
print('y_train: ' + str(y_train.shape))
print('X_test:  '  + str(X_test.shape))
print('y_test:  '  + str(y_test.shape))

X_train: (60000, 28, 28)
y_train: (60000,)
X_test:  (10000, 28, 28)
y_test:  (10000,)


#Normalization

In [None]:
X_train = X_train.astype('float32') / 255.0
X_test = X_test.astype('float32') / 255.0

#Convert 2D 28x28 pixel imgs to 784 pixel 1D array

In [None]:
X_train_f = X_train.reshape(X_train.shape[0], -1)
X_test_f = X_test.reshape(X_test.shape[0], -1)

#OneHotEncoder from scratch

In [None]:
one_hot = np.zeros((y_train.shape[0], 10))
for i, label in enumerate(y_train):
  one_hot[i, label] = 1

#ReLU

In [None]:
def relu(z):
    return np.maximum(0, z)

#Softmax

In [None]:
def softmax(z):
    exp_z = np.exp(z - np.max(z, axis=0, keepdims=True))
    return exp_z / np.sum(exp_z, axis=0, keepdims=True)

#Initialize Weights and Bias

In [None]:
def he_initialization(size_layer, size_next_layer):
    return np.random.randn(size_next_layer, size_layer) * np.sqrt(2. / size_layer)

input_layer = 784
hidden_layer = 532
output_layer = 10

W_hidden = he_initialization(input_layer, hidden_layer)
b_hidden = np.zeros((hidden_layer, 1))

W_output = he_initialization(hidden_layer, output_layer)
b_output = np.zeros((output_layer, 1))

#Forward

In [None]:
def forward(X):
    Z_hidden = np.dot(W_hidden, X.T) + b_hidden
    hidden_relu = relu(Z_hidden)

    Z_output = np.dot(W_output, hidden_relu) + b_output
    output_softmax = softmax(Z_output)

    return output_softmax, hidden_relu

#Backpropagation

In [None]:
def backpropagation(X, one_hot, output, hidden_relu):
    error_output = output - one_hot.T

    gradient_Woutput = np.dot(error_output, hidden_relu.T) / X.shape[0]
    gradient_boutput = np.sum(error_output, axis=1, keepdims=True) / X.shape[0]

    error_hidden = np.dot(W_output.T, error_output) * (hidden_relu > 0)

    gradient_Whidden = np.dot(error_hidden, X) / X.shape[0]
    gradient_bhidden = np.sum(error_hidden, axis=1, keepdims=True) / X.shape[0]

    return gradient_Woutput, gradient_boutput, gradient_Whidden, gradient_bhidden

#Training

In [None]:
def train(X_train_f, one_hot, n_epochs, lr):
    global W_output, b_output, W_hidden, b_hidden
    for epoch in range(n_epochs):
        output, hidden_relu = forward(X_train_f)

        grad_Woutput, grad_boutput, grad_Whidden, grad_bhidden = backpropagation(X_train_f, one_hot, output, hidden_relu)

        W_output -= lr * grad_Woutput
        b_output -= lr * grad_boutput
        W_hidden -= lr * grad_Whidden
        b_hidden -= lr * grad_bhidden

        loss = -np.mean(np.sum(one_hot * np.log(output.T + 1e-8), axis=1))
        print(f"Epoch {epoch + 1}/{n_epochs} - Loss: {loss:.4f}")

In [None]:
n_epochs = 200
lr = 0.03

train(X_train_f, one_hot, n_epochs, lr)

Epoch 1/200 - Loss: 0.3738
Epoch 2/200 - Loss: 0.3736
Epoch 3/200 - Loss: 0.3734
Epoch 4/200 - Loss: 0.3731
Epoch 5/200 - Loss: 0.3729
Epoch 6/200 - Loss: 0.3727
Epoch 7/200 - Loss: 0.3724
Epoch 8/200 - Loss: 0.3722
Epoch 9/200 - Loss: 0.3720
Epoch 10/200 - Loss: 0.3717
Epoch 11/200 - Loss: 0.3715
Epoch 12/200 - Loss: 0.3713
Epoch 13/200 - Loss: 0.3711
Epoch 14/200 - Loss: 0.3708
Epoch 15/200 - Loss: 0.3706
Epoch 16/200 - Loss: 0.3704
Epoch 17/200 - Loss: 0.3702
Epoch 18/200 - Loss: 0.3699
Epoch 19/200 - Loss: 0.3697
Epoch 20/200 - Loss: 0.3695
Epoch 21/200 - Loss: 0.3693
Epoch 22/200 - Loss: 0.3690
Epoch 23/200 - Loss: 0.3688
Epoch 24/200 - Loss: 0.3686
Epoch 25/200 - Loss: 0.3684
Epoch 26/200 - Loss: 0.3682
Epoch 27/200 - Loss: 0.3679
Epoch 28/200 - Loss: 0.3677
Epoch 29/200 - Loss: 0.3675
Epoch 30/200 - Loss: 0.3673
Epoch 31/200 - Loss: 0.3671
Epoch 32/200 - Loss: 0.3669
Epoch 33/200 - Loss: 0.3666
Epoch 34/200 - Loss: 0.3664
Epoch 35/200 - Loss: 0.3662
Epoch 36/200 - Loss: 0.3660
E

#Prediction

In [None]:
def predict(X_test):
    output, hidden = forward(X_test)
    predictions = np.argmax(output, axis=0)

    return predictions

In [None]:
predictions = predict(X_test_f)

correct_predictions = np.sum(predictions == y_test)
total_samples = len(y_test)
accuracy = correct_predictions / total_samples

print(f"Accuracy on test set: {accuracy:.2%}")

Accuracy on test set: 91.29%


In [None]:
conf_matrix = confusion_matrix(y_test, predictions)
conf_matrix

array([[ 955,    0,    2,    2,    0,    6,   11,    1,    3,    0],
       [   0, 1107,    1,    5,    0,    0,    4,    2,   16,    0],
       [  10,    3,  901,   20,   19,    1,   14,   19,   38,    7],
       [   4,    0,   21,  910,    2,   26,    2,   13,   22,   10],
       [   1,    4,    4,    2,  911,    1,   10,    2,    9,   38],
       [  12,    3,    4,   38,    8,  770,   17,    7,   27,    6],
       [  14,    3,    4,    2,   15,   19,  897,    1,    3,    0],
       [   4,   16,   25,    4,   13,    0,    0,  936,    5,   25],
       [   9,    7,    9,   27,   10,   21,   12,   13,  850,   16],
       [  10,    5,    3,   13,   45,   14,    1,   19,    7,  892]])