<a href="https://colab.research.google.com/github/mohamed7456/ML-Course-Assignments/blob/main/notebooks/04_neural_network_from_scratch.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **Neural Networks from Scratch**

## **Imports**

In [None]:
import pandas as pd
import numpy as np
from sklearn.metrics import confusion_matrix, accuracy_score
from keras.datasets import mnist

## Implementation

In [None]:
def sigmoid(x): # activation function
    return 1 / (1 + np.exp(-x))

def sigmoid_derivative(x):
    return sigmoid(x) * (1 - sigmoid(x))

In [None]:
def initialize_parameters(input_size, hidden_size, output_size):
    np.random.seed(0)
    W1 = np.random.randn(input_size, hidden_size) * 0.01
    b1 = np.zeros((1, hidden_size))
    W2 = np.random.randn(hidden_size, output_size) * 0.01
    b2 = np.zeros((1, output_size))
    return W1, b1, W2, b2

In [None]:
# layers
def input_layer(X):
    return X

def hidden_layer(X, W1, b1):
    Z1 = np.dot(X, W1) + b1 # adding bias
    A1 = sigmoid(Z1)
    return A1, Z1

def output_layer(A1, W2, b2):
    Z2 = np.dot(A1, W2) + b2 # adding bias
    A2 = sigmoid(Z2)
    return A2

In [None]:
def forward(X, W1, b1, W2, b2):
    A0 = input_layer(X)
    A1, Z1 = hidden_layer(A0, W1, b1)
    A2 = output_layer(A1, W2, b2)
    return A0, A1, A2, Z1

In [None]:
def backpropagation(X, Y, A0, A1, A2, Z1, W1, W2):
    m = X.shape[0]  # samples count
    dZ2 = A2 - Y
    dW2 = np.dot(A1.T, dZ2) / m
    db2 = np.sum(dZ2, axis=0, keepdims=True) / m
    dZ1 = np.dot(dZ2, W2.T) * sigmoid_derivative(Z1)
    dW1 = np.dot(A0.T, dZ1) / m
    db1 = np.sum(dZ1, axis=0, keepdims=True) / m
    return dW1, db1, dW2, db2

In [None]:
def train(X_train, Y_train, learning_rate, epochs, hidden_size, output_size):
    input_size = X_train.shape[1]
    W1, b1, W2, b2 = initialize_parameters(input_size, hidden_size, output_size)

    for epoch in range(epochs):
        # Forward
        A0, A1, A2, Z1 = forward(X_train, W1, b1, W2, b2)

        # Backpropagation
        dW1, db1, dW2, db2 = backpropagation(X_train, Y_train, A0, A1, A2, Z1, W1, W2)

        W1 -= learning_rate * dW1
        b1 -= learning_rate * db1
        W2 -= learning_rate * dW2
        b2 -= learning_rate * db2

        loss = np.mean(np.square(A2 - Y_train))
        print(f'Epoch {epoch}, Loss: {loss}')

    return W1, b1, W2, b2

In [None]:
# Load MNIST Dataset
(X_train, Y_train), (X_test, Y_test) = mnist.load_data()

# Preprocess
X_train = X_train.astype('float32') / 255.0
X_test = X_test.astype('float32') / 255.0

X_train = X_train.reshape(X_train.shape[0], -1)
X_test = X_test.reshape(X_test.shape[0], -1)

# One-hot encoding
num_classes = 10
Y_train = np.eye(num_classes)[Y_train]
Y_test = np.eye(num_classes)[Y_test]

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz


In [None]:
# Define hyperparameters
learning_rate = 0.1
epochs = 1000
hidden_size = 128
output_size = 10

In [None]:
W1, b1, W2, b2 = train(X_train, Y_train, learning_rate, epochs, hidden_size, output_size)

Epoch 0, Loss: 0.2482475298806857
Epoch 1, Loss: 0.10206361322409302
Epoch 2, Loss: 0.09286188151947906
Epoch 3, Loss: 0.09088767773156761
Epoch 4, Loss: 0.09029677751846125
Epoch 5, Loss: 0.09008874974571611
Epoch 6, Loss: 0.09000771479882452
Epoch 7, Loss: 0.08997336555031521
Epoch 8, Loss: 0.08995728930799567
Epoch 9, Loss: 0.08994865343663946
Epoch 10, Loss: 0.08994312289060018
Epoch 11, Loss: 0.08993890568176291
Epoch 12, Loss: 0.08993524410794969
Epoch 13, Loss: 0.08993181231379567
Epoch 14, Loss: 0.08992846809388064
Epoch 15, Loss: 0.08992514837572135
Epoch 16, Loss: 0.08992182451121424
Epoch 17, Loss: 0.08991848292873829
Epoch 18, Loss: 0.0899151166855635
Epoch 19, Loss: 0.08991172175377594
Epoch 20, Loss: 0.08990829537856057
Epoch 21, Loss: 0.08990483534982344
Epoch 22, Loss: 0.08990133967853212
Epoch 23, Loss: 0.08989780645312291
Epoch 24, Loss: 0.08989423377618684
Epoch 25, Loss: 0.08989061973689942
Epoch 26, Loss: 0.08988696239925169
Epoch 27, Loss: 0.08988325979713892
Epoc

In [None]:
# Test the model
_, _, A2, _ = forward(X_test, W1, b1, W2, b2)
predictions = np.argmax(A2, axis=1)

conf_matrix = confusion_matrix(np.argmax(Y_test, axis=1), predictions)
accuracy = accuracy_score(np.argmax(Y_test, axis=1), predictions)

print("Confusion Matrix:")
print(conf_matrix)
print("Accuracy:", accuracy)

Confusion Matrix:
[[ 961    0    2    1    0    7    6    1    2    0]
 [   0 1105    0    4    0    2    4    1   19    0]
 [  15   27  874   17   21    2   25   17   33    1]
 [   4    3   26  894    1   26    3   23   27    3]
 [   1    8    5    0  875    2   19    2    6   64]
 [  25   11    6   84   14  660   31   11   44    6]
 [  23    3   16    2    8   18  886    0    2    0]
 [   6   29   29    0    7    1    0  926    6   24]
 [  12   20   18   39   11   28   17    9  801   19]
 [  16   12    9    9  105   15    0   59    8  776]]
Accuracy: 0.8758
