In [2]:
import numpy as np
import matplotlib.pyplot as plt

In [3]:
# define X and y, here y is the target value
X = np.array([[0, 0],[0, 1],[1, 0],[1, 1]])
y = np.array([[0], [1], [1], [0]])

In [4]:
seeding = np.random.seed(42)
input_dim = 2
hidden_dim = 2
output_dim = 1

In [14]:
# the weights between input and hidden layer
weights_input_hidden = np.random.rand(input_dim, hidden_dim)
print(f"weight of input layer to hidden layer:\n {weights_input_hidden}")
print("--------------------------------")

# there are two bias hidden for the hidden layer, there are two neurons in the hidden layer each has its own bias.
bias_hidden = np.random.rand(1, hidden_dim)
print(f"Bias of hidden layer:\n {bias_hidden}")
print("--------------------------------")

# the weights between hidden to output layer
weights_hidden_output = np.random.rand(hidden_dim, output_dim)
print(f"weight of hidden layer to output layer:\n {weights_hidden_output}")
print("--------------------------------")

# here is one bias for the output layer 
bias_output = np.random.rand(1, output_dim)
print(f"Bias for output layer: \n {bias_output}")
print("--------------------------------")

weight of input layer to hidden layer:
 [[0.43194502 0.29122914]
 [0.61185289 0.13949386]]
--------------------------------
Bias of hidden layer:
 [[0.29214465 0.36636184]]
--------------------------------
weight of hidden layer to output layer:
 [[0.45606998]
 [0.78517596]]
--------------------------------
Bias for output layer: 
 [[0.19967378]]
--------------------------------


In [15]:
# activation function using sigmoid, 
# this activation function is sometime called as an threshold, but most commonly called as activation function
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def sigmoid_derivation(z):
    return z * (1 - z)

In [16]:
# train the MLP
epochs = 10000 # number of iterations
learning_rate = 0.1  # too much learning rate is an issue becaouse it jumps the step and too low is also a problem

In [21]:
for epoch in range(epochs):
    # ------FORWARD PROPOGATION------
    # --- data flow from input layer to hidden layer, here the calculation of the hidden layer
    #     using the formula: data_of_input_layer * weight_connecting_to_input_hidden + bias_of_hidden_layer---
    hidden_layer_input = np.dot(X, weights_input_hidden) + bias_hidden
    
    # checking the hidden layer output with the activation function
    hidden_layer_output = sigmoid(hidden_layer_input) 

    # --- data flow from hidden layer to output layer, here is the calucation of the output layer
    #     using the formula: hidden_layer_output * weight_connecting_to_hidden_to_output_layer + bias_of_output ---
    output_layer_input = np.dot(hidden_layer_output, weights_hidden_output) + bias_output
    
    # checking the hidden layer output with the activation function
    predicted_output = sigmoid(output_layer_input) # final output of the forward propogation, but predicted one.
    
    # ------computer loss (mean squared error)------
    loss = np.mean((y - predicted_output) ** 2)

    # ------this is error of the output i.e target_value - predicted_output_
    error_output = y - predicted_output

    # ------BACK PROPOGATION------
    # --- delta or error_signal measures how wrong the neuron is, how far it is from the target
    #     this is dealta_predicted_output, calculated with:
    #     error_of_output * sigmoid_derivation_of_predicted_output ---
    # --- Calculate delta for the output layer (error signal) ---
    d_predicted_output = error_output * sigmoid_derivation(predicted_output)
    
    # --- Calculate delta for the hidden layer---
    # --- np.dot(d_predicted_output, weights_hidden_output.T) i.e.
    #     delta_predicted_output * weights_hidden_to_output.T, 
    #     here T => transpose of matrix beacuse column cannot multiply with column so ".T " (transpose) is applied to make it in a row,
    #     to make the calculation row * column. ---
    error_hidden_layer = np.dot(d_predicted_output, weights_hidden_output.T)
    # --- Calculate delta for the hidden layer (error signal) ---
    # **sigmoid_derivation(hidden_layer_output) => actaul hidden layer output from the forward propogation
    d_hidden_layer = error_hidden_layer * sigmoid_derivation(hidden_layer_output)

    # ------UPDATE WEIGHT AND BIASES------
    # Update weights and biases for the input to hidden layer
    # --- adjusting weight transpose of X(input) * delta_hidden_layer * learning_rate
    weights_input_hidden += np.dot(X.T, d_hidden_layer) * learning_rate
    bias_hidden += np.sum(d_hidden_layer, axis=0, keepdims=True) * learning_rate

    # Update weights and biases for the hidden to output layer
    # --- adjusting weight transpose of hidden_layer_output(forward_propogation) * delta_predicted_output * learning_rate
    weights_hidden_output += np.dot(hidden_layer_output.T, d_predicted_output) * learning_rate
    bias_output += np.sum(d_predicted_output, axis=0, keepdims=True) * learning_rate

    # Print loss every 1000 epochs
    if epoch % 1000 == 0:
        print(f"Epoch {epoch}, Loss: {loss}")

print(f" delta predicted output: \n{d_predicted_output}")
print("---------------------------------------------------")
print(f" weihght of hidden layer to output layer: \n{weights_hidden_output.T}")

Epoch 0, Loss: 0.00040311685366260514
Epoch 1000, Loss: 0.00039092072581335945
Epoch 2000, Loss: 0.00037942540295763516
Epoch 3000, Loss: 0.0003685727602412615
Epoch 4000, Loss: 0.0003583108900281633
Epoch 5000, Loss: 0.0003485932955817068
Epoch 6000, Loss: 0.00033937820669781624
Epoch 7000, Loss: 0.00033062799634776097
Epoch 8000, Loss: 0.0003223086813929868
Epoch 9000, Loss: 0.0003143894935987066
 delta predicted output: 
[[-0.00036521]
 [ 0.00027351]
 [ 0.00027339]
 [-0.00029366]]
---------------------------------------------------
 weihght of hidden layer to output layer: 
[[  9.57580507 -10.28162036]]


In [9]:
# Test the trained MLP
print("\nFinal Predictions:")
for i in range(len(X)):
    hidden_layer_input = np.dot(X[i], weights_input_hidden) + bias_hidden
    hidden_layer_output = sigmoid(hidden_layer_input)

    output_layer_input = np.dot(hidden_layer_output, weights_hidden_output) + bias_output
    predicted_output = sigmoid(output_layer_input)

    print(f"Input: {X[i]}, Predicted Output: {predicted_output[0][0]:.3f}, Actual: {y[i][0]}")


Final Predictions:
Input: [0 0], Predicted Output: 0.053, Actual: 0
Input: [0 1], Predicted Output: 0.952, Actual: 1
Input: [1 0], Predicted Output: 0.952, Actual: 1
Input: [1 1], Predicted Output: 0.052, Actual: 0
