Aim of this program:
Implement a neural network to solve XOR Problem
Implement Backpropogation Algorithm to train a neural network
Illustrate the optimization of Weight and Bias using gradient descent

Import Required Libraries

In [1]:
# Importing the required library
import numpy as np
import matplotlib.pyplot as plt
# for reproducibility
np.random.seed(1)

Initialize Inputs: Define the input and expected output for a XOR network

In [2]:
# Defining inputs and expected output for XOR Truth Table
X = np.array([[0,0],[0,1],[1,0],[1,1]]).T  # shape (2,4)
# Expected outputs for XOR (shape (1,4))
d = np.array([[0,1,1,0]])

Declare the network parameters:
1. Number of input neurons
2. Hidden layer neurons
3. Output neurons
4. Learning Rate
5. Numebr of epochs

In [3]:
def initialize_network_parameters():
    # Network Parameters
    inputSize = 2               # Number of input neurons
    hiddenSize = 2              # Number of hidden neurons
    outputSize = 1              # Number of output neurons
    lr = 0.1
    epochs = 150000

    # Initialize weights and biases randomly within the range [-1, 1]
    w1 = np.random.rand(hiddenSize, inputSize) * 2 - 1  # Weights from input to hidden layer
    b1 = np.random.rand(hiddenSize, 1) * 2 - 1          # Bias for hidden layer
    w2 = np.random.rand(outputSize, hiddenSize) * 2 - 1 # Weights from hidden to output layer
    b2 = np.random.rand(outputSize, 1) * 2 - 1          # Bias for output layer

    return w1, b1, w2, b2, lr, epochs

In [4]:
# Get initialized parameters
w1, b1, w2, b2, lr, epochs = initialize_network_parameters()

# Training the network using backpropagation
error_list = []
for epoch in range(epochs):
    # Forward pass
    z1 = np.dot(w1, X) + b1               # Weighted sum for hidden layer
    a1 = 1 / (1 + np.exp(-z1))            # Sigmoid activation for hidden layer

    z2 = np.dot(w2, a1) + b2             # Weighted sum for output layer
    a2 = 1 / (1 + np.exp(-z2))           # Sigmoid activation for output layer

    # Error calculation and backpropagation
    error = d - a2                        # Difference between expected and actual output
    da2 = error * (a2 * (1 - a2))        # Derivative for output layer
    dz2 = da2                             # Gradient for output layer

    # Propagate error to hidden layer
    da1 = np.dot(w2.T, dz2)              # Gradient for hidden layer
    dz1 = da1 * (a1 * (1 - a1))          # Derivative for hidden layer

    # Update weights and biases
    w2 += lr * np.dot(dz2, a1.T)         # Update weights from hidden to output layer
    b2 += lr * np.sum(dz2, axis=1, keepdims=True)  # Update bias for output layer

    w1 += lr * np.dot(dz1, X.T)          # Update weights from input to hidden layer
    b1 += lr * np.sum(dz1, axis=1, keepdims=True)  # Update bias for hidden layer

    if (epoch+1) % 10000 == 0:
        print("Epoch: %d, Average error: %0.05f" % (epoch, np.average(abs(error))))
        error_list.append(np.average(abs(error)))

Epoch: 9999, Average error: 0.05180
Epoch: 19999, Average error: 0.03042
Epoch: 29999, Average error: 0.02339
Epoch: 39999, Average error: 0.01964
Epoch: 49999, Average error: 0.01723
Epoch: 59999, Average error: 0.01551
Epoch: 69999, Average error: 0.01422
Epoch: 79999, Average error: 0.01320
Epoch: 89999, Average error: 0.01237
Epoch: 99999, Average error: 0.01167
Epoch: 109999, Average error: 0.01108
Epoch: 119999, Average error: 0.01057
Epoch: 129999, Average error: 0.01012
Epoch: 139999, Average error: 0.00972
Epoch: 149999, Average error: 0.00937


In [5]:
# Testing the trained network
z1 = np.dot(w1, X) + b1  # Weighted sum for hidden layer
a1 = 1 / (1 + np.exp(-z1))  # Sigmoid activation for hidden layer

z2 = np.dot(w2, a1) + b2  # Weighted sum for output layer
a2 = 1 / (1 + np.exp(-z2))  # Sigmoid activation for output layer

# Print results
print('Final output after training:', a2)
print('Ground truth', d)
print('Error after training:', error)
print('Average error: %0.05f'%np.average(abs(error)))


Final output after training: [[0.00864946 0.99018566 0.99018444 0.00919656]]
Ground truth [[0 1 1 0]]
Error after training: [[-0.00864949  0.00981437  0.00981559 -0.0091966 ]]
Average error: 0.00937
