In [1]:
# 2. WAP to implement a multi-layer perceptron (MLP) network with one hidden layer using numpy in Python. Demonstrate that it can learn the XOR Boolean function.

import numpy as np

In [2]:
class MLP:
    def __init__(self, input_size, hidden_size, output_size, learning_rate=0.1, epochs=10000):
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.learning_rate = learning_rate
        self.epochs = epochs

        self.W1 = np.random.randn(input_size, hidden_size) 
        self.b1 = np.zeros((1, hidden_size))              
        self.W2 = np.random.randn(hidden_size, output_size)
        self.b2 = np.zeros((1, output_size))  

    def sigmoid(self, x):
        return 1 / (1 + np.exp(-x))

    def sigmoid_derivative(self, x):
        return x * (1 - x)

    def forward(self, X):
        self.z1 = np.dot(X, self.W1) + self.b1
        self.a1 = self.sigmoid(self.z1)
        self.z2 = np.dot(self.a1, self.W2) + self.b2
        self.a2 = self.sigmoid(self.z2)
        return self.a2

    def backward(self, X, y):
        output_error = y - self.a2
        output_delta = output_error * self.sigmoid_derivative(self.a2)

        hidden_error = output_delta.dot(self.W2.T)
        hidden_delta = hidden_error * self.sigmoid_derivative(self.a1)

        self.W2 += self.a1.T.dot(output_delta) * self.learning_rate
        self.b2 += np.sum(output_delta, axis=0, keepdims=True) * self.learning_rate
        self.W1 += X.T.dot(hidden_delta) * self.learning_rate
        self.b1 += np.sum(hidden_delta, axis=0, keepdims=True) * self.learning_rate

    def train(self, X, y):
        for epoch in range(self.epochs):
            self.forward(X)
            self.backward(X, y) 
            if epoch % 1000 == 0:
                loss = np.mean(np.square(y - self.a2))
                print(f"Epoch {epoch}, Loss: {loss}")

    def predict(self, X):
        return self.forward(X)

In [3]:
# XOR Gate

X = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])
y = np.array([[0], [1], [1], [0]]) 

In [4]:
input_size = 2   
hidden_size = 4  
output_size = 1  

mlp = MLP(input_size=input_size, hidden_size=hidden_size, output_size=output_size, learning_rate=0.1, epochs=10000)

In [8]:
# Train the network
mlp.train(X, y)

# Test the network
print("\nPredictions after training:")
predictions = mlp.predict(X)
predictions

Epoch 0, Loss: 0.00225393489829456
Epoch 1000, Loss: 0.0019442206323379706
Epoch 2000, Loss: 0.0017057789411882608
Epoch 3000, Loss: 0.0015170525038492107
Epoch 4000, Loss: 0.00136427224842055
Epoch 5000, Loss: 0.0012382616820502703
Epoch 6000, Loss: 0.0011326855222450582
Epoch 7000, Loss: 0.00104303807793501
Epoch 8000, Loss: 0.0009660325334299373
Epoch 9000, Loss: 0.000899218116024192

Predictions after training:


array([[0.01476871],
       [0.97020023],
       [0.97061914],
       [0.03733032]])

In [9]:
predictions = (predictions > 0.5).astype(int)
predictions

array([[0],
       [1],
       [1],
       [0]])

In [11]:
'''
Explanation:
The network was trained for 10,000 epochs using the XOR dataset.
The loss gradually decreased as the network learned the XOR function.
The output predictions after training are close to [0, 1, 1, 0], which is the expected output for the XOR truth table.
Tuning Parameters:
You can experiment with the hidden_size (number of neurons in the hidden layer) and learning_rate to improve performance and convergence speed.
The epochs value controls how many times the entire dataset is passed through the network during training. If necessary, you can adjust this number to reach a lower loss.
''';