Implement a simple 3-layer neural network from scratch using Python and numpy. The network should have 2 input neurons, 2 hidden neurons, and 1 output neuron, using the sigmoid activation for both hidden and output layers. Train it on the XOR dataset using backpropagation for 100 epochs, printing the loss every 10 epochs. After training, print
the predicted outputs for the XOR inputs

In [1]:
import numpy as np
import pandas as pd


In [4]:
data = pd.read_csv("Xor_Dataset.csv")
data


Unnamed: 0,X,Y,Z
0,0,0,0
1,0,1,1
2,1,1,0
3,1,1,0
4,0,0,0
...,...,...,...
9995,0,0,0
9996,0,1,1
9997,1,1,0
9998,1,1,0


In [5]:
data.columns.tolist()

['X', 'Y', 'Z']

In [6]:
X = data[['X', 'Y']].values  # Input features from columns 'X' and 'Y'
y = data[['Z']].values

In [12]:
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def sigmoid_derivative(x):
    return x * (1 - x)

def relu(x):
    return np.maximum(0, x)

def relu_derivative(x):
    return np.where(x > 0, 1, 0)

In [8]:
input_neurons = 2    # Input layer
hidden_neurons = 2   # Hidden layer
output_neurons = 1   # Output layer
learning_rate = 0.1
epochs = 100

In [9]:
# Weight and bias initialization
np.random.seed(0)  
weights_input_hidden = np.random.uniform(size=(input_neurons, hidden_neurons))
weights_hidden_output = np.random.uniform(size=(hidden_neurons, output_neurons))
bias_hidden = np.random.uniform(size=(1, hidden_neurons))
bias_output = np.random.uniform(size=(1, output_neurons))

In [13]:
for epoch in range(epochs):
    
    hidden_layer_input = np.dot(X, weights_input_hidden) + bias_hidden
    hidden_layer_output = relu(hidden_layer_input)  
    
    final_layer_input = np.dot(hidden_layer_output, weights_hidden_output) + bias_output
    predicted_output = sigmoid(final_layer_input)  
    
    
    loss = np.mean((y - predicted_output) ** 2)
    
    
    error_output = y - predicted_output
    delta_output = error_output * sigmoid_derivative(predicted_output)
    
    error_hidden_layer = delta_output.dot(weights_hidden_output.T)
    delta_hidden_layer = error_hidden_layer * relu_derivative(hidden_layer_output)
    
    
    weights_hidden_output += hidden_layer_output.T.dot(delta_output) * learning_rate
    weights_input_hidden += X.T.dot(delta_hidden_layer) * learning_rate
    bias_output += np.sum(delta_output, axis=0, keepdims=True) * learning_rate
    bias_hidden += np.sum(delta_hidden_layer, axis=0, keepdims=True) * learning_rate
    
    
    if (epoch + 1) % 10 == 0:
        print(f"Epoch {epoch + 1}/{epochs}, Loss: {loss:.4f}")


Epoch 10/100, Loss: 0.5048
Epoch 20/100, Loss: 0.5048
Epoch 30/100, Loss: 0.5048
Epoch 40/100, Loss: 0.5048
Epoch 50/100, Loss: 0.5048
Epoch 60/100, Loss: 0.5048
Epoch 70/100, Loss: 0.5048
Epoch 80/100, Loss: 0.5048
Epoch 90/100, Loss: 0.5048
Epoch 100/100, Loss: 0.5048


In [14]:
print("\nPredicted outputs after training:")
print(predicted_output)


Predicted outputs after training:
[[3.74327234e-19]
 [3.74327234e-19]
 [3.74327234e-19]
 ...
 [3.74327234e-19]
 [3.74327234e-19]
 [3.74327234e-19]]
