In [1]:
import numpy as np

# Define XOR input and output data
X = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])
Y = np.array([[0], [1], [1], [0]])

In [3]:
#Initialize weight and Bias
# Hyperparameters
input_size = 2
hidden_size = 4  # A common choice for this problem
output_size = 1
learning_rate = 0.1
epochs = 60000

# Initialize weights and biases randomly
# Weights from input to hidden layer (W1) and biases (b1)
W1 = np.random.randn(input_size, hidden_size)
b1 = np.zeros((1, hidden_size))
# Weights from hidden to output layer (W2) and biases (b2)
W2 = np.random.randn(hidden_size, output_size)
b2 = np.zeros((1, output_size))

# Activation function and its derivative
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def sigmoid_derivative(x):
    return x * (1 - x)

In [4]:
def forward_pass(X):
    # Hidden layer calculation
    Z1 = np.dot(X, W1) + b1
    A1 = sigmoid(Z1)

    # Output layer calculation
    Z2 = np.dot(A1, W2) + b2
    A2 = sigmoid(Z2)
    return A1, A2

In [5]:
losses = []  # To track the loss reduction

for i in range(epochs):
    # Forward Pass
    A1, A2 = forward_pass(X)

    # Calculate Loss (Mean Squared Error)
    loss = np.mean((A2 - Y) ** 2)
    losses.append(loss)

    # Backpropagation
    # Output layer
    dZ2 = (A2 - Y) * sigmoid_derivative(A2)
    dW2 = np.dot(A1.T, dZ2)
    db2 = np.sum(dZ2, axis=0, keepdims=True)

    # Hidden layer
    dZ1 = np.dot(dZ2, W2.T) * sigmoid_derivative(A1)
    dW1 = np.dot(X.T, dZ1)
    db1 = np.sum(dZ1, axis=0, keepdims=True)

    # Update weights and biases
    W1 -= learning_rate * dW1
    b1 -= learning_rate * db1
    W2 -= learning_rate * dW2
    b2 -= learning_rate * db2

    # Print loss every 5000 epochs to verify it's decreasing
    if i % 5000 == 0:
        print(f"Epoch {i}, Loss: {loss:.6f}")

Epoch 0, Loss: 0.315488
Epoch 5000, Loss: 0.005612
Epoch 10000, Loss: 0.001782
Epoch 15000, Loss: 0.001027
Epoch 20000, Loss: 0.000715
Epoch 25000, Loss: 0.000546
Epoch 30000, Loss: 0.000440
Epoch 35000, Loss: 0.000368
Epoch 40000, Loss: 0.000316
Epoch 45000, Loss: 0.000277
Epoch 50000, Loss: 0.000246
Epoch 55000, Loss: 0.000222


In [6]:
# Final forward pass to test the model
_, A2 = forward_pass(X)
predictions = np.round(A2)

print("\nFinal Predictions:")
print(f"Inputs:\n{X}")
print(f"Expected Outputs:\n{Y}")
print(f"Predicted Outputs:\n{predictions}")

# Check for correct classification
correct_predictions = (predictions == Y).all()
print(f"\nModel correctly classifies all XOR inputs: {correct_predictions}")


Final Predictions:
Inputs:
[[0 0]
 [0 1]
 [1 0]
 [1 1]]
Expected Outputs:
[[0]
 [1]
 [1]
 [0]]
Predicted Outputs:
[[0.]
 [1.]
 [1.]
 [0.]]

Model correctly classifies all XOR inputs: True
