In [2]:
import numpy as np

# Activation Function: Sigmoid & its derivative
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def sigmoid_derivative(x):
    return x * (1 - x)  # Derivative of sigmoid function

# Define Input Data (XOR problem)
X = np.array([[0, 0], 
              [0, 1], 
              [1, 0], 
              [1, 1]])  

# Expected Output
y = np.array([[0], [1], [1], [0]])  

# Define Neural Network Architecture
input_neurons = 2    # Input layer neurons
hidden_neurons = 4   # Hidden layer neurons
output_neurons = 1   # Output layer neurons
learning_rate = 0.5  # Learning rate
epochs = 10000       # Training iterations

# Initialize Weights and Biases randomly
np.random.seed(42)  # Ensures reproducibility
W1 = np.random.uniform(-1, 1, (input_neurons, hidden_neurons))  # Weights Input -> Hidden
b1 = np.random.uniform(-1, 1, (1, hidden_neurons))  # Bias Hidden Layer
W2 = np.random.uniform(-1, 1, (hidden_neurons, output_neurons))  # Weights Hidden -> Output
b2 = np.random.uniform(-1, 1, (1, output_neurons))  # Bias Output Layer

# --- Training Process (Feedforward + Backpropagation) ---
for epoch in range(epochs):
    
    # --- Forward Pass ---
    hidden_input = np.dot(X, W1) + b1  # Weighted sum for hidden layer
    hidden_output = sigmoid(hidden_input)  # Activation function
    
    final_input = np.dot(hidden_output, W2) + b2  # Weighted sum for output layer
    y_pred = sigmoid(final_input)  # Activation function

    # --- Compute Error ---
    error = y - y_pred  # Difference between actual and predicted
    loss = np.mean(error**2)  # Mean Squared Error (MSE)

    # --- Backpropagation ---
    # Compute gradients for output layer
    d_output = error * sigmoid_derivative(y_pred)  # Output error gradient

    # Compute gradients for hidden layer
    error_hidden = d_output.dot(W2.T)  # Backpropagate error to hidden layer
    d_hidden = error_hidden * sigmoid_derivative(hidden_output)  # Hidden error gradient

    # --- Update Weights and Biases ---
    W2 += hidden_output.T.dot(d_output) * learning_rate  # Update weights (Hidden -> Output)
    b2 += np.sum(d_output, axis=0, keepdims=True) * learning_rate  # Update bias (Output Layer)
    
    W1 += X.T.dot(d_hidden) * learning_rate  # Update weights (Input -> Hidden)
    b1 += np.sum(d_hidden, axis=0, keepdims=True) * learning_rate  # Update bias (Hidden Layer)

    # Print loss every 1000 epochs
    if epoch % 1000 == 0:
        print(f"Epoch {epoch}, Loss: {loss:.5f}")

# --- Final Predictions after Training ---
print("\nFinal Predictions:")
print(y_pred.round())  # Rounding to 0 or 1


Epoch 0, Loss: 0.31540
Epoch 1000, Loss: 0.00900
Epoch 2000, Loss: 0.00213
Epoch 3000, Loss: 0.00116
Epoch 4000, Loss: 0.00079
Epoch 5000, Loss: 0.00059
Epoch 6000, Loss: 0.00048
Epoch 7000, Loss: 0.00040
Epoch 8000, Loss: 0.00034
Epoch 9000, Loss: 0.00030

Final Predictions:
[[0.]
 [1.]
 [1.]
 [0.]]
