In [5]:
import numpy as np

# Define sigmoid activation function
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

# Define derivative of sigmoid
def sigmoid_derivative(x):
    return x * (1 - x)

# XOR dataset
X = np.array([[0, 0],
              [0, 1],
              [1, 0],
              [1, 1]])

y = np.array([[0],
              [1],
              [1],
              [0]])

# Neural network architecture
input_layer_size = 2
hidden_layer_size = 2
output_layer_size = 1

# Initialize weights and biases
np.random.seed(42)  # For consistent results
weights1 = np.random.uniform(size=(input_layer_size, hidden_layer_size))
bias1 = np.random.uniform(size=(1, hidden_layer_size))

weights2 = np.random.uniform(size=(hidden_layer_size, output_layer_size))
bias2 = np.random.uniform(size=(1, output_layer_size))

# Learning rate and epochs
learning_rate = 0.1
epochs = 10000

# Train the neural network
for epoch in range(epochs):
    # Forward Propagation
    hidden_input = np.dot(X, weights1) + bias1
    hidden_output = sigmoid(hidden_input)

    output_input = np.dot(hidden_output, weights2) + bias2
    output = sigmoid(output_input)

    # Back Propagation
    error = y - output
    output_delta = error * sigmoid_derivative(output)

    hidden_error = output_delta.dot(weights2.T)
    hidden_delta = hidden_error * sigmoid_derivative(hidden_output)

    # Update weights and biases
    weights2 += hidden_output.T.dot(output_delta) * learning_rate
    bias2 += np.sum(output_delta, axis=0, keepdims=True) * learning_rate

    weights1 += X.T.dot(hidden_delta) * learning_rate
    bias1 += np.sum(hidden_delta, axis=0, keepdims=True) * learning_rate

    # Optionally print loss
    if epoch % 1000 == 0:
        loss = np.mean(np.square(error))
        print(f"Epoch {epoch}, Loss: {loss:.4f}")

# Final prediction
print("\nFinal predictions after training:")
predictions = sigmoid(np.dot(sigmoid(np.dot(X, weights1) + bias1), weights2) + bias2)
print(predictions.round())

# Expected Output
print("\nExpected output:")
print(y)


Epoch 0, Loss: 0.3247
Epoch 1000, Loss: 0.2406
Epoch 2000, Loss: 0.1960
Epoch 3000, Loss: 0.1207
Epoch 4000, Loss: 0.0305
Epoch 5000, Loss: 0.0125
Epoch 6000, Loss: 0.0074
Epoch 7000, Loss: 0.0051
Epoch 8000, Loss: 0.0038
Epoch 9000, Loss: 0.0031

Final predictions after training:
[[0.]
 [1.]
 [1.]
 [0.]]

Expected output:
[[0]
 [1]
 [1]
 [0]]
