In [1]:
import numpy as np

# ReLU activation function and its derivative
def relu(x):
    return np.maximum(0, x)

def relu_derivative(x):
    return np.where(x > 0, 1, 0)

# Initialize parameters
input_size = 4  # Example with 4 binary inputs (can be modified)
hidden_layer_1_size = 4  # Number of neurons in the first hidden layer
hidden_layer_2_size = 4  # Number of neurons in the second hidden layer
output_size = 1  # One output

# Randomly initialize weights and biases
np.random.seed(42)  # For reproducibility
weights_input_hidden_1 = np.random.rand(input_size, hidden_layer_1_size)
bias_hidden_1 = np.random.rand(1, hidden_layer_1_size)
weights_hidden_1_hidden_2 = np.random.rand(hidden_layer_1_size, hidden_layer_2_size)
bias_hidden_2 = np.random.rand(1, hidden_layer_2_size)
weights_hidden_2_output = np.random.rand(hidden_layer_2_size, output_size)
bias_output = np.random.rand(1, output_size)

# Training dataset (4 binary inputs and 1 binary output)
X = np.array([[0, 0, 0, 0],
              [0, 0, 0, 1],
              [0, 0, 1, 0],
              [0, 0, 1, 1],
              [0, 1, 0, 0],
              [0, 1, 0, 1],
              [0, 1, 1, 0],
              [0, 1, 1, 1],
              [1, 0, 0, 0],
              [1, 0, 0, 1],
              [1, 0, 1, 0],
              [1, 0, 1, 1],
              [1, 1, 0, 0],
              [1, 1, 0, 1],
              [1, 1, 1, 0],
              [1, 1, 1, 1]])

y = np.array([[0],
              [0],
              [0],
              [1],
              [0],
              [1],
              [1],
              [0],
              [1],
              [0],
              [1],
              [1],
              [1],
              [0],
              [1],
              [0]])

# Hyperparameters
learning_rate = 0.1
epochs = 10000  # Number of training steps

# Training loop
for epoch in range(epochs):
    # Forward propagation
    hidden_input_1 = np.dot(X, weights_input_hidden_1) + bias_hidden_1
    hidden_output_1 = relu(hidden_input_1)

    hidden_input_2 = np.dot(hidden_output_1, weights_hidden_1_hidden_2) + bias_hidden_2
    hidden_output_2 = relu(hidden_input_2)

    final_input = np.dot(hidden_output_2, weights_hidden_2_output) + bias_output
    final_output = relu(final_input)

    # Compute error
    error = y - final_output

    # Backpropagation
    d_output = error * relu_derivative(final_output)
    error_hidden_layer_2 = d_output.dot(weights_hidden_2_output.T)
    d_hidden_2 = error_hidden_layer_2 * relu_derivative(hidden_output_2)

    error_hidden_layer_1 = d_hidden_2.dot(weights_hidden_1_hidden_2.T)
    d_hidden_1 = error_hidden_layer_1 * relu_derivative(hidden_output_1)

    # Update weights and biases
    weights_hidden_2_output += hidden_output_2.T.dot(d_output) * learning_rate
    bias_output += np.sum(d_output, axis=0, keepdims=True) * learning_rate

    weights_hidden_1_hidden_2 += hidden_output_1.T.dot(d_hidden_2) * learning_rate
    bias_hidden_2 += np.sum(d_hidden_2, axis=0, keepdims=True) * learning_rate

    weights_input_hidden_1 += X.T.dot(d_hidden_1) * learning_rate
    bias_hidden_1 += np.sum(d_hidden_1, axis=0, keepdims=True) * learning_rate

    # Print error at intervals
    if epoch % 1000 == 0:
        print(f'Epoch {epoch}, Error: {np.mean(np.abs(error))}')

# Output the final weight matrices, bias values, and number of steps
print("\nFinal Weights and Biases:")
print("Weights (Input -> Hidden Layer 1):")
print(weights_input_hidden_1)
print("Biases (Hidden Layer 1):")
print(bias_hidden_1)
print("Weights (Hidden Layer 1 -> Hidden Layer 2):")
print(weights_hidden_1_hidden_2)
print("Biases (Hidden Layer 2):")
print(bias_hidden_2)
print("Weights (Hidden Layer 2 -> Output):")
print(weights_hidden_2_output)
print("Biases (Output):")
print(bias_output)
print("\nTraining complete in", epochs, "steps.")


Epoch 0, Error: 4.553949439300682
Epoch 1000, Error: 0.5
Epoch 2000, Error: 0.5
Epoch 3000, Error: 0.5
Epoch 4000, Error: 0.5
Epoch 5000, Error: 0.5
Epoch 6000, Error: 0.5
Epoch 7000, Error: 0.5
Epoch 8000, Error: 0.5
Epoch 9000, Error: 0.5

Final Weights and Biases:
Weights (Input -> Hidden Layer 1):
[[-1.74038491 -3.04054814 -0.44615765 -4.81364832]
 [-1.87395831 -3.67495489 -1.07274634 -4.32873996]
 [-1.54438964 -3.34089939 -1.17460195 -4.52065331]
 [-1.16128893 -3.55020842 -0.92881394 -4.918756  ]]
Biases (Hidden Layer 1):
[[-3.24368736 -6.17085593 -1.54448386 -8.78828114]]
Weights (Hidden Layer 1 -> Hidden Layer 2):
[[ -0.67577982  -5.08514734  -0.0706907   -9.22793212]
 [ -1.08069748  -5.45034329  -0.23336395 -10.93639009]
 [ -0.31114989  -3.61981251   0.35293411  -6.5620347 ]
 [ -1.56510666  -5.66557203   0.5062782  -11.33809231]]
Biases (Hidden Layer 2):
[[-0.58459575 -3.51034487  0.43366729 -6.18544612]]
Weights (Hidden Layer 2 -> Output):
[[-19.55138561]
 [-24.59454383]
 [-27