In [1]:
import numpy as np

# Sigmoid activation function and its derivative
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def sigmoid_derivative(x):
    return x * (1 - x)

# Initialize parameters
input_size = 4  # Example with 4 binary inputs (can be modified)
hidden_layer_1_size = 4  # Number of neurons in the first hidden layer
hidden_layer_2_size = 4  # Number of neurons in the second hidden layer
output_size = 1  # One output

# Randomly initialize weights and biases
np.random.seed(42)  # For reproducibility
weights_input_hidden_1 = np.random.rand(input_size, hidden_layer_1_size)
bias_hidden_1 = np.random.rand(1, hidden_layer_1_size)
weights_hidden_1_hidden_2 = np.random.rand(hidden_layer_1_size, hidden_layer_2_size)
bias_hidden_2 = np.random.rand(1, hidden_layer_2_size)
weights_hidden_2_output = np.random.rand(hidden_layer_2_size, output_size)
bias_output = np.random.rand(1, output_size)

# Training dataset (4 binary inputs and 1 binary output)
X = np.array([[0, 0, 0, 0],
              [0, 0, 0, 1],
              [0, 0, 1, 0],
              [0, 0, 1, 1],
              [0, 1, 0, 0],
              [0, 1, 0, 1],
              [0, 1, 1, 0],
              [0, 1, 1, 1],
              [1, 0, 0, 0],
              [1, 0, 0, 1],
              [1, 0, 1, 0],
              [1, 0, 1, 1],
              [1, 1, 0, 0],
              [1, 1, 0, 1],
              [1, 1, 1, 0],
              [1, 1, 1, 1]])

y = np.array([[0],
              [0],
              [0],
              [1],
              [0],
              [1],
              [1],
              [0],
              [1],
              [0],
              [1],
              [1],
              [1],
              [0],
              [1],
              [0]])

# Hyperparameters
learning_rate = 0.1
epochs = 10000  # Number of training steps

# Training loop
for epoch in range(epochs):
    # Forward propagation
    hidden_input_1 = np.dot(X, weights_input_hidden_1) + bias_hidden_1
    hidden_output_1 = sigmoid(hidden_input_1)

    hidden_input_2 = np.dot(hidden_output_1, weights_hidden_1_hidden_2) + bias_hidden_2
    hidden_output_2 = sigmoid(hidden_input_2)

    final_input = np.dot(hidden_output_2, weights_hidden_2_output) + bias_output
    final_output = sigmoid(final_input)

    # Compute error
    error = y - final_output

    # Backpropagation
    d_output = error * sigmoid_derivative(final_output)
    error_hidden_layer_2 = d_output.dot(weights_hidden_2_output.T)
    d_hidden_2 = error_hidden_layer_2 * sigmoid_derivative(hidden_output_2)

    error_hidden_layer_1 = d_hidden_2.dot(weights_hidden_1_hidden_2.T)
    d_hidden_1 = error_hidden_layer_1 * sigmoid_derivative(hidden_output_1)

    # Update weights and biases
    weights_hidden_2_output += hidden_output_2.T.dot(d_output) * learning_rate
    bias_output += np.sum(d_output, axis=0, keepdims=True) * learning_rate

    weights_hidden_1_hidden_2 += hidden_output_1.T.dot(d_hidden_2) * learning_rate
    bias_hidden_2 += np.sum(d_hidden_2, axis=0, keepdims=True) * learning_rate

    weights_input_hidden_1 += X.T.dot(d_hidden_1) * learning_rate
    bias_hidden_1 += np.sum(d_hidden_1, axis=0, keepdims=True) * learning_rate

    # Print error at intervals
    if epoch % 1000 == 0:
        print(f'Epoch {epoch}, Error: {np.mean(np.abs(error))}')

# Output the final weight matrices, bias values, and number of steps
print("\nFinal Weights and Biases:")
print("Weights (Input -> Hidden Layer 1):")
print(weights_input_hidden_1)
print("Biases (Hidden Layer 1):")
print(bias_hidden_1)
print("Weights (Hidden Layer 1 -> Hidden Layer 2):")
print(weights_hidden_1_hidden_2)
print("Biases (Hidden Layer 2):")
print(bias_hidden_2)
print("Weights (Hidden Layer 2 -> Output):")
print(weights_hidden_2_output)
print("Biases (Output):")
print(bias_output)
print("\nTraining complete in", epochs, "steps.")


Epoch 0, Error: 0.4985001162704968
Epoch 1000, Error: 0.41165719712013593
Epoch 2000, Error: 0.313415928512748
Epoch 3000, Error: 0.2321051021522549
Epoch 4000, Error: 0.190604290694326
Epoch 5000, Error: 0.06471784420692181
Epoch 6000, Error: 0.037827459014003015
Epoch 7000, Error: 0.029104738980480126
Epoch 8000, Error: 0.024473002346377375
Epoch 9000, Error: 0.021491533239580113

Final Weights and Biases:
Weights (Input -> Hidden Layer 1):
[[-0.34290916  5.42415121  0.85473954  4.83775385]
 [-2.81941168 -3.93700275 -3.24698204  3.03216907]
 [-0.79567608 -0.33838964 -4.07629344  4.21822834]
 [ 4.93270822 -1.45698394 -1.1766753  -3.47345558]]
Biases (Hidden Layer 1):
[[ 1.69416534 -2.16326098  2.49831625 -2.87039742]]
Weights (Hidden Layer 1 -> Hidden Layer 2):
[[ 1.01955728 -4.9901241  -2.17864339 -3.62165601]
 [ 0.38110237  6.08109286  1.4543349   4.36722918]
 [ 0.35663235 -1.8787198   5.71932661 -1.4935324 ]
 [-0.51586418  4.89167315  6.40422985  3.6036918 ]]
Biases (Hidden Layer 2