In [2]:
import numpy as np

# Define the sigmoid activation function and its derivative
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def sigmoid_derivative(x):
    return x * (1 - x)

# Initialize parameters
input_size = 4  # 4 binary inputs
hidden_size = 3  # Hidden layer with 3 neurons
output_size = 2  # 2 binary outputs

# Randomly initialize weights and biases
np.random.seed(42)  # Set seed for reproducibility
weights_input_hidden = np.random.rand(input_size, hidden_size)
bias_hidden = np.random.rand(1, hidden_size)
weights_hidden_output = np.random.rand(hidden_size, output_size)
bias_output = np.random.rand(1, output_size)

# Training dataset (4 binary inputs and 2 binary outputs)
X = np.array([[0, 0, 0, 0],
              [0, 0, 0, 1],
              [0, 0, 1, 0],
              [0, 0, 1, 1],
              [0, 1, 0, 0],
              [0, 1, 0, 1],
              [0, 1, 1, 0],
              [0, 1, 1, 1],
              [1, 0, 0, 0],
              [1, 0, 0, 1],
              [1, 0, 1, 0],
              [1, 0, 1, 1],
              [1, 1, 0, 0],
              [1, 1, 0, 1],
              [1, 1, 1, 0],
              [1, 1, 1, 1]])

y = np.array([[0, 0],
              [0, 1],
              [0, 1],
              [0, 0],
              [1, 0],
              [1, 1],
              [1, 0],
              [1, 1],
              [0, 0],
              [0, 1],
              [0, 1],
              [0, 0],
              [1, 0],
              [1, 1],
              [1, 0],
              [1, 1]])

# Hyperparameters
learning_rate = 0.1
epochs = 10000  # Number of training steps

# Training loop
for epoch in range(epochs):
    # Forward propagation
    hidden_input = np.dot(X, weights_input_hidden) + bias_hidden
    hidden_output = sigmoid(hidden_input)

    final_input = np.dot(hidden_output, weights_hidden_output) + bias_output
    final_output = sigmoid(final_input)

    # Compute error
    error = y - final_output

    # Backpropagation
    d_output = error * sigmoid_derivative(final_output)
    error_hidden_layer = d_output.dot(weights_hidden_output.T)
    d_hidden = error_hidden_layer * sigmoid_derivative(hidden_output)

    # Update weights and biases
    weights_hidden_output += hidden_output.T.dot(d_output) * learning_rate
    bias_output += np.sum(d_output, axis=0, keepdims=True) * learning_rate
    weights_input_hidden += X.T.dot(d_hidden) * learning_rate
    bias_hidden += np.sum(d_hidden, axis=0, keepdims=True) * learning_rate

    # Print error at intervals
    if epoch % 1000 == 0:
        print(f'Epoch {epoch}, Error: {np.mean(np.abs(error))}')

# Output the final weight matrices, bias values, and number of steps
print("\nFinal Weights and Biases:")
print("Weights (Input -> Hidden):")
print(weights_input_hidden)
print("Biases (Hidden):")
print(bias_hidden)
print("Weights (Hidden -> Output):")
print(weights_hidden_output)
print("Biases (Output):")
print(bias_output)
print("\nTraining complete in", epochs, "steps.")


Epoch 0, Error: 0.49537535373920194
Epoch 1000, Error: 0.13960045555162653
Epoch 2000, Error: 0.11117557539987555
Epoch 3000, Error: 0.08123538014937164
Epoch 4000, Error: 0.05831225759810506
Epoch 5000, Error: 0.044891924098222444
Epoch 6000, Error: 0.036515490421267
Epoch 7000, Error: 0.031145751614458782
Epoch 8000, Error: 0.02746112826755291
Epoch 9000, Error: 0.02477222741169262

Final Weights and Biases:
Weights (Input -> Hidden):
[[-0.10836342  0.01845419  0.02074127]
 [ 3.64287327  7.45130211 -7.18147407]
 [-4.88809444  2.47871824  4.35666521]
 [ 6.80507616 -5.53542515  7.23491534]]
Biases (Hidden):
[[-4.42966949 -0.72485164  0.23153349]]
Weights (Hidden -> Output):
[[ 7.06944461  6.95665875]
 [ 8.43282347  7.32300409]
 [-5.92377728  9.46859284]]
Biases (Output):
[[ -4.41248914 -12.75371008]]

Training complete in 10000 steps.
