In [1]:
import numpy as np

# Tanh activation function and its derivative
def tanh(x):
    return np.tanh(x)

def tanh_derivative(x):
    return 1 - np.tanh(x)**2

# Initialize parameters
input_size = 4  # Example with 4 binary inputs (can be modified)
hidden_layer_1_size = 4  # Number of neurons in the first hidden layer
hidden_layer_2_size = 4  # Number of neurons in the second hidden layer
output_size = 1  # One output

# Randomly initialize weights and biases
np.random.seed(42)  # For reproducibility
weights_input_hidden_1 = np.random.rand(input_size, hidden_layer_1_size)
bias_hidden_1 = np.random.rand(1, hidden_layer_1_size)
weights_hidden_1_hidden_2 = np.random.rand(hidden_layer_1_size, hidden_layer_2_size)
bias_hidden_2 = np.random.rand(1, hidden_layer_2_size)
weights_hidden_2_output = np.random.rand(hidden_layer_2_size, output_size)
bias_output = np.random.rand(1, output_size)

# Training dataset (4 binary inputs and 1 binary output)
X = np.array([[0, 0, 0, 0],
              [0, 0, 0, 1],
              [0, 0, 1, 0],
              [0, 0, 1, 1],
              [0, 1, 0, 0],
              [0, 1, 0, 1],
              [0, 1, 1, 0],
              [0, 1, 1, 1],
              [1, 0, 0, 0],
              [1, 0, 0, 1],
              [1, 0, 1, 0],
              [1, 0, 1, 1],
              [1, 1, 0, 0],
              [1, 1, 0, 1],
              [1, 1, 1, 0],
              [1, 1, 1, 1]])

y = np.array([[0],
              [0],
              [0],
              [1],
              [0],
              [1],
              [1],
              [0],
              [1],
              [0],
              [1],
              [1],
              [1],
              [0],
              [1],
              [0]])

# Hyperparameters
learning_rate = 0.1
epochs = 10000  # Number of training steps

# Training loop
for epoch in range(epochs):
    # Forward propagation
    hidden_input_1 = np.dot(X, weights_input_hidden_1) + bias_hidden_1
    hidden_output_1 = tanh(hidden_input_1)

    hidden_input_2 = np.dot(hidden_output_1, weights_hidden_1_hidden_2) + bias_hidden_2
    hidden_output_2 = tanh(hidden_input_2)

    final_input = np.dot(hidden_output_2, weights_hidden_2_output) + bias_output
    final_output = tanh(final_input)

    # Compute error
    error = y - final_output

    # Backpropagation
    d_output = error * tanh_derivative(final_output)
    error_hidden_layer_2 = d_output.dot(weights_hidden_2_output.T)
    d_hidden_2 = error_hidden_layer_2 * tanh_derivative(hidden_output_2)

    error_hidden_layer_1 = d_hidden_2.dot(weights_hidden_1_hidden_2.T)
    d_hidden_1 = error_hidden_layer_1 * tanh_derivative(hidden_output_1)

    # Update weights and biases
    weights_hidden_2_output += hidden_output_2.T.dot(d_output) * learning_rate
    bias_output += np.sum(d_output, axis=0, keepdims=True) * learning_rate

    weights_hidden_1_hidden_2 += hidden_output_1.T.dot(d_hidden_2) * learning_rate
    bias_hidden_2 += np.sum(d_hidden_2, axis=0, keepdims=True) * learning_rate

    weights_input_hidden_1 += X.T.dot(d_hidden_1) * learning_rate
    bias_hidden_1 += np.sum(d_hidden_1, axis=0, keepdims=True) * learning_rate

    # Print error at intervals
    if epoch % 1000 == 0:
        print(f'Epoch {epoch}, Error: {np.mean(np.abs(error))}')

# Output the final weight matrices, bias values, and number of steps
print("\nFinal Weights and Biases:")
print("Weights (Input -> Hidden Layer 1):")
print(weights_input_hidden_1)
print("Biases (Hidden Layer 1):")
print(bias_hidden_1)
print("Weights (Hidden Layer 1 -> Hidden Layer 2):")
print(weights_hidden_1_hidden_2)
print("Biases (Hidden Layer 2):")
print(bias_hidden_2)
print("Weights (Hidden Layer 2 -> Output):")
print(weights_hidden_2_output)
print("Biases (Output):")
print(bias_output)
print("\nTraining complete in", epochs, "steps.")


Epoch 0, Error: 0.49549958176470377
Epoch 1000, Error: 0.27187912120523217
Epoch 2000, Error: 0.13430105001562184
Epoch 3000, Error: 0.1544110682243778
Epoch 4000, Error: 0.08557973042684029
Epoch 5000, Error: 0.05538755169212213
Epoch 6000, Error: 0.04806104697213179
Epoch 7000, Error: 0.025396340277358542
Epoch 8000, Error: 0.02780523187023
Epoch 9000, Error: 0.02930037813235073

Final Weights and Biases:
Weights (Input -> Hidden Layer 1):
[[ 3.69213073  0.08115265  2.83015802 -3.16469236]
 [ 1.20937891  1.78193163 -2.10670865 -0.99767639]
 [ 1.8341537  -0.76882937 -4.55730814  3.57429704]
 [ 1.19203229  1.0366979  -7.11908192  1.66391987]]
Biases (Hidden Layer 1):
[[-2.2412069  -0.94370347  3.95624343  0.01767551]]
Weights (Hidden Layer 1 -> Hidden Layer 2):
[[ 4.02715274 -0.85195846  1.55317008  2.67430183]
 [-1.28499528  2.26287489 -3.58621461  1.40350946]
 [ 3.1376507   0.10445367  0.92167102  2.57337249]
 [ 2.59989418  0.39743163 -0.09938081  2.97110205]]
Biases (Hidden Layer 2)