In [1]:
import numpy as np

# Input layer (1) = 1 neuron
# Hidden layer (1) = 2 neurons
# Output layer (1) = 1 neuron

# Initialize network parameters (weights & biases)
W1, B1 = 0.5, 0.1   # Hidden Neuron 1
W2, B2 = -0.3, 0.2  # Hidden Neuron 2
W3, W4, B3 = 0.7, -0.6, 0.05  # Output Neuron

# Hyperparameters
alpha = 0.01  # Learning rate
epochs = 100  # Number of training iterations

# Training data (single example)
X = 2  # Input
Y_true = 1.5  # Expected output

# Activation function (ReLU)
def relu(x):
    return np.maximum(0, x)

# Derivative of ReLU
def relu_derivative(x):
    return 1 if x > 0 else 0

# Training loop
for epoch in range(epochs):
    # ** FORWARD PROPAGATION **
    Z1 = (W1 * X) + B1
    A1 = relu(Z1)  # Activation for Hidden Neuron 1
    
    Z2 = (W2 * X) + B2
    A2 = relu(Z2)  # Activation for Hidden Neuron 2
    
    Z_out = (W3 * A1) + (W4 * A2) + B3  # Output neuron (no activation)
    Y_pred = Z_out  # Final output
    
    # Compute loss (Mean Squared Error)
    loss = 0.5 * (Y_true - Y_pred) ** 2

    # ** BACKPROPAGATION **
    dL_dY_pred = Y_pred - Y_true  # Gradient of loss w.r.t output

    # Gradients for Output Layer
    dL_dW3 = dL_dY_pred * A1
    dL_dW4 = dL_dY_pred * A2
    dL_dB3 = dL_dY_pred

    # Gradients for Hidden Layer
    dL_dA1 = dL_dY_pred * W3
    dL_dA2 = dL_dY_pred * W4

    # Apply ReLU derivative
    dL_dZ1 = dL_dA1 * relu_derivative(Z1)
    dL_dZ2 = dL_dA2 * relu_derivative(Z2)

    # Gradients for Hidden Layer Weights & Biases
    dL_dW1 = dL_dZ1 * X
    dL_dB1 = dL_dZ1

    dL_dW2 = dL_dZ2 * X
    dL_dB2 = dL_dZ2

    # ** WEIGHT UPDATES (Gradient Descent) **
    W1 -= alpha * dL_dW1
    W2 -= alpha * dL_dW2
    B1 -= alpha * dL_dB1
    B2 -= alpha * dL_dB2
    W3 -= alpha * dL_dW3
    W4 -= alpha * dL_dW4
    B3 -= alpha * dL_dB3

    # Print loss every 10 epochs
    if epoch % 10 == 0:
        print(f"Epoch {epoch}: Loss = {loss:.4f}")

# Final results
print("\nFinal Weights & Biases:")
print(f"W1: {W1:.4f}, B1: {B1:.4f}")
print(f"W2: {W2:.4f}, B2: {B2:.4f}")
print(f"W3: {W3:.4f}, W4: {W4:.4f}, B3: {B3:.4f}")


Epoch 0: Loss = 0.2312
Epoch 10: Loss = 0.0806
Epoch 20: Loss = 0.0236
Epoch 30: Loss = 0.0062
Epoch 40: Loss = 0.0015
Epoch 50: Loss = 0.0004
Epoch 60: Loss = 0.0001
Epoch 70: Loss = 0.0000
Epoch 80: Loss = 0.0000
Epoch 90: Loss = 0.0000

Final Weights & Biases:
W1: 0.6824, B1: 0.1912
W2: -0.3000, B2: 0.2000
W3: 0.8554, W4: -0.6000, B3: 0.1683
