In [2]:
import numpy as np

# --- 1. Define Activation and Loss Functions ---
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def sigmoid_derivative(x):
    # If x is the input (pre-activation), calculate sigmoid(x) * (1 - sigmoid(x))
    # If x is already the sigmoid output (a), use a * (1 - a)
    return x * (1 - x)

def mse_loss(y_true, y_pred):
    return np.mean((y_true - y_pred)**2)

# --- 2. Initial Setup (Simplified 2-Layer Network) ---
np.random.seed(1)
# 2 inputs, 2 hidden neurons, 1 output neuron
input_size = 2
hidden_size = 2
output_size = 1

# Initial weights and biases
W1 = np.random.uniform(low=-0.5, high=0.5, size=(input_size, hidden_size)) # (2, 2)
B1 = np.zeros((1, hidden_size)) # (1, 2)
W2 = np.random.uniform(low=-0.5, high=0.5, size=(hidden_size, output_size)) # (2, 1)
B2 = np.zeros((1, output_size)) # (1, 1)

# Sample Data (e.g., XOR example)
X = np.array([[0, 1]]) # Single input sample
y_true = np.array([[1]]) # Target output

print("--- Multi-layer Perceptron (MLP) & Backpropagation Demonstration ---")
print(f"Initial W1:\n{W1.round(4)}")
print(f"Initial W2:\n{W2.round(4)}")

# --- 3. FORWARD PASS ---
print("\n[STEP 1: FORWARD PASS]")
# Hidden Layer
Z1 = np.dot(X, W1) + B1 # Pre-activation
A1 = sigmoid(Z1) # Activation (Output of Hidden Layer)
print(f"Hidden Layer Output (A1): {A1.round(4)}")

# Output Layer
Z2 = np.dot(A1, W2) + B2 # Pre-activation
y_pred = sigmoid(Z2) # Final Prediction
print(f"Final Prediction (y_pred): {y_pred.round(4)}")
loss = mse_loss(y_true, y_pred)
print(f"Initial MSE Loss: {loss.round(4)}")

# --- 4. BACKWARD PASS (The Core of Backpropagation) ---
print("\n[STEP 2: BACKWARD PASS (Gradient Calculation)]")

# A. Output Layer Error (dLoss/dZ2)
# dLoss/dy_pred * dy_pred/dZ2
# dLoss/dy_pred = -(y_true - y_pred) for MSE
# dy_pred/dZ2 = sigmoid_derivative(y_pred)
dLoss_dZ2 = (y_pred - y_true) * sigmoid_derivative(y_pred) # (1, 1)
print(f"dLoss/dZ2: {dLoss_dZ2.round(4)}")

# B. Output Layer Weights Gradient (dLoss/dW2)
# dLoss/dW2 = A1.T * dLoss/dZ2
dLoss_dW2 = np.dot(A1.T, dLoss_dZ2) # (2, 1)
print(f"dLoss/dW2:\n{dLoss_dW2.round(4)}")

# C. Hidden Layer Error (dLoss/dZ1)
# dLoss/dZ1 = (dLoss/dZ2 * W2.T) * dZ1/dA1
dLoss_dA1 = np.dot(dLoss_dZ2, W2.T) # (1, 2)
dLoss_dZ1 = dLoss_dA1 * sigmoid_derivative(A1) # (1, 2)
print(f"dLoss/dZ1: {dLoss_dZ1.round(4)}")

# D. Hidden Layer Weights Gradient (dLoss/dW1)
# dLoss/dW1 = X.T * dLoss/dZ1
dLoss_dW1 = np.dot(X.T, dLoss_dZ1) # (2, 2)
print(f"dLoss/dW1:\n{dLoss_dW1.round(4)}")

# --- 5. WEIGHT UPDATE (Gradient Descent) ---
print("\n[STEP 3: WEIGHT UPDATE]")
learning_rate = 0.1

W2_new = W2 - learning_rate * dLoss_dW2
W1_new = W1 - learning_rate * dLoss_dW1

print(f"Change in W1 (dLoss/dW1 * LR):\n{(dLoss_dW1 * learning_rate).round(4)}")
print(f"New W1:\n{W1_new.round(4)}")
print(f"Change in W2 (dLoss/dW2 * LR):\n{(dLoss_dW2 * learning_rate).round(4)}")
print(f"New W2:\n{W2_new.round(4)}")

--- Multi-layer Perceptron (MLP) & Backpropagation Demonstration ---
Initial W1:
[[-0.083   0.2203]
 [-0.4999 -0.1977]]
Initial W2:
[[-0.3532]
 [-0.4077]]

[STEP 1: FORWARD PASS]
Hidden Layer Output (A1): [[0.3776 0.4507]]
Final Prediction (y_pred): [[0.4214]]
Initial MSE Loss: 0.3348

[STEP 2: BACKWARD PASS (Gradient Calculation)]
dLoss/dZ2: [[-0.1411]]
dLoss/dW2:
[[-0.0533]
 [-0.0636]]
dLoss/dZ1: [[0.0117 0.0142]]
dLoss/dW1:
[[0.     0.    ]
 [0.0117 0.0142]]

[STEP 3: WEIGHT UPDATE]
Change in W1 (dLoss/dW1 * LR):
[[0.     0.    ]
 [0.0012 0.0014]]
New W1:
[[-0.083   0.2203]
 [-0.5011 -0.1991]]
Change in W2 (dLoss/dW2 * LR):
[[-0.0053]
 [-0.0064]]
New W2:
[[-0.3479]
 [-0.4013]]
