In [2]:
import numpy as np
import matplotlib.pyplot as plt
import math

**Chain Rule**

- If z = f(y) and y = g(x), then:
- dz/dx = (dz/dy) * (dy/dx)

In [3]:
# Example: z = (x² + 1)³
# Let y = x² + 1, then z = y³
# dz/dx = dz/dy * dy/dx = 3y² * 2x = 3(x² + 1)² * 2x

def z(x):
    return (x**2 + 1)**3

def dz_dx(x):
    return 3 * (x**2 + 1)**2 * 2*x

def numerical_derivative(f, x, h=1e-5):
    return (f(x + h) - f(x - h)) / (2 * h)

x = 2
exact = dz_dx(x)
approx = numerical_derivative(z, x)
print(f"Exact: {exact}")
print(f"Approximate: {approx:.6f}")

Exact: 300
Approximate: 300.000000


In [4]:
# Example
def sigmoid(x):
    return 1 / (1 + math.exp(-x))

# formula -> σ′(x) = σ(x)⋅(1−σ(x))
def sigmoid_derivative(x): 
    return sigmoid(x) * (1 - sigmoid(x))

def forward_and_backward(x, w):
    k = w * x
    output = sigmoid(k)
    print(f"Forward Output: {output:.4f}")
    
    d_output_d_k = sigmoid_derivative(x)
    d_k_d_w = x
    d_output_dw = d_output_d_k * d_k_d_w
    
    return output, d_output_dw
    
x_input = 2.0
weight = 0.5
output_val, gradient_chain_rule = forward_and_backward(x_input, weight)


def check_function(w):
    return sigmoid(w * x_input)

approx_gradient = numerical_derivative(check_function, weight)

print("-" * 30)
print(f"Gradient (Chain Rule): {gradient_chain_rule:.6f}")
print(f"Gradient (Numerical) : {approx_gradient:.6f}")

Forward Output: 0.7311
------------------------------
Gradient (Chain Rule): 0.209987
Gradient (Numerical) : 0.393224


**Backpropagation**

In [13]:
def forward_pass(x, w1, b1, w2, b2):
    z1 = w1 @ x + b1
    a1 = np.maximum(0, z1)
    z2 = w2 @ a1 + b2
    return z2, a1, z1

# Loss function: MSE
def loss(y_pred, y_true):
    return 0.5 * np.sum((y_pred - y_true)**2)

def backward_pass(x, y_true, w1, b1, w2, b2):
    z2, a1, z1 = forward_pass(x, w1, b1, w2, b2)
    
    # Output layer gradient
    dL_dz2 = z2 - y_true
    
    # Backpropagate through layers (chain rule)
    dL_dw2 = dL_dz2 @ a1.T
    dL_db2 = dL_dz2
    
    dL_da1 = w2.T @ dL_dz2
    dL_dz1 = dL_da1 * (z1 > 0)
    
    dL_dw1 = dL_dz1 @ x.T
    dL_db1 = dL_dz1
    
    return dL_dw1, dL_db1, dL_dw2, dL_db2

x = np.array([[1.0], [2.0]])
y_true = np.array([[0.5]])
w1 = np.random.randn(3, 2)
b1 = np.random.randn(3, 1)
w2 = np.random.randn(1, 3)
b2 = np.random.randn(1, 1)

grads = backward_pass(x, y_true, w1, b1, w2, b2)
grads = backward_pass(x, y_true, w1, b1, w2, b2)
print("Shapes:")
print("dW1:", grads[0].shape, "dB1:", grads[1].shape, "dW2:", grads[2].shape, "dB2:", grads[3].shape)
print("Gradients computed using chain rule (backpropagation)")

Shapes:
dW1: (3, 2) dB1: (3, 1) dW2: (1, 3) dB2: (1, 1)
Gradients computed using chain rule (backpropagation)
