In [29]:
import numpy as np

def main():
    # -------- 1. Data ----------
    x = 2.0                                   # input scalar
    y = 3.0                                   # target scalar

    # make x a tiny 1D vector to match matrix math
    x_vec = np.array([x])                     # shape (1,)

    # -------- 2. Parameters ------
    # first layer: W1 in R^{2x1}, b1 in R^2
    W1 = np.array([[1.0],
                  [-1.0]])                     # shape (2,1)
    b1 = np.array([0.5, -0.5])               # shape (2,)

    # second layer: W2 in R^{1x2}, b2 in scalar
    W2 = np.array([[2.0, -1.0]])               # shape (1,2)
    b2 = 0.1                                   # scalar

    # forward pass
    # a1 = W1 x + b1
    a1 = W1 @ x_vec + b1                       # shape (2,)
    # h1 = ReLU(a1)
    h1 = np.maximum(0.0, a1)                   # shape (1,)
    # f = W2 h1 + b2
    f = (W2 @ h1 + b2).item()             # scalar
    # L = 1/2 (f - y)&^2
    L = 0.5 * (f - y)**2                        # scalar


    print("===== Forward Pass =====")
    print(f"x             ={x}")
    print(f"y             ={y}")
    print(f"a1            ={a1}")
    print(f"h1            ={h1}")
    print(f"f             ={f}")
    print(f"L             ={L}")
    print()

    # ------- 4. Backward Pass ------
    # Step 1: delta_f = dL/df
    delta_f = f - y                             # scalar
    print("===== Backward Pass ======")
    print(f"delta_f = dL/df = {delta_f}")       # 2.1

    # step 2 dL/dW2 = delta_f * h1^T
    dL_dW2 = delta_f * h1.reshape(1, -1)         # shape 1,2)
    # step 2b: dL/db2 = delta_f
    dL_db2 = delta_f

    print(f"dL/dW2 = {dL_dW2}")                   # [[5.25 0.]]
    print(f"dL/db2 = {dL_db2}")                   # 2.1  

    # step 3 delta_h1 = dL/dh1 = W2^T * delta_f
    delta_h1 = W2.T * delta_f                     # shape (2,1)
    delta_h1 = delta_h1.reshape(-1)               # make it shape (2,)
    print(f"delta_h1 = dL/dh1 = {delta_h1}")      # [4.2 -2,1]

    # step 4: through ReLU: delta_a1 = delta_h1 0 ReLU'(a1)
    relu_prime = (a1 >0).astype(float)            # 1 where a1>0 else 0
    delta_a1 = delta_h1 * relu_prime              # elementwise
    print(f"ReLU'(a1)     ={relu_prime}")         # [1. 0.]
    print(f"delta_a1   = dL/da1 =  {delta_a1}")  # [4.2 0.]

    # step 5: dL/dw1 = delta_a1 * x^T (outer product)
    dL_dW1 = delta_a1.reshape(-1, 1) * x         # shape (2,1)
    # step 5b: dL/db1 = delta_a1
    dL_db1 = delta_a1
    
    print(f"dL/dW1 =  {dL_dW1}")                # [[8.4],[0. ]]
    print(f"dL/db1 =  {dL_db1}")                   # [4.2 0.]
    
if __name__ == "__main__":
    main()
    

===== Forward Pass =====
x             =2.0
y             =3.0
a1            =[ 2.5 -2.5]
h1            =[2.5 0. ]
f             =5.1
L             =2.204999999999999

delta_f = dL/df = 2.0999999999999996
dL/dW2 = [[5.25 0.  ]]
dL/db2 = 2.0999999999999996
delta_h1 = dL/dh1 = [ 4.2 -2.1]
ReLU'(a1)     =[1. 0.]
delta_a1   = dL/da1 =  [ 4.2 -0. ]
dL/dW1 =  [[ 8.4]
 [-0. ]]
dL/db1 =  [ 4.2 -0. ]
