#WorkFlow

simple (vanilla) Gradient Descent:

computed the forward pass (Y_hat = W*X + B).

Calculated the loss (Mean Squared Error).

Computed gradients manually (dW and dB).

Updated the parameters using gradient descent rule (W_new = W - lr * dW, B_new = B - lr* dB).

Repeated for multiple iterations.

In [1]:
import numpy as np

# Step 1: Initialize data points
X = np.array([1, 2, 3, 4, 5, 6], dtype=int)    # inputs - We can just change the values
Y = np.array([5, 8, 11, 14, 17, 20], dtype=int) # true labels - We can just change the values

# Step 2: Initialize parameters
W = 0.0
B = 0.0
learning_rate = 0.01
n_iterations = 50
n = len(X)

# Gradient Descent Loop
for i in range(n_iterations):
    print(f"\n================ Iteration {i+1} ================")

    # Step 1: Forward pass and error
    print("Step 1: Forward Pass and Error Computation")
    print(" X | Y  | Y_hat = W*X + B | Error = Y_hat - Y | dW_contrib = Error*X | dB_contrib = Error")
    print("--------------------------------------------------------------------------------------------")

    Y_hat_list = []
    Error_list = []
    dW_contrib_list = []
    dB_contrib_list = []

    for xi, yi in zip(X, Y):
        y_hat = W * xi + B
        error = y_hat - yi
        dw_contrib = error * xi
        db_contrib = error

        Y_hat_list.append(y_hat)
        Error_list.append(error)
        dW_contrib_list.append(dw_contrib)
        dB_contrib_list.append(db_contrib)

        # Print each calculation as an equation
        print(f"{xi:>2} | {yi:>2} | {y_hat:>6.2f} = {W:.2f}*{xi} + {B:.2f} | "
              f"{error:>6.2f} = {y_hat:.2f} - {yi} | {dw_contrib:>10.2f} = {error:.2f}*{xi} | {db_contrib:>9.2f} = {error:.2f}")

    # Step 2: Compute average gradients
    dW = (1/n) * np.sum(dW_contrib_list)
    dB = (1/n) * np.sum(dB_contrib_list)

    print("\nStep 2: Compute Average Gradients")
    print(f"dW = (1/n) * sum(dW_contrib) = (1/{n}) * {np.sum(dW_contrib_list):.2f} = {dW:.4f}")
    print(f"dB = (1/n) * sum(dB_contrib) = (1/{n}) * {np.sum(dB_contrib_list):.2f} = {dB:.4f}")

    # Step 3: Update parameters
    W_new = W - learning_rate * dW
    B_new = B - learning_rate * dB
    print("\nStep 3: Update Parameters")
    print(f"W_new = W - lr*dW = {W:.4f} - {learning_rate}*{dW:.4f} = {W_new:.4f}")
    print(f"B_new = B - lr*dB = {B:.4f} - {learning_rate}*{dB:.4f} = {B_new:.4f}")

    # Step 4: Compute loss
    Error_array = np.array(Error_list)
    loss = (1/(2*n)) * np.sum(Error_array**2)
    print(f"\nStep 4: Compute Loss")
    print(f"L = 1/(2n) * sum(Error^2) = 1/(2*{n}) * {np.sum(Error_array**2):.2f} = {loss:.4f}")

    # Update parameters for next iteration
    W = W_new
    B = B_new



Step 1: Forward Pass and Error Computation
 X | Y  | Y_hat = W*X + B | Error = Y_hat - Y | dW_contrib = Error*X | dB_contrib = Error
--------------------------------------------------------------------------------------------
 1 |  5 |   0.00 = 0.00*1 + 0.00 |  -5.00 = 0.00 - 5 |      -5.00 = -5.00*1 |     -5.00 = -5.00
 2 |  8 |   0.00 = 0.00*2 + 0.00 |  -8.00 = 0.00 - 8 |     -16.00 = -8.00*2 |     -8.00 = -8.00
 3 | 11 |   0.00 = 0.00*3 + 0.00 | -11.00 = 0.00 - 11 |     -33.00 = -11.00*3 |    -11.00 = -11.00
 4 | 14 |   0.00 = 0.00*4 + 0.00 | -14.00 = 0.00 - 14 |     -56.00 = -14.00*4 |    -14.00 = -14.00
 5 | 17 |   0.00 = 0.00*5 + 0.00 | -17.00 = 0.00 - 17 |     -85.00 = -17.00*5 |    -17.00 = -17.00
 6 | 20 |   0.00 = 0.00*6 + 0.00 | -20.00 = 0.00 - 20 |    -120.00 = -20.00*6 |    -20.00 = -20.00

Step 2: Compute Average Gradients
dW = (1/n) * sum(dW_contrib) = (1/6) * -315.00 = -52.5000
dB = (1/n) * sum(dB_contrib) = (1/6) * -75.00 = -12.5000

Step 3: Update Parameters
W_new = 