In [66]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [67]:
from reusable_modules.metrics import MSE, MAE, RMSE, R2_Score
from reusable_modules.gd_poly import  gradient_descent 
from reusable_modules.constants import (
   learning_rates , iteration_counts , degrees )

In [68]:
df = pd.read_csv('data/zuu crew scores.csv')
df = df[df['CourseName'] == 'Foundations of ML']
df.head(5)

Unnamed: 0,MemberName,EducationLevel,Attendance,TotalHours,AssignmentsCompleted,HackathonParticipation,GitHubScore,PeerReviewScore,CourseName,CapstoneScore
0,Theekshana Rathnayake,3,79.9,43.7,2,0,62.8,5.0,Foundations of ML,45.3
1,Mayura Sandakalum Sellapperuma,2,76.8,95.6,6,0,87.4,2.7,Foundations of ML,78.8
2,Amila Narangoda,3,96.6,75.9,8,0,98.4,2.8,Foundations of ML,65.4
4,Tharusha Vihanga,2,83.2,24.0,6,0,41.8,4.2,Foundations of ML,40.1
7,Chamath Perera,3,86.5,88.0,5,0,23.9,1.3,Foundations of ML,68.2


In [69]:
FEATURES = ["Attendance", "TotalHours", "AssignmentsCompleted",
            "HackathonParticipation", "PeerReviewScore"]
TARGET = "CapstoneScore"


In [70]:
X_full = df[FEATURES].values
y_full = df[TARGET].values

print (X_full.shape)
print (y_full.shape)

(72, 5)
(72,)


In [71]:
def train_test_split_np(X, y, test_size=0.2, seed=42):
    rng = np.random.default_rng(seed)
    m = X.shape[0]
    idx = np.arange(m); rng.shuffle(idx)
    m_test = int(np.round(m * test_size))
    test_idx = idx[:m_test]; train_idx = idx[m_test:]
    return X[train_idx], X[test_idx], y[train_idx], y[test_idx]

X_train_raw, X_test_raw, y_train, y_test = train_test_split_np(X_full, y_full, test_size=0.2, seed=42)

In [None]:
mu = X_train_raw.mean(axis=0)                 # (p,)
sd = X_train_raw.std(axis=0) + 1e-12          # (p,)
X_train = (X_train_raw - mu) / sd
X_test  = (X_test_raw  - mu) / sd

print(X_train.shape)
print(X_test.shape)

In [73]:
def create_polynomial_features_multi(X, degree):
    X = np.asarray(X, dtype=float)             
    m, p = X.shape
    cols = [np.ones((m, 1), dtype=float)]      
    for d in range(1, degree + 1):
        for j in range(p):
            cols.append((X[:, j:j+1]) ** d)
    return np.hstack(cols) 

In [74]:
def r4(x):
    return float(np.round(x, 4))

results = {}   # key: (deg, α, iters) -> {"beta":..., "cost_history":...}
rows = []

for deg in degrees:
    # build polynomial design matrices ONCE per degree
    Xtr = create_polynomial_features_multi(X_train, deg)
    Xte = create_polynomial_features_multi(X_test,  deg)

    for ALPHA in learning_rates:
        for N_ITER in iteration_counts:
            beta0 = np.zeros(Xtr.shape[1], dtype=float)

            cost_history, beta = gradient_descent(Xtr, y_train, beta0, lr=ALPHA, n_iter=N_ITER)
            final_cost = cost_history[-1] if len(cost_history) else np.inf

            # skip diverged runs
            if not np.isfinite(final_cost):
                print(f"SKIP (diverged): degree={deg}, α={ALPHA}, iters={N_ITER}")
                continue

            # predictions
            yhat_tr = Xtr.dot(beta)
            yhat_te = Xte.dot(beta)

            # store raw results
            results[(deg, ALPHA, N_ITER)] = {"beta": beta, "cost_history": cost_history}

            # append rounded metrics (4 decimals)
            rows.append({
                "Degree": deg,
                "α": r4(ALPHA),
                "Iterations": N_ITER,
                "Final Cost (Train)": r4(final_cost),

                "Train MSE":  r4(MSE(y_train, yhat_tr)),
                "Train MAE":  r4(MAE(y_train, yhat_tr)),
                "Train RMSE": r4(RMSE(y_train, yhat_tr)),
                "Train R²":   r4(R2_Score(y_train, yhat_tr)),

                "Test MSE":   r4(MSE(y_test,  yhat_te)),
                "Test MAE":   r4(MAE(y_test,  yhat_te)),
                "Test RMSE":  r4(RMSE(y_test,  yhat_te)),
                "Test R²":    r4(R2_Score(y_test,  yhat_te)),
            })

Iteration : 0: Cost : 1687.5113
Iteration : 100: Cost : 1684.1549
Iteration : 200: Cost : 1680.8052
Iteration : 300: Cost : 1677.4623
Iteration : 400: Cost : 1674.1260
Iteration : 500: Cost : 1670.7964
Iteration : 600: Cost : 1667.4735
Iteration : 700: Cost : 1664.1572
Iteration : 800: Cost : 1660.8476
Iteration : 900: Cost : 1657.5446
Iteration : 0: Cost : 1687.5113
Iteration : 100: Cost : 1684.1549
Iteration : 200: Cost : 1680.8052
Iteration : 300: Cost : 1677.4623
Iteration : 400: Cost : 1674.1260
Iteration : 500: Cost : 1670.7964
Iteration : 600: Cost : 1667.4735
Iteration : 700: Cost : 1664.1572
Iteration : 800: Cost : 1660.8476
Iteration : 900: Cost : 1657.5446
Iteration : 1000: Cost : 1654.2483
Iteration : 1100: Cost : 1650.9585
Iteration : 1200: Cost : 1647.6753
Iteration : 1300: Cost : 1644.3987
Iteration : 1400: Cost : 1641.1287
Iteration : 1500: Cost : 1637.8652
Iteration : 1600: Cost : 1634.6082
Iteration : 1700: Cost : 1631.3578
Iteration : 1800: Cost : 1628.1138
Iteration

  cost = (1 / (2 * m)) * np.sum((y_hat - Y) ** 2)
  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)
  beta = beta - lr * gradients


Iteration : 8600: Cost : nan
Iteration : 8700: Cost : nan
Iteration : 8800: Cost : nan
Iteration : 8900: Cost : nan
Iteration : 9000: Cost : nan
Iteration : 9100: Cost : nan
Iteration : 9200: Cost : nan
Iteration : 9300: Cost : nan
Iteration : 9400: Cost : nan
Iteration : 9500: Cost : nan
Iteration : 9600: Cost : nan
Iteration : 9700: Cost : nan
Iteration : 9800: Cost : nan
Iteration : 9900: Cost : nan
SKIP (diverged): degree=2, α=1.0, iters=10000
Iteration : 0: Cost : 1687.3447
Iteration : 100: Cost : 1667.4609
Iteration : 200: Cost : 1647.8381
Iteration : 300: Cost : 1628.4728
Iteration : 400: Cost : 1609.3617
Iteration : 500: Cost : 1590.5013
Iteration : 600: Cost : 1571.8883
Iteration : 700: Cost : 1553.5193
Iteration : 800: Cost : 1535.3911
Iteration : 900: Cost : 1517.5004
Iteration : 0: Cost : 1687.3447
Iteration : 100: Cost : 1667.4609
Iteration : 200: Cost : 1647.8381
Iteration : 300: Cost : 1628.4728
Iteration : 400: Cost : 1609.3617
Iteration : 500: Cost : 1590.5013
Iteratio

In [75]:

summary_df = (
    pd.DataFrame(rows)
      .sort_values(by=["Test RMSE", "Final Cost (Train)"], ascending=[True, True])
      .reset_index(drop=True)
)
summary_df

Unnamed: 0,Degree,α,Iterations,Final Cost (Train),Train MSE,Train MAE,Train RMSE,Train R²,Test MSE,Test MAE,Test RMSE,Test R²
0,3,0.1,1000,6.1946,12.3892,2.8088,3.5198,0.9568,38.1278,4.8997,6.1748,0.8618
1,3,0.1,5000,6.1946,12.3892,2.8088,3.5198,0.9568,38.129,4.8997,6.1749,0.8618
2,3,0.1,10000,6.1946,12.3892,2.8088,3.5198,0.9568,38.129,4.8997,6.1749,0.8618
3,2,0.1,1000,7.882,15.7641,3.0771,3.9704,0.9451,39.062,4.7021,6.25,0.8584
4,2,0.1,5000,7.882,15.7641,3.0771,3.9704,0.9451,39.062,4.7021,6.25,0.8584
5,2,0.1,10000,7.882,15.7641,3.0771,3.9704,0.9451,39.062,4.7021,6.25,0.8584
6,1,0.001,10000,8.6146,17.2291,3.3627,4.1508,0.94,41.5852,5.1401,6.4487,0.8493
7,1,0.1,1000,8.6146,17.2291,3.3631,4.1508,0.94,41.5934,5.1417,6.4493,0.8493
8,1,0.1,5000,8.6146,17.2291,3.3631,4.1508,0.94,41.5934,5.1417,6.4493,0.8493
9,1,0.1,10000,8.6146,17.2291,3.3631,4.1508,0.94,41.5934,5.1417,6.4493,0.8493
