In [8]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [9]:
df = pd.read_csv('zuu crew scores.csv')
df = df[df['CourseName'] == 'Foundations of ML']
df.head(5)

Unnamed: 0,MemberName,EducationLevel,Attendance,TotalHours,AssignmentsCompleted,HackathonParticipation,GitHubScore,PeerReviewScore,CourseName,CapstoneScore
0,Theekshana Rathnayake,3,79.9,43.7,2,0,62.8,5.0,Foundations of ML,45.3
1,Mayura Sandakalum Sellapperuma,2,76.8,95.6,6,0,87.4,2.7,Foundations of ML,78.8
2,Amila Narangoda,3,96.6,75.9,8,0,98.4,2.8,Foundations of ML,65.4
4,Tharusha Vihanga,2,83.2,24.0,6,0,41.8,4.2,Foundations of ML,40.1
7,Chamath Perera,3,86.5,88.0,5,0,23.9,1.3,Foundations of ML,68.2


In [10]:
FEATURES = [
    "Attendance",
    "TotalHours",
    "AssignmentsCompleted",
    "HackathonParticipation",
    "PeerReviewScore",  # swap to "GitHubScore" if you prefer
]
TARGET = "CapstoneScore"

In [11]:
X_raw = df[FEATURES].values.astype(float)
y = df[TARGET].values.astype(float)

In [12]:
mu_base = X_raw.mean(axis=0)
sigma_base = X_raw.std(axis=0, ddof=0)
sigma_base_safe = np.where(sigma_base == 0, 1.0, sigma_base)
Z = (X_raw - mu_base) / sigma_base_safe     # (m,5)  <-- this is what was missing

In [None]:
def compute_cost(X, y, beta):
    m = len(y)
    y_hat = X.dot(beta)
    return (1/(2*m)) * np.sum((y_hat - y)**2)

def gradient_descent(X, y, beta, lr, n_iter):
    m = len(y)
    cost_history = []
    for i in range(n_iter):
        y_hat = X.dot(beta)
        gradients = (1/m) * X.T.dot(y_hat - y)
        beta = beta - lr * gradients
        cost = compute_cost(X, y, beta)
        cost_history.append(cost)
        if i % 100 == 0:
            print(f"Iteration {i}: Cost {cost:.4f}")
    return cost_history, beta

def predict(X, beta):
    return X.dot(beta)


In [16]:
from itertools import combinations_with_replacement
def polynomial_features_matrix(Z, degree):
    m, n = Z.shape
    cols = [np.ones((m, 1))]  # bias
    for deg in range(1, degree+1):
        for comb in combinations_with_replacement(range(n), deg):
            prod = np.prod(Z[:, comb], axis=1, keepdims=True)
            cols.append(prod)
    return np.hstack(cols)

def train_poly_full_dataset(degree, lr=0.01, n_iter=6000, rescale_poly=True):
    Xp = polynomial_features_matrix(Z, degree)

    if rescale_poly:
        Xp_nb = Xp[:, 1:]  # exclude bias
        mu_p = Xp_nb.mean(axis=0)
        sigma_p = Xp_nb.std(axis=0, ddof=0)
        sigma_p_safe = np.where(sigma_p == 0, 1.0, sigma_p)
        Xp_scaled = np.c_[np.ones((Xp.shape[0], 1)), (Xp_nb - mu_p)/sigma_p_safe]
    else:
        Xp_scaled = Xp

    beta0 = np.zeros(Xp_scaled.shape[1])
    cost_history, beta = gradient_descent(Xp_scaled, y, beta0, lr, n_iter)
    y_pred = predict(Xp_scaled, beta)

    def mse(y_true, y_pred): return np.mean((y_true - y_pred)**2)
    def rmse(y_true, y_pred): return np.sqrt(mse(y_true, y_pred))
    def r2(y_true, y_pred):
        ss_res = np.sum((y_true - y_pred)**2)
        ss_tot = np.sum((y_true - np.mean(y_true))**2)
        return 1 - ss_res/ss_tot

    return {
        "degree": degree,
        "beta": beta,
        "cost_history": cost_history,
        "mse": mse(y, y_pred),
        "rmse": rmse(y, y_pred),
        "r2": r2(y, y_pred),
        "y_pred": y_pred,
        "n_features": Xp_scaled.shape[1]
    }

# ---------- Run degrees 1, 2, 3 ----------
results = {}
for deg, alpha in [(1, 0.01), (2, 0.01), (3, 0.003)]:
    print(f"\n=== Training Polynomial degree {deg} (lr={alpha}) ===")
    results[deg] = train_poly_full_dataset(degree=deg, lr=alpha, n_iter=7000)

# ---------- Summarize ----------
print("\n=== Model comparison ===")
for d in [1,2,3]:
    r = results[d]
    print(f"Degree {d}: R²={r['r2']:.4f}, RMSE={r['rmse']:.4f}, "
          f"MSE={r['mse']:.4f}, Features={r['n_features']}")


=== Training Polynomial degree 1 (lr=0.01) ===
Iteration : 0: Cost : 1628.3791
Iteration : 100: Cost : 227.5618
Iteration : 200: Cost : 40.3059
Iteration : 300: Cost : 14.9248
Iteration : 400: Cost : 11.4194
Iteration : 500: Cost : 10.9214
Iteration : 600: Cost : 10.8476
Iteration : 700: Cost : 10.8360
Iteration : 800: Cost : 10.8340
Iteration : 900: Cost : 10.8337
Iteration : 1000: Cost : 10.8336
Iteration : 1100: Cost : 10.8336
Iteration : 1200: Cost : 10.8336
Iteration : 1300: Cost : 10.8336
Iteration : 1400: Cost : 10.8336
Iteration : 1500: Cost : 10.8336
Iteration : 1600: Cost : 10.8336
Iteration : 1700: Cost : 10.8336
Iteration : 1800: Cost : 10.8336
Iteration : 1900: Cost : 10.8336
Iteration : 2000: Cost : 10.8336
Iteration : 2100: Cost : 10.8336
Iteration : 2200: Cost : 10.8336
Iteration : 2300: Cost : 10.8336
Iteration : 2400: Cost : 10.8336
Iteration : 2500: Cost : 10.8336
Iteration : 2600: Cost : 10.8336
Iteration : 2700: Cost : 10.8336
Iteration : 2800: Cost : 10.8336
Iter