In [1]:
import numpy as np
import pandas as pd
from sklearn.metrics import mean_squared_error, r2_score

# Load data
train_df = pd.read_csv("train.csv")
test_df  = pd.read_csv("test.csv")
target = "price"

# One-hot encode and align columns (same as Problem 2/3)
X_train = pd.get_dummies(train_df.drop(columns=[target]), drop_first=True)
X_test  = pd.get_dummies(test_df.drop(columns=[target]), drop_first=True)
X_train, X_test = X_train.align(X_test, join="left", axis=1, fill_value=0)

y_train = train_df[target].to_numpy(dtype=float)
y_test  = test_df[target].to_numpy(dtype=float)

Xtr = X_train.to_numpy(dtype=float)
Xte = X_test.to_numpy(dtype=float)

# Feature scaling it should be important for GD stability
mu = Xtr.mean(axis=0)
sigma = Xtr.std(axis=0)
sigma[sigma == 0] = 1.0
Xtr_s = (Xtr - mu) / sigma
Xte_s = (Xte - mu) / sigma

# Add bias term
Xtr_b = np.c_[np.ones((Xtr_s.shape[0], 1)), Xtr_s]
Xte_b = np.c_[np.ones((Xte_s.shape[0], 1)), Xte_s]

def gradient_descent(Xb, y, alpha, num_iters):
    """
    Batch GD using lecture gradient:
    grad = (2/N) * X^T (X theta - y)
    """
    N, d = Xb.shape
    theta = np.zeros(d, dtype=float)
    for _ in range(num_iters):
        preds = Xb @ theta
        grad = (2.0 / N) * (Xb.T @ (preds - y))
        theta -= alpha * grad
    return theta

def evaluate(Xb, y, theta):
    preds = Xb @ theta
    return mean_squared_error(y, preds), r2_score(y, preds)

alphas = [0.01, 0.1, 0.5]
iters_list = [10, 50, 100]

metric_rows = []
theta_rows = []

for a in alphas:
    for it in iters_list:
        theta = gradient_descent(Xtr_b, y_train, a, it)
        tr_mse, tr_r2 = evaluate(Xtr_b, y_train, theta)
        te_mse, te_r2 = evaluate(Xte_b, y_test, theta)

        metric_rows.append({
            "alpha": a, "iters": it,
            "Train MSE": tr_mse, "Train R^2": tr_r2,
            "Test MSE": te_mse, "Test R^2": te_r2
        })

        # reporting theta (vector is huge, so summarize)
        theta_rows.append({
            "alpha": a, "iters": it,
            "theta0": theta[0],
            "theta_norm": np.linalg.norm(theta),
            "theta_first5": np.round(theta[:5], 4).tolist()
        })

gd_metrics = pd.DataFrame(metric_rows).sort_values(["alpha","iters"]).reset_index(drop=True)
gd_theta   = pd.DataFrame(theta_rows).sort_values(["alpha","iters"]).reset_index(drop=True)

print("=== Metrics table ===")
print(gd_metrics)

print("\n=== Theta summary ===")
print(gd_theta)


=== Metrics table ===
   alpha  iters      Train MSE      Train R^2       Test MSE       Test R^2
0   0.01     10   2.357503e+11  -1.047561e+00   2.829701e+11  -6.972070e-01
1   0.01     50   6.972838e+10   3.943887e-01   9.431447e+10   4.343178e-01
2   0.01    100   3.662532e+10   6.818984e-01   6.116358e+10   6.331512e-01
3   0.10     10   3.489608e+10   6.969173e-01   5.987407e+10   6.408854e-01
4   0.10     50   3.105631e+10   7.302669e-01   5.844631e+10   6.494489e-01
5   0.10    100   3.104372e+10   7.303762e-01   5.839468e+10   6.497586e-01
6   0.50     10   2.536034e+23  -2.202620e+12   2.800745e+23  -1.679840e+12
7   0.50     50   2.604548e+74  -2.262126e+63   2.876411e+74  -1.725223e+63
8   0.50    100  1.514273e+138 -1.315190e+127  1.672333e+138 -1.003037e+127

=== Theta summary ===
   alpha  iters        theta0    theta_norm  \
0   0.01     10  9.519802e+04  1.221649e+05   
1   0.01     50  3.308955e+05  3.633711e+05   
2   0.01    100  4.513976e+05  4.824453e+05   
3   0.1