In [1]:
import numpy as np
import pandas as pd
from sklearn.metrics import mean_squared_error, r2_score

# Load data
train_df = pd.read_csv("train.csv")
test_df  = pd.read_csv("test.csv")
target = "price"

# One-hot encode and align columns (same as Problem 2/3)
X_train = pd.get_dummies(train_df.drop(columns=[target]), drop_first=True)
X_test  = pd.get_dummies(test_df.drop(columns=[target]), drop_first=True)
X_train, X_test = X_train.align(X_test, join="left", axis=1, fill_value=0)

y_train = train_df[target].to_numpy(dtype=float)
y_test  = test_df[target].to_numpy(dtype=float)

Xtr = X_train.to_numpy(dtype=float)
Xte = X_test.to_numpy(dtype=float)

# Feature scaling it should be important for GD stability
mu = Xtr.mean(axis=0)
sigma = Xtr.std(axis=0)
sigma[sigma == 0] = 1.0
Xtr_s = (Xtr - mu) / sigma
Xte_s = (Xte - mu) / sigma

# Add bias term
Xtr_b = np.c_[np.ones((Xtr_s.shape[0], 1)), Xtr_s]
Xte_b = np.c_[np.ones((Xte_s.shape[0], 1)), Xte_s]

def gradient_descent(Xb, y, alpha, num_iters):
    """
    Batch GD using lecture gradient:
    grad = (2/N) * X^T (X theta - y)
    """
    N, d = Xb.shape
    theta = np.zeros(d, dtype=float)
    for _ in range(num_iters):
        preds = Xb @ theta
        grad = (2.0 / N) * (Xb.T @ (preds - y))
        theta -= alpha * grad
    return theta

def evaluate(Xb, y, theta):
    preds = Xb @ theta
    return mean_squared_error(y, preds), r2_score(y, preds)

alphas = [0.01, 0.1, 0.5]
iters_list = [10, 50, 100]

metric_rows = []
theta_rows = []

for a in alphas:
    for it in iters_list:
        theta = gradient_descent(Xtr_b, y_train, a, it)
        tr_mse, tr_r2 = evaluate(Xtr_b, y_train, theta)
        te_mse, te_r2 = evaluate(Xte_b, y_test, theta)

        metric_rows.append({
            "alpha": a, "iters": it,
            "Train MSE": tr_mse, "Train R^2": tr_r2,
            "Test MSE": te_mse, "Test R^2": te_r2
        })

        # reporting theta (vector is huge, so summarize)
        theta_rows.append({
            "alpha": a, "iters": it,
            "theta0": theta[0],
            "theta_norm": np.linalg.norm(theta),
            "theta_first5": np.round(theta[:5], 4).tolist()
        })

gd_metrics = pd.DataFrame(metric_rows).sort_values(["alpha","iters"]).reset_index(drop=True)
gd_theta   = pd.DataFrame(theta_rows).sort_values(["alpha","iters"]).reset_index(drop=True)

print("=== Metrics table ===")
print(gd_metrics)

print("\n=== Theta summary ===")
print(gd_theta)


=== Metrics table ===
   alpha  iters      Train MSE      Train R^2       Test MSE       Test R^2
0   0.01     10   2.357311e+05  -1.047393e+00   2.828668e+05  -6.965872e-01
1   0.01     50   6.969578e+04   3.946717e-01   9.432003e+04   4.342845e-01
2   0.01    100   3.676495e+04   6.806857e-01   6.127904e+04   6.324587e-01
3   0.10     10   3.504793e+04   6.955985e-01   6.000379e+04   6.401074e-01
4   0.10     50   3.142706e+04   7.270468e-01   5.889054e+04   6.467845e-01
5   0.10    100   3.141602e+04   7.271427e-01   5.883993e+04   6.470881e-01
6   0.50     10   1.464434e+17  -1.271904e+12   1.632452e+17  -9.791172e+11
7   0.50     50   1.293867e+67  -1.123761e+62   1.442316e+67  -8.650767e+61
8   0.50    100  3.504812e+129 -3.044031e+124  3.906928e+129 -2.343309e+124

=== Theta summary ===
   alpha  iters        theta0    theta_norm  \
0   0.01     10  9.519802e+01  1.221860e+02   
1   0.01     50  3.308955e+02  3.633916e+02   
2   0.01    100  4.513976e+02  4.823524e+02   
3   0.1