In [1]:
import numpy as np
import pandas as pd
from sklearn.metrics import mean_squared_error, r2_score

# Load data
train_df = pd.read_csv("train.csv")
test_df  = pd.read_csv("test.csv")

X_train = train_df[["sqft_living"]].to_numpy()
y_train = train_df["price"].to_numpy()

X_test = test_df[["sqft_living"]].to_numpy()
y_test = test_df["price"].to_numpy()

# Create polynomial features
def polynomial_features(X, p):
    return np.hstack([X**i for i in range(1, p+1)])

# Closed-form fit
def fit_closed_form(X, y):
    X_b = np.c_[np.ones((X.shape[0],1)), X]
    theta = np.linalg.inv(X_b.T @ X_b) @ (X_b.T @ y)
    return theta

# Predict
def predict(X, theta):
    X_b = np.c_[np.ones((X.shape[0],1)), X]
    return X_b @ theta

results = []

for p in [1,2,3,5]:
    Xtr_p = polynomial_features(X_train, p)
    Xte_p = polynomial_features(X_test, p)
    
    theta_p = fit_closed_form(Xtr_p, y_train)
    
    ytr_pred = predict(Xtr_p, theta_p)
    yte_pred = predict(Xte_p, theta_p)
    
    results.append({
        "Degree (p)": p,
        "Train MSE": mean_squared_error(y_train, ytr_pred),
        "Train R^2": r2_score(y_train, ytr_pred),
        "Test MSE": mean_squared_error(y_test, yte_pred),
        "Test R^2": r2_score(y_test, yte_pred)
    })

pd.DataFrame(results)


Unnamed: 0,Degree (p),Train MSE,Train R^2,Test MSE,Test R^2
0,1,57947530000.0,0.496709,88575980000.0,0.468736
1,2,54822670000.0,0.523849,71791680000.0,0.569406
2,3,53785190000.0,0.53286,99833480000.0,0.401216
3,5,52626110000.0,0.542927,28657280000000.0,-170.881541
