In [1]:
import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, r2_score

In [2]:
def add_intercept(X):
    return np.c_[np.ones((X.shape[0], 1)), X]

In [3]:
def ridge_gradient_descent(X, y, alpha, num_iters, lam):
    n, d = X.shape
    theta = np.zeros((d, 1))

    for _ in range(num_iters):
        y_pred = X @ theta
        error = y_pred - y

        # gradient of (1/n) * ||X theta - y||^2
        grad = (2 / n) * (X.T @ error)

        # ridge penalty gradient (do not penalize intercept)
        reg = (2 * lam / n) * theta
        reg[0] = 0

        theta = theta - alpha * (grad + reg)

    return theta

In [4]:
def predict(X, theta):
    return X @ theta

In [5]:
df = pd.read_csv("kc_house_data.csv")
df = df.drop(columns=["id", "date", "zipcode"])
df["price"] = df["price"] / 1000

X = df.drop(columns=["price"]).values
y = df["price"].values.reshape(-1, 1)

In [6]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

X_train_b = add_intercept(X_train_scaled)
X_test_b = add_intercept(X_test_scaled)

In [8]:
alpha = 0.1
num_iters = 100

lambdas = [0, 1, 10, 100]

for lam in lambdas:
    theta = ridge_gradient_descent(X_train_b, y_train, alpha, num_iters, lam)

    y_pred_train = predict(X_train_b, theta).ravel()
    y_pred_test = predict(X_test_b, theta).ravel()

    mse_train = mean_squared_error(y_train.ravel(), y_pred_train)
    r2_train = r2_score(y_train.ravel(), y_pred_train)

    mse_test = mean_squared_error(y_test.ravel(), y_pred_test)
    r2_test = r2_score(y_test.ravel(), y_pred_test)

    print(f"lambda = {lam}")
    print(f"  Train MSE: {mse_train:.4f}, Train R^2: {r2_train:.4f}")
    print(f"  Test  MSE: {mse_test:.4f}, Test  R^2: {r2_test:.4f}")
    print("-" * 50)

lambda = 0
  Train MSE: 39834.3874, Train R^2: 0.6951
  Test  MSE: 45996.0060, Test  R^2: 0.6957
--------------------------------------------------
lambda = 1
  Train MSE: 39834.3937, Train R^2: 0.6951
  Test  MSE: 45996.1914, Test  R^2: 0.6957
--------------------------------------------------
lambda = 10
  Train MSE: 39834.4660, Train R^2: 0.6951
  Test  MSE: 45997.8752, Test  R^2: 0.6957
--------------------------------------------------
lambda = 100
  Train MSE: 39836.6887, Train R^2: 0.6951
  Test  MSE: 46016.2029, Test  R^2: 0.6956
--------------------------------------------------
