In [1]:
import numpy as np
import pandas as pd
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# 1. Load the California housing dataset
data = fetch_california_housing(as_frame=True)
X = data.data
y = data.target

# 2. Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 3. Feature scaling (important for gradient descent)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# 4. Convert target to NumPy arrays (and optionally flatten)
y_train = y_train.to_numpy()
y_test = y_test.to_numpy()


In [45]:
from linear_reg_from_scratch import LinearRegression

model = LinearRegression(lr=0.005, n_iter=10000, patience=5)
model

<linear_reg_from_scratch.LinearRegression at 0x277f11c6000>

In [46]:
model.fit(X_train_scaled, y_train)

100%|██████████| 10000/10000 [00:00<00:00, 12623.50it/s]


In [47]:
# 5. Predict and evaluate
y_pred = model.predict(X_test_scaled)

# Compute final RMSE on test set
rmse = np.sqrt(np.mean((y_test - y_pred) ** 2))
print(f"\nTest RMSE: {rmse:.4f}")


Test RMSE: 0.7457
