In [1]:
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression, Ridge
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.metrics import mean_squared_error, r2_score
import numpy as np

data = fetch_california_housing()
X = data.data
y = data.target

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

models = {
    "Linear Regression": LinearRegression(),
    "Ridge Regression": Ridge(alpha=1.0),
    "Random Forest": RandomForestRegressor(n_estimators=200, random_state=42),
    "Gradient Boosting": GradientBoostingRegressor(random_state=42)
}

results = {}

for name, model in models.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)

    rmse = np.sqrt(mean_squared_error(y_test, y_pred))
    r2 = r2_score(y_test, y_pred)

    results[name] = (rmse, r2)

for model_name, (rmse, r2) in results.items():
    print(f"--- {model_name} ---")
    print("RMSE:", rmse)
    print("R²:", r2)
    print()


--- Linear Regression ---
RMSE: 0.7455813830127763
R²: 0.575787706032451

--- Ridge Regression ---
RMSE: 0.7455567442814782
R²: 0.575815742891368

--- Random Forest ---
RMSE: 0.5038019900730704
R²: 0.8063074586513359

--- Gradient Boosting ---
RMSE: 0.5422167577867202
R²: 0.7756433164710084

