In [5]:
!pip install ISLP

Collecting ISLP
  Using cached ISLP-0.4.0-py3-none-any.whl.metadata (7.0 kB)
Collecting lifelines (from ISLP)
  Using cached lifelines-0.30.0-py3-none-any.whl.metadata (3.2 kB)
Collecting pygam (from ISLP)
  Using cached pygam-0.10.1-py3-none-any.whl.metadata (9.7 kB)
Collecting pytorch-lightning (from ISLP)
  Using cached pytorch_lightning-2.5.2-py3-none-any.whl.metadata (21 kB)
Collecting torchmetrics (from ISLP)
  Using cached torchmetrics-1.8.0-py3-none-any.whl.metadata (21 kB)
Collecting autograd-gamma>=0.3 (from lifelines->ISLP)
  Using cached autograd_gamma-0.5.0-py3-none-any.whl
Collecting formulaic>=0.2.2 (from lifelines->ISLP)
  Using cached formulaic-1.2.0-py3-none-any.whl.metadata (7.0 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch->ISLP)
  Using cached nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cusolver-cu12==11.6.1.9 (from torch->ISLP)
  Using cached nvidia_cusolver_cu12-11.6.1.9-py3-none-manylinux2014_x86_64.w

In [6]:

# Step 1: Import Libraries
import numpy as np
import pandas as pd
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split, GridSearchCV, RandomizedSearchCV
from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.metrics import mean_squared_error, r2_score



In [7]:
# Step 2: Load Dataset
data = fetch_california_housing()
X = pd.DataFrame(data.data, columns=data.feature_names)
y = data.target


In [8]:
# Step 3: Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)



In [9]:
# Step 4: Linear Regression (baseline)
lr = LinearRegression()
lr.fit(X_train, y_train)
y_pred_lr = lr.predict(X_test)



In [10]:
# Step 5: Ridge Regression
ridge = Ridge(alpha=1.0)
ridge.fit(X_train, y_train)
y_pred_ridge = ridge.predict(X_test)



In [11]:
# Step 6: Lasso Regression
lasso = Lasso(alpha=0.1)
lasso.fit(X_train, y_train)
y_pred_lasso = lasso.predict(X_test)



In [12]:
# Step 7: Random Forest with Grid Search
rf = RandomForestRegressor(random_state=42)
param_grid_rf = {
    'n_estimators': [50, 100],
    'max_depth': [None, 10, 20]
}
grid_rf = GridSearchCV(rf, param_grid_rf, cv=3)
grid_rf.fit(X_train, y_train)
y_pred_rf = grid_rf.predict(X_test)



In [14]:
# Step 8: Gradient Boosting with Randomized Search
gb = GradientBoostingRegressor(random_state=42)
param_dist_gb = {
    'n_estimators': [50, 100, 150],
    'learning_rate': [0.01, 0.1, 0.2],
    'max_depth': [3, 5, 7]
}

In [15]:
# Step 8: Gradient Boosting with Randomized Search
gb = GradientBoostingRegressor(random_state=42)
param_dist_gb = {
    'n_estimators': [50, 100, 150],
    'learning_rate': [0.01, 0.1, 0.2],
    'max_depth': [3, 5, 7]
}
rand_gb = RandomizedSearchCV(gb, param_dist_gb, n_iter=5, cv=3, random_state=42)
rand_gb.fit(X_train, y_train)
y_pred_gb = rand_gb.predict(X_test)


In [16]:
# Step 9: Evaluation
def evaluate(model_name, y_true, y_pred):
    print(f"{model_name}:\n  RMSE: {np.sqrt(mean_squared_error(y_true, y_pred)):.4f}\n  R²: {r2_score(y_true, y_pred):.4f}\n")

evaluate("Linear Regression", y_test, y_pred_lr)
evaluate("Ridge Regression", y_test, y_pred_ridge)
evaluate("Lasso Regression", y_test, y_pred_lasso)
evaluate("Random Forest (Grid Search)", y_test, y_pred_rf)
evaluate("Gradient Boosting (Random Search)", y_test, y_pred_gb)

Linear Regression:
  RMSE: 0.7456
  R²: 0.5758

Ridge Regression:
  RMSE: 0.7455
  R²: 0.5759

Lasso Regression:
  RMSE: 0.7833
  R²: 0.5318

Random Forest (Grid Search):
  RMSE: 0.5058
  R²: 0.8047

Gradient Boosting (Random Search):
  RMSE: 0.4976
  R²: 0.8110

