In [3]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split    
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor

# Ensure xgboost is installed
%pip install xgboost
import xgboost as xgb

Defaulting to user installation because normal site-packages is not writeable
Note: you may need to restart the kernel to use updated packages.


In [2]:
%pip install xgboost

Defaulting to user installation because normal site-packages is not writeable
Collecting xgboost
  Using cached xgboost-2.1.3-py3-none-win_amd64.whl.metadata (2.1 kB)
Downloading xgboost-2.1.3-py3-none-win_amd64.whl (124.9 MB)
   ---------------------------------------- 0.0/124.9 MB ? eta -:--:--
   ---------------------------------------- 0.3/124.9 MB ? eta -:--:--
   ---------------------------------------- 0.8/124.9 MB 3.0 MB/s eta 0:00:41
   ---------------------------------------- 1.3/124.9 MB 2.9 MB/s eta 0:00:43
    --------------------------------------- 1.8/124.9 MB 2.6 MB/s eta 0:00:47
    --------------------------------------- 2.1/124.9 MB 2.4 MB/s eta 0:00:52
    --------------------------------------- 2.4/124.9 MB 2.2 MB/s eta 0:00:57
    --------------------------------------- 2.6/124.9 MB 1.9 MB/s eta 0:01:05
    --------------------------------------- 2.6/124.9 MB 1.9 MB/s eta 0:01:05
    --------------------------------------- 2.9/124.9 MB 1.7 MB/s eta 0:01:14
   - --

In [4]:
housing = fetch_california_housing()
X, y = housing.data, housing.target

In [6]:
scaler = StandardScaler()
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [7]:
models = {
    'Linear Regression': LinearRegression(),
    'Ridge Regression': Ridge(alpha=1.0),
    'Lasso Regression': Lasso(alpha=1.0),
    'Random Forest': RandomForestRegressor(n_estimators=100, random_state=42),
    'Gradient Boosting': GradientBoostingRegressor(n_estimators=100, random_state=42),
    'XGBoost': xgb.XGBRegressor(
        n_estimators=100, 
        learning_rate=0.1, 
        max_depth=3, 
        reg_alpha=1, 
        reg_lambda=1, 
        random_state=42
    )
    
}

In [8]:
results = {}

In [9]:
for name, model in models.items():
    model.fit(X_train_scaled, y_train)
    y_pred = model.predict(X_test_scaled)
    mse = mean_squared_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)
    results[name] = {
        'MSE': mse,
        'R2': r2
    }

In [10]:
for name, metrics in results.items():
    print(f'{name}:')
    print(f'MSE: {metrics["MSE"]:.2f}')
    print(f'R2: {metrics["R2"]:.2f}')
    print('')

Linear Regression:
MSE: 0.56
R2: 0.58

Ridge Regression:
MSE: 0.56
R2: 0.58

Lasso Regression:
MSE: 1.31
R2: -0.00

Random Forest:
MSE: 0.26
R2: 0.81

Gradient Boosting:
MSE: 0.29
R2: 0.78

XGBoost:
MSE: 0.29
R2: 0.78

