In [1]:

import pandas as pd
import numpy as np
# --- Our ML tools ---
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
# We don't necessarily need StandardScaler, but it doesn't hurt
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import GradientBoostingRegressor # <-- The star!
# --- Our Evaluation tools ---
from sklearn.metrics import mean_squared_error, r2_score

# --- Load the Data ---
housing = fetch_california_housing()
X = housing.data
y = housing.target



In [2]:

# --- 2. Prepare Data ---
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scaling isn't strictly required, but often helps Gradient Boosting converge faster
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)



In [3]:

# --- 3. Create & Train ---
# 1. Import (done)
# 2. Create instance
#    Let's use default settings first (often a good start)
#    n_estimators=100, learning_rate=0.1, max_depth=3
model = GradientBoostingRegressor(random_state=42)

# 3. TRAIN!
# This builds the trees sequentially, each learning from the last's errors
print("\n...Training the Gradient Boosting model...")
model.fit(X_train_scaled, y_train) # Using scaled data
print("...Model Trained!...")




...Training the Gradient Boosting model...
...Model Trained!...


In [4]:

# --- 4. Predict & Evaluate ---
y_pred = model.predict(X_test_scaled)

print("\n--- Model Evaluation (Gradient Boosting) ---")
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
r2 = r2_score(y_test, y_pred)

print(f"Root Mean Squared Error (RMSE): {rmse:.2f}")
print(f"R-Squared (R²): {r2:.2f}")

print("\n--- For Comparison ---")
print(f"Linear Regression R² was: 0.58")
print(f"KNN Regression R² was:    0.69")
print(f"SVR R² was:               0.73")




--- Model Evaluation (Gradient Boosting) ---
Root Mean Squared Error (RMSE): 0.54
R-Squared (R²): 0.78

--- For Comparison ---
Linear Regression R² was: 0.58
KNN Regression R² was:    0.69
SVR R² was:               0.73
