In [1]:
import matplotlib.pyplot as plt
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.tree import DecisionTreeRegressor, plot_tree
from sklearn.metrics import r2_score, mean_squared_error
import numpy as np

In [2]:
# --- Load dataset ---
data = fetch_california_housing(as_frame=True)
X, y = data.data, data.target

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)


In [3]:
# --- Hyperparameter Grid ---

param_grid = {
    "criterion": ["squared_error", "friedman_mse", "absolute_error", "poisson"],
    "max_depth": [None, 5, 10, 20],
    "min_samples_split": [2, 5, 10],
    "min_samples_leaf": [1, 2, 4],
}

In [None]:
# --- GridSearchCV ---
dtr = DecisionTreeRegressor(random_state=42)
grid = GridSearchCV(
    dtr,
    param_grid=param_grid,
    cv=5,
    scoring="r2",
    n_jobs=-1,
    verbose=2
)

grid.fit(X_train, y_train)

Fitting 5 folds for each of 48000 candidates, totalling 240000 fits


In [None]:
# --- Best Model ---
best_dtr = grid.best_estimator_
y_pred = best_dtr.predict(X_test)

In [None]:
# --- Results ---
print("Best Parameters:", grid.best_params_)
print("Train R²:", r2_score(y_train, best_dtr.predict(X_train)))
print("Test R²:", r2_score(y_test, y_pred))
print("Test RMSE:", np.sqrt(mean_squared_error(y_test, y_pred)))

In [None]:
# --- Plot Tree ---
plt.figure(figsize=(18, 10))
plot_tree(best_dtr, filled=True, feature_names=data.feature_names, max_depth=3)  # limit depth for readability
plt.show()

In [None]:
dt = DecisionTreeRegressor(**grid.best_params_)
dt.fit(X_train, y_train)

In [None]:
dt.fit(X_train, y_train)
y_pred=dt.predict(X_test)

In [None]:
r2_score(y_test, y_pred)

In [None]:
mean_squared_error(y_test, y_pred)