In [None]:
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split, GridSearchCV, cross_val_score
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import StandardScaler
import numpy as np
import joblib

X, y = fetch_california_housing(return_X_y=True)

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

rf = RandomForestRegressor()
gb = GradientBoostingRegressor()

rf_scores = cross_val_score(rf, X_train, y_train, cv=5, scoring="r2")
gb_scores = cross_val_score(gb, X_train, y_train, cv=5, scoring="r2")

rf.fit(X_train, y_train)
gb.fit(X_train, y_train)

rf_pred = rf.predict(X_test)
gb_pred = gb.predict(X_test)

print("RF RMSE:", np.sqrt(mean_squared_error(y_test, rf_pred)))
print("RF R2:", r2_score(y_test, rf_pred))

print("GB RMSE:", np.sqrt(mean_squared_error(y_test, gb_pred)))
print("GB R2:", r2_score(y_test, gb_pred))

params = {
    "n_estimators": [100, 150],
    "max_depth": [10, 20, None]
}

grid = GridSearchCV(
    RandomForestRegressor(),
    params,
    cv=3,
    scoring="r2",
    n_jobs=-1
)

grid.fit(X_train, y_train)

best = grid.best_estimator_
pred = best.predict(X_test)

print("Best RMSE:", np.sqrt(mean_squared_error(y_test, pred)))
print("Best R2:", r2_score(y_test, pred))
print("Best Params:", grid.best_params_)

joblib.dump(best, "best_model.pkl")


RF RMSE: 0.5070553641536689
RF R2: 0.8037977863505261
GB RMSE: 0.5422918560475437
GB R2: 0.7755811643398038
Best RMSE: 0.5074963808472075
Best R2: 0.8034563400818927
Best Params: {'max_depth': 20, 'n_estimators': 150}


['best_model.pkl']