In [1]:
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestRegressor
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler
from sklearn.impute import SimpleImputer
from sklearn.metrics import r2_score

In [2]:
data = fetch_california_housing(as_frame=True)
X = data.data
y = data.target

In [3]:
numeric_features = X.columns.tolist()

In [4]:
numeric_transformer = Pipeline([
    ('imputer', SimpleImputer(strategy='mean')),
    ('scaler', StandardScaler())
])

In [5]:
preprocessor = ColumnTransformer([
    ('num', numeric_transformer, numeric_features)
])

In [6]:
pipe = Pipeline([
    ('preprocessor', preprocessor),
    ('regressor', RandomForestRegressor(random_state=42))
])

In [7]:
param_grid = {
    'regressor__n_estimators': [100, 200],
    'regressor__max_depth': [None, 10],
    'regressor__min_samples_split': [2, 5]
}

In [8]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [9]:
grid_search = GridSearchCV(pipe, param_grid, cv=5, scoring='r2', n_jobs=-1)
grid_search.fit(X_train, y_train)

In [10]:
print("✅ Best Parameters:", grid_search.best_params_)
print("✅ Best R² Score on Training:", grid_search.best_score_)

✅ Best Parameters: {'regressor__max_depth': None, 'regressor__min_samples_split': 2, 'regressor__n_estimators': 200}
✅ Best R² Score on Training: 0.804819125494558


In [11]:
y_pred = grid_search.best_estimator_.predict(X_test)
print("✅ R² Score on Test Set:", r2_score(y_test, y_pred))

✅ R² Score on Test Set: 0.8063074586513359
