# Random Forest Regressor
* Random Forest Regressor is an extension of decision trees that improves their performance, reduces overfitting, and provides more robust prediction. Trees in random forest are built using the following steps:
    1. A random subset of training data (bootstrapped sample) is selected
    2. A random subset of features is selected
    3. A decision tree is built using the selected data and features using some measure of impurity

In [1]:
import numpy as np
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error

In [2]:
# Load the dataset
california_housing = fetch_california_housing()
X = california_housing.data
y = california_housing.target

In [3]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [4]:
# Create a Random Forest Regressor
model = RandomForestRegressor(random_state=42, n_jobs = -1)

# Define the hyperparameter grid to search
param_grid = {
    'n_estimators': [100, 200, 300],
    'max_depth': [None, 10, 20],
    'min_samples_split': [2, 5, 10, 15, 20],
    'min_samples_leaf': [1, 2, 4]
}

# Use GridSearchCV for hyperparameter tuning
grid_search = GridSearchCV(model, param_grid, cv=5, verbose = 3, scoring='neg_mean_squared_error')

# Fit GridSearchCV on training data
grid_search.fit(X_train, y_train)

# Get the best parameters and best model
best_params = grid_search.best_params_

Fitting 5 folds for each of 135 candidates, totalling 675 fits
[CV 1/5] END max_depth=None, min_samples_leaf=1, min_samples_split=2, n_estimators=100;, score=-0.261 total time=   4.2s
[CV 2/5] END max_depth=None, min_samples_leaf=1, min_samples_split=2, n_estimators=100;, score=-0.268 total time=   1.5s
[CV 3/5] END max_depth=None, min_samples_leaf=1, min_samples_split=2, n_estimators=100;, score=-0.258 total time=   1.5s
[CV 4/5] END max_depth=None, min_samples_leaf=1, min_samples_split=2, n_estimators=100;, score=-0.255 total time=   1.6s
[CV 5/5] END max_depth=None, min_samples_leaf=1, min_samples_split=2, n_estimators=100;, score=-0.266 total time=   1.5s
[CV 1/5] END max_depth=None, min_samples_leaf=1, min_samples_split=2, n_estimators=200;, score=-0.263 total time=   3.1s
[CV 2/5] END max_depth=None, min_samples_leaf=1, min_samples_split=2, n_estimators=200;, score=-0.266 total time=   3.0s
[CV 3/5] END max_depth=None, min_samples_leaf=1, min_samples_split=2, n_estimators=200;, s

[CV 3/5] END max_depth=None, min_samples_leaf=1, min_samples_split=20, n_estimators=200;, score=-0.267 total time=   2.1s
[CV 4/5] END max_depth=None, min_samples_leaf=1, min_samples_split=20, n_estimators=200;, score=-0.262 total time=   1.9s
[CV 5/5] END max_depth=None, min_samples_leaf=1, min_samples_split=20, n_estimators=200;, score=-0.274 total time=   2.1s
[CV 1/5] END max_depth=None, min_samples_leaf=1, min_samples_split=20, n_estimators=300;, score=-0.273 total time=   3.2s
[CV 2/5] END max_depth=None, min_samples_leaf=1, min_samples_split=20, n_estimators=300;, score=-0.273 total time=   3.1s
[CV 3/5] END max_depth=None, min_samples_leaf=1, min_samples_split=20, n_estimators=300;, score=-0.267 total time=   2.9s
[CV 4/5] END max_depth=None, min_samples_leaf=1, min_samples_split=20, n_estimators=300;, score=-0.261 total time=   3.1s
[CV 5/5] END max_depth=None, min_samples_leaf=1, min_samples_split=20, n_estimators=300;, score=-0.274 total time=   2.9s
[CV 1/5] END max_depth=N

[CV 1/5] END max_depth=None, min_samples_leaf=2, min_samples_split=20, n_estimators=100;, score=-0.271 total time=   0.9s
[CV 2/5] END max_depth=None, min_samples_leaf=2, min_samples_split=20, n_estimators=100;, score=-0.272 total time=   0.9s
[CV 3/5] END max_depth=None, min_samples_leaf=2, min_samples_split=20, n_estimators=100;, score=-0.270 total time=   0.9s
[CV 4/5] END max_depth=None, min_samples_leaf=2, min_samples_split=20, n_estimators=100;, score=-0.262 total time=   0.9s
[CV 5/5] END max_depth=None, min_samples_leaf=2, min_samples_split=20, n_estimators=100;, score=-0.274 total time=   0.9s
[CV 1/5] END max_depth=None, min_samples_leaf=2, min_samples_split=20, n_estimators=200;, score=-0.274 total time=   1.8s
[CV 2/5] END max_depth=None, min_samples_leaf=2, min_samples_split=20, n_estimators=200;, score=-0.272 total time=   1.9s
[CV 3/5] END max_depth=None, min_samples_leaf=2, min_samples_split=20, n_estimators=200;, score=-0.269 total time=   1.9s
[CV 4/5] END max_depth=N

[CV 4/5] END max_depth=None, min_samples_leaf=4, min_samples_split=15, n_estimators=200;, score=-0.259 total time=   1.9s
[CV 5/5] END max_depth=None, min_samples_leaf=4, min_samples_split=15, n_estimators=200;, score=-0.273 total time=   1.9s
[CV 1/5] END max_depth=None, min_samples_leaf=4, min_samples_split=15, n_estimators=300;, score=-0.272 total time=   3.0s
[CV 2/5] END max_depth=None, min_samples_leaf=4, min_samples_split=15, n_estimators=300;, score=-0.270 total time=   2.8s
[CV 3/5] END max_depth=None, min_samples_leaf=4, min_samples_split=15, n_estimators=300;, score=-0.267 total time=   2.7s
[CV 4/5] END max_depth=None, min_samples_leaf=4, min_samples_split=15, n_estimators=300;, score=-0.258 total time=   2.6s
[CV 5/5] END max_depth=None, min_samples_leaf=4, min_samples_split=15, n_estimators=300;, score=-0.273 total time=   2.8s
[CV 1/5] END max_depth=None, min_samples_leaf=4, min_samples_split=20, n_estimators=100;, score=-0.274 total time=   0.9s
[CV 2/5] END max_depth=N

[CV 3/5] END max_depth=10, min_samples_leaf=1, min_samples_split=15, n_estimators=100;, score=-0.290 total time=   0.7s
[CV 4/5] END max_depth=10, min_samples_leaf=1, min_samples_split=15, n_estimators=100;, score=-0.284 total time=   0.7s
[CV 5/5] END max_depth=10, min_samples_leaf=1, min_samples_split=15, n_estimators=100;, score=-0.299 total time=   0.7s
[CV 1/5] END max_depth=10, min_samples_leaf=1, min_samples_split=15, n_estimators=200;, score=-0.303 total time=   1.4s
[CV 2/5] END max_depth=10, min_samples_leaf=1, min_samples_split=15, n_estimators=200;, score=-0.293 total time=   1.4s
[CV 3/5] END max_depth=10, min_samples_leaf=1, min_samples_split=15, n_estimators=200;, score=-0.289 total time=   1.6s
[CV 4/5] END max_depth=10, min_samples_leaf=1, min_samples_split=15, n_estimators=200;, score=-0.282 total time=   1.6s
[CV 5/5] END max_depth=10, min_samples_leaf=1, min_samples_split=15, n_estimators=200;, score=-0.299 total time=   1.5s
[CV 1/5] END max_depth=10, min_samples_l

[CV 2/5] END max_depth=10, min_samples_leaf=2, min_samples_split=10, n_estimators=300;, score=-0.290 total time=   2.3s
[CV 3/5] END max_depth=10, min_samples_leaf=2, min_samples_split=10, n_estimators=300;, score=-0.287 total time=   2.3s
[CV 4/5] END max_depth=10, min_samples_leaf=2, min_samples_split=10, n_estimators=300;, score=-0.278 total time=   2.2s
[CV 5/5] END max_depth=10, min_samples_leaf=2, min_samples_split=10, n_estimators=300;, score=-0.295 total time=   2.3s
[CV 1/5] END max_depth=10, min_samples_leaf=2, min_samples_split=15, n_estimators=100;, score=-0.299 total time=   0.7s
[CV 2/5] END max_depth=10, min_samples_leaf=2, min_samples_split=15, n_estimators=100;, score=-0.291 total time=   0.7s
[CV 3/5] END max_depth=10, min_samples_leaf=2, min_samples_split=15, n_estimators=100;, score=-0.290 total time=   0.7s
[CV 4/5] END max_depth=10, min_samples_leaf=2, min_samples_split=15, n_estimators=100;, score=-0.282 total time=   0.7s
[CV 5/5] END max_depth=10, min_samples_l

[CV 1/5] END max_depth=10, min_samples_leaf=4, min_samples_split=10, n_estimators=200;, score=-0.300 total time=   1.5s
[CV 2/5] END max_depth=10, min_samples_leaf=4, min_samples_split=10, n_estimators=200;, score=-0.289 total time=   1.4s
[CV 3/5] END max_depth=10, min_samples_leaf=4, min_samples_split=10, n_estimators=200;, score=-0.288 total time=   1.4s
[CV 4/5] END max_depth=10, min_samples_leaf=4, min_samples_split=10, n_estimators=200;, score=-0.278 total time=   1.4s
[CV 5/5] END max_depth=10, min_samples_leaf=4, min_samples_split=10, n_estimators=200;, score=-0.295 total time=   1.4s
[CV 1/5] END max_depth=10, min_samples_leaf=4, min_samples_split=10, n_estimators=300;, score=-0.299 total time=   2.2s
[CV 2/5] END max_depth=10, min_samples_leaf=4, min_samples_split=10, n_estimators=300;, score=-0.288 total time=   2.2s
[CV 3/5] END max_depth=10, min_samples_leaf=4, min_samples_split=10, n_estimators=300;, score=-0.288 total time=   2.1s
[CV 4/5] END max_depth=10, min_samples_l

[CV 5/5] END max_depth=20, min_samples_leaf=1, min_samples_split=5, n_estimators=300;, score=-0.264 total time=   4.0s
[CV 1/5] END max_depth=20, min_samples_leaf=1, min_samples_split=10, n_estimators=100;, score=-0.263 total time=   1.0s
[CV 2/5] END max_depth=20, min_samples_leaf=1, min_samples_split=10, n_estimators=100;, score=-0.268 total time=   1.1s
[CV 3/5] END max_depth=20, min_samples_leaf=1, min_samples_split=10, n_estimators=100;, score=-0.262 total time=   1.2s
[CV 4/5] END max_depth=20, min_samples_leaf=1, min_samples_split=10, n_estimators=100;, score=-0.256 total time=   1.3s
[CV 5/5] END max_depth=20, min_samples_leaf=1, min_samples_split=10, n_estimators=100;, score=-0.268 total time=   1.2s
[CV 1/5] END max_depth=20, min_samples_leaf=1, min_samples_split=10, n_estimators=200;, score=-0.266 total time=   2.1s
[CV 2/5] END max_depth=20, min_samples_leaf=1, min_samples_split=10, n_estimators=200;, score=-0.268 total time=   2.0s
[CV 3/5] END max_depth=20, min_samples_le

[CV 4/5] END max_depth=20, min_samples_leaf=2, min_samples_split=5, n_estimators=200;, score=-0.252 total time=   2.4s
[CV 5/5] END max_depth=20, min_samples_leaf=2, min_samples_split=5, n_estimators=200;, score=-0.264 total time=   2.6s
[CV 1/5] END max_depth=20, min_samples_leaf=2, min_samples_split=5, n_estimators=300;, score=-0.263 total time=   3.6s
[CV 2/5] END max_depth=20, min_samples_leaf=2, min_samples_split=5, n_estimators=300;, score=-0.265 total time=   4.1s
[CV 3/5] END max_depth=20, min_samples_leaf=2, min_samples_split=5, n_estimators=300;, score=-0.257 total time=   3.4s
[CV 4/5] END max_depth=20, min_samples_leaf=2, min_samples_split=5, n_estimators=300;, score=-0.251 total time=   3.4s
[CV 5/5] END max_depth=20, min_samples_leaf=2, min_samples_split=5, n_estimators=300;, score=-0.263 total time=   3.6s
[CV 1/5] END max_depth=20, min_samples_leaf=2, min_samples_split=10, n_estimators=100;, score=-0.263 total time=   1.1s
[CV 2/5] END max_depth=20, min_samples_leaf=2, 

[CV 3/5] END max_depth=20, min_samples_leaf=4, min_samples_split=5, n_estimators=100;, score=-0.265 total time=   1.1s
[CV 4/5] END max_depth=20, min_samples_leaf=4, min_samples_split=5, n_estimators=100;, score=-0.257 total time=   1.0s
[CV 5/5] END max_depth=20, min_samples_leaf=4, min_samples_split=5, n_estimators=100;, score=-0.270 total time=   1.0s
[CV 1/5] END max_depth=20, min_samples_leaf=4, min_samples_split=5, n_estimators=200;, score=-0.268 total time=   2.2s
[CV 2/5] END max_depth=20, min_samples_leaf=4, min_samples_split=5, n_estimators=200;, score=-0.267 total time=   2.0s
[CV 3/5] END max_depth=20, min_samples_leaf=4, min_samples_split=5, n_estimators=200;, score=-0.263 total time=   2.1s
[CV 4/5] END max_depth=20, min_samples_leaf=4, min_samples_split=5, n_estimators=200;, score=-0.255 total time=   2.3s
[CV 5/5] END max_depth=20, min_samples_leaf=4, min_samples_split=5, n_estimators=200;, score=-0.270 total time=   2.1s
[CV 1/5] END max_depth=20, min_samples_leaf=4, m

In [5]:
# Train a model using the best parameters
best_model = RandomForestRegressor(random_state=42, **best_params)
best_model.fit(X_train, y_train)

print(f"Best Parameters: {best_params}")

# Evaluate the best model on the test data
y_pred = best_model.predict(X_test)

print(f"Training R-Squared: {best_model.score(X_train, y_train)}")
print(f"Testing R-Squared: {best_model.score(X_test, y_test)}")
print("Mean Squared Error:", mean_squared_error(y_test, y_pred))

Best Parameters: {'max_depth': None, 'min_samples_leaf': 2, 'min_samples_split': 2, 'n_estimators': 300}
Training R-Squared: 0.9590968859650107
Testing R-Squared: 0.8061927273680218
Mean Squared Error: 0.253966790105326
