In [1]:
from sklearn.datasets import fetch_california_housing
data, target = fetch_california_housing(as_frame=True, return_X_y=True)
target *= 100


In [2]:
from sklearn.model_selection import cross_validate
from sklearn.tree import DecisionTreeRegressor

tree = DecisionTreeRegressor(random_state=0)
cv_results = cross_validate(tree, data, target, n_jobs=2)
scores = cv_results['test_score'] 
print(f"R2 score: {scores.mean():.2f} +/- {scores.std():.2f}")

R2 score: 0.35 +/- 0.09


In [3]:
%time
from sklearn.model_selection import GridSearchCV
from sklearn.tree import DecisionTreeRegressor

param_grid = {'max_depth': [5, 8, None],
              'min_samples_split': [2, 10, 30, 50],
              'min_samples_leaf': [0.01, 0.05, 0.1, 1]}
cv=3

tree = GridSearchCV(DecisionTreeRegressor(random_state=0),
                    param_grid=param_grid, cv=cv, n_jobs=2)
cv_results = cross_validate(tree, data, target, n_jobs=2, return_estimator=True)
scores = cv_results['test_score']
print(f"R2 score: {scores.mean():.2f} +/- {scores.std():.2f}")

CPU times: total: 0 ns
Wall time: 0 ns
R2 score: 0.52 +/- 0.11


In [4]:
%%time
from sklearn.ensemble import BaggingRegressor

estimator = DecisionTreeRegressor(random_state=0)
bagging_regressor = BaggingRegressor(estimator=estimator, n_estimators=20, random_state=0, n_jobs=2)
cv_results = cross_validate(bagging_regressor, data, target, n_jobs=2)
scores = cv_results['test_score']

print(f"R2 score: {scores.mean():.2f} +/- {scores.std():.2f}")

R2 score: 0.64 +/- 0.08
CPU times: total: 0 ns
Wall time: 3.03 s
