In [1]:
import numpy as np

from sklearn.datasets import fetch_california_housing   

In [2]:
housing = fetch_california_housing()

In [3]:
X = housing["data"]
y = housing["target"]

In [4]:
print(X.shape)
print(y.shape)

(20640, 8)
(20640,)


In [6]:
from sklearn.linear_model import LinearRegression
from sklearn.neighbors import KNeighborsRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.metrics import r2_score

In [7]:
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.8, test_size=0.2, random_state=123)

In [8]:
lr = LinearRegression()
dt = DecisionTreeRegressor()
knn = KNeighborsRegressor()

In [9]:
lr.fit(X_train, y_train)
dt.fit(X_train, y_train)
knn.fit(X_train, y_train)

In [10]:
y_pred1 = lr.predict(X_test)
y_pred2 = dt.predict(X_test)
y_pred3 = knn.predict(X_test)

In [11]:
print(f"r2 score for Linear Regression: {r2_score(y_test, y_pred1)}")
print(f"r2 score for Decision Tree Regressor: {r2_score(y_test, y_pred2)}")
print(f"r2 score for KNN: {r2_score(y_test, y_pred3)}")

r2 score for Linear Regression: 0.6104546894797869
r2 score for Decision Tree Regressor: 0.6079484930223118
r2 score for KNN: 0.16261917827057237


In [12]:
from sklearn.ensemble import BaggingRegressor

In [13]:
bag = BaggingRegressor(random_state=1)

In [14]:
bag.fit(X_train, y_train)

In [15]:
y_preds = bag.predict(X_test)

In [17]:
print(f"Training coefficient of R2 {bag.score(X_train, y_train)}")
print(f"Testing coefficient of R2 {bag.score(X_test, y_test)}")


Training coefficient of R2 0.9628315477846598
Testing coefficient of R2 0.792038823749125


In [24]:
%%time

n_samples = X.shape[0]
n_features = X.shape[1]

params = {'base_estimator': [None, LinearRegression(), KNeighborsRegressor()],
          'n_estimators': [20,50,100],
          'max_samples': [0.5,1.0],
          'max_features': [0.5,1.0],
          'bootstrap': [True, False],
          'bootstrap_features': [True, False]}

bagging_regressor_grid = GridSearchCV(BaggingRegressor(random_state=1, n_jobs=-1), param_grid =params, cv=3, n_jobs=-1, verbose=1)
bagging_regressor_grid.fit(X_train, y_train)

print('Train R^2 Score : %.3f'%bagging_regressor_grid.best_estimator_.score(X_train, y_train))
print('Test R^2 Score : %.3f'%bagging_regressor_grid.best_estimator_.score(X_test, y_test))
print('Best R^2 Score Through Grid Search : %.3f'%bagging_regressor_grid.best_score_)
print('Best Parameters : ',bagging_regressor_grid.best_params_)

Fitting 3 folds for each of 144 candidates, totalling 432 fits


KeyboardInterrupt: 