In [1]:
from sklearn.datasets import load_boston

In [2]:
boston = load_boston()

In [3]:
from sklearn.linear_model import LinearRegression
from sklearn.neighbors import KNeighborsRegressor
from sklearn.model_selection import GridSearchCV
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import r2_score

In [4]:
lr = LinearRegression()
knn = KNeighborsRegressor()
dt = DecisionTreeRegressor()

In [5]:
from sklearn.model_selection import train_test_split
X_boston,Y_boston = boston.data,boston.target
X_train, X_test, Y_train, Y_test = train_test_split(X_boston, Y_boston , train_size=0.80, test_size=0.20, random_state=123)

In [6]:
lr.fit(X_train,Y_train)
knn.fit(X_train,Y_train)
dt.fit(X_train,Y_train)

DecisionTreeRegressor()

In [7]:
pred_lr = lr.predict(X_test)
pred_knn = knn.predict(X_test)
pred_dt = dt.predict(X_test)

In [8]:
print('Linear Regression R2 Score : ',r2_score(pred_lr,Y_test))
print('KNN R2 Score : ',r2_score(pred_knn,Y_test))
print('Decision Tree R2 Score : ',r2_score(pred_dt,Y_test))

Linear Regression R2 Score :  0.5594269612728853
KNN R2 Score :  0.2634634191207139
Decision Tree R2 Score :  0.5481135766414477


### Bagging Regressor

In [9]:
from sklearn.ensemble import BaggingRegressor

bag_regressor = BaggingRegressor(random_state=1)
bag_regressor.fit(X_train, Y_train)

BaggingRegressor(random_state=1)

In [10]:
Y_preds = bag_regressor.predict(X_test)

print('Training Coefficient of R^2 : %.3f'%bag_regressor.score(X_train, Y_train))
print('Test Coefficient of R^2 : %.3f'%bag_regressor.score(X_test, Y_test))

Training Coefficient of R^2 : 0.980
Test Coefficient of R^2 : 0.818


### Hyperparameter Tuning Using GridSearchCV

In [13]:
n_samples = boston.data.shape[0]
n_features = boston.data.shape[1]

params = {'base_estimator':[None,LinearRegression(),KNeighborsRegressor()],
          'n_estimators':[10,50,100],
          'max_samples':[0.25,0.50,1.0],
          'max_features':[0.25,0.50,1.0],
          'bootstrap':[True,False],
          'bootstrap_features':[True,False]}

In [15]:
Bagging_Regressor_grid = GridSearchCV(BaggingRegressor(),param_grid=params,cv=3,n_jobs=-1,verbose=1)
Bagging_Regressor_grid.fit(X_train,Y_train)

Fitting 3 folds for each of 324 candidates, totalling 972 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  34 tasks      | elapsed:    6.9s
[Parallel(n_jobs=-1)]: Done 184 tasks      | elapsed:   11.7s
[Parallel(n_jobs=-1)]: Done 434 tasks      | elapsed:   20.1s
[Parallel(n_jobs=-1)]: Done 784 tasks      | elapsed:   29.9s
[Parallel(n_jobs=-1)]: Done 972 out of 972 | elapsed:   36.4s finished


GridSearchCV(cv=3, estimator=BaggingRegressor(), n_jobs=-1,
             param_grid={'base_estimator': [None, LinearRegression(),
                                            KNeighborsRegressor()],
                         'bootstrap': [True, False],
                         'bootstrap_features': [True, False],
                         'max_features': [0.25, 0.5, 1.0],
                         'max_samples': [0.25, 0.5, 1.0],
                         'n_estimators': [10, 50, 100]},
             verbose=1)

In [16]:
Bagging_Regressor_grid.best_params_

{'base_estimator': None,
 'bootstrap': True,
 'bootstrap_features': True,
 'max_features': 1.0,
 'max_samples': 1.0,
 'n_estimators': 50}

In [17]:
Bagging_Regressor_grid.best_score_

0.8615187887156961