In [9]:
import pandas as pd 
import numpy as np 
import matplotlib.pyplot as plt 
import seaborn as sns
from sklearn.model_selection import train_test_split,GridSearchCV
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.metrics import r2_score

import warnings
warnings.filterwarnings('ignore')

In [7]:
from sklearn import datasets
boston=datasets.load_boston()
df=pd.DataFrame(columns=boston.feature_names,data=boston.data)
df['target']=boston.target

In [8]:
df

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT,target
0,0.00632,18.0,2.31,0.0,0.538,6.575,65.2,4.0900,1.0,296.0,15.3,396.90,4.98,24.0
1,0.02731,0.0,7.07,0.0,0.469,6.421,78.9,4.9671,2.0,242.0,17.8,396.90,9.14,21.6
2,0.02729,0.0,7.07,0.0,0.469,7.185,61.1,4.9671,2.0,242.0,17.8,392.83,4.03,34.7
3,0.03237,0.0,2.18,0.0,0.458,6.998,45.8,6.0622,3.0,222.0,18.7,394.63,2.94,33.4
4,0.06905,0.0,2.18,0.0,0.458,7.147,54.2,6.0622,3.0,222.0,18.7,396.90,5.33,36.2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
501,0.06263,0.0,11.93,0.0,0.573,6.593,69.1,2.4786,1.0,273.0,21.0,391.99,9.67,22.4
502,0.04527,0.0,11.93,0.0,0.573,6.120,76.7,2.2875,1.0,273.0,21.0,396.90,9.08,20.6
503,0.06076,0.0,11.93,0.0,0.573,6.976,91.0,2.1675,1.0,273.0,21.0,396.90,5.64,23.9
504,0.10959,0.0,11.93,0.0,0.573,6.794,89.3,2.3889,1.0,273.0,21.0,393.45,6.48,22.0


In [15]:
X=df.drop(columns=['target'],axis=1)
y=df['target']
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.20,
                                              random_state=1)

In [16]:
X_train.shape

(404, 13)

### Apply any algorithm

In [17]:
lr=LinearRegression()
dt=DecisionTreeRegressor()
knn=KNeighborsRegressor()

lr.fit(X_train,y_train)
dt.fit(X_train,y_train)
knn.fit(X_train,y_train)

KNeighborsRegressor()

In [18]:
y_pred_lr=lr.predict(X_test)
y_pred_dt=dt.predict(X_test)
y_pred_knn=knn.predict(X_test)

In [19]:
print("R2 score",r2_score(y_test,y_pred_lr))
print("R2 score",r2_score(y_test,y_pred_dt))
print("R2 score",r2_score(y_test,y_pred_knn))

R2 score 0.7634174432138501
R2 score 0.6716219534531098
R2 score 0.5401612153026705


### Bagging Regressor 

In [20]:
from sklearn.ensemble import BaggingRegressor

bag_regressor=BaggingRegressor(random_state=1)
bag_regressor.fit(X_train,y_train)

BaggingRegressor(random_state=1)

In [22]:
y_predict=bag_regressor.predict(X_test)
print("Training Co-efficients : %.3f"%bag_regressor.score(X_train,y_train))
print("Testing Co-efficients : %.3f"%bag_regressor.score(X_test,y_test))

Training Co-efficients : 0.980
Testing Co-efficients : 0.895


### Applying Grid Search CV 

In [45]:
%%time

n_samples=boston.data.shape[0]
n_features=boston.data.shape[1]

params={
    'base_estimators':[None,LinearRegression(),KNeighborsRegressor()],
    'n_estimators':[20,50,100],
    'max_samples':[0.5,1.0],
    'max_features':[0.5,1.0],
    'bootstrap':[True,False],
    'bootstrap_features':[True,False]}

bag_regressor_grid=GridSearchCV(BaggingRegressor(random_state=1,n_jobs=-1),
                               param_grid=params,
                               cv=3,n_jobs=-1,
                                verbose=1
                               )
bagging_regressor_grid.fit(X_train, y_train)


Fitting 3 folds for each of 144 candidates, totalling 432 fits
Wall time: 9.53 s


GridSearchCV(cv=3, estimator=BaggingRegressor(n_jobs=-1, random_state=1),
             n_jobs=-1,
             param_grid={'base_estimator': [None, LinearRegression(),
                                            KNeighborsRegressor()],
                         'bootstrap': [True, False],
                         'bootstrap_features': [True, False],
                         'max_features': [0.5, 1.0], 'max_samples': [0.5, 1.0],
                         'n_estimators': [20, 50, 100]},
             verbose=1)

In [46]:
print('Train R^2 Score : %.3f'%bagging_regressor_grid.best_estimator_.score(X_train, y_train))
print('Test R^2 Score : %.3f'%bagging_regressor_grid.best_estimator_.score(X_test, y_test))
print('Best R^2 Score Through Grid Search : %.3f'%bagging_regressor_grid.best_score_)
print('Best Parameters : ',bagging_regressor_grid.best_params_)

Train R^2 Score : 0.985
Test R^2 Score : 0.911
Best R^2 Score Through Grid Search : 0.869
Best Parameters :  {'base_estimator': None, 'bootstrap': True, 'bootstrap_features': False, 'max_features': 1.0, 'max_samples': 1.0, 'n_estimators': 100}


### THE END