In [3]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_boston
from sklearn.ensemble import BaggingRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score
from sklearn.model_selection import train_test_split,GridSearchCV

In [4]:
dataset = load_boston()

In [5]:
X,y = dataset.data, dataset.target

In [6]:
print("Boaston Features : " + str(dataset.feature_names))
print("Boaston data shape : " + str(X.shape))
print("Boaston target shape : " + str(y.shape))

Boaston Features : ['CRIM' 'ZN' 'INDUS' 'CHAS' 'NOX' 'RM' 'AGE' 'DIS' 'RAD' 'TAX' 'PTRATIO'
 'B' 'LSTAT']
Boaston data shape : (506, 13)
Boaston target shape : (506,)


In [7]:
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=42)

In [10]:
X_train.shape, X_test.shape

((404, 13), (102, 13))

In [11]:
#Creating one one object of linear, decision and knn model
lr = LinearRegression()
dr = DecisionTreeRegressor()
kr = KNeighborsRegressor()

In [12]:
#Fitting the modles
lr.fit(X_train,y_train)
dr.fit(X_train,y_train)
kr.fit(X_train,y_train)

KNeighborsRegressor()

In [13]:
#Predicting values:
y_pred1 = lr.predict(X_test)
y_pred2 = dr.predict(X_test)
y_pred3 = kr.predict(X_test)

In [14]:
#R2 score for each model is : 
print("R2 score for Linear Regression : ", r2_score(y_test, y_pred1))
print("R2 score for Decision Tree Regression : ", r2_score(y_test, y_pred2))
print("R2 score for KNN Regression : ", r2_score(y_test, y_pred3))

R2 score for Linear Regression :  0.6687594935356278
R2 score for Decision Tree Regression :  0.6825166533265996
R2 score for KNN Regression :  0.6473640882039258


- Average Score From Every Models

### Bagging Regressor

In [15]:
bag = BaggingRegressor(random_state=1)

In [16]:
bag.fit(X_train,y_train)

BaggingRegressor(random_state=1)

In [17]:
print("R2 Score for Train model is : %.3f"%bag.score(X_train,y_train))
print("R2 Score for Test model is : %.3f"%bag.score(X_test,y_test))

R2 Score for Train model is : 0.972
R2 Score for Test model is : 0.838


- Observation :
    1. High Variance and Low Biased

## Using Grid Search CV for Best Parameters

In [33]:
%%time
param = {"base_estimator" : [None, LinearRegression(), KNeighborsRegressor()],
        "n_estimators" : [100,300,500],
        "max_samples" : [0.3,0.5,1],
        "max_features" : [0.5, 1],
        "bootstrap" : [False, True], 
        "bootstrap_features" : [False, True]
        }

bag_reg_grid = GridSearchCV(BaggingRegressor(),param, cv=5, verbose=True, n_jobs=-1)
bag_reg_grid.fit(X_train,y_train)

print("R2 Score for Train model is : %.3f"%bag_reg_grid.best_estimator_.score(X_train,y_train))
print("R2 Score for Test model is : %.3f"%bag_reg_grid.best_estimator_.score(X_test,y_test))
print("Best R2 score by Grid Search best Model : %.3f"%bag_reg_grid.best_score_)
print("Best Parameters are : ", bag_reg_grid.best_params_)

Fitting 5 folds for each of 216 candidates, totalling 1080 fits


 -0.01923843 -0.01869436 -0.01035365  0.4512659   0.47472433  0.45884596
  0.46343486  0.47539143  0.47394872 -0.02290432 -0.02854205 -0.010113
  0.76053679  0.77328891  0.76691625  0.77490158  0.78263296  0.78105897
 -0.03172738 -0.00889601 -0.00745193  0.46606327  0.44881171  0.45750038
  0.44781335  0.46106764  0.47422216 -0.01438451 -0.01305956 -0.01307703
  0.77615817  0.78561662  0.7854029   0.79911705  0.80031789  0.80359474
 -0.01710667 -0.01595818 -0.00200575  0.45869759  0.45445392  0.45482738
  0.4779398   0.4842703   0.474764   -0.012679   -0.0051702  -0.01552821
  0.76095632  0.76268475  0.76712402  0.78411182  0.77734689  0.7785285
 -0.05461595 -0.00879112 -0.00298958  0.44507949  0.45238149  0.45452897
  0.47706908  0.45373331  0.46576467 -0.05244966 -0.00596456 -0.01221
  0.66557373  0.6712102   0.66740749  0.66654472  0.66627924  0.66921611
 -0.01337498 -0.01780763 -0.01246359  0.31968155  0.3238048   0.32167329
  0.33670897  0.31639601  0.31581907 -0.03391577 -0.00566

R2 Score for Train model is : 0.954
R2 Score for Test model is : 0.812
Best R2 score by Grid Search best Model : 0.806
Best Parameters are :  {'base_estimator': None, 'bootstrap': False, 'bootstrap_features': False, 'max_features': 0.5, 'max_samples': 0.5, 'n_estimators': 500}
Wall time: 5min 9s
