In [16]:
import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score
from sklearn.model_selection import train_test_split,GridSearchCV,KFold
from sklearn.ensemble import BaggingRegressor
from sklearn.preprocessing import LabelEncoder
from sklearn.tree import DecisionTreeRegressor

In [4]:
concrete=pd.read_csv("Concrete_Data.csv")
concrete.head()

Unnamed: 0,Cement,Blast,Fly,Water,Superplasticizer,Coarse,Fine,Age,Strength
0,540.0,0.0,0.0,162.0,2.5,1040.0,676.0,28,79.99
1,540.0,0.0,0.0,162.0,2.5,1055.0,676.0,28,61.89
2,332.5,142.5,0.0,228.0,0.0,932.0,594.0,270,40.27
3,332.5,142.5,0.0,228.0,0.0,932.0,594.0,365,41.05
4,198.6,132.4,0.0,192.0,0.0,978.4,825.5,360,44.3


In [5]:
X=concrete.drop('Strength',axis=1)
y=concrete['Strength']

In [7]:
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.3,random_state=23)

In [8]:
lr=LinearRegression()
lr.fit(X_train,y_train)

In [9]:
y_pred=lr.predict(X_test)

In [10]:
lr_r2=r2_score(y_test,y_pred)
print("Alone LR:",lr_r2)

Alone LR: 0.6312960386440598


In [11]:
bagg=BaggingRegressor(lr,n_estimators=15)
bagg.fit(X_train,y_train)

In [12]:
y_pred=bagg.predict(X_test)

In [13]:
bagg_r2=r2_score(y_test,y_pred)
print("Bagging: ",bagg_r2)

Bagging:  0.6311580091985591


## Using DTR

In [17]:
dtr=DecisionTreeRegressor()
dtr.fit(X_train,y_train)

In [18]:
y_pred=dtr.predict(X_test)

In [19]:
dtr_r2=r2_score(y_test,y_pred)
print("Alone DTR: ",dtr_r2)

Alone DTR:  0.8122181582637211


In [20]:
bagg=BaggingRegressor(dtr,n_estimators=15)
bagg.fit(X_train,y_train)

In [21]:
y_pred=bagg.predict(X_test)

In [22]:
bagg_r2=r2_score(y_test,y_pred)
print("Bagging: ",bagg_r2)

Bagging:  0.8842482626991144


## USing GCV

In [25]:
dtr=DecisionTreeRegressor(random_state=23)
bagg=BaggingRegressor(dtr,random_state=23)
bagg.get_params()

{'base_estimator': 'deprecated',
 'bootstrap': True,
 'bootstrap_features': False,
 'estimator__ccp_alpha': 0.0,
 'estimator__criterion': 'squared_error',
 'estimator__max_depth': None,
 'estimator__max_features': None,
 'estimator__max_leaf_nodes': None,
 'estimator__min_impurity_decrease': 0.0,
 'estimator__min_samples_leaf': 1,
 'estimator__min_samples_split': 2,
 'estimator__min_weight_fraction_leaf': 0.0,
 'estimator__random_state': 23,
 'estimator__splitter': 'best',
 'estimator': DecisionTreeRegressor(random_state=23),
 'max_features': 1.0,
 'max_samples': 1.0,
 'n_estimators': 10,
 'n_jobs': None,
 'oob_score': False,
 'random_state': 23,
 'verbose': 0,
 'warm_start': False}

In [26]:
kfold=KFold(n_splits=5,shuffle=True,random_state=23)
params={'estimator__max_depth':[2,3,5,10,None],
        'estimator__min_samples_leaf':[1,5,10],
       'estimator__min_samples_split':[2,5,10],
       'n_estimators':[10,30,50]}
gcv=GridSearchCV(bagg,param_grid=params,cv=kfold,verbose=3)
gcv.fit(X,y)

Fitting 5 folds for each of 135 candidates, totalling 675 fits
[CV 1/5] END estimator__max_depth=2, estimator__min_samples_leaf=1, estimator__min_samples_split=2, n_estimators=10;, score=0.573 total time=   0.1s
[CV 2/5] END estimator__max_depth=2, estimator__min_samples_leaf=1, estimator__min_samples_split=2, n_estimators=10;, score=0.528 total time=   0.0s
[CV 3/5] END estimator__max_depth=2, estimator__min_samples_leaf=1, estimator__min_samples_split=2, n_estimators=10;, score=0.580 total time=   0.0s
[CV 4/5] END estimator__max_depth=2, estimator__min_samples_leaf=1, estimator__min_samples_split=2, n_estimators=10;, score=0.581 total time=   0.0s
[CV 5/5] END estimator__max_depth=2, estimator__min_samples_leaf=1, estimator__min_samples_split=2, n_estimators=10;, score=0.519 total time=   0.0s
[CV 1/5] END estimator__max_depth=2, estimator__min_samples_leaf=1, estimator__min_samples_split=2, n_estimators=30;, score=0.557 total time=   0.1s
[CV 2/5] END estimator__max_depth=2, estima

[CV 1/5] END estimator__max_depth=2, estimator__min_samples_leaf=5, estimator__min_samples_split=2, n_estimators=50;, score=0.562 total time=   0.1s
[CV 2/5] END estimator__max_depth=2, estimator__min_samples_leaf=5, estimator__min_samples_split=2, n_estimators=50;, score=0.550 total time=   0.1s
[CV 3/5] END estimator__max_depth=2, estimator__min_samples_leaf=5, estimator__min_samples_split=2, n_estimators=50;, score=0.585 total time=   0.1s
[CV 4/5] END estimator__max_depth=2, estimator__min_samples_leaf=5, estimator__min_samples_split=2, n_estimators=50;, score=0.576 total time=   0.2s
[CV 5/5] END estimator__max_depth=2, estimator__min_samples_leaf=5, estimator__min_samples_split=2, n_estimators=50;, score=0.547 total time=   0.1s
[CV 1/5] END estimator__max_depth=2, estimator__min_samples_leaf=5, estimator__min_samples_split=5, n_estimators=10;, score=0.573 total time=   0.0s
[CV 2/5] END estimator__max_depth=2, estimator__min_samples_leaf=5, estimator__min_samples_split=5, n_esti

[CV 1/5] END estimator__max_depth=2, estimator__min_samples_leaf=10, estimator__min_samples_split=5, n_estimators=30;, score=0.557 total time=   0.1s
[CV 2/5] END estimator__max_depth=2, estimator__min_samples_leaf=10, estimator__min_samples_split=5, n_estimators=30;, score=0.558 total time=   0.1s
[CV 3/5] END estimator__max_depth=2, estimator__min_samples_leaf=10, estimator__min_samples_split=5, n_estimators=30;, score=0.595 total time=   0.1s
[CV 4/5] END estimator__max_depth=2, estimator__min_samples_leaf=10, estimator__min_samples_split=5, n_estimators=30;, score=0.575 total time=   0.1s
[CV 5/5] END estimator__max_depth=2, estimator__min_samples_leaf=10, estimator__min_samples_split=5, n_estimators=30;, score=0.535 total time=   0.1s
[CV 1/5] END estimator__max_depth=2, estimator__min_samples_leaf=10, estimator__min_samples_split=5, n_estimators=50;, score=0.562 total time=   0.1s
[CV 2/5] END estimator__max_depth=2, estimator__min_samples_leaf=10, estimator__min_samples_split=5,

[CV 1/5] END estimator__max_depth=3, estimator__min_samples_leaf=1, estimator__min_samples_split=10, n_estimators=30;, score=0.674 total time=   0.1s
[CV 2/5] END estimator__max_depth=3, estimator__min_samples_leaf=1, estimator__min_samples_split=10, n_estimators=30;, score=0.687 total time=   0.1s
[CV 3/5] END estimator__max_depth=3, estimator__min_samples_leaf=1, estimator__min_samples_split=10, n_estimators=30;, score=0.743 total time=   0.1s
[CV 4/5] END estimator__max_depth=3, estimator__min_samples_leaf=1, estimator__min_samples_split=10, n_estimators=30;, score=0.682 total time=   0.1s
[CV 5/5] END estimator__max_depth=3, estimator__min_samples_leaf=1, estimator__min_samples_split=10, n_estimators=30;, score=0.671 total time=   0.1s
[CV 1/5] END estimator__max_depth=3, estimator__min_samples_leaf=1, estimator__min_samples_split=10, n_estimators=50;, score=0.676 total time=   0.2s
[CV 2/5] END estimator__max_depth=3, estimator__min_samples_leaf=1, estimator__min_samples_split=10,

[CV 1/5] END estimator__max_depth=3, estimator__min_samples_leaf=10, estimator__min_samples_split=2, n_estimators=30;, score=0.674 total time=   0.1s
[CV 2/5] END estimator__max_depth=3, estimator__min_samples_leaf=10, estimator__min_samples_split=2, n_estimators=30;, score=0.684 total time=   0.1s
[CV 3/5] END estimator__max_depth=3, estimator__min_samples_leaf=10, estimator__min_samples_split=2, n_estimators=30;, score=0.740 total time=   0.1s
[CV 4/5] END estimator__max_depth=3, estimator__min_samples_leaf=10, estimator__min_samples_split=2, n_estimators=30;, score=0.683 total time=   0.1s
[CV 5/5] END estimator__max_depth=3, estimator__min_samples_leaf=10, estimator__min_samples_split=2, n_estimators=30;, score=0.671 total time=   0.1s
[CV 1/5] END estimator__max_depth=3, estimator__min_samples_leaf=10, estimator__min_samples_split=2, n_estimators=50;, score=0.676 total time=   0.1s
[CV 2/5] END estimator__max_depth=3, estimator__min_samples_leaf=10, estimator__min_samples_split=2,

[CV 5/5] END estimator__max_depth=5, estimator__min_samples_leaf=1, estimator__min_samples_split=5, n_estimators=10;, score=0.836 total time=   0.0s
[CV 1/5] END estimator__max_depth=5, estimator__min_samples_leaf=1, estimator__min_samples_split=5, n_estimators=30;, score=0.815 total time=   0.1s
[CV 2/5] END estimator__max_depth=5, estimator__min_samples_leaf=1, estimator__min_samples_split=5, n_estimators=30;, score=0.828 total time=   0.1s
[CV 3/5] END estimator__max_depth=5, estimator__min_samples_leaf=1, estimator__min_samples_split=5, n_estimators=30;, score=0.876 total time=   0.1s
[CV 4/5] END estimator__max_depth=5, estimator__min_samples_leaf=1, estimator__min_samples_split=5, n_estimators=30;, score=0.830 total time=   0.1s
[CV 5/5] END estimator__max_depth=5, estimator__min_samples_leaf=1, estimator__min_samples_split=5, n_estimators=30;, score=0.829 total time=   0.1s
[CV 1/5] END estimator__max_depth=5, estimator__min_samples_leaf=1, estimator__min_samples_split=5, n_esti

[CV 5/5] END estimator__max_depth=5, estimator__min_samples_leaf=5, estimator__min_samples_split=5, n_estimators=50;, score=0.826 total time=   0.2s
[CV 1/5] END estimator__max_depth=5, estimator__min_samples_leaf=5, estimator__min_samples_split=10, n_estimators=10;, score=0.813 total time=   0.0s
[CV 2/5] END estimator__max_depth=5, estimator__min_samples_leaf=5, estimator__min_samples_split=10, n_estimators=10;, score=0.813 total time=   0.1s
[CV 3/5] END estimator__max_depth=5, estimator__min_samples_leaf=5, estimator__min_samples_split=10, n_estimators=10;, score=0.859 total time=   0.0s
[CV 4/5] END estimator__max_depth=5, estimator__min_samples_leaf=5, estimator__min_samples_split=10, n_estimators=10;, score=0.819 total time=   0.0s
[CV 5/5] END estimator__max_depth=5, estimator__min_samples_leaf=5, estimator__min_samples_split=10, n_estimators=10;, score=0.834 total time=   0.0s
[CV 1/5] END estimator__max_depth=5, estimator__min_samples_leaf=5, estimator__min_samples_split=10, 

[CV 1/5] END estimator__max_depth=5, estimator__min_samples_leaf=10, estimator__min_samples_split=10, n_estimators=50;, score=0.802 total time=   0.2s
[CV 2/5] END estimator__max_depth=5, estimator__min_samples_leaf=10, estimator__min_samples_split=10, n_estimators=50;, score=0.808 total time=   0.2s
[CV 3/5] END estimator__max_depth=5, estimator__min_samples_leaf=10, estimator__min_samples_split=10, n_estimators=50;, score=0.852 total time=   0.2s
[CV 4/5] END estimator__max_depth=5, estimator__min_samples_leaf=10, estimator__min_samples_split=10, n_estimators=50;, score=0.823 total time=   0.2s
[CV 5/5] END estimator__max_depth=5, estimator__min_samples_leaf=10, estimator__min_samples_split=10, n_estimators=50;, score=0.817 total time=   0.2s
[CV 1/5] END estimator__max_depth=10, estimator__min_samples_leaf=1, estimator__min_samples_split=2, n_estimators=10;, score=0.893 total time=   0.1s
[CV 2/5] END estimator__max_depth=10, estimator__min_samples_leaf=1, estimator__min_samples_spl

[CV 2/5] END estimator__max_depth=10, estimator__min_samples_leaf=5, estimator__min_samples_split=2, n_estimators=30;, score=0.866 total time=   0.2s
[CV 3/5] END estimator__max_depth=10, estimator__min_samples_leaf=5, estimator__min_samples_split=2, n_estimators=30;, score=0.907 total time=   0.1s
[CV 4/5] END estimator__max_depth=10, estimator__min_samples_leaf=5, estimator__min_samples_split=2, n_estimators=30;, score=0.886 total time=   0.1s
[CV 5/5] END estimator__max_depth=10, estimator__min_samples_leaf=5, estimator__min_samples_split=2, n_estimators=30;, score=0.868 total time=   0.2s
[CV 1/5] END estimator__max_depth=10, estimator__min_samples_leaf=5, estimator__min_samples_split=2, n_estimators=50;, score=0.865 total time=   0.2s
[CV 2/5] END estimator__max_depth=10, estimator__min_samples_leaf=5, estimator__min_samples_split=2, n_estimators=50;, score=0.865 total time=   0.2s
[CV 3/5] END estimator__max_depth=10, estimator__min_samples_leaf=5, estimator__min_samples_split=2,

[CV 5/5] END estimator__max_depth=10, estimator__min_samples_leaf=10, estimator__min_samples_split=5, n_estimators=10;, score=0.845 total time=   0.1s
[CV 1/5] END estimator__max_depth=10, estimator__min_samples_leaf=10, estimator__min_samples_split=5, n_estimators=30;, score=0.833 total time=   0.1s
[CV 2/5] END estimator__max_depth=10, estimator__min_samples_leaf=10, estimator__min_samples_split=5, n_estimators=30;, score=0.838 total time=   0.1s
[CV 3/5] END estimator__max_depth=10, estimator__min_samples_leaf=10, estimator__min_samples_split=5, n_estimators=30;, score=0.874 total time=   0.1s
[CV 4/5] END estimator__max_depth=10, estimator__min_samples_leaf=10, estimator__min_samples_split=5, n_estimators=30;, score=0.860 total time=   0.1s
[CV 5/5] END estimator__max_depth=10, estimator__min_samples_leaf=10, estimator__min_samples_split=5, n_estimators=30;, score=0.847 total time=   0.1s
[CV 1/5] END estimator__max_depth=10, estimator__min_samples_leaf=10, estimator__min_samples_s

[CV 4/5] END estimator__max_depth=None, estimator__min_samples_leaf=1, estimator__min_samples_split=5, n_estimators=50;, score=0.909 total time=   0.3s
[CV 5/5] END estimator__max_depth=None, estimator__min_samples_leaf=1, estimator__min_samples_split=5, n_estimators=50;, score=0.891 total time=   0.3s
[CV 1/5] END estimator__max_depth=None, estimator__min_samples_leaf=1, estimator__min_samples_split=10, n_estimators=10;, score=0.887 total time=   0.0s
[CV 2/5] END estimator__max_depth=None, estimator__min_samples_leaf=1, estimator__min_samples_split=10, n_estimators=10;, score=0.882 total time=   0.0s
[CV 3/5] END estimator__max_depth=None, estimator__min_samples_leaf=1, estimator__min_samples_split=10, n_estimators=10;, score=0.908 total time=   0.0s
[CV 4/5] END estimator__max_depth=None, estimator__min_samples_leaf=1, estimator__min_samples_split=10, n_estimators=10;, score=0.896 total time=   0.1s
[CV 5/5] END estimator__max_depth=None, estimator__min_samples_leaf=1, estimator__mi

[CV 4/5] END estimator__max_depth=None, estimator__min_samples_leaf=5, estimator__min_samples_split=10, n_estimators=30;, score=0.886 total time=   0.1s
[CV 5/5] END estimator__max_depth=None, estimator__min_samples_leaf=5, estimator__min_samples_split=10, n_estimators=30;, score=0.868 total time=   0.1s
[CV 1/5] END estimator__max_depth=None, estimator__min_samples_leaf=5, estimator__min_samples_split=10, n_estimators=50;, score=0.866 total time=   0.3s
[CV 2/5] END estimator__max_depth=None, estimator__min_samples_leaf=5, estimator__min_samples_split=10, n_estimators=50;, score=0.865 total time=   0.3s
[CV 3/5] END estimator__max_depth=None, estimator__min_samples_leaf=5, estimator__min_samples_split=10, n_estimators=50;, score=0.906 total time=   0.2s
[CV 4/5] END estimator__max_depth=None, estimator__min_samples_leaf=5, estimator__min_samples_split=10, n_estimators=50;, score=0.887 total time=   0.2s
[CV 5/5] END estimator__max_depth=None, estimator__min_samples_leaf=5, estimator__

In [27]:
print("best parameter :",gcv.best_params_)
print("best score :",gcv.best_score_)

best parameter : {'estimator__max_depth': None, 'estimator__min_samples_leaf': 1, 'estimator__min_samples_split': 2, 'n_estimators': 50}
best score : 0.9092434907551853
