In [7]:
from sklearn.datasets import make_classification
from sklearn.metrics import accuracy_score
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.ensemble import BaggingClassifier
from sklearn.ensemble import GradientBoostingClassifier

In [8]:
#created own toy dataset having 10000 rows and 10 columns
x,y=make_classification(n_samples=10000,n_features=10,n_informative=3)

In [9]:
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.2,random_state=42)

In [10]:
dt = DecisionTreeClassifier(random_state=42)
dt.fit(x_train,y_train)
y_pred = dt.predict(x_test)
print("Decision Tree Accuracy: ",accuracy_score(y_test,y_pred))

Decision Tree Accuracy:  0.916


**Bagging**

In [11]:
bag = BaggingClassifier(
    estimator=DecisionTreeClassifier()
    n_estimators=500,
    max_samples=0.5,
    bootstrap=True,
    random_state=42
)

In [12]:
bag.fit(x_train,y_train)

In [13]:
y_pred = bag.predict(x_test)

In [14]:
accuracy_score(y_test,y_pred)

0.954

In [15]:
bag.estimators_samples_[0].shape  #check how many rows we are using for model

(4000,)

In [16]:
bag.estimators_features_[0].shape       #check how many columns we are using

(10,)

**Bagging using SVM**

In [17]:
bag = BaggingClassifier(
    estimator=SVC(),
    n_estimators=500,
    max_samples=0.25,
    bootstrap=True,
    random_state=42
)

In [18]:
bag.fit(x_train,y_train)
y_pred=bag.predict(x_test)
print("Bagging using SVM",accuracy_score(y_test,y_pred))

**Pasting**

In [19]:
bag = BaggingClassifier(
    estimator=DecisionTreeClassifier(),
    n_estimators=500,
    max_samples=0.25,
    bootstrap=False,
    random_state=42,
    verbose = 1,
    n_jobs=-1
)

In [20]:
bag.fit(x_train,y_train)
y_pred = bag.predict(x_test)
print("Accuracy",accuracy_score(y_test,y_pred))

[Parallel(n_jobs=2)]: Using backend LokyBackend with 2 concurrent workers.
[Parallel(n_jobs=2)]: Done   2 out of   2 | elapsed:   18.3s finished
[Parallel(n_jobs=2)]: Using backend LokyBackend with 2 concurrent workers.


Accuracy 0.9535


[Parallel(n_jobs=2)]: Done   2 out of   2 | elapsed:    0.4s finished


**Random Subspaces**

In [22]:
bag = BaggingClassifier(
    estimator=DecisionTreeClassifier(),   #Base model: a descision tree
    n_estimators=500,                     #500 trees in the ensemble
    max_samples=1.0,                      #each tree will use all the samples in the data
    bootstrap=False,                      #no replacement sampling(pasting)
    max_features=0.5,                     #bootstrap sampling for features
    bootstrap_features=True,
    random_state=42
)

In [23]:
bag.fit(x_train,y_train)
y_pred=bag.predict(x_test)
print("Random subspaces classifier",accuracy_score(y_test,y_pred))

Random subspaces 0.953


In [24]:
bag.estimators_samples_[0].shape  #check how many rows we are using for model

(8000,)

In [25]:
bag.estimators_features_[0].shape       #check how many columns we are using

(5,)

**OOB Score**

In [26]:
bag = BaggingClassifier(
    estimator=DecisionTreeClassifier(),
    n_estimators=500,
    max_samples=0.25,
    bootstrap=True,
    oob_score=True,
    random_state=42
)


In [27]:
bag.fit(x_train,y_train)

In [28]:
bag.oob_score_

0.946125

In [29]:
y_pred = bag.predict(x_test)
print("Accuracy",accuracy_score(y_test,y_pred))

Accuracy 0.952


**Applying GridSearchCV**

In [30]:
from sklearn.model_selection import GridSearchCV

In [31]:
parameters={
    'n_estimators':[50,100,500],
    'max_samples':[0.1,0.4,0.7,1.0],
    'bootstrap':[True,False],
    'max_features':[0.1,0.4,0.7,1.0]

}

In [35]:
search = GridSearchCV(BaggingClassifier(),parameters,cv=5)

In [34]:
search.fit(x_train,y_train)

In [37]:
search.best_params_
search.best_score_

AttributeError: 'GridSearchCV' object has no attribute 'best_params_'