In [1]:
from sklearn.datasets import make_classification
from sklearn.metrics import accuracy_score
from sklearn.ensemble import BaggingClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split

In [2]:
X,y=make_classification(n_samples=10000,n_features=10,n_informative=3)

In [3]:
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=.2,random_state=42)

In [4]:
dt=DecisionTreeClassifier(random_state=42)
dt.fit(X_train,y_train)
y_pred=dt.predict(X_test)

In [5]:
accuracy_score(y_test,y_pred)

0.9045

# bagging

In [6]:
bag=BaggingClassifier(
    base_estimator=DecisionTreeClassifier(),
    n_estimators=500,
    max_samples=.5,
    bootstrap=True,
    random_state=42

)

In [7]:
bag.fit(X_train,y_train)

BaggingClassifier(base_estimator=DecisionTreeClassifier(), max_samples=0.5,
                  n_estimators=500, random_state=42)

In [8]:
y_pred=bag.predict(X_test)

In [9]:
accuracy_score(y_test,y_pred)

0.9335

In [10]:
bag.estimators_samples_

[array([2523, 3113, 7114, ..., 5668, 7066, 3677]),
 array([4782,  663, 7155, ..., 3122, 2141, 5379]),
 array([5462, 6574, 4896, ..., 6686, 4871, 6424]),
 array([2848, 2629, 1591, ...,  595, 4333, 1671]),
 array([3821, 6494, 1606, ..., 1645, 3358,  388]),
 array([2261, 7922, 3649, ..., 7532, 6138, 1483]),
 array([ 652, 1676, 2291, ..., 2618,  113, 7628]),
 array([2478, 4107, 1958, ..., 5175, 7270, 3055]),
 array([5800, 3548, 6540, ..., 5313, 2336, 6608]),
 array([5256, 7181, 3409, ..., 3860, 3417, 5180]),
 array([2675, 2834, 3817, ...,  270, 2233,  103]),
 array([3236, 7607, 4600, ..., 5180, 5002, 3192]),
 array([4563, 4137, 6298, ..., 5296, 5676,  860]),
 array([2816, 5343, 5817, ..., 5489, 4283,  456]),
 array([2448, 2733, 5480, ..., 7046, 5681, 1372]),
 array([4248, 3828, 4630, ...,  767, 3976, 5795]),
 array([4815, 1867,  503, ..., 3767, 7526, 2513]),
 array([1688, 4132, 5218, ..., 5248, 3524, 6444]),
 array([4167, 2900, 3602, ..., 2322, 2670, 6725]),
 array([2920, 7645, 5452, ..., 

In [11]:
bag.estimators_samples_[0].shape

(4000,)

In [12]:
bag.estimators_features_[0].shape

(10,)

# Bagging using svm

In [13]:
bag=BaggingClassifier(
    base_estimator=SVC(),
    n_estimators=500,
    max_samples=.25,
    bootstrap=True,
    random_state=42
)

In [14]:
bag.fit(X_train,y_train)
y_pred=bag.predict(X_test)
accuracy_score(y_test,y_pred)

0.9085

# pasting

In [15]:
bag=BaggingClassifier(
    base_estimator=DecisionTreeClassifier(),
    n_estimators=500,
    max_samples=.25,
    bootstrap=False,
    random_state=42,
    verbose=1,
    n_jobs=1

)

In [16]:
bag.fit(X_train,y_train)
y_pred=bag.predict(X_test)
accuracy_score(y_test,y_pred)

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:   12.5s finished
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.2s finished


0.933

# random subspaces

In [17]:
bag=BaggingClassifier(
    base_estimator=DecisionTreeClassifier(),
    n_estimators=500,
    max_samples=1.0,
    bootstrap=False,
    max_features=.50,
    bootstrap_features=True,
    random_state=42

)

In [18]:
bag.fit(X_train,y_train)
y_pred=bag.predict(X_test)
accuracy_score(y_test,y_pred)

0.9135

In [19]:
bag.estimators_samples_[0].shape

(8000,)

In [20]:
bag.estimators_features_[0].shape

(5,)

# random_patches

In [21]:
bag=BaggingClassifier(
    base_estimator=DecisionTreeClassifier(),
    n_estimators=500,
    max_samples=.25,
    bootstrap=True,
    max_features=.5,
    bootstrap_features=True,
    random_state=42
)

In [22]:
bag.fit(X_train,y_train)
y_pred=bag.predict(X_test)
accuracy_score(y_test,y_pred)

0.9135

# oob score

In [23]:
bag=BaggingClassifier(
    base_estimator=DecisionTreeClassifier(),
    n_estimators=500,
    max_samples=.25,
    bootstrap=True,
    oob_score=True,
    random_state=42
)

In [24]:
bag.fit(X_train,y_train)

BaggingClassifier(base_estimator=DecisionTreeClassifier(), max_samples=0.25,
                  n_estimators=500, oob_score=True, random_state=42)

In [25]:
bag.oob_score_

0.944625

In [26]:
y_pred=bag.predict(X_test)
accuracy_score(y_test,y_pred)

0.9315

# Applying GridSearchCV

In [27]:
from sklearn.model_selection import GridSearchCV

In [28]:
parameters={
    "n_estimators":[50,100,500],
    "max_samples":[.1,.4,.7,1],
    "bootstrap":[True,False],
    "max_features":[.1,.4,.7,1]
    
    
}

In [29]:
search=GridSearchCV(BaggingClassifier(),parameters,cv=5)

In [30]:
search.fit(X_train,y_train)

GridSearchCV(cv=5, estimator=BaggingClassifier(),
             param_grid={'bootstrap': [True, False],
                         'max_features': [0.1, 0.4, 0.7, 1],
                         'max_samples': [0.1, 0.4, 0.7, 1],
                         'n_estimators': [50, 100, 500]})

In [31]:
search.best_params_

{'bootstrap': False,
 'max_features': 0.7,
 'max_samples': 0.7,
 'n_estimators': 50}

In [32]:
search.best_score_

0.9515

In [33]:
search.best_params_

{'bootstrap': False,
 'max_features': 0.7,
 'max_samples': 0.7,
 'n_estimators': 50}