# Bagging

In [1]:
from sklearn.svm import SVC
from sklearn.ensemble import BaggingClassifier
from sklearn.datasets import make_classification
X, y = make_classification(n_samples=100, n_features=4,
                           n_informative=2, n_redundant=0,
                           random_state=0, shuffle=False)
clf = BaggingClassifier(base_estimator=SVC(),
                        n_estimators=10, random_state=0).fit(X, y)
clf.predict([[0, 0, 0, 0]])

array([1])

# Random Forest

In [2]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.datasets import make_classification
X, y = make_classification(n_samples=1000, n_features=4,
                           n_informative=2, n_redundant=0,
                           random_state=0, shuffle=False)
clf = RandomForestClassifier(max_depth=2, random_state=0)
clf.fit(X, y)

print(clf.predict([[0, 0, 0, 0]]))

[1]


# Stacking Classifiers

In [1]:
from sklearn.datasets import load_iris
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
from sklearn.ensemble import StackingClassifier
from sklearn.neighbors import KNeighborsClassifier
X, y = load_iris(return_X_y=True)
estimators = [
    ('svm', SVC(kernel='linear', C=10**4)),
    ('knn', make_pipeline(StandardScaler(),
                          KNeighborsClassifier()))
]
clf = StackingClassifier(
    estimators=estimators, final_estimator=make_pipeline(StandardScaler(),
                          LogisticRegression(random_state=42))
)
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, stratify=y, random_state=42
)
clf.fit(X_train, y_train).score(X_test, y_test)

0.9473684210526315

In [4]:
clf.estimators_

[SVC(C=10000, kernel='linear'),
 Pipeline(steps=[('standardscaler', StandardScaler()),
                 ('kneighborsclassifier', KNeighborsClassifier())])]

In [2]:
clf = SVC(kernel='linear', C=10**4)
clf.fit(X_train, y_train).score(X_test, y_test)

0.8947368421052632

In [3]:
clf = make_pipeline(StandardScaler(),
                          KNeighborsClassifier())
clf.fit(X_train, y_train).score(X_test, y_test)

0.9210526315789473