### Ensemble methods

In [1]:
import numpy as np
from sklearn import datasets
from sklearn.ensemble import BaggingClassifier, AdaBoostClassifier, GradientBoostingClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import StratifiedKFold

In [2]:
RANDOM_STATE = 1

skf = StratifiedKFold(n_splits=5)

In [3]:
wine = datasets.load_wine()
X, y = wine.data, wine.target


In [4]:
clf = DecisionTreeClassifier(min_samples_leaf=3, max_depth=1, random_state=RANDOM_STATE)
val = []

for train_data, test_data in skf.split(X, y):
    X_train, X_test = X[train_data], X[test_data]
    y_train, y_test = y[train_data], y[test_data]

    clf.fit(X_train, y_train)
    pred = clf.predict(X_test)
    test = np.where(pred == y_test, 1, 0)
    test = np.sum(test)/len(y_test)
    val.append(test)

avg = np.sum(val)/len(val)
print('Decision tree average score:', avg)

Decision tree average score: 0.6463492063492063


In [5]:
clf = BaggingClassifier(base_estimator=DecisionTreeClassifier(min_samples_leaf=3, max_depth=1, random_state=RANDOM_STATE),n_estimators=50)
val = []

for train_data, test_data in skf.split(X, y):
    X_train, X_test = X[train_data], X[test_data]
    y_train, y_test = y[train_data], y[test_data]

    clf.fit(X_train, y_train)
    pred = clf.predict(X_test)
    test = np.where(pred == y_test, 1, 0)
    test = np.sum(test) / len(y_test)
    val.append(test)

avg = np.sum(val) / len(val)
print('Bagging average score:', avg)

Bagging average score: 0.8266666666666665


In [6]:
clf = AdaBoostClassifier(base_estimator=DecisionTreeClassifier(min_samples_leaf=3, max_depth=1, random_state=RANDOM_STATE), algorithm='SAMME', n_estimators=50)
val = []

for train_data, test_data in skf.split(X, y):
    X_train, X_test = X[train_data], X[test_data]
    y_train, y_test = y[train_data], y[test_data]

    clf.fit(X_train, y_train)
    pred = clf.predict(X_test)
    test = np.where(pred == y_test, 1, 0)
    test = np.sum(test) / len(y_test)
    val.append(test)

avg = np.sum(val) / len(val)
print('AdaBoost average score:', avg)

AdaBoost average score: 0.9333333333333332


In [7]:
clf = GradientBoostingClassifier(random_state=RANDOM_STATE, learning_rate=1, subsample=0.5, n_estimators=50, min_samples_leaf=3)
val = []

for train_data, test_data in skf.split(X, y):
    X_train, X_test = X[train_data], X[test_data]
    y_train, y_test = y[train_data], y[test_data]

    clf.fit(X_train, y_train)
    pred = clf.predict(X_test)
    test = np.where(pred == y_test, 1, 0)
    test = np.sum(test) / len(y_test)
    val.append(test)

avg = np.sum(val) / len(val)
print('Gradient boosting average score:', avg)

Gradient boosting average score: 0.9609523809523809
