In [1]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import load_digits
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import StratifiedKFold, train_test_split
from sklearn.model_selection import cross_val_score
from sklearn import metrics

def bootstrap_sample(X, y):
    n_samples = X.shape[0]
    idxs = np.random.choice(n_samples, n_samples, replace=True)
    return X[idxs], y[idxs]

def bagging_ensemble(X_train, y_train, X_test, y_test, n_clf = 10):
    
    pred_list = []
    for _ in range(n_clf):
        model = DecisionTreeClassifier()
        X, y = bootstrap_sample(X_train, y_train)
        model.fit(X, y)
        pred_list.append(model.predict(X_test))

    y_pred = np.apply_along_axis(lambda x:np.argmax(np.bincount(x)), axis=0, arr=pred_list)
    accuracy = metrics.accuracy_score(y_test, y_pred)
        
    return accuracy


In [2]:
# Load digits dataset
digits = load_digits(n_class = 4)
X, y = digits.data, digits.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2)
accuracy = bagging_ensemble(X_train, y_train, X_test, y_test)
accuracy


0.9722222222222222