In [119]:
import numpy as np

np.random.seed(42)

In [120]:
from sklearn.datasets import make_moons
from sklearn.model_selection import train_test_split

X, y = make_moons(n_samples=10_000, noise=0.4)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)


In [121]:
from sklearn.model_selection import GridSearchCV
from sklearn.tree import DecisionTreeClassifier

tree_clf = DecisionTreeClassifier()
clf = GridSearchCV(tree_clf, { 'max_depth': [1, 2], 'max_leaf_nodes': [2, 4, 6, 8] }, cv=5)

In [122]:
clf.fit(X_train, y_train)

GridSearchCV(cv=5, estimator=DecisionTreeClassifier(),
             param_grid={'max_depth': [1, 2], 'max_leaf_nodes': [2, 4, 6, 8]})

In [123]:
clf.best_score_, clf.best_estimator_.score(X_test, y_test)

(0.8538571428571429, 0.856)

In [124]:
from sklearn.model_selection import ShuffleSplit

ss = ShuffleSplit(n_splits=1_000, train_size=100, random_state=42)
trees = []

for train_index, test_index in ss.split(X_train):
    tree = DecisionTreeClassifier(max_depth=2, max_leaf_nodes=4)
    tree.fit(X_train[train_index], y_train[train_index])
    trees.append(tree)

In [125]:
predictions = None

for tree in trees:
    if predictions is None:
        predictions = tree.predict(X_test).reshape(1, X_test.shape[0])
    else:
        predictions = np.vstack([predictions, tree.predict(X_test).reshape(1, X_test.shape[0])])

In [126]:
from sklearn.metrics import accuracy_score

y_accumulated = np.array([])

for column in range(predictions.shape[1]):
    most_occured_prediction = np.argmax(np.bincount(predictions[:, column]))
    y_accumulated = np.hstack([y_accumulated, most_occured_prediction])

accuracy_score(y_test, y_accumulated)

0.8606666666666667