In [None]:

from sklearn.datasets import fetch_openml


In [None]:
X_mnist, y_mnist = fetch_openml('mnist_784', version=1, return_X_y=True)

In [None]:
X_train, y_train = X_mnist[:50_000], y_mnist[:50_000]
x_val, y_val = X_mnist[50_000:60_000], y_mnist[50_000:60_000]
X_test, y_test = X_mnist[60_000:], y_mnist[60_000:]

In [None]:
from sklearn.ensemble import ExtraTreesClassifier, RandomForestClassifier
from sklearn.svm import LinearSVC
from sklearn.neural_network import MLPClassifier

In [None]:
random_forest_clf = RandomForestClassifier(n_estimators=100,random_state=42)
extra_tress_clf = ExtraTreesClassifier(n_estimators=100, random_state=42)
svm_clf = LinearSVC(max_iter=100, tol=20, dual=True,random_state=42)
mlp_clf = MLPClassifier(random_state=42)

In [None]:
estimators = [random_forest_clf, extra_tress_clf, svm_clf, mlp_clf]
for estimator in estimators:
    print("Training the", estimator)
    estimator.fit(X_train, y_train)

Training the RandomForestClassifier(random_state=42)
Training the ExtraTreesClassifier(random_state=42)
Training the LinearSVC(dual=True, max_iter=100, random_state=42, tol=20)
Training the MLPClassifier(random_state=42)


In [None]:
[estimator.score(x_val, y_val) for estimator in estimators]

[0.9736, 0.9743, 0.8662, 0.9613]

In [None]:
from sklearn.ensemble import VotingClassifier

In [None]:
named_estimators = [
    ("random_forest_clf", random_forest_clf),
    ("extra_tress_clf", extra_tress_clf),
    ("svm_clf", svm_clf),
    ("mlp_clf", mlp_clf),]

In [None]:
voting_clf = VotingClassifier(named_estimators)

In [None]:
voting_clf.fit(X_train, y_train)

In [None]:
voting_clf.score(x_val, y_val)

0.975

In [None]:
from sklearn.preprocessing import LabelEncoder
endcoder = LabelEncoder()
y_val_encoded = endcoder.fit_transform(y_val)

In [None]:
import numpy as np


In [None]:
y_val_encoded = y_val.astype(np.int64)

In [None]:
[estimator.score(x_val, y_val_encoded)
 for estimator in voting_clf.estimators_]

[0.9736, 0.9743, 0.8662, 0.9613]

In [None]:
voting_clf.set_params(svm_clf="drop")

In [None]:
voting_clf.named_estimators_


{'random_forest_clf': RandomForestClassifier(random_state=42),
 'extra_tress_clf': ExtraTreesClassifier(random_state=42),
 'svm_clf': LinearSVC(dual=True, max_iter=100, random_state=42, tol=20),
 'mlp_clf': MLPClassifier(random_state=42)}

In [None]:
voting_clf.estimators_

[RandomForestClassifier(random_state=42),
 ExtraTreesClassifier(random_state=42),
 LinearSVC(dual=True, max_iter=100, random_state=42, tol=20),
 MLPClassifier(random_state=42)]

In [None]:
svm_clf_trained = voting_clf.named_estimators_.pop("svm_clf")
voting_clf.estimators_.remove(svm_clf_trained)

In [None]:
voting_clf.score(x_val, y_val)

0.9761

In [None]:
voting_clf.voting = "soft"

In [None]:
voting_clf.score(x_val, y_val)

0.9703

In [None]:
voting_clf.voting = "hard"
voting_clf.score(X_test, y_test)

0.9733

In [None]:
[estimator.score(X_test, y_test.astype(np.int64))
for estimator in voting_clf.estimators_]

[0.968, 0.9703, 0.9618]

In [None]:
x_val_pred = np.empty((len(x_val), len(estimators)), dtype=object)

for index, estimator in enumerate(estimators):
    x_val_pred[:, index] = estimator.predict(x_val)

In [None]:
x_val_pred

array([['3', '3', '3', '3'],
       ['8', '8', '8', '8'],
       ['6', '6', '6', '6'],
       ...,
       ['5', '5', '5', '5'],
       ['6', '6', '6', '6'],
       ['8', '8', '8', '8']], dtype=object)

In [None]:
rnd_forest_blender = RandomForestClassifier(n_estimators=200, oob_score=True, random_state=42)
rnd_forest_blender.fit(x_val_pred, y_val)

In [None]:
rnd_forest_blender.oob_score_

0.9738

In [None]:
X_test_pred = np.empty((len(X_test), len(estimators)), dtype=object)

for index, estimator in enumerate(estimators):
    X_test_pred[:, index] = estimator.predict(X_test)

In [None]:
y_pred = rnd_forest_blender.predict(X_test_pred)

In [None]:
from sklearn.metrics import accuracy_score
accuracy_score(y_test, y_pred)

0.9688

In [None]:
x_train_full, y_train_full = X_mnist[:60_000], y_mnist[:60_000]

In [None]:
from sklearn.ensemble import StackingClassifier

In [None]:
stack_clf = StackingClassifier(named_estimators, final_estimator=rnd_forest_blender)
stack_clf.fit(x_train_full, y_train_full)

In [None]:
stack_clf.score(X_test, y_test )

0.9795