In [2]:
from sklearn.datasets import fetch_openml

X_mnist,y_mnist = fetch_openml('mnist_784',return_X_y=True,as_frame=False,parser='auto')

In [14]:
X_train,y_train = X_mnist[:50_000],y_mnist[:50_000]
X_valid, y_valid = X_mnist[50_000:60_000], y_mnist[50_000:60_000]
X_test,y_test = X_mnist[60_000:],y_mnist[60_000:]

In [15]:
X_valid.shape  # (10000, 784)
y_valid.shape  # (10000,)


(10000,)

In [16]:
from sklearn.ensemble import ExtraTreesClassifier,RandomForestClassifier
from sklearn.svm import LinearSVC
from sklearn.neural_network import MLPClassifier

In [17]:
random_forest_clf = RandomForestClassifier(n_estimators=100,random_state=42)
extra_tree_clf = ExtraTreesClassifier(n_estimators=100,random_state=42)
svc_clf = LinearSVC(max_iter=100,tol=20,random_state=42)
mlp_clf = MLPClassifier(random_state=42)

In [18]:
estimators = [random_forest_clf,extra_tree_clf,svc_clf,mlp_clf]
for estimator in estimators:
    print(f"Training the {estimator}")
    estimator.fit(X_train,y_train)

Training the RandomForestClassifier(random_state=42)
Training the ExtraTreesClassifier(random_state=42)
Training the LinearSVC(max_iter=100, random_state=42, tol=20)
Training the MLPClassifier(random_state=42)


In [19]:
[estimator.score(X_valid, y_valid) for estimator in estimators]

[0.9736, 0.9743, 0.0991, 0.9669]

In [20]:
from sklearn.ensemble import VotingClassifier

In [21]:
named_estimator = [
    ['random_forest_clf',random_forest_clf],
    ['extra_trees_clf',extra_tree_clf],
    ['svm_clf',svc_clf],
    ['mlp_clf',mlp_clf]
]

In [22]:
voting_clf = VotingClassifier(named_estimator)

In [24]:
voting_clf.fit(X_train,y_train)

0,1,2
,estimators,"[['random_forest_clf', RandomForestC...ndom_state=42)], ['extra_trees_clf', ExtraTreesCla...ndom_state=42)], ...]"
,voting,'hard'
,weights,
,n_jobs,
,flatten_transform,True
,verbose,False

0,1,2
,n_estimators,100
,criterion,'gini'
,max_depth,
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,'sqrt'
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True

0,1,2
,n_estimators,100
,criterion,'gini'
,max_depth,
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,'sqrt'
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,False

0,1,2
,penalty,'l2'
,loss,'squared_hinge'
,dual,'auto'
,tol,20
,C,1.0
,multi_class,'ovr'
,fit_intercept,True
,intercept_scaling,1
,class_weight,
,verbose,0

0,1,2
,hidden_layer_sizes,"(100,)"
,activation,'relu'
,solver,'adam'
,alpha,0.0001
,batch_size,'auto'
,learning_rate,'constant'
,learning_rate_init,0.001
,power_t,0.5
,max_iter,200
,shuffle,True


In [26]:
voting_clf.score(X_valid,y_valid)

0.9754

In [28]:
from sklearn.preprocessing import LabelEncoder

encoder = LabelEncoder()
y_valid_encoded = encoder.fit_transform(y_valid)

In [31]:
import numpy as np
y_valid_encoded = y_valid.astype(np.int64)

In [33]:
[estimator.score(X_valid,y_valid_encoded)
 for estimator in voting_clf.estimators_]

[0.9736, 0.9743, 0.0991, 0.9669]

In [34]:
voting_clf.set_params(svm_clf='drop')

0,1,2
,estimators,"[['random_forest_clf', RandomForestC...ndom_state=42)], ['extra_trees_clf', ExtraTreesCla...ndom_state=42)], ...]"
,voting,'hard'
,weights,
,n_jobs,
,flatten_transform,True
,verbose,False

0,1,2
,n_estimators,100
,criterion,'gini'
,max_depth,
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,'sqrt'
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True

0,1,2
,n_estimators,100
,criterion,'gini'
,max_depth,
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,'sqrt'
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,False

0,1,2
,hidden_layer_sizes,"(100,)"
,activation,'relu'
,solver,'adam'
,alpha,0.0001
,batch_size,'auto'
,learning_rate,'constant'
,learning_rate_init,0.001
,power_t,0.5
,max_iter,200
,shuffle,True


In [37]:
voting_clf.estimators

[['random_forest_clf', RandomForestClassifier(random_state=42)],
 ['extra_trees_clf', ExtraTreesClassifier(random_state=42)],
 ('svm_clf', 'drop'),
 ['mlp_clf', MLPClassifier(random_state=42)]]

In [38]:
voting_clf.estimators_

[RandomForestClassifier(random_state=42),
 ExtraTreesClassifier(random_state=42),
 LinearSVC(max_iter=100, random_state=42, tol=20),
 MLPClassifier(random_state=42)]

In [42]:
voting_clf.named_estimators_

{'random_forest_clf': RandomForestClassifier(random_state=42),
 'extra_trees_clf': ExtraTreesClassifier(random_state=42),
 'svm_clf': LinearSVC(max_iter=100, random_state=42, tol=20),
 'mlp_clf': MLPClassifier(random_state=42)}

In [43]:
svm_clf_trained = voting_clf.named_estimators_.pop('svm_clf')
voting_clf.estimators_.remove(svm_clf_trained)

In [45]:
voting_clf.score(X_valid,y_valid)

0.9768

In [46]:
voting_clf.voting = 'soft'

In [48]:
voting_clf.score(X_valid,y_valid)

0.9727

In [49]:
voting_clf.voting = 'hard'
voting_clf.score(X_test,y_test)

0.9722

In [51]:
[estimator.score(X_test,y_test.astype(np.int64))
 for estimator in voting_clf.estimators_]

[0.968, 0.9703, 0.9638]

In [56]:
X_valid_predictions = np.empty((len(X_valid),len(estimators)),dtype=object)
for index,estimator in enumerate(estimators):
    X_valid_predictions[:,index] = estimator.predict(X_valid)

In [55]:
X_valid_predictions

array([['3', '3', '0', '3'],
       ['8', '8', '0', '8'],
       ['6', '6', '0', '6'],
       ...,
       ['5', '5', '0', '5'],
       ['6', '6', '0', '6'],
       ['8', '8', '0', '8']], shape=(10000, 4), dtype=object)

In [57]:
rnd_forest_blender = RandomForestClassifier(n_estimators=200, oob_score=True,
                                            random_state=42)
rnd_forest_blender.fit(X_valid_predictions,y_valid)

0,1,2
,n_estimators,200
,criterion,'gini'
,max_depth,
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,'sqrt'
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True


In [58]:
rnd_forest_blender.oob_score_

0.9733

In [59]:
X_test_predictions = np.empty((len(X_test),len(estimators)),dtype=object)

for index,estimator in enumerate(estimators):
    X_test_predictions[:,index] = estimator.predict(X_test)

In [60]:
X_test_predictions

array([['7', '7', '0', '7'],
       ['2', '2', '0', '2'],
       ['1', '1', '0', '1'],
       ...,
       ['4', '4', '0', '4'],
       ['5', '5', '0', '5'],
       ['6', '6', '0', '6']], shape=(10000, 4), dtype=object)

In [61]:
y_pred = rnd_forest_blender.predict(X_test_predictions)

In [65]:
from sklearn.metrics import accuracy_score
accuracy_score(y_test,y_pred)

0.9701

In [66]:
X_train_full,y_train_full = X_mnist[:60_000],y_mnist[:60_000]

In [70]:
from sklearn.ensemble import StackingClassifier
stack_clf = StackingClassifier(named_estimator,
                               final_estimator=rnd_forest_blender)
stack_clf.fit(X_train_full,y_train_full)

0,1,2
,estimators,"[['random_forest_clf', RandomForestC...ndom_state=42)], ['extra_trees_clf', ExtraTreesCla...ndom_state=42)], ...]"
,final_estimator,RandomForestC...ndom_state=42)
,cv,
,stack_method,'auto'
,n_jobs,
,passthrough,False
,verbose,0

0,1,2
,n_estimators,100
,criterion,'gini'
,max_depth,
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,'sqrt'
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True

0,1,2
,n_estimators,100
,criterion,'gini'
,max_depth,
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,'sqrt'
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,False

0,1,2
,penalty,'l2'
,loss,'squared_hinge'
,dual,'auto'
,tol,20
,C,1.0
,multi_class,'ovr'
,fit_intercept,True
,intercept_scaling,1
,class_weight,
,verbose,0

0,1,2
,hidden_layer_sizes,"(100,)"
,activation,'relu'
,solver,'adam'
,alpha,0.0001
,batch_size,'auto'
,learning_rate,'constant'
,learning_rate_init,0.001
,power_t,0.5
,max_iter,200
,shuffle,True

0,1,2
,n_estimators,200
,criterion,'gini'
,max_depth,
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,'sqrt'
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True


In [71]:
stack_clf.score(X_test,y_test)

0.9793