# Voting Vs. Stacking ensembles

In [1]:
import numpy as np

from sklearn.datasets import fetch_mldata
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.ensemble import RandomForestClassifier, VotingClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

In [2]:
mnist = fetch_mldata( "MNIST original" )
XTV, testX, yTV, testY = train_test_split( mnist.data, mnist.target, test_size = 10000,
                                           random_state = 123 )
trainX, valX, trainY, valY = train_test_split( XTV, yTV, test_size = 10000,
                                              random_state = 123 )

## Voting ensemble

In [3]:
forest = RandomForestClassifier()
forest.fit( trainX, trainY )

print("Trained random forest.")

Trained random forest.


In [4]:
knn = KNeighborsClassifier()
knn.fit( trainX, trainY )

print("Trained KNN.")

Trained KNN.


In [5]:
logReg = LogisticRegression()
logReg.fit( trainX, trainY )

print("Trained logistic regression.")

Trained logistic regression.


In [6]:
#gaussian = GaussianNB()
#gaussian.fit( trainX, trainY ) This seems bad, see below

#print("Trained naive Bayes classifier")

In [7]:
mlp = MLPClassifier()
mlp.fit( trainX, trainY )

print("Trained MLP.")

Trained MLP.


In [8]:
pred1 = forest.predict( valX )
acc1 = accuracy_score( valY, pred1 )

In [9]:
pred2 = knn.predict( valX )
acc2 = accuracy_score( valY, pred2 )

In [10]:
pred3 = logReg.predict( valX )
acc3 = accuracy_score( valY, pred3 )

In [11]:
#pred3 = gaussian.predict( valX )
#acc3 = accuracy_score( valY, pred3 ) only 55%

In [12]:
pred4 = mlp.predict( valX )
acc4 = accuracy_score( valY, pred4 )

print( f"Random forest: {acc1}" + "\n"
       f"KNN: {acc2}" + "\n"
       f"Logistic regression: {acc3}" + "\n"
       f"MLP: {acc4}" )

Random forest: 0.944
KNN: 0.9695
Logistic regression: 0.915
MLP: 0.9512


In [13]:
hard = VotingClassifier( estimators = [ ("forest", forest),
                                        ("knn", knn),
                                        ("logreg", logReg),
                                        ("mlp", mlp) ],
                         voting = "hard" )
hard.fit( trainX, trainY )

print("Trained hard voter")

Trained hard voter


In [14]:
soft = VotingClassifier( estimators = [ ("forest", forest),
                                        ("knn", knn),
                                        ("logreg", logReg),
                                        ("mlp", mlp) ],
                         voting = "soft" )
soft.fit( trainX, trainY )

print("Trained soft voter")

Trained soft voter


In [15]:
predHard = hard.predict( valX )
accHard  = accuracy_score( valY, predHard )

In [16]:
predSoft = soft.predict( valX )
accSoft  = accuracy_score( valY, predSoft )

print( f"Hard vote: {accHard}\nSoft Vote: {accSoft}" )

Hard vote: 0.9667
Soft Vote: 0.9698


## Stacking ensemble

In [17]:
l2Data = np.array( [pred1, pred2, pred3, pred4] )
l2Data = l2Data.T

In [22]:
forest2 = RandomForestClassifier()
forest2.fit( l2Data, valY )

mlp2 = MLPClassifier()
mlp2.fit( l2Data, valY )

MLPClassifier(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
       beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=(100,), learning_rate='constant',
       learning_rate_init=0.001, max_iter=200, momentum=0.9,
       nesterovs_momentum=True, power_t=0.5, random_state=None,
       shuffle=True, solver='adam', tol=0.0001, validation_fraction=0.1,
       verbose=False, warm_start=False)

In [19]:
l2Test = np.array( [ forest.predict( testX ), knn.predict( testX ),
                     logReg.predict( testX ), mlp.predict( testX ) ] )
l2Test = l2Test.T

In [25]:
predStack = forest2.predict( l2Test )
#predStack = mlp2.predict( l2Test ) #This one is slightly worse. no tunning was done...
accStack = accuracy_score( testY, predStack )

In [26]:
print(f"Voting: {accSoft}\nStacking: {accStack}")

Voting: 0.9698
Stacking: 0.9677
