<a href="https://colab.research.google.com/github/maxxies/mnist_ensemble_learning/blob/main/MNIST_ensemble_learning.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import VotingClassifier
from sklearn.svm import SVC
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.datasets import fetch_openml
from sklearn.metrics import accuracy_score
import numpy as np


In [2]:
# Fetching dataset
mnist = fetch_openml('mnist_784', version=1)

# Splitting dataset into data and target_value
data = mnist['data']
target = mnist['target']
target = target.astype(np.uint8)  # Casting target to int

In [6]:
# Splitting data into training set, testing set, validation set
train_X, test_X, validation_X, train_y , test_y, validation_y = data[:50000], data[50000:60000], data[60000:], \
                                                               target[:50000], target[50000:60000], target[60000:]


In [4]:
# Training voting classifier with hard voting
rf_clf = RandomForestClassifier()
svm_clf = SVC()
extraTree_clf = ExtraTreesClassifier()
voting_clf = VotingClassifier(estimators=[('rf', rf_clf), ('svm', svm_clf), ('et', extraTree_clf)], voting='hard')


In [7]:
# Accuracy scores of the predictors on train set
print("Train set")
for clf in (extraTree_clf, rf_clf, svm_clf, voting_clf):
    clf.fit(train_X, train_y)
    y_pred = clf.predict(test_X)
    print(clf.__class__.__name__, accuracy_score(test_y, y_pred))


Train set
ExtraTreesClassifier 0.9744
RandomForestClassifier 0.9736
SVC 0.9802
VotingClassifier 0.9774


In [8]:
# Accuracy scores of the predictors on validation set
print("Validation set")
for clf in (extraTree_clf, rf_clf, svm_clf, voting_clf):
    clf.fit(validation_X, validation_y)
    y_pred = clf.predict(test_X)
    print(clf.__class__.__name__, accuracy_score(test_y, y_pred))

Validation set
ExtraTreesClassifier 0.9558
RandomForestClassifier 0.9528
SVC 0.964
VotingClassifier 0.9611


In [9]:
# Training voting classifier with soft voting
rf_clf = RandomForestClassifier()
svm_clf = SVC(probability=True)
extraTree_clf = ExtraTreesClassifier()
voting_clf = VotingClassifier(estimators=[('rf', rf_clf), ('svm', svm_clf), ('et', extraTree_clf)], voting='soft')

In [10]:
# Accuracy scores of the predictors on train set
print("Train set")
for clf in (extraTree_clf, rf_clf, svm_clf, voting_clf):
    clf.fit(train_X, train_y)
    y_pred = clf.predict(test_X)
    print(clf.__class__.__name__, accuracy_score(test_y, y_pred))

Train set
ExtraTreesClassifier 0.9753
RandomForestClassifier 0.9712
SVC 0.9802
VotingClassifier 0.9807


In [11]:
# Accuracy scores of the predictors on validation set
print("Validation set")
for clf in (extraTree_clf, rf_clf, svm_clf, voting_clf):
    clf.fit(validation_X, validation_y)
    y_pred = clf.predict(test_X)
    print(clf.__class__.__name__, accuracy_score(test_y, y_pred))


Validation set
ExtraTreesClassifier 0.9573
RandomForestClassifier 0.9518
SVC 0.964
VotingClassifier 0.965
