# Import Necessary Libraries

In [1]:
import numpy as np 
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, BaggingClassifier, AdaBoostClassifier, VotingClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.cluster import KMeans
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix

# read MNIST dataset

In [2]:
data = pd.read_csv('mnist.csv')

In [3]:
df_x = data.iloc[:,1:]
df_y = data.iloc[:,0]

In [4]:
data.head()

Unnamed: 0,label,1x1,1x2,1x3,1x4,1x5,1x6,1x7,1x8,1x9,...,28x19,28x20,28x21,28x22,28x23,28x24,28x25,28x26,28x27,28x28
0,5,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,4,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,9,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [5]:
x_train, x_test, y_train, y_test = train_test_split(df_x, df_y, test_size=0.2, random_state=4)

# Decision Tree Classifier


In [6]:
dt = DecisionTreeClassifier()
dt.fit(x_train,y_train)

DecisionTreeClassifier()

In [7]:
dt.score(x_test,y_test)

0.8661666666666666

# Random Forest 

In [8]:
rf = RandomForestClassifier(n_estimators=20)
rf.fit(x_train,y_train)

RandomForestClassifier(n_estimators=20)

In [9]:
rf.score(x_test,y_test)

0.9565

# Bagging 

In [10]:
bg = BaggingClassifier(DecisionTreeClassifier(), max_samples= 0.5, max_features = 1.0, n_estimators = 20)
bg.fit(x_train,y_train)

BaggingClassifier(base_estimator=DecisionTreeClassifier(), max_samples=0.5,
                  n_estimators=20)

In [11]:
bg.score(x_test,y_test)

0.9435

# Boosting - Ada Boost

In [12]:
adb = AdaBoostClassifier(DecisionTreeClassifier(),n_estimators = 5, learning_rate = 1)
adb.fit(x_train,y_train)

AdaBoostClassifier(base_estimator=DecisionTreeClassifier(), learning_rate=1,
                   n_estimators=5)

In [13]:
adb.score(x_test,y_test)

0.8653333333333333

# SVM


In [14]:
svm = SVC(kernel='poly', degree=2)
svm.fit(x_train,y_train)

SVC(degree=2, kernel='poly')

In [15]:
svm.score(x_test, y_test)

0.9751666666666666

# KNN


In [16]:
knn = KNeighborsClassifier(n_neighbors = 6, p = 2, metric='minkowski')
knn.fit(x_train,y_train)

KNeighborsClassifier(n_neighbors=6)

In [17]:
knn.score(x_test, y_test)

0.9703333333333334

# kmeans

In [18]:
kmeans = KMeans(n_clusters=5)
kmeans.fit(df_x)

KMeans(n_clusters=5)

In [19]:
y_pred = kmeans.predict(df_x)

In [20]:
import sklearn.metrics as sm
sm.accuracy_score(y_pred, df_y)

0.18806666666666666

# MLP

In [21]:
clf = MLPClassifier()

In [22]:
clf.fit(x_train, y_train)

MLPClassifier()

In [23]:
clf.score(x_test, y_test)

0.9633333333333334

# Majority Voting

In [24]:
final_model = VotingClassifier(estimators=[('knn', knn),('dt', dt), ('svm', svm),('clf',clf)], voting='hard')

In [25]:
final_model.fit(x_train, y_train)

VotingClassifier(estimators=[('knn', KNeighborsClassifier(n_neighbors=6)),
                             ('dt', DecisionTreeClassifier()),
                             ('svm', SVC(degree=2, kernel='poly')),
                             ('clf', MLPClassifier())])

In [26]:
y_pred = final_model.predict(x_test)

In [27]:
acc = accuracy_score(y_test, y_pred)
print("EL Model Accuracy:", acc)

EL Model Accuracy: 0.9756666666666667


In [28]:
cm = confusion_matrix(y_test, y_pred)
print("EL Model Confusion Matrix:\n", cm)

EL Model Confusion Matrix:
 [[1192    0    1    0    2    1    1    0    0    0]
 [   0 1331    1    2    1    0    0    7    0    0]
 [   9    3 1214    1    3    0    1    9    2    3]
 [   0    1    9 1198    0    5    0    5    5    2]
 [   0    3    3    0 1106    0    5    4    2    3]
 [   1    2    1   17    0 1052    8    0    6    1]
 [   7    0    0    0    1    6 1155    0    3    0]
 [   2    9    8    1    5    0    0 1249    2    4]
 [   5   10    6   10    3    7    6    0 1115    6]
 [   2    7    1    8   13    1    1   22    6 1096]]


In [29]:
cr = classification_report(y_test, y_pred)
print("EL Model Classification Report:\n", cr)

EL Model Classification Report:
               precision    recall  f1-score   support

           0       0.98      1.00      0.99      1197
           1       0.97      0.99      0.98      1342
           2       0.98      0.98      0.98      1245
           3       0.97      0.98      0.97      1225
           4       0.98      0.98      0.98      1126
           5       0.98      0.97      0.97      1088
           6       0.98      0.99      0.98      1172
           7       0.96      0.98      0.97      1280
           8       0.98      0.95      0.97      1168
           9       0.98      0.95      0.96      1157

    accuracy                           0.98     12000
   macro avg       0.98      0.98      0.98     12000
weighted avg       0.98      0.98      0.98     12000

