![image.png](attachment:image.png)

In [1]:
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt

from keras.datasets import mnist

from sklearn.ensemble import BaggingClassifier
from sklearn.ensemble import RandomForestClassifier

from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
from sklearn.metrics import classification_report

from sklearn.model_selection import cross_validate
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import train_test_split
from sklearn.model_selection import ShuffleSplit

from sklearn.tree import DecisionTreeClassifier

from sklearn.pipeline import Pipeline

![image-2.png](attachment:image-2.png)

In [2]:
(X_train, y_train), (X_test, y_test) = mnist.load_data()

![image.png](attachment:image.png)

In [3]:
X_train.shape, y_train.shape, X_test.shape, y_test.shape

((60000, 28, 28), (60000,), (10000, 28, 28), (10000,))

![image.png](attachment:image.png)

In [4]:
# Flatten each input image into a vector of length 784
X_train = X_train.reshape(X_train.shape[0], 28*28)
X_test = X_test.reshape(X_test.shape[0], 28*28)

# Normalizing
X_train = X_train/255
X_test = X_test/255

In [5]:
X_train.shape, X_test.shape

((60000, 784), (10000, 784))

![image-2.png](attachment:image-2.png)

In [6]:
cv = ShuffleSplit(n_splits=10, test_size=0.2, random_state=42)

![image.png](attachment:image.png)

In [7]:
def train_classifiers(estimator, X_train, y_train, cv, name):
    estimator.fit(X_train, y_train)
    cv_train_score = cross_val_score(estimator, X_train, y_train, cv=cv, scoring='f1_macro')
    print(f'On an average, {name} makes f1 score of '
      f'{cv_train_score.mean():.3f} +/- {cv_train_score.std():.3f} on the training set.')

![image.png](attachment:image.png)

In [8]:
def eval(estimator, X_test, y_test):
    y_pred = estimator.predict(X_test)
    
    print('# Classification report-------------------------------------------------')
    print(classification_report(y_test, y_pred))
    
    print('# Confusion matrix------------------------------------------------------')
    disp = ConfusionMatrixDisplay(confusion_matrix=confusion_matrix(y_test, y_pred))
    disp.plot()    
    plt.title('Confusion matrix')
    plt.show()

![image-2.png](attachment:image-2.png)

In [None]:
decision_tree_pipeline = Pipeline([('classifier', DecisionTreeClassifier())])
train_classifiers(decision_tree_pipeline, X_train, y_train.ravel(), cv, '*Decision Tree*')

![image.png](attachment:image.png)

In [None]:
eval(decision_tree_pipeline, X_test, y_test)

![image.png](attachment:image.png)

In [None]:
bagging_pipeline = Pipeline([('classifier', BaggingClassifier())])
train_classifiers(bagging_pipeline, X_train, y_train.ravel(), cv, '*Bagging*')

![image-2.png](attachment:image-2.png)

In [None]:
eval(bagging_pipeline, X_test, y_test)

![image-2.png](attachment:image-2.png)

In [None]:
random_forest_pipeline = Pipeline([('classifier', RandomForestClassifier())])
train_classifiers(random_forest_pipeline, X_train, y_train.ravel(), cv, '*Random Forest*')

![image-2.png](attachment:image-2.png)

In [None]:
eval(random_forest_pipeline, X_test, y_test)

![image.png](attachment:image.png)