In this colab, we will implement multiclass MNIST digit recognition classifier with boosting:
- AdaBoost
- GradientBoosting
- XGBoost

### Imports

In [2]:
# plotting utility
import matplotlib.pyplot as plt

from sklearn.datasets import fetch_openml

# training three classifiers: AdaBoostClassifier,
# GradientBoostingClassifier and XGBoostClassifer.
from sklearn.ensemble import AdaBoostClassifier
from sklearn.ensemble import GradientBoostingClassifier

# model selection utilities for training and test split and cross validation
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_validate
from sklearn.model_selection import cross_val_score

# use shuffle-split cross validation for this exercise.
from sklearn.model_selection import ShuffleSplit

# make use of confusion matrix and classification report to evaluate performance
# on the test set.
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
from sklearn.metrics import classification_report

# the model is defined through pipeline utility.
from sklearn.pipeline import Pipeline

## Loading MNIST Dataset

In [3]:
# Load MNIST data using fetch_openml
X, y = fetch_openml('mnist_784', version=1, return_X_y=True)

# splitting the data into train and test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)

print(X.shape)

(70000, 784)


In [4]:
# Change the data frame X (shape=70000,784) to numpy array of shape (70000,28,28)
X = X.values.reshape(-1,28,28)
X_train = X_train.values.reshape(-1,28,28)
X_test = X_test.values.reshape(-1,28,28)

In [5]:
print(X_train.shape)
print(X_test.shape)
print(y_train.shape)
print(y_test.shape)

(63000, 28, 28)
(7000, 28, 28)
(63000,)
(7000,)


Before using the dataset for training and evaluation, we need to flatten it into a vector. After flattening, we have training and test examples
represented with a vector of 784 features. Each feature records pixel intensity in each of 28x28 pixel.

We normalize the pixel intensity by dividing it with the maximum value i.e. 255. In that sense we have each feature value in the range 0 to 1.

In [6]:
# Flatten each input image into a vector of length 784
X_train =X_train.reshape(X_train.shape[0], 28*28)
X_test =X_test.reshape(X_test.shape[0], 28*28)

# Normalizing.
X_train = X_train/255
X_test = X_test/255

In [7]:
print('Shape of training data after flattening',X_train.shape)
print('Shape of testing data after flattening', X_test.shape)

Shape of training data after flattening (63000, 784)
Shape of testing data after flattening (7000, 784)


In [8]:
cv = ShuffleSplit(n_splits=10, test_size=0.2, random_state=42)

In [9]:
def train_classifiers(estimator, X_train, y_train, cv, name):
    estimator.fit(X_train, y_train)
    cv_train_score = cross_val_score(
        estimator, X_train, y_train, cv=cv, scoring="f1_macro"
    )
    print(
        f"On an average, {name} model has f1 score of "
        f"{cv_train_score.mean():.3f} +/- {cv_train_score.std():.3f} on the training set."
    )

In [10]:
def eval(estimator, X_test, y_test):
    y_pred = estimator.predict(X_test)

    print("# Classification report")
    print(classification_report(y_test, y_pred) )

    print("# Confusion matrix")
    disp = ConfusionMatrixDisplay(
    confusion_matrix=confusion_matrix(y_test, y_pred))
    disp.plot()
    plt.title('Confusion matrix')
    plt.show()

# AdaBoost for MNIST multiclass classification

- We instantiate an adaboost classifier with default parameters and train it. The train_classifier function prints mean of cross validated
accuracy and standard deviation of the trained classifier on the training set.

In [11]:
# Too much computing time required.
abc_pipeline = Pipeline([("classifier", AdaBoostClassifier())])
train_classifiers(abc_pipeline, X_train, y_train.ravel(), cv, "AdaBoostClassifier")

  train_classifiers(abc_pipeline, X_train, y_train.ravel(), cv, "AdaBoostClassifier")


KeyboardInterrupt: 

In [None]:
eval(abc_pipeline, X_test, y_test)

## MNIST classification with GradientBoostingClassifer

Let's instantiate a gradient boosting classifier object with default parameters and train it.

Observe the mean f1_score and its standard deviation obtained by the classifier based 10-fold cross validation of the training set.

In [None]:
gbc_pipeline = Pipeline([("classifier", GradientBoostingClassifier(n_estimators=10))])
train_classifiers(
    gbc_pipeline, X_train, y_train.ravel(), cv, "GradientBoostingClassifier"
)

In [None]:
eval(gbc_pipeline, X_test, y_test)

## MNIST classification with XGBoost classifier

In [13]:
from xgboost import XGBClassifier

In [21]:
# Too much computing time required.
xbc_pipeline = Pipeline([("classifier", XGBClassifier())])
train_classifiers(
    xbc_pipeline, X_train, y_train.astype(int).ravel(), cv, "GradientBoostingClassifier"
)

  xbc_pipeline, X_train, y_train.astype(int).ravel(), cv, "GradientBoostingClassifier"


KeyboardInterrupt: 

In [None]:
eval(xbc_pipeline, X_test, y_test)