In [None]:
!pip install ipython-autotime
%load_ext autotime

In [None]:
import time
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
from sklearn.model_selection import train_test_split,RepeatedStratifiedKFold
from sklearn.metrics import classification_report
import keras
from sklearn.model_selection import GridSearchCV
from keras.datasets import mnist
from sklearn.metrics import auc, accuracy_score, confusion_matrix, mean_squared_error,classification_report
from xgboost import XGBClassifier
from catboost import CatBoostClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.ensemble import RandomForestClassifier

In [None]:
def inspect_performance(model, train_images, train_labels, test_images, test_labels, pred):
    "Prints training performance, test performance and a performance report"
    print("Training accuracy: ", model.score(train_images,train_labels))
    print("Test accuracy: ", model.score(test_images,test_labels))
    print("Test report: ")
    print(classification_report(pred, test_labels))

def plot_confusion_matrix(target, pred):
    plt.figure(figsize=[9,6])
    "Plots a confusion matrix using a heatmap"
    conf_mat = confusion_matrix(target, pred)
    sns.heatmap(mat.T, square=True, annot=True, fmt='d', cbar=False)
    plt.xlabel('true label')
    plt.ylabel('predicted label')

def plot_error(y_pred): # reference link at the end of notebook
    y_pred_classes = np.argmax(y_pred,axis =1) 
    errors = (y_pred_classes - test_Y != 0)
    Y_pred_classes_errors = y_pred_classes[errors]
    Y_pred_errors = y_pred[errors]
    Y_true_errors = test_Y[errors]
    X_val_errors = test_X[errors]
    def display_errors(errors_index,img_errors,pred_errors, obs_errors):
        """ This function shows 6 images with their predicted and real labels"""
        n = 0
        nrows = 3
        ncols = 3
        fig, ax = plt.subplots(nrows,ncols,sharex=True,sharey=True)
        for row in range(nrows):
            for col in range(ncols):
                error = errors_index[n]
                ax[row,col].imshow((img_errors[error]).reshape((28,28)))
                ax[row,col].set_title("Pred :{} True :{}".format(pred_errors[error],obs_errors[error]))
                n += 1
        fig.tight_layout(pad = 1)
    Y_pred_errors_prob = np.max(Y_pred_errors,axis = 1)
    true_prob_errors = np.diagonal(np.take(Y_pred_errors, Y_true_errors, axis=1))
    delta_pred_true_errors = Y_pred_errors_prob - true_prob_errors
    sorted_dela_errors = np.argsort(delta_pred_true_errors)
    most_important_errors = sorted_dela_errors[-10:]
    return(display_errors(most_important_errors, X_val_errors, Y_pred_classes_errors, Y_true_errors))

In [None]:
(train_X,train_Y), (test_X,test_Y) = mnist.load_data()

In [None]:
print('Training data shape : ', train_X.shape, train_Y.shape)

print('Testing data shape : ', test_X.shape, test_Y.shape)

In [None]:
classes = np.unique(train_Y)
nClasses = len(classes)
print('Total number of outputs : ', nClasses)
print('Output classes : ', classes)

In [None]:
train_Y.shape

In [None]:
sns.countplot(train_Y)
plt.show()

In [None]:
train_X = train_X.reshape(-1, 784)
test_X = test_X.reshape(-1, 784)

In [None]:
train_X = train_X.astype('float32')
test_X = test_X.astype('float32')
train_X = train_X / 255.
test_X = test_X / 255. 

In [None]:
train_X,valid_X,train_label,valid_label = train_test_split(train_X, train_Y, test_size=0.2, random_state=13)

# AdaBoost

In [None]:
adb = AdaBoostClassifier(algorithm='SAMME.R',n_estimators= 100, learning_rate = 0.1)
ada = adb.fit(train_X,train_label)

In [None]:
y_pred_ada = ada.predict(test_X)
inspect_performance(ada,train_X,train_label, test_X, test_Y, y_pred_ada)

In [None]:
y_pred_ada_pro = ada.predict_proba(test_X)
plot_error(y_pred_ada_pro)

In [None]:
plot_confusion_matrix(test_Y,y_pred_ada)

### GridSearch CV (Ada)

In [None]:
grid = dict()
grid['n_estimators'] = [50, 100, 500]
grid['learning_rate'] = [0.01, 0.1,0.5]
cv = RepeatedStratifiedKFold(n_splits=10, n_repeats=3, random_state=1)
ada_cv = GridSearchCV(estimator=AdaBoostClassifier(), param_grid=grid, n_jobs=-1, cv=cv, scoring='accuracy')

In [None]:
ada_cv.fit(train_X[:4000],train_label[:4000])

In [None]:
y_pred_ada_cv = ada_cv.predict(test_X)

accuracy_score(test_Y,y_pred_ada_cv)

In [None]:
ada_cv.best_params_

# Random Forest

In [None]:
rfc = RandomForestClassifier(n_jobs=-1, n_estimators=10)
rfc.fit(train_X,train_label)

In [None]:
y_pred_rfc = rfc.predict(test_X)
inspect_performance(rfc,train_X,train_label, test_X, test_Y, y_pred_rfc)

In [None]:
plot_confusion_matrix(test_Y,y_pred_rfc)

In [None]:
y_pred_rfc_pro = rfc.predict_proba(test_X)
plot_error(y_pred_rfc_pro)

### Gridsearch CV (RF)

In [None]:
param_grid = {
    'max_depth': [80, 90, 100, 110],
    'min_samples_leaf': [1,3],
    'min_samples_split': [2,5],
    'n_estimators': [50,100]
}
rfc_cv = GridSearchCV(RandomForestClassifier(), param_grid = param_grid, cv = 5, verbose = 5, n_jobs = -1)

In [None]:
rfc_cv.fit(train_X,train_label)

In [None]:
y_pred_rfc_cv = rfc_cv.predict(test_X)

accuracy_score(test_Y,y_pred_rfc_cv)

In [None]:
rfc_cv.best_params_

# XGBoost

In [None]:
xgb = XGBClassifier(
    objective= 'multi:softprob',
    nthread=4,
    seed=42,
    num_class=10,
)
eval_set = [(train_X,train_label),(valid_X,valid_label)]

In [None]:
xgb.fit(train_X,train_label,early_stopping_rounds=3, eval_metric=["merror", "mlogloss"], eval_set=eval_set, verbose=True)

In [None]:
y_pred_xgb = xgb.predict(test_X)
inspect_performance(xgb,train_X,train_label, test_X, test_Y, y_pred_xgb)

In [None]:
plot_confusion_matrix(test_Y,y_pred_xgb)

In [None]:
y_pred_xgb_pro = xgb.predict_proba(test_X)
plot_error(y_pred_xgb_pro)

### GridSearch CV (XGB)

In [None]:
param_grid=[{'max_depth':[5,10,15],
           'learning_rate':[0.3,0.5],
           'colsample_bytree':[0.6,0.8,1],
            'n_estimators':[50,100,500]}]
xgb_cv = GridSearchCV(XGBClassifier(random_state=42), param_grid, scoring='accuracy', n_jobs=-1, cv=5)

In [None]:
xgb_cv.fit(train_X[:2000],train_label[:2000])

In [None]:
y_pred_xgb_cv = xgb_cv.predict(test_X)

accuracy_score(test_Y,y_pred_xgb_cv)

In [None]:
xgb_cv.best_params_

# CatBoost

In [None]:
cat = CatBoostClassifier(n_estimators=50, verbose=False, max_depth=6,loss_function='MultiClass')
eval_set = [(train_X,train_label),(valid_X,valid_label)]
cat.fit(train_X,train_label,eval_set = eval_set)

In [None]:
y_pred_cat = cat.predict(test_X)
inspect_performance(cat,train_X,train_label, test_X, test_Y, y_pred_cat)

In [None]:
plot_confusion_matrix(test_Y,y_pred_cat)

In [None]:
y_pred_cat_pro = cat.predict_proba(test_X)
plot_error(y_pred_cat_pro)

### GridSearch CV (Cat)

In [None]:
param_grid=[{'n_estimators':[10,50,100],
            'learning_rate':[0.05,0.1],
            'rsm':[0.5,0.8]}]
cat_cv = GridSearchCV(CatBoostClassifier(random_state=42, silent = True,
                        bootstrap_type = 'Bernoulli'), param_grid, scoring='accuracy', n_jobs=-1, cv=5)

In [None]:
cat_cv.fit(train_X,train_label)

In [None]:
y_pred_cat_cv = cat_cv.predict(test_X)

accuracy_score(test_Y,y_pred_cat_cv)

In [None]:
cat_cv.best_params_

#### reference for plot error function
https://www.kaggle.com/jsrshivam/mnist-digit-recognition-nn