In [None]:
!pip install ipython-autotime
%load_ext autotime


In [None]:
import time
import numpy as np
import pandas as pd
import tensorflow as tf
import seaborn as sns
from tensorflow.keras.utils import to_categorical
import matplotlib.pyplot as plt
%matplotlib inline
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
import keras
from keras.callbacks import EarlyStopping
from keras.callbacks import ModelCheckpoint
from keras.callbacks import LearningRateScheduler
from keras.models import Sequential,Input,Model
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D,BatchNormalization
from tensorflow.keras.models import Sequential
from sklearn.model_selection import GridSearchCV
from keras.layers.advanced_activations import LeakyReLU
from keras.datasets import mnist
from sklearn.metrics import auc, accuracy_score, confusion_matrix, mean_squared_error,classification_report
# import sklearn.metrics as metrics
from xgboost import XGBClassifier
from plotly.subplots import make_subplots
import plotly.graph_objs as go
import pprint
from scipy import stats
from IPython.display import display

In [None]:
def inspect_performance(model, train_images, train_labels, test_images, test_labels, ypred):
    "Prints training performance, test performance and a performance report"
    print("Training accuracy: ", model.score(train_images,train_labels))
    print("Test accuracy: ", model.score(test_images,test_labels))
    print("Test report: ")
    print(classification_report(ypred, test_labels))

def plot_confusion_matrix(labels, predictions):
    plt.figure(figsize=[9,6])
    "Plots a confusion matrix using a heatmap"
    mat = confusion_matrix(labels, predictions)
    sns.heatmap(mat.T, square=True, annot=True, fmt='d', cbar=False)
    plt.xlabel('true label')
    plt.ylabel('predicted label')

def plot_error(y_pred_mod): # reference link at the end of notebook
    y_pred_classes = np.argmax(y_pred_mod,axis =1) 
    errors = (y_pred_classes - test_Y != 0)
    Y_pred_classes_errors = y_pred_classes[errors]
    Y_pred_errors = y_pred_mod[errors]
    Y_true_errors = test_Y[errors]
    X_val_errors = test_X[errors]
    def display_errors(errors_index,img_errors,pred_errors, obs_errors):
        """ This function shows 6 images with their predicted and real labels"""
        n = 0
        nrows = 3
        ncols = 3
        fig, ax = plt.subplots(nrows,ncols,sharex=True,sharey=True)
        for row in range(nrows):
            for col in range(ncols):
                error = errors_index[n]
                ax[row,col].imshow((img_errors[error]).reshape((28,28)))
                ax[row,col].set_title("Pred :{} True :{}".format(pred_errors[error],obs_errors[error]))
                n += 1
        fig.tight_layout(pad = 1)
    Y_pred_errors_prob = np.max(Y_pred_errors,axis = 1)
    true_prob_errors = np.diagonal(np.take(Y_pred_errors, Y_true_errors, axis=1))
    delta_pred_true_errors = Y_pred_errors_prob - true_prob_errors
    sorted_dela_errors = np.argsort(delta_pred_true_errors)
    most_important_errors = sorted_dela_errors[-10:]
    return(display_errors(most_important_errors, X_val_errors, Y_pred_classes_errors, Y_true_errors))

In [None]:
(train_X,train_Y), (test_X,test_Y) = mnist.load_data()

In [None]:
print('Training data shape : ', train_X.shape, train_Y.shape)

print('Testing data shape : ', test_X.shape, test_Y.shape)

In [None]:
classes = np.unique(train_Y)
nClasses = len(classes)
print('Total number of outputs : ', nClasses)
print('Output classes : ', classes)

In [None]:
plt.figure(figsize=[5,5])

# Display the first image in training data
plt.subplot(121)
plt.imshow(train_X[0,:,:], cmap='gray')
plt.title("Ground Truth : {}".format(train_Y[0]))


# Display the first image in testing data
plt.subplot(122)
plt.imshow(test_X[0,:,:], cmap='gray')
plt.title("Ground Truth : {}".format(test_Y[0]))


In [None]:
train_X = train_X.reshape(-1, 28,28, 1)
test_X = test_X.reshape(-1, 28,28, 1)
train_X.shape, test_X.shape

In [None]:
train_X = train_X.astype('float32')
test_X = test_X.astype('float32')
train_X = train_X / 255.
test_X = test_X / 255.

In [None]:
train_X,valid_X,train_label,valid_label = train_test_split(train_X, train_Y, test_size=0.2, random_state=13)

In [None]:
train_label2 = train_label
valid_label2 = valid_label
train_label = to_categorical(train_label)
valid_label = to_categorical(valid_label)
test_Y_one_hot = to_categorical(test_Y)

In [None]:
train_X.shape,valid_X.shape,train_label.shape,valid_label.shape

In [None]:
batch_size = 64
epochs = 10
num_classes = 10
inputs = keras.Input(shape=(784,), name="digits")

In [None]:
model = Sequential()
model.add(Conv2D(32, (3, 3), activation='relu', input_shape=(28,28,1),padding='same'))
model.add(BatchNormalization(momentum=0.9, epsilon=1e-5, gamma_initializer="uniform"))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

model.add(Conv2D(64, (3, 3), activation='relu',padding='same'))
model.add(BatchNormalization(momentum=0.9, epsilon=1e-5, gamma_initializer="uniform"))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

model.add(Conv2D(128, kernel_size=5, activation='relu',padding='same'))
model.add(BatchNormalization(momentum=0.9, epsilon=1e-5, gamma_initializer="uniform"))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

model.add(Conv2D(256, kernel_size=5, activation='relu',padding='same'))
model.add(BatchNormalization(momentum=0.9, epsilon=1e-5, gamma_initializer="uniform"))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

model.add(Flatten())

model.add(Dense(256, activation='relu', name='my_dense'))
model.add(Dropout(0.5))
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(10, activation='softmax'))

In [None]:
layer_name='my_dense'
intermediate_layer_model = Model(inputs=model.input,
                                 outputs=model.get_layer(layer_name).output)

intermediate_layer_model.summary()

In [None]:
model.compile(loss=keras.losses.categorical_crossentropy, optimizer="adam",metrics=['accuracy'])
es = EarlyStopping(monitor='val_loss', mode='min', verbose=1,patience=3)

In [None]:
model_train = model.fit(train_X, train_label, batch_size=batch_size,epochs=epochs,verbose = 1,validation_data=(valid_X, valid_label),callbacks = [es])

In [None]:
test_eval = model.evaluate(test_X, test_Y_one_hot, verbose=0)

In [None]:
print('Test loss:', test_eval[0])
print('Test accuracy:', test_eval[1])

In [None]:
accuracy = model_train.history['accuracy']
val_accuracy = model_train.history['val_accuracy']
loss = model_train.history['loss']
val_loss = model_train.history['val_loss']
epochs = range(len(accuracy))
plt.plot(epochs, accuracy, 'bo', label='Training accuracy')
plt.plot(epochs, val_accuracy, 'b', label='Validation accuracy')
plt.title('Training and validation accuracy')
plt.legend()
plt.figure()
plt.plot(epochs, loss, 'bo', label='Training loss')
plt.plot(epochs, val_loss, 'b', label='Validation loss')
plt.title('Training and validation loss')
plt.legend()
plt.show()

In [None]:
intermediate_output = intermediate_layer_model.predict(train_X) 
intermediate_output = pd.DataFrame(data=intermediate_output)

In [None]:
intermediate_valid_output = intermediate_layer_model.predict(valid_X) 
intermediate_valid_output = pd.DataFrame(data=intermediate_valid_output)

In [None]:
intermediate_test_output = intermediate_layer_model.predict(test_X)
intermediate_test_output = pd.DataFrame(data=intermediate_test_output)

In [None]:
xgb_cnn = XGBClassifier(
    objective= 'multi:softprob',
    nthread=4,
    seed=42,
)

In [None]:
eval_set = [(intermediate_valid_output,valid_label2)]

In [None]:
xgb_cnn.fit(intermediate_output,train_label2,early_stopping_rounds=3, eval_metric="mlogloss", eval_set=eval_set, verbose=True)

In [None]:
xgb_cnn.score(intermediate_test_output,test_Y)

In [None]:
y_pred_xgb_cnn = xgb_cnn.predict(intermediate_test_output)

accuracy_score(test_Y,y_pred_xgb_cnn)

In [None]:
inspect_performance(xgb_cnn, intermediate_output,train_label2, intermediate_test_output, test_Y, y_pred_xgb_cnn)

In [None]:
plot_confusion_matrix(test_Y,y_pred_xgb_cnn)

In [None]:
y_pred_xgb_cnn_pro = xgb_cnn.predict_proba(intermediate_test_output)

In [None]:
plot_error(y_pred_xgb_cnn_pro)

In [None]:
param_grid=[{'max_depth':[5,10,15],
           'learning_rate':[0.05,0.3,0.5],
           'colsample_bytree':[0.6,0.8,1],
            'n_estimators':[50,100,500]}]
xgb_cv = GridSearchCV(XGBClassifier(random_state=42), param_grid, scoring='accuracy', n_jobs=-1, cv=5)

In [None]:
xgb_cv.fit(intermediate_output[:2000],train_label2[:2000])

In [None]:
y_pred_xgb_cv = xgb_cv.predict(test_X)

accuracy_score(test_Y,y_pred_xgb_cv)

In [None]:
xgb_cv.best_params_

#### reference for plot error function
https://www.kaggle.com/jsrshivam/mnist-digit-recognition-nn