In [1]:
from keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPool2D
from keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import train_test_split
from keras.utils.np_utils import to_categorical
from tensorflow.keras.optimizers import RMSprop
from keras.callbacks import ReduceLROnPlateau
from sklearn.metrics import confusion_matrix
from keras.models import Sequential
import matplotlib.image as mpimg
import matplotlib.pyplot as plt
import seaborn as sns

import pandas as pd
import numpy as np
%matplotlib inline

np.random.seed(2)

import itertools

sns.set(style='white', context='notebook', palette='deep')

## 2. Data Preparation

### 2.1 Load Data

In [2]:
train = pd.read_csv('../input/digit-recognizer/train.csv')
test = pd.read_csv("../input/digit-recognizer/test.csv")

In [3]:
y_train = train['label']

X_train = train.drop(labels = ['label'], axis=1)

del train

g = sns.countplot(y_train)

y_train.value_counts()

### 2.2 Check for null and missing values

In [4]:
X_train.isnull().any().describe()

In [5]:
test.isnull().any().describe()

### 2.3 Normalization

In [6]:
X_train = X_train / 255.0
test = test / 255.0

### 2.4 Reshape

In [7]:
# Reshape image in 3 dimensions (height=28px, width=28px, channel=1)
X_train = X_train.values.reshape(-1, 28, 28, 1)
test = test.values.reshape(-1, 28, 28, 1)

Gray-scale = 1D vector of 784 values being reshaped to 28x28x1 3D matrices
RGB-images = 3 Channels 28x28x3 3D matrices

### 2.5 Label Encoding

In [8]:
y_train = to_categorical(y_train, num_classes=10)

Labels are 10 digits from 0 to 9

### 2.6 Split training and validation set

In [9]:
random_seed = 2

In [10]:
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.1, random_state=random_seed)

In [11]:
g = plt.imshow(X_train[0][:,:,0])

## 3. CNN

In [12]:
model = Sequential()

model.add(Conv2D(filters = 32, kernel_size=(5,5), padding='Same',
                 activation='relu', input_shape=(28,28,1)))

model.add(Conv2D(filters = 32, kernel_size=(5,5), padding='Same',
                 activation='relu', input_shape=(28,28,1)))

model.add(MaxPool2D(pool_size=(2,2)))
model.add(Dropout(0.25))

model.add(Conv2D(filters = 64, kernel_size=(3,3), padding='Same',
                 activation='relu'))
model.add(Conv2D(filters = 64, kernel_size=(3,3), padding='Same',
                 activation='relu'))

model.add(MaxPool2D(pool_size=(2,2), strides=(2,2)))
model.add(Dropout(0.25))

model.add(Flatten())
model.add(Dense(256, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(10, activation='softmax'))

### 3.2 Optimizer and annealer

In [13]:
optimizer = RMSprop(learning_rate=0.001, rho=0.9, epsilon=1e-08, decay=0.0)

In [14]:
model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])

In [15]:
# Set learning rate
learning_rate_reduction = ReduceLROnPlateau(monitor='val_acc',
                                            patience=3,
                                            verbose=1,
                                            factor=0.5,
                                            min_lr=0.00001)

In [16]:
epochs = 25
batch_size = 96

### 3.3 Data Augmentation

In [17]:
# Training without augmentations
history = model.fit(X_train, y_train, batch_size=batch_size, epochs=epochs,
                    validation_data=(X_val, y_val), verbose=2)

In [20]:
# Augmentations
datagen = ImageDataGenerator(featurewise_center = False,
                             samplewise_center = False,
                             featurewise_std_normalization=False,
                             samplewise_std_normalization=False,
                             zca_whitening=False,
                             rotation_range=10,
                             zoom_range=0.1,
                             width_shift_range=0.1,
                             height_shift_range=0.1,
                             horizontal_flip=False,
                             vertical_flip=False)

datagen.fit(X_train)

In [21]:
# Fit augmentation model
history_aug = model.fit_generator(datagen.flow(X_train, y_train, batch_size=batch_size),
                             epochs=epochs, validation_data=(X_val, y_val),
                             verbose=2, steps_per_epoch=X_train.shape[0] // batch_size,
                             callbacks=[learning_rate_reduction])

## 4. Evaluate the Model

### 4.1 Train and val curves

In [25]:
# For non-augmented data
fig, ax = plt.subplots(2,1)
ax[0].plot(history.history['loss'], color='b', label='Training loss')
ax[0].plot(history.history['val_loss'], color='r', label='validation loss', axes=ax[0])
legend = ax[0].legend(loc='best', shadow=True)

ax[1].plot(history.history['accuracy'], color='b', label='Training accuracy')
ax[1].plot(history.history['val_accuracy'], color='r', label='Validation accuracy')

legend = ax[1].legend(loc='best', shadow=True)

In [26]:
# For augmented data
fig, ax = plt.subplots(2,1)
ax[0].plot(history_aug.history['loss'], color='b', label='Training loss')
ax[0].plot(history_aug.history['val_loss'], color='r', label='validation loss', axes=ax[0])
legend = ax[0].legend(loc='best', shadow=True)

ax[1].plot(history_aug.history['accuracy'], color='b', label='Training accuracy')
ax[1].plot(history_aug.history['val_accuracy'], color='r', label='Validation accuracy')

legend = ax[1].legend(loc='best', shadow=True)

### 4.2 Confusion Matrix

In [27]:
def plot_confusion_matrix(cm, classes, normalize=False, title='Confusion matrix',
                          cmap=plt.cm.Blues):
    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=45)
    plt.yticks(tick_marks, classes)
    
    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
        
    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, cm[i, j],
                horizontalalignment='center',
                color='white' if cm[i, j] > thresh else "black")
        
    plt.tight_layout()
    plt.ylabel("True Label")
    plt.xlabel("Predicted Label")

In [29]:
y_pred = model.predict(X_val)
y_pred_classes = np.argmax(y_pred, axis=1)

y_true = np.argmax(y_val, axis=1)

confusion_mtx = confusion_matrix(y_true, y_pred_classes)

plot_confusion_matrix(confusion_mtx, classes = range(10))

In [40]:
accuracy_score = model.evaluate(X_val, y_pred, verbose=2)

In [34]:
## Display wrong result
errors = (y_pred_classes - y_true != 0)

y_pred_classes_errors = y_pred_classes[errors]
y_pred_errors = y_pred[errors]
y_true_errors = y_true[errors]
X_val_errors = X_val[errors]

def display_errors(errors_index, img_errors, pred_errors, obs_errors):
    n = 0
    nrows = 2
    ncols = 3
    fig, ax = plt.subplots(nrows, ncols, sharex=True, sharey=True)
    
    for row in range(nrows):
        for col in range(ncols):
            error = errors_index[n]
            ax[row, col].imshow((img_errors[error]).reshape((28,28)))
            ax[row, col].set_title("Predicted label: {}\nTrue label: {}".format(pred_errors[error], obs_errors[error]))
            n += 1
            
y_pred_errors_prob = np.max(y_pred_errors, axis=1)
true_prob_errors = np.diagonal(np.take(y_pred_errors, y_true_errors, axis=1))

delta_pred_true_errors = y_pred_errors_prob - true_prob_errors

sorted_delta_errors = np.argsort(delta_pred_true_errors)

most_important_errors = sorted_delta_errors[-6:]

# show 6 top errors
display_errors(most_important_errors, X_val_errors, 
               y_pred_classes_errors, y_true_errors)

In [35]:
results = model.predict(test)

results = np.argmax(results, axis=1)

results = pd.Series(results, name='Label')

In [41]:
submission = pd.concat([pd.Series(range(1,28001), name='ImageId'), results], axis=1)

submission.to_csv("cnn_mnist_prdictions.csv", index=False)