In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
# Import 
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense,Flatten,Dropout,Conv2D,MaxPooling2D, BatchNormalization
from keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import EarlyStopping, Callback
from keras.optimizers import Adam

from sklearn.model_selection import train_test_split
import sklearn.metrics as metrics

import random
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
print(tf.config.list_physical_devices('GPU'),'//',tf.test.is_built_with_cuda())

In [None]:
# Read data
train = pd.read_csv('../input/digit-recognizer/train.csv')
test = pd.read_csv('../input/digit-recognizer/test.csv')

train.head()

In [None]:
train.describe()

In [None]:
# Checking missing values in train and test set
print('Nº of missing values in train set: ', train.isnull().any().sum())
print()
print('Nº of missing values in test set: ', test.isnull().any().sum())

In [None]:
X = np.array(train.drop('label',axis=1))/ 255.
X = X.reshape((-1,28,28,1))

y = np.array(train['label'])

In [None]:
# Split train and test data
train_X, test_X, train_y, test_y = train_test_split(X, y, test_size=0.2)

print('X_train: ' + str(train_X.shape))
print('Y_train: ' + str(train_y.shape))
print('X_test:  '  + str(test_X.shape))
print('Y_test:  '  + str(test_y.shape))

In [None]:
# pick a sample to plot
sample = 5
image = train_X[sample].reshape(28,28)

# plot the sample
fig = plt.figure
plt.imshow(image, cmap='gray')
plt.show()

In [None]:
num = 10 # Remember num = num_row * num_col or some images will be blank
num_row = 2
num_col = 5

# plot images
fig, axes = plt.subplots(num_row, num_col, figsize=(1.5*num_col,2*num_row))
for i in range(num):
    ax = axes[i//num_col, i%num_col]
    ax.imshow(train_X[i].reshape(28,28), cmap='gray')
    ax.set_title('Label: {}'.format(train_y[i]))
plt.tight_layout()
plt.show()

# Data augmentation with DataGenerator

In [None]:
# defining model parameters 
batch_size = 128
epochs = 110
epochs_to_wait_to_improve = 10
num_classes = max(pd.unique(train['label'])) +1 # 10 classes

# fix random seed for reproducibility
seed = 7
random.seed(seed)

In [None]:
# create a data generator
datagen = ImageDataGenerator(
                             rotation_range=12,
                             width_shift_range=0.11,
                             height_shift_range=0.11,
                             shear_range=0.15,
                             zoom_range = 0.09, 
                             validation_split=0.3,
                             horizontal_flip=False, 
                             vertical_flip=False
                            )

In [None]:
# Define the training generator
train_generator = datagen.flow(train_X, 
                               train_y, 
                               batch_size=batch_size,
                               shuffle=True,
                               subset='training')

# Define the testing generator
val_generator = datagen.flow(test_X, 
                             test_y, 
                             batch_size=batch_size,
                             subset='validation')

# Prepare and train the model

In [None]:
# Define the CNN model 

model = Sequential() # Initialize the sequential model

# Add CNN convolutions with BatchNormalization and MaxPooling2D
# Avoid overfitting with Dropout
model.add(Conv2D(32, kernel_size = (3,3), input_shape=(28, 28, 1), padding = 'Same', activation='relu'))
model.add(Conv2D(64, kernel_size = (3,3), padding = 'Same', activation='relu'))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2, 2), strides=1, padding='valid'))
model.add(Dropout(0.25))
model.add(Conv2D(64, kernel_size = (3,3), padding = 'Same', activation='relu'))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2, 2), strides=1, padding='valid'))
model.add(Dropout(0.2))

# Convert our matrix to 1-D set of features 
model.add(Flatten())

# Add fully-conected layers
model.add(Dense(512, activation='relu'))
model.add(BatchNormalization())
model.add(Dropout(0.20))
model.add(Dense(128, activation='relu'))
model.add(BatchNormalization())
model.add(Dropout(0.35))
model.add(Dense(num_classes, activation='softmax'))


In [None]:
# Defining the call backs EarlyStopping and myCallback which will stop the training
# if the accuracy reaches 99%

class myCallback(Callback):
    def on_epoch_end(self, epoch, logs={}):
        if(logs.get('accuracy')>0.999):
            print("\nReached 99.9% accuracy so cancelling training!")
            self.model.stop_training = True

# Instantiate callback
mycallback = myCallback()


early_stopping_callback = EarlyStopping(monitor='val_loss', 
                                        patience=epochs_to_wait_to_improve,
                                        verbose = 2,
                                        restore_best_weights=True)

# Define the optimizer
optimizer = Adam(lr=0.001, beta_1=0.9)
#optimizer = 'RMSprop'

# Compile the model
model.compile(optimizer=optimizer, loss='sparse_categorical_crossentropy', metrics=['accuracy'])

In [None]:
model.summary()

In [None]:
history = model.fit(train_generator,
          epochs=epochs,
          validation_data=val_generator, 
          callbacks=[mycallback,early_stopping_callback])

# Visualize training results



In [None]:
# Visualize training results with matplotlib
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']

loss = history.history['loss']
val_loss = history.history['val_loss']

epochs_range = range(len(acc))

plt.figure(figsize=(8, 8))
plt.subplot(1, 2, 1)
plt.plot(epochs_range, acc, label='Training Accuracy')
plt.plot(epochs_range, val_acc, label='Validation Accuracy')
plt.legend(loc='lower right')
plt.title('Training and Validation Accuracy')

plt.subplot(1, 2, 2)
plt.plot(epochs_range, loss, label='Training Loss')
plt.plot(epochs_range, val_loss, label='Validation Loss')
plt.legend(loc='upper right')
plt.title('Training and Validation Loss')
plt.show()

# Evaluate model

Compute accuracy and loss values in test set. Print confusion matrix of the test set with ground truth values and predicted values.

In [None]:
# evaluate model with test_X from train_test_split
test_loss, test_acc = model.evaluate(test_X, test_y, verbose=5)

print('\nTest accuracy:', test_acc)


In [None]:
# Get confusion matrix with test_X
Y_pred = model.predict(test_X)
val_preds = np.argmax(Y_pred, axis=1)

val_trues = test_y
cm = metrics.confusion_matrix(val_trues, val_preds)
cm

class_names = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']

# Plot confusion matrix in a beautiful manner
fig = plt.figure(figsize=(16, 14))
ax= plt.subplot()
sns.heatmap(cm, annot=True, ax = ax, fmt = 'g'); #annot=True to annotate cells
# labels, title and ticks
ax.set_xlabel('Predicted', fontsize=20)
ax.xaxis.set_label_position('bottom')
plt.xticks(rotation=90)
ax.xaxis.set_ticklabels(class_names, fontsize = 10)
ax.xaxis.tick_bottom()

ax.set_ylabel('True', fontsize=20)
ax.yaxis.set_ticklabels(class_names, fontsize = 10)
plt.yticks(rotation=0)

plt.title('Confusion Matrix', fontsize=20)

plt.show()

In [None]:
metrics.classification_report(val_trues, val_preds)

In [None]:
# Visualize the first 4 test samples and show their predicted digit value in the title
test_X_reshaped = test_X
_, axes = plt.subplots(nrows=1, ncols=4, figsize=(10, 3))
for ax, image, prediction in zip(axes, test_X_reshaped, val_preds):
    ax.set_axis_off()
    if (len(image.shape) == 3):
        image = image.reshape(28,28)
    ax.imshow(image, cmap=plt.cm.gray_r, interpolation='nearest')
    ax.set_title(f'Prediction: {prediction}')

# Predictions submition

In [None]:
test_pred = np.array(test/255.)
test_pred = test_pred.reshape((-1,28,28,1))

test_predictions = model.predict_classes(test_pred)

In [None]:
# Submitting predictions
# Predictions of test.csv
sub_df = {'ImageId':list(range(1, len(test_predictions) + 1)),'Label': test_predictions}
submission = pd.DataFrame(sub_df).astype('int')
submission.head()

submission.to_csv('submission.csv', index=False)