In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import cv2
import pickle

from sklearn.model_selection import train_test_split

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.layers import *
from tensorflow.keras.models import *
from tensorflow.keras.utils import to_categorical

import warnings
warnings.filterwarnings('ignore')

In [None]:
df = pd.read_csv("../input/digit-recognizer/train.csv")
df.head()

In [None]:
df.shape

In [None]:
X, y = df.drop('label', 1).values, df.label.values

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=1, stratify=y)

X_train = X_train.reshape(-1,28,28)
X_test = X_test.reshape(-1,28,28)

In [None]:
fig, axs = plt.subplots(nrows=2, ncols=5, figsize=(10, 5))
axs = np.ravel(axs)

for i in range(len(axs)):
    plt.sca(axs[i])
    plt.imshow(X_train[i], cmap='gray')
    plt.axis('off')
    
plt.tight_layout()
plt.show()

## cv2 Morphological Methods to extend data

In [None]:
%%time

X_train = X_train.astype('uint8')
X_test = X_test.astype('uint8')


kernel = np.ones(shape=(3,3), dtype=np.uint8)

for i, image in enumerate(X_train):
    image = cv2.morphologyEx(image, cv2.MORPH_CLOSE, kernel, iterations=1)
    image = np.expand_dims(image, axis=0)
    X_train = np.concatenate((X_train, image), axis=0)
    y_train = np.concatenate((y_train, np.array([y_train[i]])), axis=0)
    

In [None]:
X_train = np.expand_dims(X_train, axis=-1)
X_test = np.expand_dims(X_test, axis=-1)

X_train.shape, y_train.shape

### Load QMnist Data

In [None]:
def unpickle(file):
    with open(file, 'rb') as fo:
        qmnist = pickle.load(fo, encoding='bytes')
    return qmnist

In [None]:
qmnist = unpickle("../input/qmnist-the-extended-mnist-dataset-120k-images/MNIST-120k")

data = qmnist['data']
labels = qmnist['labels']

data.shape, labels.shape

In [None]:
X_qmnist = np.expand_dims(data, axis=-1)
y_qmnist = np.squeeze(labels)

In [None]:
X_train_full = np.concatenate((X_train, X_qmnist), axis=0)
y_train_full = np.concatenate((y_train, y_qmnist), axis=0)

In [None]:
X_train_scaled = X_train_full / 255.0
X_test_scaled = X_test / 255.0

y_train = to_categorical(y_train_full, num_classes=10)
y_test = to_categorical(y_test, num_classes=10)

In [None]:
BATCH_SIZE = 256
AUTOTUNE = tf.data.AUTOTUNE

train_ds = tf.data.Dataset.from_tensor_slices((X_train_scaled, y_train))
train_ds = train_ds.shuffle(1024).batch(BATCH_SIZE).prefetch(AUTOTUNE)

test_ds =  tf.data.Dataset.from_tensor_slices((X_test_scaled, y_test)).batch(BATCH_SIZE).prefetch(AUTOTUNE)

# CNN Model

In [None]:
cnn_model = Sequential([
    Conv2D(64, 3, padding='same', activation='relu', input_shape=X_train_scaled[0].shape),
    BatchNormalization(),
    MaxPool2D(),
    Conv2D(32, 3, padding='same', activation='relu'),
    BatchNormalization(),
    MaxPool2D(),
    Flatten(),
    Dense(512, 'relu'),
    Dense(256, 'relu'),
    Dense(10, 'softmax')
])

cnn_model.summary()

In [None]:
%%time

cnn_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

early_stopping = keras.callbacks.EarlyStopping(monitor='val_accuracy', patience=30, restore_best_weights=True)

cnn_history = cnn_model.fit(train_ds, epochs=100,
                            validation_data=test_ds,
                            callbacks=[early_stopping],
                            verbose=2)

In [None]:
test_df = pd.read_csv("../input/digit-recognizer/test.csv")
test_df.shape

In [None]:
test_data = test_df.values
test_data = test_data.reshape(-1, 28, 28, 1)
test_data.shape

In [None]:
test_data = test_data / 255.0

In [None]:
predictions = cnn_model.predict(test_data).argmax(axis=1)

submission_df = pd.DataFrame({'ImageId':np.arange(1, len(predictions)+1), 'Label':predictions})
submission_df.to_csv('submission.csv', index=False)