In [None]:
import tensorflow as tf
from tensorflow.keras import Sequential
from tensorflow.keras.layers import InputLayer
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Conv2D
from tensorflow.keras.layers import MaxPooling2D
from tensorflow.keras.layers import Flatten
from tensorflow.keras.layers import Dropout
from tensorflow.keras.layers import LeakyReLU
from tensorflow.keras.layers import BatchNormalization
from keras.preprocessing.image import ImageDataGenerator

from sklearn.model_selection import train_test_split
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go
import seaborn as sns
from itertools import cycle


In [None]:
TrainData = pd.read_csv(r'../input/digit-recognizer/train.csv')
TrainData


In [None]:
TestData = pd.read_csv(r'../input/digit-recognizer/test.csv')
TestData


In [None]:
np.isnan(np.array(TrainData)).any()

In [None]:
np.isnan(np.array(TestData)).any()

In [None]:
TrainData = TrainData.copy()
Label = TrainData.pop('label')
sns.countplot(x=Label)


In [None]:
samples = TrainData.sample(25)
plt.figure(figsize=(20, 20))

f, ax = plt.subplots(5, 5, figsize=(10, 10))
for i in range(25):
    ax[i//5, i % 5].imshow(np.array(samples[i:i+1]).reshape(28, 28))
    ax[i//5, i % 5].axis('off')
    ax[i//5, i % 5].set_title(Label[samples.index].values[i], fontsize='12')


In [None]:
DS1, DS2, L1, L2 = train_test_split(TrainData, Label, test_size=0.2)

DS1 = tf.reshape(tf.constant(DS1), (-1, 28, 28, 1))/255
DS2 = tf.reshape(tf.constant(DS2), (-1, 28, 28, 1))/255
L1 = tf.one_hot(L1, 10)
L2 = tf.one_hot(L2, 10)

DS = tf.reshape(tf.constant(TrainData), (-1, 28, 28, 1))/255
L = tf.one_hot(Label, 10)

In [None]:
class MyCallBack(tf.keras.callbacks.Callback):
    def on_epoch_end(self, epoch, log):
        if log['accuracy'] > 0.9995:
            self.model.stop_training = True


ReduceLR = tf.keras.callbacks.ReduceLROnPlateau(
    monitor='accuracy', factor=0.2, patience=3)

Stop = MyCallBack()



In [None]:

ImageModifier = ImageDataGenerator(
    rotation_range=10,
    zoom_range=0.1,
    width_shift_range=0.1,
    height_shift_range=0.1,
)


Model = Sequential([

    InputLayer((28, 28, 1)),
    Dropout(0.1),
    Conv2D(128, 3, use_bias=False),
    LeakyReLU(0.05),
    BatchNormalization(),
    MaxPooling2D(2, 2),
    Conv2D(64, 3, use_bias=False),
    LeakyReLU(0.05),
    BatchNormalization(),
    MaxPooling2D(2, 2),
    Flatten(),
    Dense(128, use_bias=False),
    LeakyReLU(0.05),
    BatchNormalization(),
    Dense(10, activation='softmax')

])


Model.compile(optimizer='adam',
              loss='categorical_crossentropy', metrics='accuracy')

Model.summary()


In [None]:
Epochs = 50
History = Model.fit(ImageModifier.flow(DS1,L1), validation_data=(DS2,L2), epochs=Epochs,
                    callbacks=[ReduceLR, Stop], verbose=0)

In [None]:
Acc = History.history['accuracy']
Loss = History.history['loss']
ValAcc = History.history['val_accuracy']
ValLoss = History.history['val_loss']
fig = px.line(x=History.epoch, y=[Acc, ValAcc], title='Accuracy', labels={
              "x": "Epoch", "value": "Accuracy", })
fig.update_layout(plot_bgcolor="pink")
names = cycle(['accuracy', 'val_accuracy'])
fig.for_each_trace(lambda t:  t.update(name=next(names)))


In [None]:
fig = px.line(x=History.epoch, y=[Loss, ValLoss], title='Loss', labels={
              "x": "Epoch", "value": "Loss", })
fig.update_layout(plot_bgcolor="pink")
names = cycle(['loss', 'val_loss'])
fig.for_each_trace(lambda t:  t.update(name=next(names)))


In [None]:
L2Predict = Model.predict(DS2)
error = (np.argmax(L2Predict, axis=1)-np.argmax(L2, axis=1) != 0)


In [None]:
ConfusionMatrix = tf.math.confusion_matrix(
    np.argmax(L2, axis=1), np.argmax(L2Predict, axis=1))
sns.heatmap(ConfusionMatrix, annot=True, fmt='g')


In [None]:

samples = DS2[error]

plt.figure(figsize=(20, 20))
f, ax = plt.subplots(1, 5, figsize=(10, 10))
for i in range(5):
    ax[i].imshow(np.array(samples[i:i+1]).reshape(28, 28))
    ax[i].axis('off')
    ax[i].set_title(str(np.argmax(L2Predict, axis=1)[error][i]) +
                    ' in fact '+str(np.argmax(L2, axis=1)[error][i]), fontsize='12')


In [None]:
# Get full use of the Training Data
History = Model.fit(ImageModifier.flow(DS,L), epochs=Epochs,
                    callbacks=[ReduceLR, Stop], verbose=0)

Model.save(r'./model.h5')


In [None]:
TestData = tf.reshape(tf.constant(TestData), (-1, 28, 28, 1))/255
result = np.argmax(Model.predict(TestData), axis=1)
result


In [None]:
Submission = pd.read_csv(r'../input/digit-recognizer/sample_submission.csv')
Submission.Label = result

Submission.to_csv(r'./sample_submission.csv', index=False)
