In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
# import libraries

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

In [None]:
# Load data using pandas.read_csv()

data = pd.read_csv('../input/digit-recognizer/train.csv')

In [None]:
# View partial data

data.head()

In [None]:
# View data dimensions

data.shape

In [None]:
# Extract the label column and convert it to numpy array

labels = data.pop('label').to_numpy()

In [None]:
labels

In [None]:
# Looking at the data dimensions again, the label column has been extracted

data.shape

In [None]:
data = data.to_numpy().reshape(-1,28,28)

In [None]:
plt.figure(figsize=(10,10))

for i in range(25):
    plt.subplot(5,5,i+1)
    plt.imshow(data[i])
    plt.xticks([])
    plt.yticks([])
    plt.xlabel(labels[i])

In [None]:
# Add dimension

data = np.expand_dims(data,axis=-1)
labels = np.expand_dims(labels,axis=-1)

In [None]:
# You can see that there's one more dimension at the end

data.shape

In [None]:
labels.shape

In [None]:
# Fill the image with zeros around it

data = tf.pad(data,[[0,0],[2,2],[2,2],[0,0]])

In [None]:
# You can see the shape change

data.shape

In [None]:
num = data.shape[0] // 10
train_data, val_data, test_data = tf.split(data,[num*8, num, num])
train_label, val_label, test_label = tf.split(labels,[num*8, num, num])

In [None]:
print('train:',train_data.shape,'-- label:',train_label.shape)
print('validition:',val_data.shape,'-- label:',val_label.shape)
print('test:',test_data.shape,'-- label:',test_label.shape)

In [None]:
# Show some pictures. Everything is all right

plt.figure(figsize=(10,10))

for i in range(25):
    plt.subplot(5,5,i+1)
    plt.imshow(train_data[i])
    plt.xticks([])
    plt.yticks([])
    plt.xlabel(train_label[i][0].numpy())

In [None]:
train_ds = tf.data.Dataset.from_tensor_slices((train_data,train_label)).shuffle(33600,seed=42).batch(128)
val_ds = tf.data.Dataset.from_tensor_slices((val_data,val_label)).shuffle(33600,seed=42).batch(128)
test_ds = tf.data.Dataset.from_tensor_slices((test_data,test_label)).shuffle(33600,seed=42).batch(128)

In [None]:
# Use 'take' to view a batch of data

plt.figure(figsize=(10,10))

for image,label in train_ds.take(1):
    for i in range(25):
        plt.subplot(5,5,i+1)
        plt.imshow(image[i])
        plt.xticks([])
        plt.yticks([])
        plt.xlabel(label[i][0].numpy())

# Build model

In [None]:
# The shape of the picture

image_shape = (32,32,1)

In [None]:
# Use Monte-Carlo Dropout to reduce overfitting

class MCDropout(layers.Dropout):
    def call(self,inputs):
        return super().call(inputs,training=True)


# Use a LeNet-style neural network. Some parameters are adjusted

inputs = layers.Input(shape=image_shape)
x = layers.experimental.preprocessing.Rescaling(1./255)(inputs) # normalized
x = layers.Conv2D(128,5,strides=1,activation='relu')(x)
x = layers.MaxPooling2D(2,strides=2)(x)
x = layers.Conv2D(128,5,strides=1,activation='relu')(x)
x = layers.MaxPooling2D(2,strides=2)(x)
x = layers.Conv2D(128,5,strides=1,activation='relu')(x)
x = layers.Flatten()(x)
x = layers.Dense(84,activation='relu')(x)
x = MCDropout(0.1)(x)
outputs = layers.Dense(10)(x)

model = keras.Model(inputs,outputs)

model.compile(
    optimizer=keras.optimizers.Adam(lr=0.001),
    loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    metrics=['accuracy']
)

In [None]:
# View network architecture
model.summary()

In [None]:
# Using EarlyStopping, end training when val_accuracy is not improved for 10 consecutive times
early_stopping = keras.callbacks.EarlyStopping(monitor='val_accuracy',mode='max',
                                    patience=10,restore_best_weights=True)

# Using ReduceLROnPlateau, the learning rate is reduced by half when val_accuracy is not improved for 5 consecutive times
lr_scheduler = keras.callbacks.ReduceLROnPlateau(monitor='val_accuracy',mode='max',factor=0.5,patience=5)

# training
history = model.fit(train_ds,batch_size=128,epochs=100,validation_data=val_ds,
                    callbacks=[early_stopping,lr_scheduler])

In [None]:
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']

loss = history.history['loss']
val_loss = history.history['val_loss']

plt.figure(figsize=(8, 8))
plt.subplot(2, 1, 1)
plt.plot(acc, label='Training Accuracy')
plt.plot(val_acc, label='Validation Accuracy')
plt.legend(loc='lower right')
plt.ylabel('Accuracy')
plt.ylim([min(plt.ylim()),1])
plt.title('Training and Validation Accuracy')

plt.subplot(2, 1, 2)
plt.plot(loss, label='Training Loss')
plt.plot(val_loss, label='Validation Loss')
plt.legend(loc='upper right')
plt.ylabel('Cross Entropy')
plt.ylim([0,1.0])
plt.title('Training and Validation Loss')
plt.xlabel('epoch')
plt.show()

In [None]:
# test model
model.evaluate(test_ds,verbose=2)

# Submission

In [None]:
# load data

test = pd.read_csv('../input/digit-recognizer/test.csv')
sample_submission = pd.read_csv('../input/digit-recognizer/sample_submission.csv')

In [None]:
# process data

test = test.to_numpy().reshape(-1,28,28)
test = np.expand_dims(test,axis=-1)
test = tf.pad(test,[[0,0],[2,2],[2,2],[0,0]])

test.shape

In [None]:
# predict
result =  model.predict(test)

result.shape

In [None]:
# get predict label
predict_label = np.argmax(result,axis=-1)

predict_label.shape

In [None]:
# Show some prediction results

plt.figure(figsize=(10,10))

for i in range(25):
    plt.subplot(5,5,i+1)
    plt.imshow(test[i,...,0])
    plt.xticks([])
    plt.yticks([])
    plt.xlabel(predict_label[i])

In [None]:
sample_submission['Label'] = predict_label
sample_submission.to_csv('submission.csv', index=False)