In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

**Load the Dataset**

In [None]:
X = pd.read_csv('/kaggle/input/digit-recognizer/train.csv')
X.head()

In [None]:
X.shape

In [None]:
X_test = pd.read_csv('/kaggle/input/digit-recognizer/test.csv')
X_test.head()

In [None]:
X_test.shape

In [None]:
y = X['label']
y.head()

In [None]:
X.drop(['label'], axis=1, inplace=True)
X.head()

In [None]:
import matplotlib as mpl
import matplotlib.pyplot as plt

**Check a sample image**

In [None]:
some_digit = np.array(X.iloc[[0]])
some_digit_image = some_digit.reshape(28, 28)

plt.imshow(some_digit_image, cmap = mpl.cm.binary, interpolation="nearest")
plt.axis("off")
plt.show()

**Data Augmentation**

*Generate more training data by shifting the given images 1 pixel in up/down/left/right directions to generate 4 more training samples for each input image.*

In [None]:
from scipy.ndimage import interpolation

*Shift 1 pixel down*

In [None]:
X_aug_down = interpolation.shift(np.array(X).reshape(42000,28,28), [0,1,0], cval=0)
X_aug_down.shape

*Shift 1 pixel up.*

In [None]:
X_aug_up = interpolation.shift(np.array(X).reshape(42000,28,28), [0,-1,0], cval=0)
X_aug_up.shape

*Shift 1 pixel right.*

In [None]:
X_aug_right = interpolation.shift(np.array(X).reshape(42000,28,28), [0,0,1], cval=0)
X_aug_right.shape

*Shift 1 pixel left.*

In [None]:
X_aug_left = interpolation.shift(np.array(X).reshape(42000,28,28), [0,0,-1], cval=0)
X_aug_left.shape

In [None]:
X_temp = np.concatenate((X_aug_down, X_aug_up, X_aug_right, X_aug_left))
X_temp.shape

*Augmented training dataset generated and stored in X_aug and y_aug.*

In [None]:
X_aug = np.concatenate((X, X_temp.reshape(168000, 784)))
X_aug.shape

In [None]:
y_aug = np.concatenate((y, y, y, y, y))
y_aug.shape

**Split into Training and Validation Sets**

In [None]:
from sklearn.model_selection import train_test_split

In [None]:
X_train, X_val, y_train, y_val = train_test_split(X_aug, y_aug, test_size=0.2)

**Training ResNet-34**

In [None]:
import tensorflow as tf
from tensorflow import keras

In [None]:
from functools import partial

*The ***ImageNet Large Scale Visual Recognition Challenge (ILSVRC)*** 2015 challenge was won using a ***Residual Network (or ResNet)***. The key to being able to train such a deep network is to use ***skip connections (also called shortcut connections)***: the signal feeding into a layer is also added to the output of a layer located a bit higher up the stack. If we add the input x to the output of the network (i.e., we add a skip connection), then the network will be forced to model ***f(x) = h(x) - x rather than h(x)***. This is called residual learning.*

*When we initialize a regular neural network, its weights are close to zero, so the network just outputs values close to zero. If we add a skip connection, the resulting network just outputs a copy of its inputs. If the target function is fairly close, this will speed up the training considerably. Moreoever, if we add many skip connections, the network can start making progress even if several layers have not started learning yet. Thanks to skip connections, the signal can easily make its way across the whole network.*

***ResNet-34*** *is the ResNet with 34 layers (only counting convolutional layers and the fully connected layer) containing 3 residual units that output 64 feature maps, 4 residual units with 128 maps, 6 residual units with 256 maps, and 3 residual units with 512 maps.*

In [None]:
DefaultConv2D = partial(keras.layers.Conv2D, kernel_size=3, strides=1, padding="SAME", use_bias=False)

In [None]:
class ResidualUnit(keras.layers.Layer):
    def __init__(self, filters, strides=1, activation="relu", **kwargs):
        super().__init__(**kwargs)
        self.activation = keras.activations.get(activation)
        self.main_layers = [
            DefaultConv2D(filters, strides=strides),
            keras.layers.BatchNormalization(),
            self.activation,
            DefaultConv2D(filters),
            keras.layers.BatchNormalization()]
        self.skip_layers = []
        if strides > 1:
            self.skip_layers = [
                DefaultConv2D(filters, kernel_size=1, strides=strides),
                keras.layers.BatchNormalization()
            ]
            
    def get_config(self):
        return
            
    def call(self, inputs):
        Z = inputs
        for layer in self.main_layers:
            Z = layer(Z)
        skip_Z = inputs
        for layer in self.skip_layers:
            skip_Z = layer(skip_Z)
        return self.activation(Z + skip_Z)

In [None]:
model = keras.models.Sequential()
model.add(DefaultConv2D(64, kernel_size=7, strides=2, input_shape=[28, 28, 1]))
model.add(keras.layers.BatchNormalization())
model.add(keras.layers.Activation("relu"))
model.add(keras.layers.MaxPool2D(pool_size=3, strides=2, padding="SAME"))
prev_filters = 64
for filters in [64] * 3 + [128] * 4 + [256] * 6 + [512] * 3:
    strides = 1 if filters == prev_filters else 2
    model.add(ResidualUnit(filters, strides=strides))
    prev_filters = filters
model.add(keras.layers.GlobalAvgPool2D())
model.add(keras.layers.Flatten())
model.add(keras.layers.Dense(10, activation="softmax"))

In [None]:
optimizer = keras.optimizers.Adam(learning_rate=0.01)
model.compile(loss="sparse_categorical_crossentropy", optimizer=optimizer, metrics=["accuracy"])

In [None]:
early_stopping_cb = keras.callbacks.EarlyStopping(patience=3, restore_best_weights=True)

In [None]:
model.summary()

In [None]:
model.fit(X_train.reshape(-1, 28, 28, 1), y_train, epochs=10, 
          validation_data=(X_val.reshape(-1, 28, 28, 1), y_val),
          callbacks=[early_stopping_cb])

**Prediction on the Test Set**

In [None]:
predictions = np.argmax(model.predict(X_test.values.reshape(-1, 28, 28, 1)), axis=-1)

output = pd.DataFrame({'ImageId': list(range(1, len(X_test)+1)), 'Label': predictions})
output.to_csv('submission.csv', index=False)
print("Your submission was successfully saved!")