In [None]:
# pytorch is consuming so much ram, so, I've selected the tensorflow

import numpy as np
import pandas as pd
import seaborn as sns
import tensorflow as ts
import tensorflow.keras as ks
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from keras.utils.np_utils import to_categorical

pd.set_option('display.max_columns', 784)

#### Normalizing and reshaping the dataset

In [None]:
df_path         = '../input/digit-recognizer/'
im_shape = (28, 28, 1)

out_df = pd.read_csv(df_path + 'sample_submission.csv')
out_df.set_index('ImageId', inplace = True)

def prepare_data(df: pd.DataFrame, train = False):
    x_train = y_train = None
    
    if train:
        y_train = df['label']
        y_train = to_categorical(y_train, num_classes = 10)
        x_train = df.drop('label', axis=1, inplace=False)
    else:
        x_train = df
    
    # There is no semantic loss but convolutions **cores** will contain sane numbers
    x_train = x_train / 255
    
    # (rows, 28, 28, 1) 
    x_train = x_train.values.reshape(-1, *im_shape)
    
    return x_train, y_train

x_test, _        = prepare_data(pd.read_csv(df_path + 'test.csv'))
x_train, y_train = prepare_data(pd.read_csv(df_path + 'train.csv'),  train=True)

x_train_splitted, x_test_splitted, y_train_splitted, y_test_splitted = train_test_split(
    x_train, y_train, test_size = 0.2
)

#### Creating CNN model using the keras api

In [None]:
from keras.preprocessing.image import ImageDataGenerator

# augmentation setup
mnist_datagen = ImageDataGenerator(
        samplewise_center              = False, 
        featurewise_std_normalization  = False,  
        samplewise_std_normalization   = False,  
        zca_whitening                  = True,  
        horizontal_flip                = False,  
        vertical_flip                   = False,
    
        rotation_range                 = 10,  
        width_shift_range              = 0.1,  
        height_shift_range             = 0.1, 
)

In [None]:
# learning options
epoch_count = 150
prefered_batch_size  = 256

model_options = {
    'optimizer': 'adam',
    'metrics':   'accuracy',                  
    'loss':      'categorical_crossentropy'  # awesome loss-function for multiclass classification
}
lrelu = lambda x: ks.layers.LeakyReLU(alpha = 0.01)(x)

# C part (2 convolutions|MaxPools(2x2))
conv_setup = {
    'kernel_size': 3,
    'strides':     1,               # we are getting them using kernel(size 5) with step 1
    'activation': 'relu'
}

lr_model = ks.Sequential()
# -----------------------------------------------------------------  # C1
lr_model.add(ks.layers.Conv2D(
    filters     = 64,               # 32 feature maps*
    input_shape = im_shape,
    **conv_setup
))
lr_model.add(ks.layers.BatchNormalization())

lr_model.add(ks.layers.Conv2D(
    filters     = 64,
    **conv_setup
))
lr_model.add(ks.layers.BatchNormalization())

lr_model.add(ks.layers.MaxPool2D()) # pool size is 2x2 by default
lr_model.add(ks.layers.Dropout(0.4))

# -----------------------------------------------------------------  # C2
lr_model.add(ks.layers.Conv2D(
    filters     = 128,
    **conv_setup
))
lr_model.add(ks.layers.BatchNormalization())

lr_model.add(ks.layers.Conv2D(
    filters     = 128,
    **conv_setup
))
lr_model.add(ks.layers.BatchNormalization())

lr_model.add(ks.layers.MaxPool2D())
lr_model.add(ks.layers.Dropout(0.4))

# -----------------------------------------------------------------  # Classifier part
lr_model.add(ks.layers.Flatten())   # 2d -> 1d(256 units)
lr_model.add(ks.layers.Dense(
    1024, activation = 'swish'
))
lr_model.add(ks.layers.Dropout(0.4))
lr_model.add(ks.layers.BatchNormalization())

# -----------------------------------------------------------------  # Output layer
lr_model.add(ks.layers.Dense(
    10, activation='softmax'        # 10 classes
))

# Assembly model
lr_model.compile(**model_options)
lr_model.summary()

In [None]:
# train our model
lr_model.fit_generator(
    mnist_datagen.flow(x_train_splitted, y_train_splitted, batch_size = prefered_batch_size),
    verbose = 1,
    epochs = 100,
    steps_per_epoch= x_train_splitted.shape[0] // prefered_batch_size,
    validation_data = (x_test_splitted, y_test_splitted),    
)

In [None]:
# predict classes
y_preds = lr_model.predict(x_test)
y_preds_classified = np.argmax(y_preds, axis=1)
y_preds_classified[:10]

In [None]:
# upload results 
out_df['Label'] = y_preds_classified
out_df.to_csv('outer.csv')