In [None]:
# Import all the relevant libraries and classes
import numpy as np
import tensorflow as tf
from tensorflow import keras
import matplotlib.pyplot as plt
from kaggle_datasets import KaggleDatasets
import pandas as pd
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.preprocessing.image import ImageDataGenerator

In [None]:
!nvidia-smi

In [None]:
# Import data from kaggle
train = pd.read_csv('../input/digit-recognizer/train.csv') 
test = pd.read_csv('../input/digit-recognizer/test.csv')

X_train = train.drop(['label'], axis = 1)
y_train = train['label']

X_test = test


# Divide the labels in the training, testing and validation set
y_train = tf.constant(y_train)


# Divide the input in the training, testing and validation set
X_train = tf.constant(np.array(X_train), dtype = tf.float64)
X_test = tf.constant(np.array(X_test), dtype = tf.float32)

In [None]:
X_train.shape, X_test.shape

In [None]:
type(X_test)

In [None]:
plt.figure(figsize = (10, 10))
count = 1
idd = 0
for i in range(5):
    for j in range(5):
        plt.subplot(5, 5, count)
        plt.imshow(tf.constant(X_test[idd, :], shape = [28, 28]), cmap = 'gray')
        idd += 1
        count += 1

In [None]:
# Scale the input
X_train_s = (X_train - 127.5) / 127.5
X_test_s = (X_test - 127.5) / 127.5

In [None]:
X_train_s.numpy().shape, X_test_s.shape

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_valid, y_train, y_valid = train_test_split(X_train_s.numpy(), y_train.numpy(), test_size = 0.2)
X_train.shape, X_valid.shape

In [None]:
X_train = tf.reshape(X_train, shape = [-1, 28, 28, 1])
X_valid = tf.reshape(X_valid, shape = [-1, 28, 28, 1])
X_test = tf.reshape(X_test_s, shape = [-1, 28, 28, 1])

y_train = keras.utils.to_categorical(y_train)
y_valid = keras.utils.to_categorical(y_valid)

In [None]:
X_train.shape, X_test.shape, X_valid.shape

In [None]:
y_train.shape, y_valid.shape

In [None]:
# Create a model with keras sequential class
mod = keras.models.Sequential([

    keras.layers.InputLayer(input_shape = X_train.shape[1:]), 

    keras.layers.Conv2D(filters = 32, kernel_size = (3, 3)), 
    keras.layers.LeakyReLU(alpha = 0.25),     
    keras.layers.BatchNormalization(),           # (26, 26)

    keras.layers.Conv2D(filters = 48, kernel_size = (3, 3)), 
    keras.layers.LeakyReLU(alpha = 0),
    keras.layers.BatchNormalization(),         
    keras.layers.MaxPool2D(),                   # (12, 12)

    keras.layers.Conv2D(filters = 64, kernel_size = (3, 3)),
    keras.layers.LeakyReLU(alpha = 0.25),    
    keras.layers.BatchNormalization(),        # (10, 10)
    
    keras.layers.Conv2D(filters = 96, kernel_size = (3, 3), padding = 'same'),
    keras.layers.LeakyReLU(alpha = 0),    
    keras.layers.BatchNormalization(),        
    keras.layers.MaxPool2D(),                 # (5, 5)

    
    keras.layers.Conv2D(filters = 128, kernel_size = (3, 3)),
    keras.layers.LeakyReLU(alpha = 0.25),    
    keras.layers.BatchNormalization(),         # (3, 3)
    
    keras.layers.Conv2D(filters = 256, kernel_size = (3, 3)),
    keras.layers.LeakyReLU(alpha = 0),    
    keras.layers.BatchNormalization(),         # (1, 1)

    keras.layers.Flatten(),                    # 256
    
    keras.layers.Dense(64),
    keras.layers.LeakyReLU(alpha = 0.25),
    keras.layers.BatchNormalization(),
    
    keras.layers.Dense(10),
    keras.layers.BatchNormalization(),
    keras.layers.Activation(keras.activations.softmax),

])

# Compile the model
mod.compile(optimizer=keras.optimizers.Adam(learning_rate=0.1), 
            loss = keras.losses.categorical_crossentropy, 
            metrics = ['accuracy'])



# summarize the model
mod.summary()




# Plot the model
keras.utils.plot_model(mod, show_shapes=True)

In [None]:
# Initialize callbacks
cb1 = EarlyStopping(patience = 3, restore_best_weights=True, monitor = 'val_acc', verbose = 1)
cb2 = ReduceLROnPlateau(patience = 3, min_lr=0.00001, factor = 0.15, monitor = 'val_acc', verbose = 1)

### Data Augmentation

In [None]:
data_train = ImageDataGenerator(
    rotation_range = 12, 
    width_shift_range = 5, 
    height_shift_range = 5, 
    zoom_range = 0.17,
 )

data_valid = ImageDataGenerator()

train_gen = data_train.flow(X_train, y_train, batch_size = 280, shuffle = True)
valid_gen = data_valid.flow(X_valid, y_valid, batch_size = 168, shuffle = True)

In [None]:
# Fit the data to the model and pass the validation data
acc = [0, 0]
count = 0
while(acc[0] < 0.9955 and count <= 14):
    count += 1
    history = mod.fit(train_gen, epochs = 25, validation_data = valid_gen, callbacks = [cb1, cb2])
    acc = mod.evaluate(X_valid, y_valid)

In [None]:
df = pd.DataFrame(history.history)
df = df[df['val_loss'] < 1].reset_index()
df.index = df['index']
df.drop(['index'], axis = 1, inplace = True)
pd.DataFrame(df).plot(figsize = (12, 10));

In [None]:
mod.evaluate(X_train, y_train), mod.evaluate(X_valid, y_valid)

In [None]:
# Evaluate the model on the test set
pred = mod.predict(X_test)
pred[3]

In [None]:
pred = np.argmax(pred, axis = 1)
pred.shape

In [None]:
sub = pd.read_csv('../input/digit-recognizer/sample_submission.csv')
sub['Label'] = pred
sub.to_csv('submission.csv', index = False)
sub.head()