# MNIST using Tensorflow. 0.996 
Mi first submission to Kaggle. I achieved a VAL_ACCURACY between 0.9955 and 0.9972 with 30 epochs.  

In [None]:
import tensorflow as tf
tf.random.set_seed(42)
import numpy as np

from tensorflow import keras
from tensorflow.keras.models import load_model
from time import time
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
from matplotlib import rcParams
import pandas as pd
from sklearn.model_selection import train_test_split

# SUPPORT FUNCTIONS
Functions that can be  reused. 

In [None]:
#I Use this function to plot the loss and accuracy of the training and validation data 
#of the history received. You can indicate the first epoch to plot. 
#Useful to see the slope of the curve in the last epochs. 
#history--> The history returbed by the fit of the model.
#firstepoch--> First epoch to show in the plot. 
def plot_loss_acc(history, firstepoch=0):
  '''Plots the training and validation loss and accuracy from a history object'''
  acc = history.history['accuracy']
  acc = acc[firstepoch:]
  val_acc = history.history['val_accuracy']
  val_acc = val_acc[firstepoch:]
  loss = history.history['loss']
  loss=loss[firstepoch:]
  val_loss = history.history['val_loss']
  val_loss = val_loss[firstepoch:]

  epochs = range(len(acc))

  plt.plot(epochs, acc, 'bo-', label='Training accuracy')
  plt.plot(epochs, val_acc, 'go-', label='Validation accuracy')
  plt.title('Training and validation accuracy')
  plt.legend()

  plt.figure()

  plt.plot(epochs, loss, 'bo-', label='Training Loss')
  plt.plot(epochs, val_loss, 'go-', label='Validation Loss')
  plt.title('Training and validation loss')
  plt.legend()

  plt.show()

In [None]:
#Show image. 
#X-->Features. 
#y-->Labels. 
#nrows. 
#ncols. 
#firstimg. 
#numimg-->Number of images to display. 
def show_img_dataset(X, y=None, nrows = 4, ncols=4, firstimg=100, numimg=4):
    for i in range(numimg):
        sp = plt.subplot(nrows, ncols, i + 1)

        sp.axis('Off')
        plt.imshow(X[firstimg+i], cmap="Greys")
        if (y is not None): 
            plt.title(y[firstimg+i])
    plt.show()

In [None]:
#return predictions of the model received. 
def get_predictions(model, X, y):
    predictions=model.predict(X)  
    results = pd.DataFrame(data={'Predictions': np.argmax(predictions, axis=1), 
                            'Actuals': np.argmax(y, axis=1)})
    predictions.shape, y.shape
    return predictions, results

# Data

In [None]:
#Read the data from CSV files. 
train_df=pd.read_csv('/kaggle/input/digit-recognizer/train.csv')
test_df=pd.read_csv('/kaggle/input/digit-recognizer/test.csv')

In [None]:
train_df.head()

In [None]:
#Obtain Features and labels. 
X= train_df.drop('label', axis=1)
y = train_df['label']

X.shape, y.shape

In [None]:
X = np.array(X).reshape(X.shape[0], 28, 28)
X = np.array(X).reshape((-1, 28, 28, 1))
X_test = np.array(test_df).reshape((-1, 28, 28, 1))
X.shape, X_test.shape

In [None]:
show_img_dataset(X, y = y, firstimg=780, nrows = 2, ncols=4, numimg=8)

In [None]:
show_img_dataset(X_test, firstimg=780, nrows = 2, ncols=4, numimg=8)

In [None]:
#Normalization and One Hot Encoding. 
X = X/255
X_test = X_test/255

y = train_df['label']
y = tf.keras.utils.to_categorical(y, num_classes = 10)
X.shape, y.shape

In [None]:
#obtainig from X and y training and validating data. 
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
#just to cleant a little bit the memory, but it's no t necessary. 
del X, y, train_df, test_df

In [None]:
X_train.shape, X_val.shape, y_train.shape, y_val.shape

## Data Augmentation
Just a little bit of DataAugmentation. 

Vertical or horizontal flips are contraproducent, but
a bit of shift and zoom helps with the final accuracy. 

In [None]:
#Just a little bit of DataAugmentation. 
#Vertical or horizontal flips are contraproducent, but
#a bit of shioft helps
from keras.preprocessing.image import ImageDataGenerator

x_trainr = X_train.reshape(-1, 28, 28, 1)
x_valr = X_val.reshape(-1, 28, 28, 1)

datagen = ImageDataGenerator(
        rotation_range=5, 
        zoom_range = 0.1, 
        width_shift_range=0.05, 
        height_shift_range=0.05,    
)  

X_mean = X_train.mean(axis=0)
datagen.fit(X_train - X_mean)
train_gen = datagen.flow(x_trainr, y_train, batch_size=128)
test_gen = datagen.flow(x_valr, y_val, batch_size=128)

## The Model
I used two callbacks. 

**ModelCheckPoint**: to save the best model and load it after the fit. Rareley the result of the last epoch is the best. In the param **monitor** you can iondicate wich value you want to watch, the default is loss, but I prefer to improve the val_accuracy. Not sure if we can get a better score in the test dataset if we get the model with the best loss value instead of the one with the best accuracy. 

**ReduceLROnPlateau**: It reduces the **learning_rate** after the **epochs** indicated when there are no improvement in the **monitor** variable indicated. We are going to train just for 30 epochs and I indicated a really short patiente of 3 **epochs**. 

Maybe it's important to mention that I replaced the Dropout layer for a SpatialDropout one. Is similar, but it affects to an entirely channel. 

**Dropout** 
[[1, 1, 1, 1]
[1, 1, 1, 1]
[1, 1, 1, 1]]

Transforms to: 

[[0, 1, 1, 1]
[1, 1, 0, 1]
[1, 0, 1, 1]]


**Spatial Dropout **

[[1, 1, 1, 1]
[1, 1, 1, 1]
[1, 1, 1, 1]]

Transforms to: 

[[1, 1, 0, 1]
[1, 1, 0, 1]
[1, 1, 0, 1]]


In [None]:
keras.backend.clear_session()

cpDA4 = tf.keras.callbacks.ModelCheckpoint('modelDA4.h5', 
                                           mode='max', monitor='val_accuracy', 
                                           verbose=1, 
                                           save_best_only=True)
reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_accuracy', factor=0.1,
                              patience=3, min_lr=0.0001, verbose=1, mode='auto')
modelDA4 = tf.keras.models.Sequential([
                                    tf.keras.layers.Conv2D(64, (5,5), activation='relu', input_shape=(28, 28, 1)),
                                    tf.keras.layers.BatchNormalization(),   
                                    tf.keras.layers.Conv2D(64, (5,5), activation='relu'),
                                    tf.keras.layers.MaxPooling2D(2, 2),
                                    tf.keras.layers.SpatialDropout2D(0.4),
                                    

                                    tf.keras.layers.Conv2D(128, (3,3), activation='relu'),
                                    tf.keras.layers.BatchNormalization(), 
                                    tf.keras.layers.Conv2D(128, (3,3), activation='relu'), 
                                    tf.keras.layers.MaxPooling2D(2, 2),
                                    tf.keras.layers.SpatialDropout2D(0.4),

                                    tf.keras.layers.Flatten(), 
                                    tf.keras.layers.Dense(128, activation='relu'),
                                    tf.keras.layers.Dense(10, activation='softmax')])
modelDA4.summary()

In [None]:
modelDA4.compile(optimizer='adam', 
                  loss='categorical_crossentropy', 
                  metrics=['accuracy']) 


historyDA4 = modelDA4.fit(train_gen, 
                        validation_data=(X_val, y_val), 
                        epochs=30, 
                        steps_per_epoch=len(train_gen), 
                        verbose=1, 
                        callbacks=[cpDA4, reduce_lr]
                        )

In [None]:
plot_loss_acc(historyDA4, 5)

In [None]:
prediction, results = get_predictions(modelDA4, X_val, y_val)
results

In [None]:
validation_loss, validation_accuracy = modelDA4.evaluate(X_val, y_val)
modelDA4_loaded = load_model('modelDA4.h5')
validation_loss_loaded, validation_accuracy_loaded = modelDA4_loaded.evaluate(X_val, y_val)
print('Validation loss: ', validation_loss)
print('Validation accuracy: ', validation_accuracy)

print('Validation loss loaded: ', validation_loss_loaded)
print('Validation accuracy loaded: ', validation_accuracy_loaded)

In [None]:

y_pred=modelDA4_loaded.predict(X_test)
results = np.argmax(y_pred, axis=1)
results.shape, results

In [None]:
image_id = pd.Series(range(1,28001),name='ImageId')
y_preds = pd.Series(results,name = 'Label')
pred = pd.concat([image_id,y_preds],axis=1)
pred.to_csv('submission.csv',index=False)