# 1. **Train MNIST Data**
`We will train the first model on the MNIST-dataset. The MNIST-dataset contains 70,000 images (28 x 28 pixels) of handwritten digits. `


In [None]:
!pip install numpy==1.18.5
!pip install matplotlib==3.2.2
!pip install tensorflow==2.3.0

In [None]:
# Importing useful libraries
import tensorflow as tf
import tensorflow.keras
from tensorflow.keras.datasets import mnist
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Flatten, Dense, Dropout, Conv2D, MaxPooling2D, Activation, BatchNormalization, MaxPool2D
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ModelCheckpoint
import matplotlib.pyplot as plt

BATCH_SIZE = 128
NUM_CLASSES = 10
EPOCHS = 30
LR=1e-3

# Input image dimensions
img_rows, img_cols = 28, 28

In [None]:
# Import the data, split between train and test sets
(X_train, y_train), (X_test, y_test) = mnist.load_data()

# Normalizing the input and reshaping
X_train = X_train.reshape(X_train.shape[0], 28, 28, 1)
X_train = X_train.astype('float32')
X_train /= 255
X_test = X_test.reshape(X_test.shape[0],28,28,1)
X_test = X_test.astype('float32')
X_test /= 255

# convert class vectors to binary class matrices
y_train = tensorflow.keras.utils.to_categorical(y_train, NUM_CLASSES)
y_test = tensorflow.keras.utils.to_categorical(y_test, NUM_CLASSES)

In [None]:
# Applying transformation to image
train_gen = ImageDataGenerator(rotation_range=8,
                               width_shift_range=0.08,
                               shear_range=0.3,
                               height_shift_range=0.08,
                               zoom_range=0.08)
test_gen = ImageDataGenerator()
training_set = train_gen.flow(X_train, y_train, batch_size=BATCH_SIZE)
test_set = train_gen.flow(X_test, y_test, batch_size=BATCH_SIZE)

In [None]:
def DigitNet(height=28, width=28, depth=1, classes=10):  # best bs=64 & 128
    model = Sequential()

    model.add(Conv2D(filters=32, kernel_size=(5, 5), padding='Same',
                     activation='relu', input_shape=(height, width, depth)))
    model.add(Conv2D(filters=32, kernel_size=(5, 5), padding='Same',
                     activation='relu'))
    model.add(MaxPool2D(pool_size=(2, 2)))
    model.add(Dropout(0.25))

    model.add(Conv2D(filters=64, kernel_size=(3, 3), padding='Same',
                     activation='relu'))
    model.add(Conv2D(filters=64, kernel_size=(3, 3), padding='Same',
                     activation='relu'))
    model.add(MaxPool2D(pool_size=(2, 2), strides=(2, 2)))
    model.add(Dropout(0.25))

    model.add(Flatten())
    model.add(Dense(256, activation="relu"))
    model.add(Dropout(0.5))
    model.add(Dense(classes, activation="softmax"))
    return model

## Training the model
`We will use an earlystopper, which is a functionality provided by the Keras API. We can specify the earlystopper to monitor the validation loss. Once the validation loss stops improving (decreasing), the earlystopper stops the training session to mitigate the risk of overfitting.`

In [None]:
model = DigitNet(28, 28, 1, 10)

In [None]:
# Compiling
checkpoint = tf.compat.v1.keras.callbacks.ModelCheckpoint(
    'BestDigitNet.pb', monitor='val_loss', save_best_only=True, mode='auto')  # Callback for Model with best validation loss
earlystopper = tensorflow.keras.callbacks.EarlyStopping(monitor='loss', patience=3)
adam = Adam(lr=LR)
model.compile(loss="categorical_crossentropy",
              optimizer=adam, metrics=["accuracy"])

# Training
session = model.fit(training_set,epochs=EPOCHS,
                              steps_per_epoch=X_train.shape[0] // BATCH_SIZE,
                              validation_data=test_set,
                              validation_steps=X_test.shape[0] // BATCH_SIZE,
                              callbacks=[checkpoint,earlystopper])

In [None]:
acc = session.history['accuracy']
val_acc = session.history['val_accuracy']

loss = session.history['loss']
val_loss = session.history['val_accuracy']

epochs_range = range(len(acc))

# Plot the accuracy
plt.figure(figsize=(15, 6))
plt.subplot(1, 2, 1)
plt.plot(epochs_range, acc, label='Training Accuracy')
plt.plot(epochs_range, val_acc, label='Validation Accuracy')
plt.legend(loc='lower right')
plt.title('Training and Validation Accuracy')

# Plot the loss
plt.subplot(1, 2, 2)
plt.plot(epochs_range, loss, label='Training Loss')
plt.plot(epochs_range, val_loss, label='Validation Loss')
plt.legend(loc='upper right')
plt.title('Training and Validation Loss')
plt.show()

We can see that we get an amazing accuracy of roughly 99% on both the training and the validation set, with the validation set having a little better accuracy. We can also see that earlystopper kicked in and we see no signs of overfitting in the two plots above. Lastly, we have to save the model so we can import it in the next jupyter-notebook, where we will use Transfer learning to train our final model to be used for Digit Classification.


***Check For BestDigitNet.pb folder for our saved model.***

