<a href="https://colab.research.google.com/github/mehrotrasan16/Keras-Deep-Learning/blob/01-MNIST-CNN-99.29/01_keras_mnist_baseline.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# TensorFlow and tf.keras
import tensorflow as tf
import numpy as np
from tensorflow import keras

#tf version should be 2.2 or higher
tf.__version__

'2.3.0'

In [2]:
keras.__version__

'2.4.0'

In [3]:
tf.test.is_gpu_available()

Instructions for updating:
Use `tf.config.list_physical_devices('GPU')` instead.


True

In [4]:
#get data
(train_images, train_labels), (test_images, test_labels) = \
      keras.datasets.mnist.load_data()

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz


In [5]:
#scale model
train_images = train_images / 255.0
test_images = test_images / 255.0

In [6]:
#setup model
model = keras.Sequential([
    keras.layers.Flatten(input_shape=(28, 28)),
    keras.layers.Dense(10, activation = tf.nn.softmax)
])

In [7]:
#compile model
model.compile(optimizer='sgd',
          loss='sparse_categorical_crossentropy',
          metrics=['accuracy'])

In [8]:
#train model
model.fit(train_images, train_labels, epochs=5)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<tensorflow.python.keras.callbacks.History at 0x7fc3fa6cfe10>

In [9]:
#evaluate
test_loss, test_acc = model.evaluate(test_images,  test_labels)
print('\nTest accuracy:', test_acc)


Test accuracy: 0.909500002861023


In [10]:
# Predict on the first 5 test images.
predictions = model.predict(test_images[:5])

In [11]:
#Raw predictions
predictions.shape,predictions


((5, 10),
 array([[2.51472258e-04, 1.31420995e-06, 2.04264055e-04, 2.50326633e-03,
         4.28943786e-05, 8.97952705e-05, 3.56143642e-06, 9.93770659e-01,
         1.84985984e-04, 2.94780103e-03],
        [1.28500042e-02, 1.82364209e-04, 8.87212336e-01, 2.29414087e-02,
         6.12397969e-07, 2.05710232e-02, 4.86901551e-02, 2.75987020e-07,
         7.54873687e-03, 3.15013835e-06],
        [2.62597809e-04, 9.38776672e-01, 1.78683288e-02, 1.15421573e-02,
         9.73321672e-04, 2.69965711e-03, 5.66723850e-03, 5.75687690e-03,
         1.34065133e-02, 3.04657267e-03],
        [9.95994091e-01, 3.32653052e-08, 2.87498202e-04, 9.95635855e-05,
         7.78090225e-07, 1.93194614e-03, 9.76627809e-04, 4.13868693e-04,
         1.70248692e-04, 1.25428938e-04],
        [1.51319406e-03, 5.42931739e-05, 1.43261785e-02, 8.50892568e-04,
         8.77656400e-01, 1.49583165e-03, 7.44148670e-03, 1.41868470e-02,
         1.18237818e-02, 7.06511363e-02]], dtype=float32))

In [12]:
# Print our model's predictions
print(np.argmax(predictions, axis=1))

[7 2 1 0 4]


In [13]:
# Check our predictions against the ground truths
print(test_labels[:5]) # [7, 2, 1, 0, 4]

[7 2 1 0 4]


## Fully Connected Layers : Comparison Table

| Approach | Accuracy(%) |
|---|---|
| Base Model | 90.78 |
| Epoch inc to 15| 91.72 |
| ADAM Optimizer | 92.65 |
| 128 - Dense layer | 97.79 |
| 2 128 - Dense layers | 97.87   |
| Best of learning rate loop: 0.003 | 97.44  |
| with Learning Rate Decay | 97.86  |
| with Dropout layers | 97.86  |

**Conclusion**:
With Fully Connected DNNs, we seem to hit a cap of 97.86 no matter what we do. To get this to 99.3% we must incolve Conv2D , CNNs with Dropout layers. 

# Convolutional Neural Networks 

In [14]:
train_images = train_images.reshape(train_images.shape[0],28,28,1)
test_images = test_images.reshape(test_images.shape[0],28,28,1)

In [15]:
import math
def lr_decay(epoch):
    return 0.01 * math.pow(0.6,epoch)

lr_decay_callback = keras.callbacks.LearningRateScheduler(lr_decay,verbose=True)

In [40]:
#setup model
cnnmodel4 = keras.Sequential([
    keras.layers.Conv2D(kernel_size=3,filters=12,use_bias=False,padding='same'),
    keras.layers.BatchNormalization(center=True,scale=False),
    keras.layers.Activation('relu'),

    keras.layers.Conv2D(kernel_size=6,filters=24,use_bias=False,padding='same',strides=2),
    keras.layers.BatchNormalization(center=True,scale=False),
    keras.layers.Activation('relu'),
    
    keras.layers.Conv2D(kernel_size=6,filters=32,use_bias=False,padding='same',strides=2),
    keras.layers.BatchNormalization(center=True,scale=False),
    keras.layers.Activation('relu'),
    
    keras.layers.Flatten(),
    
    keras.layers.Dense(128),
    keras.layers.BatchNormalization(center=True,scale=False),
    keras.layers.Activation('relu'),
    keras.layers.Dropout(0.4),

    keras.layers.Dense(200),
    keras.layers.BatchNormalization(center=True,scale=False),
    keras.layers.Activation('relu'),
    keras.layers.Dropout(0.4),
    
    keras.layers.Dense(10, activation = tf.nn.softmax)    
])

In [41]:
#compile model
cnnmodel4.compile(optimizer='adam',
          loss='sparse_categorical_crossentropy',
          metrics=['accuracy'])

In [42]:
#train model
cnnmodel4.fit(train_images, train_labels, epochs=10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x7fc3f2779dd8>

In [43]:
#evaluate
test_loss, test_acc = cnnmodel4.evaluate(test_images,  test_labels)
print('\nTest accuracy:', test_acc)


Test accuracy: 0.9941999912261963


In [44]:
# Predict on the first 5 test images.
predictions = cnnmodel4.predict(test_images[:5])



In [45]:
#Raw predictions
predictions

array([[1.5782616e-09, 6.9415016e-08, 7.5213883e-08, 1.0835117e-07,
        1.7923166e-07, 6.9192410e-08, 1.9628452e-09, 9.9999928e-01,
        7.4512215e-08, 1.8919951e-07],
       [5.1273860e-11, 1.3580883e-11, 1.0000000e+00, 9.0957797e-13,
        1.6672678e-12, 1.7733803e-15, 1.4581414e-08, 8.7692418e-13,
        3.6139234e-11, 7.7182022e-14],
       [1.7695918e-08, 9.9999988e-01, 3.1735853e-10, 6.6930661e-10,
        9.5216821e-09, 2.5613005e-08, 1.1514817e-09, 9.4620702e-08,
        4.3913921e-08, 6.0226624e-09],
       [9.9999893e-01, 5.3397664e-10, 2.3388271e-09, 1.3756958e-10,
        1.0352182e-10, 2.0101329e-08, 9.9053659e-07, 1.3869458e-10,
        9.3291625e-08, 9.8224060e-09],
       [2.8228433e-09, 3.3895513e-09, 1.7659983e-09, 4.5588195e-12,
        9.9999523e-01, 1.6908334e-08, 1.6821964e-08, 4.8614748e-11,
        4.0396532e-08, 4.6738646e-06]], dtype=float32)

In [46]:
# Print our model's predictions
print(np.argmax(predictions, axis=1))

[7 2 1 0 4]


In [47]:
# Check our predictions against the ground truths
print(test_labels[:5]) # [7, 2, 1, 0, 4]

[7 2 1 0 4]


# Comparison Table


| Approach | Accuracy(%) |
|---|---|
| Base 3x Conv2D | 98.79 |
| 3 x conv with Max Pool and lr Decay | 98.48 |
| Extra Dense layer with Dropout | 98.94 |
| 3x Conv, 2xDense, with Dropout & BatchNorm | 99.41|

**Conclusion**:
Batch Norm made an enourmous difference, allowing us to reach the required accuracy.