#### Load tensorflow

In [None]:
import tensorflow as tf

#### Collect Data

We will use MNIST dataset for this exercise. This dataset contains images of hand written numbers with each image being a black & white picture of size 28x28. We will download the data using tensorflow API. The dataset has 60,000 training examples and 10,000 test examples. Please note that images have already been converted to numpy arrays.

In [None]:
#Download dataset
(X_train, y_train),(X_test, y_test) = tf.keras.datasets.mnist.load_data()

In [None]:
#Check number of training examples and size of each example
X_train.shape

In [None]:
X_train[0]

In [None]:
#Check number of test examples and size of each example
X_test.shape

Visualize data

In [None]:
import matplotlib.pyplot as plt
import numpy as np

In [None]:
img_num = np.random.randint(0, X_test.shape[0]) #Get a random integer between 0 and number of examples in test dataset
plt.imshow(X_test[img_num],cmap='gray') #Show the image from test dataset
plt.suptitle('Number: ' + str(y_test[img_num]))
plt.show()

#### Prepare Labels
Convert Output label to multiple values

In [None]:
#Label for an example
y_test[0]

In [None]:
y_train = tf.keras.utils.to_categorical(y_train, num_classes=10)
y_test = tf.keras.utils.to_categorical(y_test, num_classes=10)

In [None]:
#Label for same example after one hot encoding
y_test[0]

#### Build the Model V1
Activation function 'tanh' --> Hyperbolic tangent

In [None]:
#Initialize Sequential model
tf.keras.backend.clear_session()
model = tf.keras.models.Sequential()

#Reshape data from 2D to 1D -> 28x28 to 784
model.add(tf.keras.layers.Reshape((784,),input_shape=(28,28,)))

#Normalize the data
model.add(tf.keras.layers.BatchNormalization())

Add Hidden Layers

In [None]:
#Add 1st hidden layer
model.add(tf.keras.layers.Dense(200, activation='tanh'))

In [None]:
#Add 2nd hidden layer
model.add(tf.keras.layers.Dense(100, activation='tanh'))

In [None]:
#Add 3rd hidden layer
model.add(tf.keras.layers.Dense(60, activation='tanh'))

In [None]:
#Add 4th hidden layer
model.add(tf.keras.layers.Dense(30, activation='tanh'))

**Add Output Layer**
Please note that activation function for the output layer is independent of what we used in hidden layer. Here we need to get 10 probabilities  (10 classes in MNIST with one right answer) and hence using softmax.

In [None]:
#Add OUTPUT layer
model.add(tf.keras.layers.Dense(10, activation='softmax'))

Compile the Model

In [None]:
#Compile the model
model.compile(optimizer='sgd', loss='categorical_crossentropy',
              metrics=['accuracy'])

Review model

In [None]:
model.summary()

#### Build the Model V2
With Activation function Relu

In [None]:
#Initialize Sequential model
tf.keras.backend.clear_session()
model = tf.keras.models.Sequential()

#Reshape data from 2D to 1D -> 28x28 to 784
model.add(tf.keras.layers.Reshape((784,),input_shape=(28,28,)))

#Normalize the data
model.add(tf.keras.layers.BatchNormalization())

In the code from previous model, replace 'tanh' with 'relu'

In [None]:
#Add 1st hidden layer
model.add(tf.keras.layers.Dense(200, activation='relu'))

#Add 2nd hidden layer
model.add(tf.keras.layers.Dense(100, activation='relu'))

#Add 3rd hidden layer
model.add(tf.keras.layers.Dense(60, activation='relu'))

#Add 4th hidden layer
model.add(tf.keras.layers.Dense(30, activation='relu'))

Output layer activation functio **will not change**.

In [None]:
#Add OUTPUT layer
model.add(tf.keras.layers.Dense(10, activation='softmax'))

In [None]:
#Compile the model
model.compile(optimizer='sgd', loss='categorical_crossentropy',
              metrics=['accuracy'])

#### Build the model V3
With LeakyRelu activation function

In [None]:
#Initialize Sequential model
tf.keras.backend.clear_session()
model = tf.keras.models.Sequential()

#Reshape data from 2D to 1D -> 28x28 to 784
model.add(tf.keras.layers.Reshape((784,),input_shape=(28,28,)))

#Normalize the data
model.add(tf.keras.layers.BatchNormalization())

LeakyRelu is available as a layer in Keras

In [None]:
#Add 1st hidden layer
model.add(tf.keras.layers.Dense(200))
model.add(tf.keras.layers.LeakyReLU())

#Add 2nd hidden layer
model.add(tf.keras.layers.Dense(100))
model.add(tf.keras.layers.LeakyReLU())

#Add 3rd hidden layer
model.add(tf.keras.layers.Dense(60))
model.add(tf.keras.layers.LeakyReLU())

#Add 4th hidden layer
model.add(tf.keras.layers.Dense(30))
model.add(tf.keras.layers.LeakyReLU())

Output layer will remain same.

In [None]:
#Add OUTPUT layer
model.add(tf.keras.layers.Dense(10, activation='softmax'))

#Compile the model
model.compile(optimizer='sgd', loss='categorical_crossentropy',
              metrics=['accuracy'])

#### Build the Model V4
Applying Dropout

In [None]:
#Initialize Sequential model
tf.keras.backend.clear_session()
model = tf.keras.models.Sequential()

#Reshape data from 2D to 1D -> 28x28 to 784
model.add(tf.keras.layers.Reshape((784,),input_shape=(28,28,)))

#Normalize the data
model.add(tf.keras.layers.BatchNormalization())

**Dropout** can be applied after any trainable layer (e.g Dense). Dropuout rate is a hyper-parameter and usually between 0.1 (10%) to 0.5 (50%). In Keras, Dropout is a layer and we can specify the dropout rate when adding the layer.

In [None]:
#Add 1st hidden layer
model.add(tf.keras.layers.Dense(200, activation='relu'))
model.add(tf.keras.layers.Dropout(0.4)) #40% dropout rate

#Add 2nd hidden layer
model.add(tf.keras.layers.Dense(100, activation='relu'))
model.add(tf.keras.layers.Dropout(0.3)) #30% dropout rate

#Add 3rd hidden layer, we may or may not use dropout after every layer
model.add(tf.keras.layers.Dense(60, activation='relu'))

#Add 4th hidden layer
model.add(tf.keras.layers.Dense(30, activation='relu'))
model.add(tf.keras.layers.Dropout(0.25)) #25% dropout rate

Never use dropout **after** the output layer

In [None]:
#Add OUTPUT layer
model.add(tf.keras.layers.Dense(10, activation='softmax'))

In [None]:
#Compile the model
model.compile(optimizer='sgd', loss='categorical_crossentropy',
              metrics=['accuracy'])

#### Build the Model V5
Applying BatchNormalization

In [None]:
#Initialize Sequential model
tf.keras.backend.clear_session()
model = tf.keras.models.Sequential()

#Reshape data from 2D to 1D -> 28x28 to 784
model.add(tf.keras.layers.Reshape((784,),input_shape=(28,28,)))

#Normalize the data
model.add(tf.keras.layers.BatchNormalization())

**BatchNormalization** can be applied **before** any trainable layer (e.g Dense). We can use a combination of BatchNorm and Dropout layer. Both layers are used to reduce overfitting.

In [None]:
#Add 1st hidden layer
model.add(tf.keras.layers.Dense(200, activation='relu'))

#Add 2nd hidden layer
model.add(tf.keras.layers.BatchNormalization())
model.add(tf.keras.layers.Dense(100, activation='relu'))

#Add 3rd hidden layer, we may or may not use dropout after every layer
model.add(tf.keras.layers.BatchNormalization())
model.add(tf.keras.layers.Dense(60, activation='relu'))

#Add 4th hidden layer
model.add(tf.keras.layers.BatchNormalization())
model.add(tf.keras.layers.Dense(30, activation='relu'))

We can use BatchNorm layer **before** the output layer

In [None]:
#Add OUTPUT layer
model.add(tf.keras.layers.BatchNormalization())
model.add(tf.keras.layers.Dense(10, activation='softmax'))

In [None]:
#Compile the model
model.compile(optimizer='sgd', loss='categorical_crossentropy',
              metrics=['accuracy'])

#### Changing default learning rate

Learning rate can be changed in the optimizer. So when we do model.compile, we can use a customized optimizer object. Try different learning rates to see which helps to get better test accuracy (or lower test loss).

In [None]:
#Create a SGD Optimizer object and set the learning ('lr' parameter)
sgd_optimizer = tf.keras.optimizers.SGD(learning_rate=0.1)

#Use the above object while compiling th model
model.compile(optimizer=sgd_optimizer, loss='categorical_crossentropy', metrics=['accuracy'])

#### Learning rate decay

Learning rate decay is an approach to reduce learning rate over a period. We can set decay rate in the optimizer object using 'decay' parameter.

In [None]:
#Create a SGD Optimizer object and set the learning ('lr' parameter)
sgd_optimizer = tf.keras.optimizers.SGD(learning_rate=0.1, decay=0.001)

#Use the above object while compiling the model
model.compile(optimizer=sgd_optimizer, loss='categorical_crossentropy', metrics=['accuracy'])

#### Using different Optimizers

Applying Momentum

In [None]:
#Create a SGD Optimizer object and set the momentum parameter
sgd_optimizer = tf.keras.optimizers.SGD(learning_rate=0.1, momentum=0.9)

#Use the above object while compiling the model
model.compile(optimizer=sgd_optimizer, loss='categorical_crossentropy', metrics=['accuracy'])

Applying Nesterov Momemtum

In [None]:
#Create a SGD Optimizer object and set the momentum parameter and nesetrov=True
sgd_optimizer = tf.keras.optimizers.SGD(learning_rate=0.1, momentum=0.9, nesterov=True)

#Use the above object while compiling th model
model.compile(optimizer=sgd_optimizer, loss='categorical_crossentropy', metrics=['accuracy'])

Adagrad

In [None]:
#Using adagrad
model.compile(optimizer='adagrad', loss='categorical_crossentropy', metrics=['accuracy'])

Adadelta

In [None]:
#Using adadelta
model.compile(optimizer='adadelta', loss='categorical_crossentropy', metrics=['accuracy'])

RMSProp

In [None]:
#Using RMSProp
model.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['accuracy'])

RMSProp with a different learning rate

In [None]:
#Create a RMSProp Optimizer object and set the learning rate
rmsprop_optimizer = tf.keras.optimizers.RMSprop(learning_rate=0.1)

#Use the above object while compiling the model
model.compile(optimizer=rmsprop_optimizer, loss='categorical_crossentropy', metrics=['accuracy'])

Using Adam

In [None]:
#Using Adam
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

#### Train the model

In [None]:
model_checkpoint = tf.keras.callbacks.ModelCheckpoint('mnist_v1.keras', save_best_only=True,
                                                      monitor='val_accuracy',
                                                      mode='max', verbose=1)

In [None]:
model.fit(X_train,y_train,
          validation_data=(X_test,y_test),
          epochs=10,
          batch_size=32,
          callbacks=[model_checkpoint])

In [None]:
model.save('mnist_sigmoid_v1.h5')

In [None]:
!ls -l

Load a saved model

In [None]:
model = tf.keras.models.load_model('mnist_sigmoid_v1.h5')

In [None]:
model.summary()

### Model Prediction

In [None]:
import numpy as np

What input shape model is expecting?

Predicting on first example

In [None]:
X_test[0].shape

In [None]:
model.predict(X_test[0])

Why are we getting the error?

In [None]:
testX[0].shape

Building a Batch

In [None]:
input_data = np.expand_dims(testX[0], axis=0)
input_data.shape

Model Prediction

In [None]:
pred = model.predict(input_data)
pred

In [None]:
pred.shape

In [None]:
pred[0]

Predicted Class

In [None]:
np.argmax(pred[0])

In [None]:
np.max(pred[0])

Actual Class

In [None]:
np.argmax(testY[0])

Visual confirmation

In [None]:
#Lets print the image as well
import matplotlib.pyplot as plt
plt.imshow(testX[0],cmap='gray')
plt.show()