<a href="https://colab.research.google.com/github/kishore145/AI-ML-Foundations/blob/master/Neural%20Networks/03_Deep_Net_in_tensorflow_mnist_classification.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
import tensorflow as tf
from tensorflow.keras.datasets import mnist
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras.layers import Dropout

from tensorflow.keras.utils import to_categorical

In [0]:
# Load mnist data set into train and test splits
(X_train, y_train), (X_valid, y_valid) = mnist.load_data()

In [0]:
# Preprocess Data - Same steps as followed in shallow network
X_train = X_train.reshape(60000, 784).astype('float32')
X_valid = X_valid.reshape(10000, 784).astype('float32')

X_train /= 255
X_valid /= 255

n_classes = 10
y_train = to_categorical(y_train, n_classes)
y_valid = to_categorical(y_valid, n_classes)

In [0]:
# Model Designing - For deep neuaral network, general rule of thumb is : 
# 1 i/p layer, 1 o/p layer and 3 or more hidden layers

# Create a sequntial Model
model = Sequential()

# Batch normalization helps normalize the o/p a from one hidden layer to next
# It works by normalizing z value (w.x + b) prior to running activation function
# It is similar to input scaling performed on input vector X
# Key difference is z is not normalized with mean 0 & std dev of 1 like i/p vectors, 
# It includes 2 learnable parameters beta and gamma which shifts the mean and variance of z

# Add first hidden layers with batch normalization
model.add(Dense(64, activation = 'relu', input_shape = (784,)))
model.add(BatchNormalization())

# Add second hidden layer with batch normalization
model.add(Dense(64, activation='relu'))
model.add(BatchNormalization())

# Add third hiddden layer with batch normalization and 20% Dropout
# Dropout prevent overfitting by dropping certain percentage of neurons in each run
model.add(Dense(64, activation='relu'))
model.add(BatchNormalization())
model.add(Dropout(.2))  

# Add o/p softmax layer of 10 neurons
model.add(Dense(10, activation = 'softmax'))



In [0]:
# Compile / Configure model
model.compile(optimizer = 'nadam', loss = 'categorical_crossentropy', metrics = ['accuracy'])

In [6]:
# Review model summary
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 64)                50240     
_________________________________________________________________
batch_normalization (BatchNo (None, 64)                256       
_________________________________________________________________
dense_1 (Dense)              (None, 64)                4160      
_________________________________________________________________
batch_normalization_1 (Batch (None, 64)                256       
_________________________________________________________________
dense_2 (Dense)              (None, 64)                4160      
_________________________________________________________________
batch_normalization_2 (Batch (None, 64)                256       
_________________________________________________________________
dropout (Dropout)            (None, 64)                0

In [7]:
# Fitting the model
model.fit(x=X_train, y= y_train, batch_size=128, epochs = 20, verbose = 1, validation_data=(X_valid, y_valid))

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<tensorflow.python.keras.callbacks.History at 0x7f282b6ebc50>

In [8]:
# Evaluate model performance
model.evaluate(X_valid, y_valid)



[0.08925644308328629, 0.9776999950408936]

In [9]:
# Importing numpy
import numpy as np

# Performing a sample prediction
X_valid_0 = X_valid[0].reshape(1,784)
y_pred = model.predict(X_valid_0)

# Print results
print(f'Actual y_valid[0]                   : {y_valid[0]}')
print(f'Taking raw np.argmax (Index)        : {np.argmax(y_pred)}')
print(f'Taking np.argmax(predict, axis = -1): {np.argmax(model.predict(X_valid_0), axis=-1)}')
print(f'Raw prediction using predict method : {[round(i,0) for i  in y_pred[0]]}') # Predict method expects an array of i/p
#print(f'Using Pred class                    : {model.predict_classes(X_valid_0)}') #-- Deprecated

Actual y_valid[0]                   : [0. 0. 0. 0. 0. 0. 0. 1. 0. 0.]
Taking raw np.argmax (Index)        : 7
Taking np.argmax(predict, axis = -1): [7]
Raw prediction using predict method : [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0]


In [0]:
# Concludes basic deep neural network architecture for classification problem based on JonKrohn's lectures. 