In [1]:
# if you keras is not using tensorflow as backend set "KERAS_BACKEND=tensorflow" use this command
from keras.utils import np_utils 
from keras.datasets import mnist 
import seaborn as sns
from keras.initializers import RandomNormal

Using TensorFlow backend.


In [2]:
%matplotlib notebook
import matplotlib.pyplot as plt
import numpy as np
import time
# https://gist.github.com/greydanus/f6eee59eaf1d90fcb3b534a25362cea4
# https://stackoverflow.com/a/14434334
# this function is used to update the plots for each epoch and error
def plt_dynamic(x, vy, ty, ax, colors=['b']):
    ax.plot(x, vy, 'b', label="Validation Loss")
    ax.plot(x, ty, 'r', label="Train Loss")
    plt.legend()
    plt.grid()
    fig.canvas.draw()

In [3]:
# the data, shuffled and split between train and test sets 
(X_train, y_train), (X_test, y_test) = mnist.load_data()

In [4]:
print("Number of training examples :", X_train.shape[0], "and each image is of shape (%d, %d)"%(X_train.shape[1], X_train.shape[2]))
print("Number of training examples :", X_test.shape[0], "and each image is of shape (%d, %d)"%(X_test.shape[1], X_test.shape[2]))

Number of training examples : 60000 and each image is of shape (28, 28)
Number of training examples : 10000 and each image is of shape (28, 28)


In [5]:
# if you observe the input shape its 2 dimensional vector
# for each image we have a (28*28) vector
# we will convert the (28*28) vector into single dimensional vector of 1 * 784 

X_train = X_train.reshape(X_train.shape[0], X_train.shape[1]*X_train.shape[2]) 
X_test = X_test.reshape(X_test.shape[0], X_test.shape[1]*X_test.shape[2]) 

In [6]:
# after converting the input images from 3d to 2d vectors

print("Number of training examples :", X_train.shape[0], "and each image is of shape (%d)"%(X_train.shape[1]))
print("Number of training examples :", X_test.shape[0], "and each image is of shape (%d)"%(X_test.shape[1]))

Number of training examples : 60000 and each image is of shape (784)
Number of training examples : 10000 and each image is of shape (784)


In [7]:
# if we observe the above matrix each cell is having a value between 0-255
# before we move to apply machine learning algorithms lets try to normalize the data
# X => (X - Xmin)/(Xmax-Xmin) = X/255

X_train = X_train/255
X_test = X_test/255

In [8]:
# here we are having a class number for each image
print("Class label of first image :", y_train[0])

# lets convert this into a 10 dimensional vector
# ex: consider an image is 5 convert it into 5 => [0, 0, 0, 0, 0, 1, 0, 0, 0, 0]
# this conversion needed for MLPs 

Y_train = np_utils.to_categorical(y_train, 10) 
Y_test = np_utils.to_categorical(y_test, 10)

print("After converting the output into a vector : ",Y_train[0])

Class label of first image : 5
After converting the output into a vector :  [0. 0. 0. 0. 0. 1. 0. 0. 0. 0.]


## 2 LAYER ARCHITECTURE WITH DROPOUT(0.5) AND BATCH NORMALIZATION WITH ACTIVATION RELU

In [9]:
from keras.models import Sequential 
from keras.layers import Dense, Activation

In [10]:
# some model parameters

output_dim = 10
input_dim = X_train.shape[1]

batch_size = 256 
nb_epoch = 20

In [11]:
from keras.layers.normalization import BatchNormalization
from keras.layers import Dropout


model_relu = Sequential()
model_relu.add(Dense(364, activation='relu', input_shape=(input_dim,)))
model_relu.add(Dropout(0.5))
model_relu.add(BatchNormalization())
model_relu.add(Dense(52, activation='relu'))
model_relu.add(Dropout(0.5))
model_relu.add(BatchNormalization())
model_relu.add(Dense(output_dim, activation='softmax'))

model_relu.summary()

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_1 (Dense)              (None, 364)               285740    
_________________________________________________________________
dropout_1 (Dropout)          (None, 364)               0         
_________________________________________________________________
batch_normalization_1 (Batch (None, 364)               1456      
_________________________________________________________________
dense_2 (Dense)              (None, 52)                18980     
_________________________________________________________________
dropout_2 (Dropout)          (None, 52)                0         
_________________________________________________________________
batch_normalization_2

In [12]:
model_relu.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

history = model_relu.fit(X_train, Y_train, batch_size=batch_size, epochs=nb_epoch, verbose=1, validation_data=(X_test, Y_test)) 

Instructions for updating:
Use tf.cast instead.
Train on 60000 samples, validate on 10000 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [13]:
score = model_relu.evaluate(X_test, Y_test, verbose=0) 
print('Test score:', score[0]) 
print('Test accuracy:', score[1])

fig,ax = plt.subplots(1,1)
ax.set_xlabel('epoch') ; ax.set_ylabel('Categorical Crossentropy Loss')

# list of epoch numbers
x = list(range(1,nb_epoch+1))

# print(history.history.keys())
# dict_keys(['val_loss', 'val_acc', 'loss', 'acc'])
# history = model_drop.fit(X_train, Y_train, batch_size=batch_size, epochs=nb_epoch, verbose=1, validation_data=(X_test, Y_test))

# we will get val_loss and val_acc only when you pass the paramter validation_data
# val_loss : validation loss
# val_acc : validation accuracy

# loss : training loss
# acc : train accuracy
# for each key in histrory.histrory we will have a list of length equal to number of epochs


vy = history.history['val_loss']
ty = history.history['loss']
plt_dynamic(x, vy, ty, ax)

Test score: 0.06967618248889922
Test accuracy: 0.9804


<IPython.core.display.Javascript object>

## 3  LAYER ARCHITECTURE WITH DROPOUT(0.5) AND BATCH NORMALIZATION WITH ACTIVATION RELU

In [14]:
model_relu_2 = Sequential()
model_relu_2.add(Dense(484, activation='relu', input_shape=(input_dim,)))
model_relu_2.add(Dropout(0.5))
model_relu_2.add(BatchNormalization())
model_relu_2.add(Dense(212, activation='relu'))
model_relu_2.add(Dropout(0.5))
model_relu_2.add(BatchNormalization())
model_relu_2.add(Dense(64, activation='relu'))
model_relu_2.add(Dropout(0.5))
model_relu_2.add(BatchNormalization())
model_relu_2.add(Dense(output_dim, activation='softmax'))

model_relu_2.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_4 (Dense)              (None, 484)               379940    
_________________________________________________________________
dropout_3 (Dropout)          (None, 484)               0         
_________________________________________________________________
batch_normalization_3 (Batch (None, 484)               1936      
_________________________________________________________________
dense_5 (Dense)              (None, 212)               102820    
_________________________________________________________________
dropout_4 (Dropout)          (None, 212)               0         
_________________________________________________________________
batch_normalization_4 (Batch (None, 212)               848       
_________________________________________________________________
dense_6 (Dense)              (None, 64)                13632     
__________

In [15]:
model_relu_2.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

history = model_relu_2.fit(X_train, Y_train, batch_size=batch_size, epochs=nb_epoch, verbose=1, validation_data=(X_test, Y_test)) 

Train on 60000 samples, validate on 10000 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [16]:
score = model_relu_2.evaluate(X_test, Y_test, verbose=0) 
print('Test score:', score[0]) 
print('Test accuracy:', score[1])

fig,ax = plt.subplots(1,1)
ax.set_xlabel('epoch') ; ax.set_ylabel('Categorical Crossentropy Loss')

# list of epoch numbers
x = list(range(1,nb_epoch+1))

# print(history.history.keys())
# dict_keys(['val_loss', 'val_acc', 'loss', 'acc'])
# history = model_drop.fit(X_train, Y_train, batch_size=batch_size, epochs=nb_epoch, verbose=1, validation_data=(X_test, Y_test))

# we will get val_loss and val_acc only when you pass the paramter validation_data
# val_loss : validation loss
# val_acc : validation accuracy

# loss : training loss
# acc : train accuracy
# for each key in histrory.histrory we will have a list of length equal to number of epochs


vy = history.history['val_loss']
ty = history.history['loss']
plt_dynamic(x, vy, ty, ax)

Test score: 0.06834373725345941
Test accuracy: 0.9819


<IPython.core.display.Javascript object>

## 5 LAYER ARCHITECTURE WITH DROPOUT(0.5) AND BATCH NORMALIZATION WITH ACTIVATION RELU

In [17]:
model_relu_3 = Sequential()
model_relu_3.add(Dense(564, activation='relu', input_shape=(input_dim,)))
model_relu_3.add(Dropout(0.5))
model_relu_3.add(BatchNormalization())
model_relu_3.add(Dense(412, activation='relu'))
model_relu_3.add(Dropout(0.5))
model_relu_3.add(BatchNormalization())
model_relu_3.add(Dense(256, activation='relu'))
model_relu_3.add(Dropout(0.5))
model_relu_3.add(BatchNormalization())
model_relu_3.add(Dense(128, activation='relu'))
model_relu_3.add(Dropout(0.5))
model_relu_3.add(BatchNormalization())
model_relu_3.add(Dense(64, activation='relu'))
model_relu_3.add(Dropout(0.5))
model_relu_3.add(BatchNormalization())
model_relu_3.add(Dense(output_dim, activation='softmax'))

model_relu_3.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_8 (Dense)              (None, 564)               442740    
_________________________________________________________________
dropout_6 (Dropout)          (None, 564)               0         
_________________________________________________________________
batch_normalization_6 (Batch (None, 564)               2256      
_________________________________________________________________
dense_9 (Dense)              (None, 412)               232780    
_________________________________________________________________
dropout_7 (Dropout)          (None, 412)               0         
_________________________________________________________________
batch_normalization_7 (Batch (None, 412)               1648      
_________________________________________________________________
dense_10 (Dense)             (None, 256)               105728    
__________

In [18]:
model_relu_3.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

history = model_relu_3.fit(X_train, Y_train, batch_size=batch_size, epochs=nb_epoch, verbose=1, validation_data=(X_test, Y_test)) 

Train on 60000 samples, validate on 10000 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [19]:
score = model_relu_3.evaluate(X_test, Y_test, verbose=0) 
print('Test score:', score[0]) 
print('Test accuracy:', score[1])

fig,ax = plt.subplots(1,1)
ax.set_xlabel('epoch') ; ax.set_ylabel('Categorical Crossentropy Loss')

# list of epoch numbers
x = list(range(1,nb_epoch+1))

# print(history.history.keys())
# dict_keys(['val_loss', 'val_acc', 'loss', 'acc'])
# history = model_drop.fit(X_train, Y_train, batch_size=batch_size, epochs=nb_epoch, verbose=1, validation_data=(X_test, Y_test))

# we will get val_loss and val_acc only when you pass the paramter validation_data
# val_loss : validation loss
# val_acc : validation accuracy

# loss : training loss
# acc : train accuracy
# for each key in histrory.histrory we will have a list of length equal to number of epochs


vy = history.history['val_loss']
ty = history.history['loss']
plt_dynamic(x, vy, ty, ax)

Test score: 0.07426590730494353
Test accuracy: 0.9811


<IPython.core.display.Javascript object>

## 2 LAYER ARCHITECTURE WITHOUT  DROPOUT AND BATCH NORMALIZATION

In [20]:
from keras.layers.normalization import BatchNormalization
from keras.layers import Dropout


model_relu_4 = Sequential()
model_relu_4.add(Dense(484, activation='relu', input_shape=(input_dim,)))
model_relu_4.add(Dense(64, activation='relu'))
model_relu_4.add(Dense(output_dim, activation='softmax'))

model_relu_4.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_10 (Dense)             (None, 484)               379940    
_________________________________________________________________
dense_11 (Dense)             (None, 64)                31040     
_________________________________________________________________
dense_12 (Dense)             (None, 10)                650       
Total params: 411,630
Trainable params: 411,630
Non-trainable params: 0
_________________________________________________________________


In [21]:
model_relu_4.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

history = model_relu_4.fit(X_train, Y_train, batch_size=batch_size, epochs=nb_epoch, verbose=1, validation_data=(X_test, Y_test)) 

Train on 60000 samples, validate on 10000 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [22]:
score = model_relu_4.evaluate(X_test, Y_test, verbose=0) 
print('Test score:', score[0]) 
print('Test accuracy:', score[1])

fig,ax = plt.subplots(1,1)
ax.set_xlabel('epoch') ; ax.set_ylabel('Categorical Crossentropy Loss')

# list of epoch numbers
x = list(range(1,nb_epoch+1))

# print(history.history.keys())
# dict_keys(['val_loss', 'val_acc', 'loss', 'acc'])
# history = model_drop.fit(X_train, Y_train, batch_size=batch_size, epochs=nb_epoch, verbose=1, validation_data=(X_test, Y_test))

# we will get val_loss and val_acc only when you pass the paramter validation_data
# val_loss : validation loss
# val_acc : validation accuracy

# loss : training loss
# acc : train accuracy
# for each key in histrory.histrory we will have a list of length equal to number of epochs


vy = history.history['val_loss']
ty = history.history['loss']
plt_dynamic(x, vy, ty, ax)

Test score: 0.09367929859978971
Test accuracy: 0.9787


<IPython.core.display.Javascript object>

## 3 LAYER ARCHITECTURE WITHOUT DROPOUT AND BATCH NORMALIZATION

In [23]:
model_relu_5 = Sequential()
model_relu_5.add(Dense(484, activation='relu', input_shape=(input_dim,)))
model_relu_5.add(Dense(212, activation='relu'))
model_relu_5.add(BatchNormalization())
model_relu_5.add(Dense(64, activation='relu'))
model_relu_5.add(BatchNormalization())
model_relu_5.add(Dense(output_dim, activation='softmax'))

model_relu_5.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_13 (Dense)             (None, 484)               379940    
_________________________________________________________________
dense_14 (Dense)             (None, 212)               102820    
_________________________________________________________________
batch_normalization_6 (Batch (None, 212)               848       
_________________________________________________________________
dense_15 (Dense)             (None, 64)                13632     
_________________________________________________________________
batch_normalization_7 (Batch (None, 64)                256       
_________________________________________________________________
dense_16 (Dense)             (None, 10)                650       
Total params: 498,146
Trainable params: 497,594
Non-trainable params: 552
_________________________________________________________________


In [24]:
model_relu_5.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

history = model_relu_5.fit(X_train, Y_train, batch_size=batch_size, epochs=nb_epoch, verbose=1, validation_data=(X_test, Y_test)) 

Train on 60000 samples, validate on 10000 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [25]:
score = model_relu_5.evaluate(X_test, Y_test, verbose=0) 
print('Test score:', score[0]) 
print('Test accuracy:', score[1])

fig,ax = plt.subplots(1,1)
ax.set_xlabel('epoch') ; ax.set_ylabel('Categorical Crossentropy Loss')

# list of epoch numbers
x = list(range(1,nb_epoch+1))

# print(history.history.keys())
# dict_keys(['val_loss', 'val_acc', 'loss', 'acc'])
# history = model_drop.fit(X_train, Y_train, batch_size=batch_size, epochs=nb_epoch, verbose=1, validation_data=(X_test, Y_test))

# we will get val_loss and val_acc only when you pass the paramter validation_data
# val_loss : validation loss
# val_acc : validation accuracy

# loss : training loss
# acc : train accuracy
# for each key in histrory.histrory we will have a list of length equal to number of epochs


vy = history.history['val_loss']
ty = history.history['loss']
plt_dynamic(x, vy, ty, ax)

Test score: 0.08906617645227526
Test accuracy: 0.9775


<IPython.core.display.Javascript object>

## 5 LAYER ARCHITECTURE WITHOUT DROPOUT AND BATCH NORMALIZATION

In [26]:
model_relu_6 = Sequential()
model_relu_6.add(Dense(564, activation='relu', input_shape=(input_dim,)))
model_relu_6.add(Dense(412, activation='relu'))
model_relu_6.add(Dense(256, activation='relu'))
model_relu_6.add(Dense(128, activation='relu'))
model_relu_6.add(Dense(64, activation='relu'))
model_relu_6.add(Dense(output_dim, activation='softmax'))

model_relu_6.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_17 (Dense)             (None, 564)               442740    
_________________________________________________________________
dense_18 (Dense)             (None, 412)               232780    
_________________________________________________________________
dense_19 (Dense)             (None, 256)               105728    
_________________________________________________________________
dense_20 (Dense)             (None, 128)               32896     
_________________________________________________________________
dense_21 (Dense)             (None, 64)                8256      
_________________________________________________________________
dense_22 (Dense)             (None, 10)                650       
Total params: 823,050
Trainable params: 823,050
Non-trainable params: 0
_________________________________________________________________


In [27]:
model_relu_6.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

history = model_relu_6.fit(X_train, Y_train, batch_size=batch_size, epochs=nb_epoch, verbose=1, validation_data=(X_test, Y_test)) 

Train on 60000 samples, validate on 10000 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [28]:
score = model_relu_6.evaluate(X_test, Y_test, verbose=0) 
print('Test score:', score[0]) 
print('Test accuracy:', score[1])

fig,ax = plt.subplots(1,1)
ax.set_xlabel('epoch') ; ax.set_ylabel('Categorical Crossentropy Loss')

# list of epoch numbers
x = list(range(1,nb_epoch+1))

# print(history.history.keys())
# dict_keys(['val_loss', 'val_acc', 'loss', 'acc'])
# history = model_drop.fit(X_train, Y_train, batch_size=batch_size, epochs=nb_epoch, verbose=1, validation_data=(X_test, Y_test))

# we will get val_loss and val_acc only when you pass the paramter validation_data
# val_loss : validation loss
# val_acc : validation accuracy

# loss : training loss
# acc : train accuracy
# for each key in histrory.histrory we will have a list of length equal to number of epochs


vy = history.history['val_loss']
ty = history.history['loss']
plt_dynamic(x, vy, ty, ax)

Test score: 0.09870356189140862
Test accuracy: 0.9804


<IPython.core.display.Javascript object>

## 2 LAYER ARCHITECTURE WITH DROPOUT(0.5) AND BATCH NORMALIZATION AND ACTIVATION SIGMOID

In [45]:
from keras.layers.normalization import BatchNormalization
from keras.layers import Dropout


model_sigmoid = Sequential()
model_sigmoid.add(Dense(364, activation='sigmoid', input_shape=(input_dim,)))
model_sigmoid.add(Dropout(0.5))
model_sigmoid.add(BatchNormalization())
model_sigmoid.add(Dense(52, activation='sigmoid'))
model_sigmoid.add(Dropout(0.5))
model_sigmoid.add(BatchNormalization())
model_sigmoid.add(Dense(output_dim, activation='softmax'))

model_sigmoid.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_41 (Dense)             (None, 364)               285740    
_________________________________________________________________
dropout_19 (Dropout)         (None, 364)               0         
_________________________________________________________________
batch_normalization_20 (Batc (None, 364)               1456      
_________________________________________________________________
dense_42 (Dense)             (None, 52)                18980     
_________________________________________________________________
dropout_20 (Dropout)         (None, 52)                0         
_________________________________________________________________
batch_normalization_21 (Batc (None, 52)                208       
_________________________________________________________________
dense_43 (Dense)             (None, 10)                530       
Total para

In [46]:
model_sigmoid.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

history = model_sigmoid.fit(X_train, Y_train, batch_size=batch_size, epochs=nb_epoch, verbose=1, validation_data=(X_test, Y_test)) 

Train on 60000 samples, validate on 10000 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [47]:
score = model_sigmoid.evaluate(X_test, Y_test, verbose=0) 
print('Test score:', score[0]) 
print('Test accuracy:', score[1])

fig,ax = plt.subplots(1,1)
ax.set_xlabel('epoch') ; ax.set_ylabel('Categorical Crossentropy Loss')

# list of epoch numbers
x = list(range(1,nb_epoch+1))

# print(history.history.keys())
# dict_keys(['val_loss', 'val_acc', 'loss', 'acc'])
# history = model_drop.fit(X_train, Y_train, batch_size=batch_size, epochs=nb_epoch, verbose=1, validation_data=(X_test, Y_test))

# we will get val_loss and val_acc only when you pass the paramter validation_data
# val_loss : validation loss
# val_acc : validation accuracy

# loss : training loss
# acc : train accuracy
# for each key in histrory.histrory we will have a list of length equal to number of epochs


vy = history.history['val_loss']
ty = history.history['loss']
plt_dynamic(x, vy, ty, ax)

Test score: 0.07422105259281817
Test accuracy: 0.977


<IPython.core.display.Javascript object>

## 3 LAYER ARCHITECTURE WITH DROPOUT(0.5) AND BATCH NORMALIZATION AND ACTIVATION SIGMOID

In [48]:
model_sigmoid_2 = Sequential()
model_sigmoid_2.add(Dense(484, activation='sigmoid', input_shape=(input_dim,)))
model_sigmoid_2.add(Dropout(0.5))
model_sigmoid_2.add(BatchNormalization())
model_sigmoid_2.add(Dense(212, activation='sigmoid'))
model_sigmoid_2.add(Dropout(0.5))
model_sigmoid_2.add(BatchNormalization())
model_sigmoid_2.add(Dense(64, activation='sigmoid'))
model_sigmoid_2.add(Dropout(0.5))
model_sigmoid_2.add(BatchNormalization())
model_sigmoid_2.add(Dense(output_dim, activation='softmax'))

model_sigmoid_2.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_44 (Dense)             (None, 484)               379940    
_________________________________________________________________
dropout_21 (Dropout)         (None, 484)               0         
_________________________________________________________________
batch_normalization_22 (Batc (None, 484)               1936      
_________________________________________________________________
dense_45 (Dense)             (None, 212)               102820    
_________________________________________________________________
dropout_22 (Dropout)         (None, 212)               0         
_________________________________________________________________
batch_normalization_23 (Batc (None, 212)               848       
_________________________________________________________________
dense_46 (Dense)             (None, 64)                13632     
__________

In [49]:
model_sigmoid_2.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

history = model_sigmoid_2.fit(X_train, Y_train, batch_size=batch_size, epochs=nb_epoch, verbose=1, validation_data=(X_test, Y_test)) 

Train on 60000 samples, validate on 10000 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [50]:
score = model_sigmoid_2.evaluate(X_test, Y_test, verbose=0) 
print('Test score:', score[0]) 
print('Test accuracy:', score[1])

fig,ax = plt.subplots(1,1)
ax.set_xlabel('epoch') ; ax.set_ylabel('Categorical Crossentropy Loss')

# list of epoch numbers
x = list(range(1,nb_epoch+1))

# print(history.history.keys())
# dict_keys(['val_loss', 'val_acc', 'loss', 'acc'])
# history = model_drop.fit(X_train, Y_train, batch_size=batch_size, epochs=nb_epoch, verbose=1, validation_data=(X_test, Y_test))

# we will get val_loss and val_acc only when you pass the paramter validation_data
# val_loss : validation loss
# val_acc : validation accuracy

# loss : training loss
# acc : train accuracy
# for each key in histrory.histrory we will have a list of length equal to number of epochs


vy = history.history['val_loss']
ty = history.history['loss']
plt_dynamic(x, vy, ty, ax)

Test score: 0.06643162165306275
Test accuracy: 0.9817


<IPython.core.display.Javascript object>

## 5 LAYER ARCHITECTURE WITH DROPOUT(0.5) AND BATCH NORMALIZATION AND ACTIVATION SIGMOID

In [51]:
model_sigmoid_3 = Sequential()
model_sigmoid_3.add(Dense(564, activation='sigmoid', input_shape=(input_dim,)))
model_sigmoid_3.add(Dropout(0.5))
model_sigmoid_3.add(BatchNormalization())
model_sigmoid_3.add(Dense(412, activation='sigmoid'))
model_sigmoid_3.add(Dropout(0.5))
model_sigmoid_3.add(BatchNormalization())
model_sigmoid_3.add(Dense(256, activation='sigmoid'))
model_sigmoid_3.add(Dropout(0.5))
model_sigmoid_3.add(BatchNormalization())
model_sigmoid_3.add(Dense(128, activation='sigmoid'))
model_sigmoid_3.add(Dropout(0.5))
model_sigmoid_3.add(BatchNormalization())
model_sigmoid_3.add(Dense(64, activation='sigmoid'))
model_sigmoid_3.add(Dropout(0.5))
model_sigmoid_3.add(BatchNormalization())
model_sigmoid_3.add(Dense(output_dim, activation='softmax'))

model_sigmoid_3.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_48 (Dense)             (None, 564)               442740    
_________________________________________________________________
dropout_24 (Dropout)         (None, 564)               0         
_________________________________________________________________
batch_normalization_25 (Batc (None, 564)               2256      
_________________________________________________________________
dense_49 (Dense)             (None, 412)               232780    
_________________________________________________________________
dropout_25 (Dropout)         (None, 412)               0         
_________________________________________________________________
batch_normalization_26 (Batc (None, 412)               1648      
_________________________________________________________________
dense_50 (Dense)             (None, 256)               105728    
__________

In [52]:
model_sigmoid_3.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

history = model_sigmoid_3.fit(X_train, Y_train, batch_size=batch_size, epochs=nb_epoch, verbose=1, validation_data=(X_test, Y_test)) 

Train on 60000 samples, validate on 10000 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [53]:
score = model_sigmoid_3.evaluate(X_test, Y_test, verbose=0) 
print('Test score:', score[0]) 
print('Test accuracy:', score[1])

fig,ax = plt.subplots(1,1)
ax.set_xlabel('epoch') ; ax.set_ylabel('Categorical Crossentropy Loss')

# list of epoch numbers
x = list(range(1,nb_epoch+1))

# print(history.history.keys())
# dict_keys(['val_loss', 'val_acc', 'loss', 'acc'])
# history = model_drop.fit(X_train, Y_train, batch_size=batch_size, epochs=nb_epoch, verbose=1, validation_data=(X_test, Y_test))

# we will get val_loss and val_acc only when you pass the paramter validation_data
# val_loss : validation loss
# val_acc : validation accuracy

# loss : training loss
# acc : train accuracy
# for each key in histrory.histrory we will have a list of length equal to number of epochs


vy = history.history['val_loss']
ty = history.history['loss']
plt_dynamic(x, vy, ty, ax)

Test score: 0.08784358917449135
Test accuracy: 0.9803


<IPython.core.display.Javascript object>

In [58]:
from prettytable import PrettyTable

pretty = PrettyTable()

pretty.field_names = ['S.No','MODEL','ACTIVATION','train_accuracy','test_accuracy','train_loss','test_loss']

pretty.add_row(['1','2-LAYER MLP WITH DROPOUT AND BN','RELU','0.9686','0.9804','0.1803','0.0697'])
pretty.add_row(['2','3-LAYER MLP WITH DROPOUT AND BN','RELU','0.9736','0.9819','0.0958','0.0683'])
pretty.add_row(['3','5-LAYER MLP WITH DROPOUT AND BN','RELU','0.9704','0.9811','0.1242','0.0743'])
pretty.add_row(['1','2-LAYER MLP WITHOUT DROPOUT AND BN','NO','0.9986','0.9787','0.0044','0.0937'])
pretty.add_row(['2','3-LAYER MLP WITHOUT DROPOUT AND BN','NO','0.9989','0.9775','0.0036','0.0697'])
pretty.add_row(['3','5-LAYER MLP WITHOUT DROPOUT AND BN','NO','0.9975','0.9804','0.0083','0.987'])
pretty.add_row(['1','2-LAYER MLP WITH DROPOUT AND BN','SIGMOID','0.9675','0.9770','0.1136','0.0742'])
pretty.add_row(['2','3-LAYER MLP WITH DROPOUT AND BN','SIGMOID','0.9730','0.9812','0.0986','0.0664'])
pretty.add_row(['3','5-LAYER MLP WITH DROPOUT AND BN','SIGMOID','0.9711','0.9803','0.1180','0.0878'])



print(pretty)

+------+------------------------------------+------------+----------------+---------------+------------+-----------+
| S.No |               MODEL                | ACTIVATION | train_accuracy | test_accuracy | train_loss | test_loss |
+------+------------------------------------+------------+----------------+---------------+------------+-----------+
|  1   |  2-LAYER MLP WITH DROPOUT AND BN   |    RELU    |     0.9686     |     0.9804    |   0.1803   |   0.0697  |
|  2   |  3-LAYER MLP WITH DROPOUT AND BN   |    RELU    |     0.9736     |     0.9819    |   0.0958   |   0.0683  |
|  3   |  5-LAYER MLP WITH DROPOUT AND BN   |    RELU    |     0.9704     |     0.9811    |   0.1242   |   0.0743  |
|  1   | 2-LAYER MLP WITHOUT DROPOUT AND BN |     NO     |     0.9986     |     0.9787    |   0.0044   |   0.0937  |
|  2   | 3-LAYER MLP WITHOUT DROPOUT AND BN |     NO     |     0.9989     |     0.9775    |   0.0036   |   0.0697  |
|  3   | 5-LAYER MLP WITHOUT DROPOUT AND BN |     NO     |     0

### PROCEDURE :
* STEP 1 => Constructing 2,3,5 layer architecture with dropout rate=0.5 and batch normalization and activation = relu
* STEP 2 => fit and validate each architecture
* STEP 3 => Constructing 2,3,5 layer architecture without dropout ,batch normalization and activation = relu
* STEP 4 => fit and validate each architecture
* STEP 5 => Constructing 2,3,5 layer architecture with dropout rate = 0.5 and batch normalization and activation = sigmoid
* STEP 6 => fit and validate each architecture

### OBSERVATION
* Models without dropout and batch normalization works badly.