<h2 style="color:blue" align="center">Deep Neural Network (DNN) on MNIST</h2>

#### Import the necessary libraries

In [110]:
import tensorflow as tf 
import matplotlib.pyplot as plt
%matplotlib inline
import numpy as np
import time, math 

from tensorflow import keras 
from keras.models import Sequential
from keras.layers import Dense 
from keras.callbacks import Callback, CSVLogger 

#### Load and split the dataset into training and testing set

MNIST - Handwritten digits recognition

In [111]:
#(X_train,y_train),(X_test,y_test) = keras.datasets.mnist.load_data()

MNIST - Fashion dataset

In [112]:
(X_train,y_train),(X_test,y_test) = keras.datasets.fashion_mnist.load_data()

#### Flattening

In [113]:
X_train_flattened = X_train.reshape(len(X_train), 28 * 28)
X_test_flattened = X_test.reshape(len(X_test),  28 * 28) 

#### Normalize the input dataset

In [114]:
X_train_normalized = X_train_flattened / 255
X_test_normalized = X_test_flattened / 255

#### Define DNN model with two layers, optimizer, metrics, and loss function

In [115]:
np.random.seed(3)

def get_model(): 
    model = Sequential([
        # input layer 784 neurons to first hidden layer with 64 neurons
        Dense(64, input_shape = (784,), activation='relu'), 
        # first hidden layer to second hidden layer
        Dense(64, activation='relu'),  
        # Output layer with 10 neurons
        Dense(10, activation='softmax')
    ])
    
    # General SGD
    #opti = keras.optimizers.SGD(learning_rate=0.01)
    
    # SGD with momentum
    #opti = keras.optimizers.SGD(learning_rate=0.01, momentum=0.6)
    
    # SGD with Nesterov momentum 
    #opti = keras.optimizers.SGD(learning_rate=0.01, momentum=0.6, nesterov=True)
    
    # RMSprop 
    #opti = keras.optimizers.RMSprop(learning_rate=0.001, momentum=0.6)
    
    # Adam
    opti = keras.optimizers.Adam(learning_rate=0.001) 
    
    # Adamax
    #opti = keras.optimizers.Adamax(learning_rate=0.001) 
    
    model.compile(
        optimizer = opti,
        loss = 'sparse_categorical_crossentropy',
        metrics = ['accuracy']
    )
    
    return model

#### Custom callbacks

For generic optimizer model

In [116]:
class CustomCallbackGeneric(Callback):  
    # Training stop criteria
    stop_at = 0.99
    def on_epoch_end(self, epoch, logs={}):
        if(logs.get('accuracy')> self.stop_at):  
            self.model.stop_training = True

For HM based optimizer model

In [117]:
class CustomCallbackHM(Callback):  
    # Stop the algorithm when the following accuracy reached 
    stop_at = 0.995  
    
    initial_weights = 0
    previous_weights = 0
    call_hm = 0 
     
    def on_train_begin(self, logs=None):
        self.initial_weights = model_hm.get_weights() 
        self.initial_weights = np.array(self.initial_weights,dtype=object)
        self.previous_weights = self.initial_weights
        # Harmonic mean based weights calculation
        self.call_hm = np.vectorize(self.apply_hm)  

    def on_epoch_end(self, epoch, logs={}):
        # Training stop criteria
        if(logs.get('accuracy')> self.stop_at):  
            self.model.stop_training = True
        
        counter = 0
        num_layers = len(model_hm.layers)  
        current_weights = model_hm.get_weights()
        current_weights = np.array(current_weights,dtype=object)        

        for i in range(num_layers):  
            # Harmonic mean based weights calculation
            current_weights[counter] = self.call_hm(self.previous_weights[counter], current_weights[counter])
            counter = counter + 2
            
        # Updating the model with new weights
        updated = current_weights.tolist()   
        model_hm.set_weights(updated)
        self.previous_weights = current_weights
        
    def apply_hm(self, v1, v2):     
        if v1==0 or v2==0:
            return v2
        elif v1>0 and v2>0:
            hm = 2*v1*v2/(v1+v2)
            min1 = min(v1,v2)
            diff = abs(hm-min1)
            if v2 > v1:
                return v2 + diff
            else:
                return v2 - diff
        elif v1<0 and v2<0:
            hm = 2*v1*v2/(v1+v2)
            max1 = max(v1,v2)
            diff = abs(hm-max1)
            if v2 > v1:
                return v2 + diff
            else:
                return v2 - diff
        else:
            return v2  

To record loss and accuracy in CSV file

In [118]:
logger_generic_model = CSVLogger('Generic_model_MNIST.csv', append=False, separator=',')
logger_hm_model = CSVLogger('HM_model_MNIST.csv', append=False, separator=',')

#### Training

Generic optimizer model

In [119]:
model_wihtout_hm = get_model() 
st = time.time()
model_wihtout_hm.fit(X_train_normalized, y_train, epochs = 135, verbose=1, callbacks=[CustomCallbackGeneric(), logger_generic_model], batch_size = X_train.shape[0]) 
et = time.time()
elapsed_time = et - st
print('Execution time:', elapsed_time, 'seconds')

Epoch 1/135
Epoch 2/135
Epoch 3/135
Epoch 4/135
Epoch 5/135
Epoch 6/135
Epoch 7/135
Epoch 8/135
Epoch 9/135
Epoch 10/135
Epoch 11/135
Epoch 12/135
Epoch 13/135
Epoch 14/135
Epoch 15/135
Epoch 16/135
Epoch 17/135
Epoch 18/135
Epoch 19/135
Epoch 20/135
Epoch 21/135
Epoch 22/135
Epoch 23/135
Epoch 24/135
Epoch 25/135
Epoch 26/135
Epoch 27/135
Epoch 28/135
Epoch 29/135
Epoch 30/135
Epoch 31/135
Epoch 32/135
Epoch 33/135
Epoch 34/135
Epoch 35/135
Epoch 36/135
Epoch 37/135
Epoch 38/135
Epoch 39/135
Epoch 40/135
Epoch 41/135
Epoch 42/135
Epoch 43/135
Epoch 44/135
Epoch 45/135
Epoch 46/135
Epoch 47/135
Epoch 48/135
Epoch 49/135
Epoch 50/135
Epoch 51/135
Epoch 52/135
Epoch 53/135
Epoch 54/135
Epoch 55/135
Epoch 56/135
Epoch 57/135
Epoch 58/135
Epoch 59/135
Epoch 60/135
Epoch 61/135
Epoch 62/135
Epoch 63/135
Epoch 64/135
Epoch 65/135
Epoch 66/135
Epoch 67/135
Epoch 68/135
Epoch 69/135
Epoch 70/135
Epoch 71/135
Epoch 72/135
Epoch 73/135
Epoch 74/135
Epoch 75/135
Epoch 76/135
Epoch 77/135
Epoch 78

Epoch 84/135
Epoch 85/135
Epoch 86/135
Epoch 87/135
Epoch 88/135
Epoch 89/135
Epoch 90/135
Epoch 91/135
Epoch 92/135
Epoch 93/135
Epoch 94/135
Epoch 95/135
Epoch 96/135
Epoch 97/135
Epoch 98/135
Epoch 99/135
Epoch 100/135
Epoch 101/135
Epoch 102/135
Epoch 103/135
Epoch 104/135
Epoch 105/135
Epoch 106/135
Epoch 107/135
Epoch 108/135
Epoch 109/135
Epoch 110/135
Epoch 111/135
Epoch 112/135
Epoch 113/135
Epoch 114/135
Epoch 115/135
Epoch 116/135
Epoch 117/135
Epoch 118/135
Epoch 119/135
Epoch 120/135
Epoch 121/135
Epoch 122/135
Epoch 123/135
Epoch 124/135
Epoch 125/135
Epoch 126/135
Epoch 127/135
Epoch 128/135
Epoch 129/135
Epoch 130/135
Epoch 131/135
Epoch 132/135
Epoch 133/135
Epoch 134/135
Epoch 135/135
Execution time: 21.914703130722046 seconds


HM based optimizer model

In [121]:
model_hm = get_model() 
st = time.time()
model_hm.fit(X_train_normalized, y_train, epochs = 94, verbose=1, callbacks=[CustomCallbackHM(),logger_hm_model], batch_size = X_train.shape[0]) 
et = time.time()
elapsed_time = et - st
print('Execution time:', elapsed_time, 'seconds')

Epoch 1/94
Epoch 2/94
Epoch 3/94
Epoch 4/94
Epoch 5/94
Epoch 6/94
Epoch 7/94
Epoch 8/94
Epoch 9/94
Epoch 10/94
Epoch 11/94
Epoch 12/94
Epoch 13/94
Epoch 14/94
Epoch 15/94
Epoch 16/94
Epoch 17/94
Epoch 18/94
Epoch 19/94
Epoch 20/94
Epoch 21/94
Epoch 22/94
Epoch 23/94
Epoch 24/94
Epoch 25/94
Epoch 26/94
Epoch 27/94
Epoch 28/94
Epoch 29/94
Epoch 30/94
Epoch 31/94
Epoch 32/94
Epoch 33/94
Epoch 34/94
Epoch 35/94
Epoch 36/94
Epoch 37/94
Epoch 38/94
Epoch 39/94
Epoch 40/94
Epoch 41/94
Epoch 42/94
Epoch 43/94
Epoch 44/94
Epoch 45/94
Epoch 46/94
Epoch 47/94
Epoch 48/94
Epoch 49/94
Epoch 50/94
Epoch 51/94
Epoch 52/94
Epoch 53/94
Epoch 54/94
Epoch 55/94
Epoch 56/94
Epoch 57/94
Epoch 58/94
Epoch 59/94
Epoch 60/94
Epoch 61/94
Epoch 62/94
Epoch 63/94
Epoch 64/94
Epoch 65/94
Epoch 66/94
Epoch 67/94
Epoch 68/94
Epoch 69/94
Epoch 70/94
Epoch 71/94
Epoch 72/94
Epoch 73/94
Epoch 74/94
Epoch 75/94
Epoch 76/94
Epoch 77/94
Epoch 78/94
Epoch 79/94
Epoch 80/94
Epoch 81/94
Epoch 82/94
Epoch 83/94
Epoch 84/94
E

Model summary

In [122]:
model_hm.summary()

Model: "sequential_23"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_69 (Dense)            (None, 64)                50240     
                                                                 
 dense_70 (Dense)            (None, 64)                4160      
                                                                 
 dense_71 (Dense)            (None, 10)                650       
                                                                 
Total params: 55,050
Trainable params: 55,050
Non-trainable params: 0
_________________________________________________________________


###### Testing the model

Generic opimizer model

In [123]:
model_wihtout_hm.evaluate(X_test_normalized, y_test)



[0.4487186670303345, 0.8421000242233276]

HM based optimizer model

In [124]:
model_hm.evaluate(X_test_normalized, y_test)



[0.4433857798576355, 0.8449000120162964]