<h2 style="color:blue" align="center">Deep Neural Network (DNN) on MNIST</h2>

#### Import the necessary libraries

In [103]:
import tensorflow as tf 
import matplotlib.pyplot as plt
%matplotlib inline
import numpy as np
import time, math 

from tensorflow import keras 
from keras.models import Sequential
from keras.layers import Dense 
from keras.callbacks import Callback, CSVLogger 

#### Load and split the dataset into training and testing set

MNIST - Handwritten digits recognition

In [104]:
(X_train,y_train),(X_test,y_test) = keras.datasets.mnist.load_data()

MNIST - Fashion dataset

In [105]:
#(X_train,y_train),(X_test,y_test) = keras.datasets.fashion_mnist.load_data()

#### Flattening

In [106]:
X_train_flattened = X_train.reshape(len(X_train), 28 * 28)
X_test_flattened = X_test.reshape(len(X_test),  28 * 28) 

#### Normalize the input dataset

In [107]:
X_train_normalized = X_train_flattened / 255
X_test_normalized = X_test_flattened / 255

#### Define DNN model with two layers, optimizer, metrics, and loss function

In [108]:
import random as python_random
python_random.seed(3)
np.random.seed(7)
tf.random.set_seed(13)

def get_model(): 
    model = Sequential([
        # input layer 784 neurons to first hidden layer with 64 neurons
        Dense(64, input_shape = (784,), activation='relu'), 
        # first hidden layer to second hidden layer
        Dense(64, activation='relu'),  
        # Output layer with 10 neurons
        Dense(10, activation='softmax')
    ])
    
    # General SGD
    #opti = keras.optimizers.SGD(learning_rate=0.01)
    
    # SGD with momentum
    #opti = keras.optimizers.SGD(learning_rate=0.01, momentum=0.6)
    
    # SGD with Nesterov momentum 
    #opti = keras.optimizers.SGD(learning_rate=0.01, momentum=0.6, nesterov=True)
    
    # RMSprop 
    #opti = keras.optimizers.RMSprop(learning_rate=0.001, momentum=0.6)
    
    # Adam
    opti = keras.optimizers.Adam(learning_rate=0.001) 
    
    # Adamax
    #opti = keras.optimizers.Adamax(learning_rate=0.001) 
    
    model.compile(
        optimizer = opti,
        loss = 'sparse_categorical_crossentropy',
        metrics = ['accuracy']
    )
    
    return model

#### Custom callbacks

For generic optimizer model

In [109]:
class CustomCallbackGeneric(Callback):  
    # Training stop criteria
    stop_at = 0.99
    def on_epoch_end(self, epoch, logs={}):
        if(logs.get('accuracy')> self.stop_at):  
            self.model.stop_training = True

For HM based optimizer model

In [110]:
class CustomCallbackHM(Callback):  
    # Stop the algorithm when the following accuracy reached 
    stop_at = 0.995  
    
    initial_weights = 0
    previous_weights = 0
    call_hm = 0 
     
    def on_train_begin(self, logs=None):
        self.initial_weights = model_hm.get_weights() 
        self.initial_weights = np.array(self.initial_weights,dtype=object)
        self.previous_weights = self.initial_weights
        # Harmonic mean based weights calculation
        self.call_hm = np.vectorize(self.apply_hm)  

    def on_epoch_end(self, epoch, logs={}):
        # Training stop criteria
        if(logs.get('accuracy')> self.stop_at):  
            self.model.stop_training = True
        
        num_layers = len(model_hm.layers)  
        current_weights = model_hm.get_weights()
        current_weights = np.array(current_weights,dtype=object)        

        for i in range(num_layers):  
            # Harmonic mean based weights calculation
            tensor1 = tf.convert_to_tensor(self.previous_weights[i*2])
            tensor2 = tf.convert_to_tensor(current_weights[i*2])
            current_weights[i*2] = self.call_hm(tensor1, tensor2)   
            
        # Updating the model with new weights
        model_hm.set_weights(current_weights.tolist())
        self.previous_weights = current_weights
        
    def apply_hm(self, v1, v2):     
        if v1==0 or v2==0:
            return v2
        elif v1>0 and v2>0:
            hm = 2*v1*v2/(v1+v2)
            min1 = min(v1,v2)
            diff = abs(hm-min1)
            if v2 > v1:
                return v2 + diff
            else:
                return v2 - diff
        elif v1<0 and v2<0:
            hm = 2*v1*v2/(v1+v2)
            max1 = max(v1,v2)
            diff = abs(hm-max1)
            if v2 > v1:
                return v2 + diff
            else:
                return v2 - diff
        else:
            return v2  

To record loss and accuracy in CSV file

In [111]:
logger_generic_model = CSVLogger('Generic_model_MNIST.csv', append=False, separator=',')
logger_hm_model = CSVLogger('HM_model_MNIST.csv', append=False, separator=',')

#### Training

Get a model to assign same weights to model with and without HM

In [112]:
model = get_model() 
weights = model.get_weights() 
model_wihtout_hm = get_model()
model_wihtout_hm.set_weights(weights) 
model_hm = get_model()
model_hm.set_weights(weights) 

Generic optimizer model

In [116]:
st = time.time()
model_wihtout_hm.fit(X_train_normalized, y_train, epochs = 200, verbose=1, callbacks=[CustomCallbackGeneric(), logger_generic_model], batch_size = X_train.shape[0]) 
et = time.time()
elapsed_time = et - st
print('Execution time:', elapsed_time, 'seconds')

Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch 71/200
Epoch 72/200
Epoch 73/200
Epoch 74/200
Epoch 75/200
Epoch 76/200
Epoch 77/200
Epoch 78

Epoch 84/200
Epoch 85/200
Epoch 86/200
Epoch 87/200
Epoch 88/200
Epoch 89/200
Epoch 90/200
Epoch 91/200
Epoch 92/200
Epoch 93/200
Epoch 94/200
Epoch 95/200
Epoch 96/200
Epoch 97/200
Epoch 98/200
Epoch 99/200
Epoch 100/200
Epoch 101/200
Epoch 102/200
Epoch 103/200
Epoch 104/200
Epoch 105/200
Epoch 106/200
Epoch 107/200
Epoch 108/200
Epoch 109/200
Epoch 110/200
Epoch 111/200
Epoch 112/200
Epoch 113/200
Epoch 114/200
Epoch 115/200
Epoch 116/200
Epoch 117/200
Epoch 118/200
Epoch 119/200
Epoch 120/200
Epoch 121/200
Epoch 122/200
Epoch 123/200
Epoch 124/200
Epoch 125/200
Epoch 126/200
Epoch 127/200
Epoch 128/200
Epoch 129/200
Epoch 130/200
Epoch 131/200
Epoch 132/200
Epoch 133/200
Epoch 134/200
Epoch 135/200
Epoch 136/200
Epoch 137/200
Epoch 138/200
Epoch 139/200
Epoch 140/200
Epoch 141/200
Epoch 142/200
Epoch 143/200
Epoch 144/200
Epoch 145/200
Epoch 146/200
Epoch 147/200
Epoch 148/200
Epoch 149/200
Epoch 150/200
Epoch 151/200
Epoch 152/200
Epoch 153/200
Epoch 154/200
Epoch 155/200
Epoch 15

HM based optimizer model

In [117]:
st = time.time()
model_hm.fit(X_train_normalized, y_train, epochs = 175, verbose=1, callbacks=[CustomCallbackHM(),logger_hm_model], batch_size = X_train.shape[0]) 
et = time.time()
elapsed_time = et - st
print('Execution time:', elapsed_time, 'seconds')

Epoch 1/175
Epoch 2/175
Epoch 3/175
Epoch 4/175
Epoch 5/175
Epoch 6/175
Epoch 7/175
Epoch 8/175
Epoch 9/175
Epoch 10/175
Epoch 11/175
Epoch 12/175
Epoch 13/175
Epoch 14/175
Epoch 15/175
Epoch 16/175
Epoch 17/175
Epoch 18/175
Epoch 19/175
Epoch 20/175
Epoch 21/175
Epoch 22/175
Epoch 23/175
Epoch 24/175
Epoch 25/175
Epoch 26/175
Epoch 27/175
Epoch 28/175
Epoch 29/175
Epoch 30/175
Epoch 31/175
Epoch 32/175
Epoch 33/175
Epoch 34/175
Epoch 35/175
Epoch 36/175
Epoch 37/175
Epoch 38/175
Epoch 39/175
Epoch 40/175
Epoch 41/175
Epoch 42/175
Epoch 43/175
Epoch 44/175
Epoch 45/175
Epoch 46/175
Epoch 47/175
Epoch 48/175
Epoch 49/175
Epoch 50/175
Epoch 51/175
Epoch 52/175
Epoch 53/175
Epoch 54/175
Epoch 55/175
Epoch 56/175
Epoch 57/175
Epoch 58/175
Epoch 59/175
Epoch 60/175
Epoch 61/175
Epoch 62/175
Epoch 63/175
Epoch 64/175
Epoch 65/175
Epoch 66/175
Epoch 67/175
Epoch 68/175
Epoch 69/175
Epoch 70/175
Epoch 71/175
Epoch 72/175
Epoch 73/175
Epoch 74/175
Epoch 75/175
Epoch 76/175
Epoch 77/175
Epoch 78

Epoch 84/175
Epoch 85/175
Epoch 86/175
Epoch 87/175
Epoch 88/175
Epoch 89/175
Epoch 90/175
Epoch 91/175
Epoch 92/175
Epoch 93/175
Epoch 94/175
Epoch 95/175
Epoch 96/175
Epoch 97/175
Epoch 98/175
Epoch 99/175
Epoch 100/175
Epoch 101/175
Epoch 102/175
Epoch 103/175
Epoch 104/175
Epoch 105/175
Epoch 106/175
Epoch 107/175
Epoch 108/175
Epoch 109/175
Epoch 110/175
Epoch 111/175
Epoch 112/175
Epoch 113/175
Epoch 114/175
Epoch 115/175
Epoch 116/175
Epoch 117/175
Epoch 118/175
Epoch 119/175
Epoch 120/175
Epoch 121/175
Epoch 122/175
Epoch 123/175
Epoch 124/175
Epoch 125/175
Epoch 126/175
Epoch 127/175
Epoch 128/175
Epoch 129/175
Epoch 130/175
Epoch 131/175
Epoch 132/175
Epoch 133/175
Epoch 134/175
Epoch 135/175
Epoch 136/175
Epoch 137/175
Epoch 138/175
Epoch 139/175
Epoch 140/175
Epoch 141/175
Epoch 142/175
Epoch 143/175
Epoch 144/175
Epoch 145/175
Epoch 146/175
Epoch 147/175
Epoch 148/175
Epoch 149/175
Epoch 150/175
Epoch 151/175
Epoch 152/175
Epoch 153/175
Epoch 154/175
Epoch 155/175
Epoch 15

Model summary

In [118]:
model_hm.summary()

Model: "sequential_30"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_90 (Dense)            (None, 64)                50240     
                                                                 
 dense_91 (Dense)            (None, 64)                4160      
                                                                 
 dense_92 (Dense)            (None, 10)                650       
                                                                 
Total params: 55,050
Trainable params: 55,050
Non-trainable params: 0
_________________________________________________________________


###### Testing the model

Generic opimizer model

In [119]:
model_wihtout_hm.evaluate(X_test_normalized, y_test)



[0.3995527923107147, 0.858299970626831]

HM based optimizer model

In [120]:
model_hm.evaluate(X_test_normalized, y_test)



[0.3985791504383087, 0.8615999817848206]