<h2 style="color:blue" align="center">Plain Deep Neural Network (DNN)</h2>

#### Import the necessary libraries

In [98]:
import tensorflow as tf 
import matplotlib.pyplot as plt
%matplotlib inline
import numpy as np
import time, math 

from tensorflow import keras 
from keras.models import Sequential
from keras.layers import Dense 
from keras.callbacks import Callback, CSVLogger 

#### Load and split the dataset into training and testing set

MNIST - Handwritten digits recognition

In [99]:
#(X_train,y_train),(X_test,y_test) = keras.datasets.mnist.load_data()

<img src="Figures/MNIST-Handwritten digits.png" height=450 width=450/>

MNIST - Fashion dataset

In [100]:
(X_train,y_train),(X_test,y_test) = keras.datasets.fashion_mnist.load_data()

<img src="Figures/MNIST-fashion.png" height=400 width=400/>

#### Flattening

In [101]:
# For MNIST handwritten digits and Fashion dataset
X_train_flattened = X_train.reshape(len(X_train), 28 * 28)
X_test_flattened = X_test.reshape(len(X_test),  28 * 28) 

# For CIFAR-10 and CIFAR-100
#X_train_flattened = X_train.reshape(len(X_train), 32 * 32)
#X_test_flattened = X_test.reshape(len(X_test),  32 * 32) 

#### Normalize the input dataset

In [102]:
X_train_normalized = X_train_flattened / 255
X_test_normalized = X_test_flattened / 255

#### Define DNN model with two layers, optimizer, metrics, and loss function

In [103]:
np.random.seed(3)

def get_model(): 
    model = Sequential([
        # input layer 784 neurons to first hidden layer with 64 neurons
        Dense(64, input_shape = (784,), activation='relu'), 
        # first hidden layer to second hidden layer
        Dense(64, activation='relu'),  
        # Output layer with 10 neurons
        Dense(10, activation='softmax')
    ])
    
    # General SGD
    #opti = keras.optimizers.SGD(learning_rate=0.01)
    
    # SGD with momentum
    #opti = keras.optimizers.SGD(learning_rate=0.01, momentum=0.6)
    
    # SGD with Nesterov momentum 
    #opti = keras.optimizers.SGD(learning_rate=0.01, momentum=0.6, nesterov=True)
    
    # RMSprop 
    #opti = keras.optimizers.RMSprop(learning_rate=0.001, momentum=0.6)
    
    # Adam
    opti = keras.optimizers.Adam(learning_rate=0.001) 
    
    # Adamax
    #opti = keras.optimizers.Adamax(learning_rate=0.001) 
    
    model.compile(
        optimizer = opti,
        loss = 'sparse_categorical_crossentropy',
        metrics = ['accuracy']
    )
    
    return model

#### Custom callbacks

For generic optimizer model

In [104]:
class CustomCallbackGeneric(Callback):  
    # Training stop criteria
    stop_at = 0.99
    def on_epoch_end(self, epoch, logs={}):
        if(logs.get('accuracy')> self.stop_at):  
            self.model.stop_training = True

For HM based optimizer model

In [105]:
class CustomCallbackHM(Callback):  
    # Stop the algorithm when the following accuracy reached 
    stop_at = 0.99
    
    # to update weights for batch gradient desecent
    batch_size = 60000    
    iteration = 0    
    training_set_size = 0
    update_every_fold = 0  
    
    initial_weights = 0
    previous_weights = 0
    call_hm = 0 
     
    def on_train_begin(self, logs=None):
        self.initial_weights = model_hm.get_weights() 
        self.initial_weights = np.array(self.initial_weights,dtype=object)
        self.previous_weights = self.initial_weights
        # Harmonic mean based weights calculation
        self.call_hm = np.vectorize(self.apply_hm) 
        #Determining number of updates for mini-batch gradient every epoch
        self.training_set_size = X_train_normalized.shape[0]
        self.update_every_fold = self.fold_calc(self.batch_size, self.training_set_size)
             
    def on_epoch_begin(self, epoch,  logs=None): 
        self.iteration = 1   
        
    def on_train_batch_end(self, batch, logs=None): 
        if self.iteration%self.update_every_fold == 0:  
            counter = 0
            num_layers = len(model_hm.layers)  
            current_weights = model_hm.get_weights()
            current_weights = np.array(current_weights,dtype=object)        

            for i in range(num_layers):  
                # Harmonic mean based weights calculation
                current_weights[counter] = self.call_hm(self.previous_weights[counter], current_weights[counter])
                counter = counter + 2
            
            # Updating the model with new weights
            updated = current_weights.tolist()   
            model_hm.set_weights(updated)
            self.previous_weights = current_weights
        self.iteration = self.iteration + 1  
    
    def on_epoch_end(self, epoch, logs={}):
        # Training stop criteria
        if(logs.get('accuracy')> self.stop_at):  
            self.model.stop_training = True

    def apply_hm(self, v1, v2):     
        if v1==0 or v2==0:
            return v2
        elif v1>0 and v2>0:
            hm = 2*v1*v2/(v1+v2)
            min1 = min(v1,v2)
            diff = abs(hm-min1)
            if v2 > v1:
                return v2 + diff
            else:
                return v2 - diff
        elif v1<0 and v2<0:
            hm = 2*v1*v2/(v1+v2)
            max1 = max(v1,v2)
            diff = abs(hm-max1)
            if v2 > v1:
                return v2 + diff
            else:
                return v2 - diff
        else:
            return v2  
        
    def fold_calc(self, batch_size, training_set_size): 
        total_fold = int(math.ceil(math.log2(training_set_size)))
        #print("total_fold   :", total_fold) 
        num_batch = math.ceil(training_set_size / batch_size)
        #print("num_batch   :", num_batch)
        batch_fold = int(math.floor(math.log2(batch_size)))
        #print("batch_fold   :", batch_fold)
        update_every_fold = (total_fold-batch_fold)    
        #print("update_every_fold   :", update_every_fold)
        num_updates = num_batch/update_every_fold
        #print("num_updates   :", num_updates)
        return update_every_fold

To record loss and accuracy in CSV file

In [106]:
logger_generic_model = CSVLogger('Generic_model_MNIST.csv', append=False, separator=',')
logger_hm_model = CSVLogger('HM_model_MNIST.csv', append=False, separator=',')

#### Training

Generic opimizer model

In [107]:
model_wihtout_hm = get_model() 
st = time.time()
model_wihtout_hm.fit(X_train_normalized, y_train, epochs = 100, verbose=1, callbacks=[CustomCallbackGeneric(), logger_generic_model], batch_size=60000) 
et = time.time()
elapsed_time = et - st
print('Execution time:', elapsed_time, 'seconds')

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100
Execution time: 17.981179237365723 seconds


HM based optimizer model

In [108]:
model_hm = get_model() 
st = time.time()
model_hm.fit(X_train_normalized, y_train, epochs = 100, verbose=1, callbacks=[CustomCallbackHM(),logger_hm_model], batch_size=60000) 
et = time.time()
elapsed_time = et - st
print('Execution time:', elapsed_time, 'seconds')

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100
Execution time: 21.89090919494629 seconds


Model summary

In [109]:
model_hm.summary()

Model: "sequential_17"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_51 (Dense)            (None, 64)                50240     
                                                                 
 dense_52 (Dense)            (None, 64)                4160      
                                                                 
 dense_53 (Dense)            (None, 10)                650       
                                                                 
Total params: 55,050
Trainable params: 55,050
Non-trainable params: 0
_________________________________________________________________


###### Testing the model

Generic opimizer model

In [110]:
model_wihtout_hm.evaluate(X_test_normalized, y_test)



[0.46369126439094543, 0.838100016117096]

HM based optimizer model

In [111]:
model_hm.evaluate(X_test_normalized, y_test)



[0.4384474754333496, 0.8482000231742859]