<h2 style="color:blue" align="center">Plain Deep Neural Network (DNN) on CIFAR</h2>

#### Import the necessary libraries

In [1]:
import tensorflow as tf 
import matplotlib.pyplot as plt
%matplotlib inline
import numpy as np
import pandas as pd
import random as python_random
import time, math 

from tensorflow import keras 
from keras.models import Sequential
from keras.layers import Dense, Flatten, Conv2D, MaxPooling2D
from keras.callbacks import Callback, CSVLogger 

#### Load and split the dataset into training and testing set

CIFAR 10

In [2]:
(X_train,y_train),(X_test,y_test) = keras.datasets.cifar10.load_data()

CIFAR 100

In [3]:
#(X_train,y_train),(X_test,y_test) = keras.datasets.cifar100.load_data()

Here we see there are 50000 training images and 1000 test images

#### Normalize the input dataset

In [4]:
X_train_normalized = X_train / 255.0
X_test_normalized = X_test / 255.0

In [5]:
y_train = y_train.reshape(-1,)
y_test = y_test.reshape(-1,)

#### Define DNN model

In [6]:
python_random.seed(3)
np.random.seed(7)
tf.random.set_seed(13)
opti_name = ''

def get_model(): 
    model = Sequential([ 
          Flatten(input_shape=(32,32,3)),
          Dense(3000, activation = 'relu'),
          Dense(1000, activation = 'relu'),
          # For CIFAR-10
          Dense(10, activation = 'softmax')
          # For CIFAR-100
          #Dense(100, activation = 'softmax')
    ]) 
    
    global opti_name
    
    # General SGD
    #opti = keras.optimizers.SGD(learning_rate=0.01)
    #opti_name = 'SGD'
    
    # SGD with momentum
    #opti = keras.optimizers.SGD(learning_rate=0.01, momentum=0.6)
    #opti_name = 'SGD with momentum'
    
    # SGD with Nesterov momentum 
    #opti = keras.optimizers.SGD(learning_rate=0.01, momentum=0.6, nesterov=True)
    #opti_name = 'SGD with Nesterov momentum'
    
    # RMSprop 
    #opti = keras.optimizers.RMSprop(learning_rate=0.001, momentum=0.6)
    #opti_name = 'RMSprop'
    
    # Adam
    opti = keras.optimizers.Adam(learning_rate=0.001) 
    opti_name = 'Adam'
    
    # Adamax
    #opti = keras.optimizers.Adamax(learning_rate=0.001) 
    #opti_name = 'Adamax'
    
    model.compile(
        optimizer = opti,
        loss = 'sparse_categorical_crossentropy',
        metrics = ['accuracy']
    )
    
    return model

#### Custom callbacks

For generic optimizer model

In [15]:
# Get the best of base-line model and set it as stopping criteria in HM-based model
generic_best = 0

class CustomCallbackGeneric(Callback):   
    # Training stop criteria
    stop_at = 0.99
    
    def on_epoch_end(self, epoch, logs={}):
        global generic_best
        acc = round(logs.get('accuracy'), 4)  
        
        if epoch == 0:
            generic_best = acc             
        
        if epoch > 0 and acc > generic_best :
            generic_best = acc  
            
        if(acc > self.stop_at):  
            self.model.stop_training = True 

For HM based optimizer model

In [10]:
class CustomCallbackHM(Callback):   
    initial_weights = 0
    previous_weights = 0
    call_hm = 0     
    r = 1
    # r=0 no HM based, r=1 HM based
     
    def on_train_begin(self, logs=None):
        self.initial_weights = model_hm.get_weights() 
        self.initial_weights = np.array(self.initial_weights,dtype=object)
        self.previous_weights = self.initial_weights
        # Harmonic mean based weights calculation
        self.call_hm = np.vectorize(self.apply_hm)  

    def on_epoch_end(self, epoch, logs={}): 
         # Set the stopping criteria at (stop_at) the MAE obtained from the baseline model 
        global generic_best 
        
        num_layers = len(model_hm.layers)-1  
        current_weights = model_hm.get_weights() 
        current_weights = np.array(current_weights, dtype=object)       

        for i in range(num_layers):  
            # Harmonic mean based weights calculation 
            tensor1 = tf.convert_to_tensor(self.previous_weights[i*2])
            tensor2 = tf.convert_to_tensor(current_weights[i*2])
            current_weights[i*2] = self.call_hm(tensor1, tensor2)   
               
        # Updating the model with new weights   
        model_hm.set_weights(current_weights.tolist())
        self.previous_weights = current_weights
        
         # Stopping criteria
        if(round(logs.get('accuracy')) > generic_best): 
            self.model.stop_training = True
        
    def apply_hm(self, v1, v2):     
        if v1==0 or v2==0:
            return v2
        elif v1>0 and v2>0:
            hm = 2*v1*v2/(v1+v2)
            min1 = min(v1,v2)
            diff = abs(hm-min1) * self.r
            if v2 > v1:
                return v2 + diff
            else:
                return v2 - diff
        elif v1<0 and v2<0:
            hm = 2*v1*v2/(v1+v2)
            max1 = max(v1,v2)
            diff = abs(hm-max1) * self.r
            if v2 > v1:
                return v2 + diff
            else:
                return v2 - diff
        else:
            return v2  

To record loss and accuracy in CSV file

In [11]:
logger_generic_model = CSVLogger('3.Generic_model_CIFAR.csv', append=False, separator=',')
logger_hm_model = CSVLogger('3.HM_model_CIFAR.csv', append=False, separator=',')

#### Training

Get a model to assign same weights to model with and without HM

In [12]:
model = get_model() 
weights = model.get_weights() 
num_epochs = 400

Generic opimizer model

In [13]:
model_wihtout_hm = get_model()
model_wihtout_hm.set_weights(weights) 
st = time.time() 
model_wihtout_hm.fit(X_train_normalized, y_train, epochs = num_epochs, verbose=1, callbacks=[CustomCallbackGeneric(), logger_generic_model], batch_size = X_train.shape[0]) 
et = time.time()
elapsed_time = round(et - st, 4)
print('Execution time:', elapsed_time, 'seconds')
print('\nGeneric optimizer best Accuracy is :', cb.generic_best)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Execution time: 49.85312223434448 seconds

Generic optimizer best Accuracy is : 0.15


HM based optimizer model

In [14]:
model_hm = get_model()
model_hm.set_weights(weights) 
st = time.time()
model_hm.fit(X_train_normalized, y_train, epochs = num_epochs, verbose=1, callbacks=[CustomCallbackHM(),logger_hm_model], batch_size = X_train.shape[0]) 
et = time.time()
elapsed_time = round(et - st, 4)
print('Execution time:', elapsed_time, 'seconds')

Epoch 1/5
Execution time: 21.878639459609985 seconds


Model summary

In [13]:
model_hm.summary()

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 flatten_2 (Flatten)         (None, 3072)              0         
                                                                 
 dense_6 (Dense)             (None, 3000)              9219000   
                                                                 
 dense_7 (Dense)             (None, 1000)              3001000   
                                                                 
 dense_8 (Dense)             (None, 10)                10010     
                                                                 
Total params: 12,230,010
Trainable params: 12,230,010
Non-trainable params: 0
_________________________________________________________________


#### Generic optimizer vs HM-based optimizer

In [None]:
df1 = pd.read_csv("3.Generic_model_MNIST.csv")
df2 = pd.read_csv("3.HM_model_MNIST.csv")

Loss

In [None]:
x1 = range(0, df1.shape[0])
x2 = range(0, df2.shape[0])
y1 = df1['accuracy'] 
y2 = df2['accuracy']  
plt.figure(figsize = (3,2), dpi = 200)
plt.plot(x1, y1, "r-", label = opti_name, linewidth = 0.8, alpha = 0.7)
plt.plot(x2, y2, "k:", label = 'HM-based ' + opti_name, linewidth = 1, alpha = 0.9) 
plt.ylabel('Accuracy' , fontdict = {'fontname':'Times New Roman', 'fontsize':8})
plt.xlabel('Epoch', fontdict = {'fontname':'Times New Roman', 'fontsize':8})
#plt.title("Loss", fontdict = {'fontname':'Times New Roman', 'fontsize':8})
plt.xticks(fontsize = 7, fontname = 'Times New Roman')
plt.yticks(fontsize = 7, fontname = 'Times New Roman')
plt.tight_layout()
plt.legend(prop={'size': 5})
#plt.savefig("graph.png",bbox_inches='tight',dpi=(300)) 
plt.show()

Accuracy

In [None]:
x1 = range(0, df1.shape[0])
x2 = range(0, df2.shape[0])
y1 = df1['loss'] 
y2 = df2['loss']  
plt.figure(figsize = (3,2), dpi = 200)
plt.plot(x1, y1, "r-", label = opti_name, linewidth = 0.8, alpha = 0.7)
plt.plot(x2, y2, "k:", label = 'HM-based ' + opti_name, linewidth = 1, alpha = 0.9) 
plt.ylabel('Loss' , fontdict = {'fontname':'Times New Roman', 'fontsize':8})
plt.xlabel('Epoch', fontdict = {'fontname':'Times New Roman', 'fontsize':8})
#plt.title("MAE", fontdict = {'fontname':'Times New Roman', 'fontsize':8})
plt.xticks(fontsize = 7, fontname = 'Times New Roman')
plt.yticks(fontsize = 7, fontname = 'Times New Roman')
plt.tight_layout()
plt.legend(prop={'size': 5})
#plt.savefig("graph.png",bbox_inches='tight',dpi=(300)) 
plt.show()

###### Testing the model

Generic opimizer model

In [14]:
model_wihtout_hm.evaluate(X_test_normalized, y_test)



[4.715517997741699, 0.10029999911785126]

HM based optimizer model

In [17]:
model_hm.evaluate(X_test_normalized, y_test)



[6.004293441772461, 0.10000000149011612]