In [None]:
! nvidia-smi -L

GPU 0: A100-SXM4-40GB (UUID: GPU-3d203e26-4a3e-3592-e586-3080f1d5b7dc)


In [None]:
import tensorflow as tf
import tensorflow_hub as hub
from tensorflow import keras
from keras.optimizers import RMSprop, SGD
import datetime

import numpy as np
import matplotlib.pyplot as plt
from matplotlib.ticker import MaxNLocator

%matplotlib inline
%config InlineBackend.figure_format = 'retina'
plt.style.use('ggplot')


In [None]:
# Load Dataset - Data Augmentation
(x_train, y_train), (x_test, y_test) = keras.datasets.cifar10.load_data()

# Randomly Rotate/Flip Image To Prevent Overfitting
datagen = tf.keras.preprocessing.image.ImageDataGenerator(
    rotation_range=15,
    horizontal_flip=True,
    width_shift_range=0.1,
    height_shift_range=0.1
    #zoom_range=0.3
    )
datagen.fit(x_train)

# Reshape Data and Convert To Floating Point
img_rows, img_cols = 32, 32
x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, 3)
x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, 3)
input_shape = (img_rows, img_cols, 3)
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')

# Normalise Data
x_test = (x_test - np.mean(x_train))/np.std(x_train)
x_train = (x_train - np.mean(x_train))/np.std(x_train)

# One-Hot Encoding Of Output Categories
num_classes = 10
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)

Downloading data from https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz


#### LeNet-5 Implementation For CIFAR-10
ReLU Timing: 2 min 23.19s

ReLU Accuracy: 0.6398

ELU Timing: 2 min 23.56s

ELU Accuracy: 0.6556




In [None]:
class LeNet:
  def __init__(self, inputShape=(32,32,3), activationFunction='elu'):
    self.inputShape = inputShape
    self.activationFunction = activationFunction

  def buildModel(self):
    model = tf.keras.Sequential()

    model.add(tf.keras.layers.Conv2D(6, kernel_size=(5,5), strides=(1,1), padding='same', input_shape=input_shape, use_bias=True, kernel_initializer='glorot_uniform', bias_initializer='zeros'))

    model.add(tf.keras.layers.Activation(self.activationFunction))
    model.add(tf.keras.layers.MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))

    model.add(tf.keras.layers.Conv2D(16, kernel_size=(5,5), padding='valid'))
    model.add(tf.keras.layers.Activation(self.activationFunction))
    model.add(tf.keras.layers.MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))

    model.add(tf.keras.layers.Conv2D(120, kernel_size=(5,5), padding='valid'))
    model.add(tf.keras.layers.Activation(self.activationFunction))

    model.add(tf.keras.layers.Flatten())
    model.add(tf.keras.layers.Dense(84))

    model.add(tf.keras.layers.Dense(num_classes))

    model.add(tf.keras.layers.Activation('softmax'))
    return model

LeNetInstance = LeNet(activationFunction = 'relu')
LeNetReluModel = LeNetInstance.buildModel()
LeNetReluModel.compile(loss='categorical_crossentropy',
              optimizer='adam', metrics=['accuracy'])

LeNetInstance = LeNet(activationFunction = 'elu')
LeNetEluModel = LeNetInstance.buildModel()
LeNetEluModel.compile(loss='categorical_crossentropy',
              optimizer='adam', metrics=['accuracy'])

batch_size = 64
num_classes = 10
epochs = 30

startTime = datetime.datetime.now()
print("Training Of LeNet Using ReLU Activation Function")
LeNetReluModelLog = LeNetReluModel.fit(x_train, y_train,
                      batch_size=batch_size,
                      epochs=epochs,
                      validation_split = 0.1,
                     )
print('Training Time For LeNet Using ReLU: %s' % (datetime.datetime.now() - startTime))

startTime = datetime.datetime.now()
print("Training Of LeNet Using ELU Activation Function")
LeNetEluModelLog = LeNetEluModel.fit(x_train, y_train,
                      batch_size=batch_size,
                      epochs=epochs,
                      validation_split = 0.1,
                     )
print('Training Time For LeNet Using ELU: %s' % (datetime.datetime.now() - startTime))

Training Of LeNet Using ReLU Activation Function
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
Training Time For LeNet Using ReLU: 0:02:23.192448
Training Of LeNet Using ELU Activation Function
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
Training Time For LeNet Using ELU: 0:02:23.562284


#### ResNet-50 Implementation For CIFAR-10 (Using Colab Pro)
ReLU Timing: 36 min 43.61s

ReLU Accuracy: 0.8298

ELU Timing: 37 min 32.61s

ELU Accuracy: 0.8176

In [None]:
class ResNet50:
  def __init__(self, activationFunction='relu'):
    self.activationFunction = activationFunction

  def convolutionalBlock(self, X, f, filters, s = 2):
    F1, F2, F3 = filters
    X_shortcut = X
    X = tf.keras.layers.Conv2D(F1, (1, 1), strides = (s,s), kernel_initializer = "glorot_uniform")(X)
    X = tf.keras.layers.BatchNormalization(axis = 3)(X)
    X = tf.keras.layers.Activation(self.activationFunction)(X)
    X = tf.keras.layers.Conv2D(filters = F2, kernel_size = (f, f), strides = (1,1), padding = 'same', kernel_initializer = "glorot_uniform")(X)
    X = tf.keras.layers.BatchNormalization(axis = 3)(X)
    X = tf.keras.layers.Activation(self.activationFunction)(X)
    X = tf.keras.layers.Conv2D(filters = F3, kernel_size = (1, 1), strides = (1,1), padding = 'valid', kernel_initializer = "glorot_uniform")(X)
    X = tf.keras.layers.BatchNormalization(axis = 3)(X)
    X_shortcut = tf.keras.layers.Conv2D(filters = F3, kernel_size = (1, 1), strides = (s,s), padding = 'valid', kernel_initializer = "glorot_uniform")(X_shortcut)
    X_shortcut = tf.keras.layers.BatchNormalization(axis = 3)(X_shortcut)
    X = tf.keras.layers.Add()([X_shortcut, X])
    X = tf.keras.layers.Activation(self.activationFunction)(X)
    return X

  def identityBlock(self, X, f, filters):
    F1, F2, F3 = filters
    X_shortcut = X
    X = tf.keras.layers.Conv2D(filters = F1, kernel_size = (1, 1), strides = (1,1), padding = 'valid', kernel_initializer = "glorot_uniform")(X)
    X = tf.keras.layers.BatchNormalization(axis = 3)(X)
    X = tf.keras.layers.Activation(self.activationFunction)(X)
    X = tf.keras.layers.Conv2D(filters = F2, kernel_size = (f, f), strides = (1,1), padding = 'same', kernel_initializer = "glorot_uniform")(X)
    X = tf.keras.layers.BatchNormalization(axis = 3)(X)
    X = tf.keras.layers.Activation(self.activationFunction)(X)
    X = tf.keras.layers.Conv2D(filters = F3, kernel_size = (1, 1), strides = (1,1), padding = 'valid', kernel_initializer = "glorot_uniform")(X)
    X = tf.keras.layers.BatchNormalization(axis = 3)(X)
    X = tf.keras.layers.Add()([X_shortcut, X])
    X = tf.keras.layers.Activation(self.activationFunction)(X)
    return X

  def buildModel(self, classes=10):
    X_input = tf.keras.layers.Input(input_shape)
    X = tf.keras.layers.Resizing(224, 224)(X_input)
    X = tf.keras.layers.ZeroPadding2D((3, 3))(X)
    X = tf.keras.layers.Conv2D(64, (7, 7), strides = (2, 2), kernel_initializer = "glorot_uniform")(X)
    X = tf.keras.layers.BatchNormalization(axis = 3)(X)
    X = tf.keras.layers.Activation(self.activationFunction)(X)
    X = tf.keras.layers.MaxPooling2D((3, 3), strides=(2, 2))(X)
    X = self.convolutionalBlock(X, f = 3, filters = [64, 64, 256], s = 1)
    X = self.identityBlock(X, 3, [64, 64, 256])
    X = self.identityBlock(X, 3, [64, 64, 256])
    X = self.convolutionalBlock(X, f = 3, filters = [128, 128, 512], s = 2)
    X = self.identityBlock(X, 3, [128, 128, 512])
    X = self.identityBlock(X, 3, [128, 128, 512])
    X = self.identityBlock(X, 3, [128, 128, 512])
    X = self.convolutionalBlock(X, f = 3, filters = [256, 256, 1024], s = 2)
    X = self.identityBlock(X, 3, [256, 256, 1024])
    X = self.identityBlock(X, 3, [256, 256, 1024])
    X = self.identityBlock(X, 3, [256, 256, 1024])
    X = self.identityBlock(X, 3, [256, 256, 1024])
    X = self.identityBlock(X, 3, [256, 256, 1024])
    X = self.convolutionalBlock(X, f = 3, filters = [512, 512, 2048], s = 2)
    X = self.identityBlock(X, 3, [512, 512, 2048])
    X = self.identityBlock(X, 3, [512, 512, 2048])
    X = tf.keras.layers.AveragePooling2D(pool_size=(2, 2))(X)
    X = tf.keras.layers.Flatten()(X)
    X = tf.keras.layers.Dense(classes, activation='softmax', kernel_initializer = "glorot_uniform")(X)
    model = tf.keras.Model(inputs = X_input, outputs = X)
    return model

ResInstance = ResNet50(activationFunction = 'relu')
ResnetReluModel = ResInstance.buildModel()
ResnetReluModel.compile(loss='categorical_crossentropy',
              optimizer='adam', metrics=['accuracy'])

ResInstance = ResNet50(activationFunction = 'elu')
ResnetEluModel = ResInstance.buildModel()
ResnetEluModel.compile(loss='categorical_crossentropy',
              optimizer='adam', metrics=['accuracy'])

batch_size = 64
num_classes = 10
epochs = 30

startTime = datetime.datetime.now()
print("Training Of ResNet-50 Using ReLU Activation Function")
model_log = ResnetReluModel.fit(x_train, y_train,
                      batch_size=batch_size,
                      epochs=epochs,
                      validation_split = 0.1,
                     )
print('Training Time For ResNet-50 Using ReLU: %s' % (datetime.datetime.now() - startTime))

startTime = datetime.datetime.now()
print("Training Of ResNet-50 Using ELU Activation Function")
model_log = ResnetEluModel.fit(x_train, y_train,
                      batch_size=batch_size,
                      epochs=epochs,
                      validation_split = 0.1,
                     )

print('Training Time For ResNet-50 Using ELU: %s' % (datetime.datetime.now() - startTime))

Training Of ResNet-50 Using ReLU Activation Function
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
Training Time For ResNet-50 Using ReLU: 0:36:43.612650
Training Of ResNet-50 Using ELU Activation Function
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
Training Time For ResNet-50 Using ELU: 0:37:32.616402


#### ResNet-34 For CIFAR-10 (Using Colab Pro)
ReLU Timing: 22 min 52.96s

ReLU Accuracy: 0.8270

ELU Timing: 23 min 01.17s

ELU Accuracy: 0.8232

In [None]:
class ResNetImproved:
  def __init__(self, activationFunction='relu'):
    self.activationFunction = activationFunction

  def convolutionalBlock(self, X, f, filters, s = 2):
    X_shortcut = X
    X = tf.keras.layers.Conv2D(filters, (f, f), strides = (s,s), kernel_initializer = "glorot_uniform",)(X)
    X = tf.keras.layers.BatchNormalization(axis = 3)(X)
    X = tf.keras.layers.Activation(self.activationFunction)(X)
    X = tf.keras.layers.Conv2D(filters, (f, f), strides = (1,1), padding = 'same', kernel_initializer = "glorot_uniform",)(X)
    X = tf.keras.layers.BatchNormalization(axis = 3)(X)
    X_shortcut = tf.keras.layers.Conv2D(filters, (f, f), strides = (s,s), padding = 'valid', kernel_initializer = "glorot_uniform",)(X_shortcut)
    X_shortcut = tf.keras.layers.BatchNormalization(axis = 3)(X_shortcut)
    X = tf.keras.layers.Add()([X_shortcut, X])
    X = tf.keras.layers.Activation(self.activationFunction)(X)
    return X

  def identityBlock(self, X, f, filters):
    X_shortcut = X
    X = tf.keras.layers.Conv2D(filters, (f, f), strides = (1,1), padding = 'same', kernel_initializer = "glorot_uniform",)(X)
    X = tf.keras.layers.BatchNormalization(axis = 3)(X)
    X = tf.keras.layers.Activation(self.activationFunction)(X)
    X = tf.keras.layers.Conv2D(filters, (f, f), strides = (1,1), padding = 'same', kernel_initializer = "glorot_uniform",)(X)
    X = tf.keras.layers.BatchNormalization(axis = 3)(X)
    X = tf.keras.layers.Add()([X_shortcut, X])
    X = tf.keras.layers.Activation(self.activationFunction)(X)
    return X

  def buildModel(self, classes=10):
    X_input = tf.keras.layers.Input(input_shape)
    X = tf.keras.layers.Resizing(224, 224)(X_input)
    X = tf.keras.layers.ZeroPadding2D((3, 3))(X)
    X = tf.keras.layers.Conv2D(64, (7, 7), strides = (2, 2), kernel_initializer = "glorot_uniform",
                               kernel_regularizer=tf.keras.regularizers.L1L2(l1=1e-5, l2=1e-4),
                                         bias_regularizer=tf.keras.regularizers.L2(1e-4),
                                         activity_regularizer=tf.keras.regularizers.L2(1e-5))(X)
    X = tf.keras.layers.BatchNormalization(axis = 3)(X)
    X = tf.keras.layers.Activation(self.activationFunction)(X)
    X = tf.keras.layers.MaxPooling2D((3, 3), strides=(2, 2))(X)
    X = self.convolutionalBlock(X, f = 3, filters = 64, s = 1)
    X = self.identityBlock(X, 3, 64)
    X = self.identityBlock(X, 3, 64)
    X = self.convolutionalBlock(X, f = 3, filters = 128, s = 2)
    X = self.identityBlock(X, 3, 128)
    X = self.identityBlock(X, 3, 128)
    X = self.identityBlock(X, 3, 128)
    X = self.convolutionalBlock(X, f = 3, filters = 256, s = 2)
    X = self.identityBlock(X, 3, 256)
    X = self.identityBlock(X, 3, 256)
    X = self.identityBlock(X, 3, 256)
    X = self.identityBlock(X, 3, 256)
    X = self.identityBlock(X, 3, 256)
    X = self.convolutionalBlock(X, f = 3, filters = 512, s = 2)
    X = self.identityBlock(X, 3, 512)
    X = self.identityBlock(X, 3, 512)
    X = tf.keras.layers.AveragePooling2D(pool_size=(2, 2))(X)
    X = tf.keras.layers.Flatten()(X)
    X = tf.keras.layers.Dense(classes, activation='softmax', kernel_initializer = "glorot_uniform",
                              kernel_regularizer=tf.keras.regularizers.L1L2(l1=1e-5, l2=1e-4),
                                         bias_regularizer=tf.keras.regularizers.L2(1e-4),
                                         activity_regularizer=tf.keras.regularizers.L2(1e-5))(X)
    model = tf.keras.Model(inputs = X_input, outputs = X)
    return model


ResInstance = ResNetImproved(activationFunction = 'relu')
ResnetReluModel = ResInstance.buildModel()
ResnetReluModel.compile(loss='categorical_crossentropy',
              optimizer=tf.keras.optimizers.Adam(learning_rate=0.01, clipnorm = 1, decay=0.005), metrics=['accuracy'])

ResInstance = ResNetImproved(activationFunction = 'elu')
ResnetEluModel = ResInstance.buildModel()
ResnetEluModel.compile(loss='categorical_crossentropy',
              optimizer=tf.keras.optimizers.Adam(learning_rate=0.01, clipnorm = 1, decay=0.005), metrics=['accuracy'])

batch_size = 64
num_classes = 10
epochs = 30

startTime = datetime.datetime.now()
print("Training Of ResNet-34 Using ReLU Activation Function")
model_log = ResnetReluModel.fit(x_train, y_train,
                      batch_size=batch_size,
                      epochs=epochs,
                      validation_split = 0.1,
                     )
print('Training Time For ResNet-34 Using ReLU: %s' % (datetime.datetime.now() - startTime))

startTime = datetime.datetime.now()
print("Training Of ResNet-34 Using ELU Activation Function")
model_log = ResnetEluModel.fit(x_train, y_train,
                      batch_size=batch_size,
                      epochs=epochs,
                      validation_split = 0.1,
                     )

print('Training Time For ResNet-34 Using ELU: %s' % (datetime.datetime.now() - startTime))

Training Of ResNet-34 Using ReLU Activation Function
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
Training Time For ResNet-34 Using ReLU: 0:22:52.962625
Training Of ResNet-34 Using ELU Activation Function
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
Training Time For ResNet-34 Using ELU: 0:23:01.175216


#### ResNet 18 (Best Model) (No Colab Pro)

ReLU Timing: 1h 21min 27.45s

ReLU Timing (Adjusted To Colab Pro): 22 min 23.95s

*(A100 is 1.47x faster than V100, which is 2.46x faster than T4)*

ReLU Accuracy: 0.8310


In [None]:
class ResNetImproved:
  def __init__(self, activationFunction='relu'):
    self.activationFunction = activationFunction

  def convolutionalBlock(self, X, f, filters, s = 2):
    X_shortcut = X
    X = tf.keras.layers.Conv2D(filters, (f, f), strides = (s,s), kernel_initializer = "glorot_uniform",)(X)
    X = tf.keras.layers.BatchNormalization(axis = 3)(X)
    X = tf.keras.layers.Activation(self.activationFunction)(X)
    X = tf.keras.layers.Conv2D(filters, (f, f), strides = (1,1), padding = 'same', kernel_initializer = "glorot_uniform",)(X)
    X = tf.keras.layers.BatchNormalization(axis = 3)(X)
    X_shortcut = tf.keras.layers.Conv2D(filters, (f, f), strides = (s,s), padding = 'valid', kernel_initializer = "glorot_uniform",)(X_shortcut)
    X_shortcut = tf.keras.layers.BatchNormalization(axis = 3)(X_shortcut)
    X = tf.keras.layers.Add()([X_shortcut, X])
    X = tf.keras.layers.Activation(self.activationFunction)(X)
    return X

  def identityBlock(self, X, f, filters):
    X_shortcut = X
    X = tf.keras.layers.Conv2D(filters, (f, f), strides = (1,1), padding = 'same', kernel_initializer = "glorot_uniform",)(X)
    X = tf.keras.layers.BatchNormalization(axis = 3)(X)
    X = tf.keras.layers.Activation(self.activationFunction)(X)
    X = tf.keras.layers.Conv2D(filters, (f, f), strides = (1,1), padding = 'same', kernel_initializer = "glorot_uniform",)(X)
    X = tf.keras.layers.BatchNormalization(axis = 3)(X)
    X = tf.keras.layers.Add()([X_shortcut, X])
    X = tf.keras.layers.Activation(self.activationFunction)(X)
    return X

  def buildModel(self, classes=10):
    X_input = tf.keras.layers.Input(input_shape)
    X = tf.keras.layers.Resizing(224, 224)(X_input)
    X = tf.keras.layers.ZeroPadding2D((3, 3))(X)
    X = tf.keras.layers.Conv2D(64, (7, 7), strides = (2, 2), kernel_initializer = "glorot_uniform",
                               kernel_regularizer=tf.keras.regularizers.L1L2(l1=1e-5, l2=1e-4),
                                         bias_regularizer=tf.keras.regularizers.L2(1e-4),
                                         activity_regularizer=tf.keras.regularizers.L2(1e-5))(X)
    X = tf.keras.layers.BatchNormalization(axis = 3)(X)
    X = tf.keras.layers.Activation(self.activationFunction)(X)
    X = tf.keras.layers.MaxPooling2D((3, 3), strides=(2, 2))(X)
    X = self.convolutionalBlock(X, f = 3, filters = 64, s = 1)
    X = self.identityBlock(X, 3, 64)
    X = self.convolutionalBlock(X, f = 3, filters = 128, s = 2)
    X = self.identityBlock(X, 3, 128)
    X = self.convolutionalBlock(X, f = 3, filters = 256, s = 2)
    X = self.identityBlock(X, 3, 256)
    X = self.convolutionalBlock(X, f = 3, filters = 512, s = 2)
    X = self.identityBlock(X, 3, 512)
    X = tf.keras.layers.AveragePooling2D(pool_size=(2, 2))(X)
    X = tf.keras.layers.Flatten()(X)
    X = tf.keras.layers.Dense(classes, activation='softmax', kernel_initializer = "glorot_uniform",
                              kernel_regularizer=tf.keras.regularizers.L1L2(l1=1e-5, l2=1e-4),
                                         bias_regularizer=tf.keras.regularizers.L2(1e-4),
                                         activity_regularizer=tf.keras.regularizers.L2(1e-5))(X)
    model = tf.keras.Model(inputs = X_input, outputs = X)
    return model


ResInstance = ResNetImproved(activationFunction = 'relu')
ResnetReluModel = ResInstance.buildModel()
ResnetReluModel.compile(loss='categorical_crossentropy',
              optimizer=tf.keras.optimizers.Adam(learning_rate=0.01, clipnorm = 1, decay=0.005), metrics=['accuracy'])

ResInstance = ResNetImproved(activationFunction = 'elu')
ResnetEluModel = ResInstance.buildModel()
ResnetEluModel.compile(loss='categorical_crossentropy',
              optimizer=tf.keras.optimizers.Adam(learning_rate=0.01, clipnorm = 1, decay=0.005), metrics=['accuracy'])

batch_size = 64
num_classes = 10
epochs = 30

startTime = datetime.datetime.now()
print("Training Of ResNet-18 Using ReLU Activation Function")
model_log = ResnetReluModel.fit(x_train, y_train,
                      batch_size=batch_size,
                      epochs=epochs,
                      validation_split = 0.1,
                     )
print('Training Time For ResNet-18 Using ReLU: %s' % (datetime.datetime.now() - startTime))

startTime = datetime.datetime.now()
print("Training Of ResNet-18 Using ELU Activation Function")
model_log = ResnetEluModel.fit(x_train, y_train,
                      batch_size=batch_size,
                      epochs=epochs,
                      validation_split = 0.1,
                     )

print('Training Time For ResNet-18 Using ELU: %s' % (datetime.datetime.now() - startTime))

Training Of ResNet-18 Using ReLU Activation Function
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
Training Time For ResNet-18 Using ReLU: 1:21:27.453080
Training Of ResNet-18 Using ELU Activation Function
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
Training Time For ResNet-18 Using ELU: 1:21:55.957091


#### VGG-16 Implementation For CIFAR-10 (Using Colab Pro)
ReLU Timing: 1 h 1 min 28.50s

ReLU Accuracy: 0.8524

ELU Timing: 1 h 02 min 50.60s

ELU Accuracy: 0.8636

In [None]:
class VGG16:

  def __init__(self, inputShape=(32,32,3), weightDecay = 0.0005, activationFunction='elu'):
    self.inputShape = inputShape
    self.weightDecay = weightDecay
    self.activationFunction = activationFunction

  def buildModel(self):
    model = tf.keras.Sequential()

    model.add(tf.keras.layers.Input(shape=input_shape))
    model.add(tf.keras.layers.Resizing(224, 224))

    # Input Block
    model.add(tf.keras.layers.Conv2D(64, (3, 3), padding='same', kernel_regularizer=tf.keras.regularizers.L2(self.weightDecay)))
    model.add(tf.keras.layers.Activation(self.activationFunction))
    model.add(tf.keras.layers.BatchNormalization())
    model.add(tf.keras.layers.Dropout(0.3))

    self.ConvolutionBlock(model, 64)
    model.add(tf.keras.layers.MaxPooling2D(pool_size=(2, 2)))

    self.ConvolutionBlock(model, 128, 0.4)
    self.ConvolutionBlock(model, 128)
    model.add(tf.keras.layers.MaxPooling2D(pool_size=(2, 2)))

    self.ConvolutionBlock(model, 256, 0.4)
    self.ConvolutionBlock(model, 256, 0.4)
    self.ConvolutionBlock(model, 256)
    model.add(tf.keras.layers.MaxPooling2D(pool_size=(2, 2)))

    self.ConvolutionBlock(model, 512, 0.4)
    self.ConvolutionBlock(model, 512, 0.4)
    self.ConvolutionBlock(model, 512)
    model.add(tf.keras.layers.MaxPooling2D(pool_size=(2, 2)))

    self.ConvolutionBlock(model, 512, 0.4)
    self.ConvolutionBlock(model, 512, 0.4)
    self.ConvolutionBlock(model, 512)
    model.add(tf.keras.layers.MaxPooling2D(pool_size=(2, 2)))

    model.add(tf.keras.layers.Dropout(0.5))
    model.add(tf.keras.layers.Flatten())
    model.add(tf.keras.layers.Dense(512,kernel_regularizer=tf.keras.regularizers.L2(self.weightDecay)))
    model.add(tf.keras.layers.Activation(self.activationFunction))
    model.add(tf.keras.layers.BatchNormalization())

    model.add(tf.keras.layers.Dropout(0.5))
    model.add(tf.keras.layers.Dense(10))
    model.add(tf.keras.layers.Activation('softmax'))

    return model

  def ConvolutionBlock(self, model, filter, dropout=0):
    model.add(tf.keras.layers.Conv2D(filter, (3, 3), padding='same',kernel_regularizer=tf.keras.regularizers.L2(self.weightDecay)))
    model.add(tf.keras.layers.Activation(self.activationFunction))
    model.add(tf.keras.layers.BatchNormalization())

    if dropout != 0:
      model.add(tf.keras.layers.Dropout(dropout))

VGGInstance = VGG16(activationFunction = 'relu')
VGGReluModel = VGGInstance.buildModel()
VGGReluModel.compile(loss='categorical_crossentropy',
              optimizer=tf.keras.optimizers.Adam(learning_rate=0.01, clipnorm = 1, decay=0.005), metrics=['accuracy'])

VGGInstance = VGG16(activationFunction = 'elu')
VGGEluModel = VGGInstance.buildModel()
VGGEluModel.compile(loss='categorical_crossentropy',
              optimizer=tf.keras.optimizers.Adam(learning_rate=0.01, clipnorm = 1, decay=0.005), metrics=['accuracy'])

batch_size = 64
num_classes = 10
epochs = 30

startTime = datetime.datetime.now()
print("Training Of VGG-16 Using ReLU Activation Function")
VGGReluModelLog = VGGReluModel.fit(x_train, y_train,
                      batch_size=batch_size,
                      epochs=epochs,
                      validation_split = 0.1,
                     )
print('Training Time For VGG-16 Using ReLU: %s' % (datetime.datetime.now() - startTime))

startTime = datetime.datetime.now()
print("Training Of VGG-16 Using ELU Activation Function")
VGGEluModelLog = VGGEluModel.fit(x_train, y_train,
                      batch_size=batch_size,
                      epochs=epochs,
                      validation_split = 0.1,
                     )
print('Training Time For VGG-16 Using ELU: %s' % (datetime.datetime.now() - startTime))

Training Of VGG-16 Using ReLU Activation Function
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
Training Time For VGG-16 Using ReLU: 1:01:28.497404
Training Of VGG-16 Using ELU Activation Function
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
Training Time For VGG-16 Using ELU: 1:02:50.607803


#### AlexNet Implementation For CIFAR-10 (Using Colab Pro)
ReLU Timing: 32 min 24.64s

ReLU Accuracy: 0.8450

ELU Timing: 32 min 15.08s

ELU Accuracy: 0.8336

In [None]:
class AlexNet:

  def __init__(self, inputShape=(32,32,3), activationFunction='relu'):
    self.inputShape = inputShape
    self.activationFunction = activationFunction

  def buildModel(self):
    model = tf.keras.Sequential()

    model.add(tf.keras.layers.Resizing(224, 224))

    # Convolutional Layer 1
    model.add(tf.keras.layers.Conv2D(filters=96, input_shape=self.inputShape, kernel_size=(11,11), strides=(4,4), padding='same'))
    model.add(tf.keras.layers.BatchNormalization())
    model.add(tf.keras.layers.Activation(self.activationFunction))
    model.add(tf.keras.layers.MaxPooling2D(pool_size=(2,2), strides=(2,2), padding='same'))

    # Convolutional Layer 2
    model.add(tf.keras.layers.Conv2D(filters=256, kernel_size=(5, 5), strides=(1,1), padding='same'))
    model.add(tf.keras.layers.BatchNormalization())
    model.add(tf.keras.layers.Activation(self.activationFunction))
    model.add(tf.keras.layers.MaxPooling2D(pool_size=(2,2), strides=(2,2), padding='same'))

    # Convolutional Layer 3
    model.add(tf.keras.layers.Conv2D(filters=384, kernel_size=(3,3), strides=(1,1), padding='same'))
    model.add(tf.keras.layers.BatchNormalization())
    model.add(tf.keras.layers.Activation(self.activationFunction))

    # Convolutional Layer 4
    model.add(tf.keras.layers.Conv2D(filters=384, kernel_size=(3,3), strides=(1,1), padding='same'))
    model.add(tf.keras.layers.BatchNormalization())
    model.add(tf.keras.layers.Activation(self.activationFunction))

    # Convolutional Layer 5
    model.add(tf.keras.layers.Conv2D(filters=256, kernel_size=(3,3), strides=(1,1), padding='same'))
    model.add(tf.keras.layers.BatchNormalization())
    model.add(tf.keras.layers.Activation(self.activationFunction))
    model.add(tf.keras.layers.MaxPooling2D(pool_size=(2,2), strides=(2,2), padding='same'))

    #Pass To Fully Connected layer
    model.add(tf.keras.layers.Flatten())

    # Fully Connected Layer 1
    model.add(tf.keras.layers.Dense(4096, input_shape=self.inputShape))
    model.add(tf.keras.layers.BatchNormalization())
    model.add(tf.keras.layers.Activation(self.activationFunction))
    model.add(tf.keras.layers.Dropout(0.4))

    # Fully Connected Layer 2
    model.add(tf.keras.layers.Dense(4096))
    model.add(tf.keras.layers.BatchNormalization())
    model.add(tf.keras.layers.Activation(self.activationFunction))
    model.add(tf.keras.layers.Dropout(0.4))

    # Fully Connected Layer 3
    model.add(tf.keras.layers.Dense(1000))
    model.add(tf.keras.layers.BatchNormalization())
    model.add(tf.keras.layers.Activation(self.activationFunction))
    model.add(tf.keras.layers.Dropout(0.4))

    #Output Layer
    model.add(tf.keras.layers.Dense(10))
    model.add(tf.keras.layers.BatchNormalization())
    model.add(tf.keras.layers.Activation('softmax'))

    return model

AlexNetInstance = AlexNet(activationFunction = 'relu')
AlexNetReluModel = AlexNetInstance.buildModel()
AlexNetReluModel.compile(loss='categorical_crossentropy',
              optimizer='adam', metrics=['accuracy'])

AlexNetInstance = AlexNet(activationFunction = 'elu')
AlexNetEluModel = AlexNetInstance.buildModel()
AlexNetEluModel.compile(loss='categorical_crossentropy',
              optimizer='adam', metrics=['accuracy'])

batch_size = 64
num_classes = 10
epochs = 30

startTime = datetime.datetime.now()
print("Training Of AlexNet Using ReLU Activation Function")
AlexNetReluModelLog = AlexNetReluModel.fit(x_train, y_train,
                      batch_size=batch_size,
                      epochs=epochs,
                      validation_split = 0.1,
                     )
print('Training Time For AlexNet Using ReLU: %s' % (datetime.datetime.now() - startTime))

startTime = datetime.datetime.now()
print("Training Of AlexNet Using ELU Activation Function")
AlexNetEluModelLog = AlexNetEluModel.fit(x_train, y_train,
                      batch_size=batch_size,
                      epochs=epochs,
                      validation_split = 0.1,
                     )
print('Training Time For AlexNet Using ELU: %s' % (datetime.datetime.now() - startTime))

Training Of AlexNet Using ReLU Activation Function
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
Training Time For AlexNet Using ReLU: 0:32:24.645190
Training Of AlexNet Using ELU Activation Function
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
Training Time For AlexNet Using ELU: 0:32:15.084036
