# Table of Contents
[I.   Benchmark with Floating-point Operations Per Second (FLOPS) Calculator](#Test01)

[II.  Benchmark with MNIST dataset](#Test02)

[III. Benchmark with Zalando MNIST dataset](#Test02)

In [None]:
##
# Import required libraries:
#
import tensorflow
import numpy as np
import matplotlib.pyplot as plt
import sklearn
import pandas as pd
import time

##
# Import functions:
#
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras import backend
from tensorflow.keras.applications import VGG19
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Conv2D, GlobalAveragePooling2D, Dropout, Dense
from tensorflow.keras import regularizers
from tensorflow.keras import Model
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.utils import to_categorical
from tensorflow.keras import mixed_precision
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split
from keras_flops import get_flops
from utils import process_mnist

In [None]:
##
# Recheck to see if GPU will be available:
#
gpu = len(tensorflow.config.list_physical_devices('GPU'))>0
print("GPU is", "available" if gpu else "NOT AVAILABLE")

In [None]:
##
# Set up the precision target:
#
policy = mixed_precision.Policy('mixed_float16')
mixed_precision.set_global_policy(policy)

In [None]:
##
# Implement the model architecture:
#
backend.clear_session()
try:
    os.mkdir('classifiers')
except:
    pass
full_name = 'Model A'

convolution_base = VGG19(weights = 'imagenet', include_top = False, input_shape = (32,32,3))

###
# Use this command to load the weights if you cannot access to the Internet.
# Remember to replace <file_name> with the actual filename.
# convolution_base = VGG19(weights = 'weights/<file_name>.h5', include_top = False, input_shape = (32,32,3))

convolution_base.trainable = False
###
model = Sequential()
model.add(convolution_base)
x = model.output
###
x = Conv2D(128, kernel_size=(3,3), activation = 'relu', padding = 'same')(x)
x = Conv2D(128, kernel_size=(3,3), activation = 'relu', padding = 'same')(x)
x = Conv2D(128, kernel_size=(3,3), activation = 'relu', padding = 'same')(x)
x = Conv2D(128, kernel_size=(3,3), activation = 'relu', padding = 'same')(x)
x = GlobalAveragePooling2D()(x)
###
x = Dense(4096, activation = 'relu', kernel_regularizer = regularizers.l1_l2(l1 = 1e-5, l2 = 1e-4),
          bias_regularizer = regularizers.l2(1e-4),
          activity_regularizer = regularizers.l2(1e-5))(x)
x = Dropout(0.5)(x)
x = Dense(2048, activation = 'relu', kernel_regularizer = regularizers.l1_l2(l1 = 1e-5, l2 = 1e-4),
          bias_regularizer = regularizers.l2(1e-4),
          activity_regularizer = regularizers.l2(1e-5))(x)
x = Dropout(0.5)(x)
x = Dense(128, activation = 'relu', kernel_regularizer = regularizers.l1_l2(l1 = 1e-5, l2 = 1e-4),
          bias_regularizer = regularizers.l2(1e-4),
          activity_regularizer = regularizers.l2(1e-5))(x)
out = Dense(10, activation = 'softmax')(x)
finalModel = Model(inputs = model.input, outputs = out)

##
# Compile the model with defined optimizer and metrics:
#
opt = Adam(learning_rate = 0.001, beta_1 = 0.9, beta_2 = 0.999, epsilon = 1e-07)
finalModel.compile(loss = 'categorical_crossentropy', optimizer = opt, metrics = ['accuracy', 'Precision', 'Recall'])

##
# Extract the best model based on validation accuracy:
#
#filepath = "classifiers/%s-{epoch:02d}-{val_accuracy:.4f}-MNIST.hdf5"%full_name
#checkpoint = ModelCheckpoint(filepath, monitor = 'val_accuracy', save_best_only = True, mode = 'max')
#callbacks_list = [checkpoint]

# I. Benchmark with FLoating-point Operations Per Second (FLOPS) Calculator <a name = 'Test01'></a>

In [None]:
##
# Calculate FLOPS:
#
flops = get_flops(finalModel, batch_size = 1)
print("FLOPS: %.2f G" %(flops / 10**9))

# II. Benchmark with MNIST dataset <a name = 'Test02'></a>

In [None]:
##
# Load the MNIST dataset:
#

data_location = int(input('Select the option to load data (0 = from the server; 1 = manually from the directory): '))

if data_location == 0:
    X, y = fetch_openml('mnist_784', version = 1, return_X_y = True, as_frame = False)
    
    ##
    # Convert the input data into RGB image type and resize the resolution to 32x32:
    #
    X = process_mnist.resize_mnist(X, 32, 28)
    X = np.stack((X,) * 3, axis = -1)
    
    ##
    # One-hot encoding the output labels:
    #
    y = to_categorical(y, num_classes = 10)
    
elif data_location == 1:
    X_train, y_train = process_mnist.load_mnist('data/mnist', kind = 'train')
    X_test, y_test = process_mnist.load_mnist('data/mnist', kind = 't10k')
    
    ##
    # Convert the input data into RGB image type and resize the resolution to 32x32:
    #
    X_train = process_mnist.resize_mnist(X_train, 32, 28)
    X_train = np.stack((X_train,) * 3, axis = -1)

    X_test = process_mnist.resize_mnist(X_test, 32, 28)
    X_test = np.stack((X_test,) * 3, axis = -1)

    ##
    # One-hot encoding the output labels:
    #
    y_train = to_categorical(y_train, num_classes = 10)
    y_test = to_categorical(y_test, num_classes = 10)
else:
    print('Invalid selection!')

In [None]:
##
# Sanity check for input and output dimensions:
#
if data_location == 0:
    assert X.shape == (70000, 32, 32, 3), "X should have a dimension of (70000, 32, 32, 3)!"
    assert y.shape == (70000,10), "y should have a dimension of (70000,10)!"
elif data_location == 1:
    assert X_train.shape == (60000, 32, 32, 3), "X should have a dimension of (60000, 32, 32, 3)"
    assert y_train.shape == (60000,10), "y should have a dimension of (60000,10)"
    assert X_test.shape == (10000, 32, 32, 3), "X should have a dimension of (60000, 32, 32, 3)"
    assert y_test.shape == (10000,10), "y should have a dimension of (60000,10)"
else:
    print('Invalid selection!')

In [None]:
##
# Prepare the train and test subsets:
#
if data_location == 0:
    X_train, X_test, y_train, y_test = train_test_split(X, y, train_size = None, test_size = 10000)
else:
    None

In [None]:
##
# Train the model and store the execution time for evaluation:
#
start = time.time()
history = finalModel.fit(X_train, y_train, epochs = 100, verbose = 1,
                         shuffle = True, validation_data = (X_test, y_test))
end = time.time()
runtime_mnist = end - start

In [None]:
##
# Visualize the train/validation accuracy and loss after the training duration:
#
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title("MNIST Accuracy")
plt.ylabel('Accuracy (%)')
plt.xlabel('Epoch')
plt.legend(['Train', 'Test'], loc = 'upper left')
plt.grid(True)
plt.show()
###
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title("MNIST Loss")
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Test'], loc = 'upper left')
plt.grid(True)
plt.show()

## II. Benchmark with Zalando MNIST dataset <a name = 'Test02'></a>

In [None]:
##
# Load the Zalando MNIST dataset:
#
X_train, y_train = process_mnist.load_mnist('data/fashion', kind = 'train')
X_test, y_test = process_mnist.load_mnist('data/fashion', kind = 't10k')


##
# Convert the input data into RGB image type and resize the resolution to 32x32:
#
X_train = process_mnist.resize_mnist(X_train, 32, 28)
X_train = np.stack((X_train,) * 3, axis = -1)
X_test = process_mnist.resize_mnist(X_test, 32, 28)
X_test = np.stack((X_test,) * 3, axis = -1)

##
# One-hot encoding the output labels:
#
y_train = to_categorical(y_train, num_classes = 10)
y_test = to_categorical(y_test, num_classes = 10)

In [None]:
##
# Sanity check for input and output dimensions:
#
assert X_train.shape == (60000,32,32,3), "X_train should have a dimension of (60000,32,32,3)!"
assert X_test.shape == (10000,32,32,3), "X_test should have a dimension of (10000,32,32,3)!"
assert y_train.shape == (60000,10), "y_train should have a dimension of (60000,10)"
assert y_test.shape == (10000,10), "y_test should have a dimension of (10000,10)"

In [None]:
##
# Extract the best model based on validation accuracy:
#
#filepath = "classifiers/%s-{epoch:02d}-{val_accuracy:.4f}-fashion.hdf5"%full_name
#checkpoint = ModelCheckpoint(filepath, monitor = 'val_accuracy', save_best_only = True, mode = 'max')
#callbacks_list = [checkpoint]

###
# Train the model and store the execution time for evaluation:
#
start = time.time()
history = finalModel.fit(X_train, y_train, epochs = 100, verbose = 1,
                         shuffle = True, validation_data = (X_test, y_test))
end = time.time()
runtime_fashion = end - start

In [None]:
##
# Visualize the train/validation accuracy and loss after the training duration:
#
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title("Zalando Accuracy")
plt.ylabel('Accuracy (%)')
plt.xlabel('Epoch')
plt.legend(['Train', 'Test'], loc = 'upper left')
plt.grid(True)
plt.show() 
###
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title("Zalando Loss")
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Test'], loc = 'upper left')
plt.grid(True)
plt.show()

In [None]:
##
# Visualize the execution time on each dataset, respectively: 
#
values = [round(runtime_mnist,0) , round(runtime_fashion,0)]
names = ['MNIST', 'Zalando']

plt.bar(names, values)
plt.ylabel('Time')
plt.grid(True)

for index, data in enumerate(values):
    plt.text(x = index , y = data + 1 , s = f"{data}" , fontdict = dict(fontsize = 12), ha = 'center')
    
plt.show()