# Table of Contents
[I. Benchmark with General Matrix to Matrix Multiplication (GEMM) Operations](#Test01)

[II. Benchmark with Convolution](#Test02)

[III.Benchmark with Recurrent Neural Network](#Test04) 

In [None]:
##
# Import important libraries:
#
import numpy as np
import tensorflow
import pandas as pd
from tensorflow.keras.utils import to_categorical
from tensorflow.keras import mixed_precision
from utils import gemm, convolve, process_mnist, rnn
import time

In [None]:
##
# Recheck to see if GPU will be available:
#
device_name = tensorflow.test.gpu_device_name()
if not device_name:
  raise SystemError('GPU device not found')
print('Found GPU at: {}'.format(device_name))

# I. Benchmark with General Matrix to Matrix Multiplication (GEMM) operations <a name = 'Test01'></a>

In [None]:
##
# Evaluate the total excecution time for Dense Matrix Multiplication:
#
start = time.time()
gemm.dense_matrix_multiplication(3072,128,1024)
gemm.dense_matrix_multiplication(5124,9124,2560)
gemm.dense_matrix_multiplication(2560,64,2560)
gemm.dense_matrix_multiplication(7860,64,2560)
gemm.dense_matrix_multiplication(1760,128,1760)
end = time.time()
duration_dense = end - start
print(f"Duration: {round(duration_dense,2)} s")

In [None]:
##
# Evaluate the total excecution time for Sparse Matrix Multiplication:
#
start = time.time()
gemm.sparse_matrix_multiplication(10752,1,3584,0.9)
gemm.sparse_matrix_multiplication(7680,1500,2560,0.95)
gemm.sparse_matrix_multiplication(7680,2,2560,0.95)
gemm.sparse_matrix_multiplication(7680,1,2560,0.95)
end = time.time()
duration_sparse = end - start
print(f"Duration: {round(duration_sparse,2)} s")

# II. Benchmark with Convolution operations <a name = 'Test02'></a>

In [None]:
##
# Load the Zalando MNIST dataset:
#
X_train, y_train = process_mnist.load_mnist('data/fashion', kind = 'train')
X_test, y_test = process_mnist.load_mnist('data/fashion', kind = 't10k')


##
# Convert the input data into RGB image type and resize the resolution to 32x32:
#
X_train = process_mnist.resize_mnist(X_train, 32, 28)
X_train = np.stack((X_train,) * 3, axis = -1)
X_test = process_mnist.resize_mnist(X_test, 32, 28)
X_test = np.stack((X_test,) * 3, axis = -1)

##
# One-hot encoding the output labels:
#
y_train = to_categorical(y_train, num_classes = 10)
y_test = to_categorical(y_test, num_classes = 10)

In [None]:
##
# Sanity check for input and output dimensions:
#
assert X_train.shape == (60000,32,32,3), "X_train should have a dimension of (60000,32,32,3)!"
assert X_test.shape == (10000,32,32,3), "X_test should have a dimension of (10000,32,32,3)!"
assert y_train.shape == (60000,10), "y_train should have a dimension of (60000,10)"
assert y_test.shape == (10000,10), "y_test should have a dimension of (10000,10)"

In [None]:
##
# Set up the precision target:
#
policy = tensorflow.keras.mixed_precision.experimental.Policy('mixed_float16')
tensorflow.keras.mixed_precision.experimental.set_policy(policy)

In [None]:
##
# Compute the number of flops:
#
session = tensorflow.compat.v1.Session()
graph = tensorflow.compat.v1.get_default_graph()

with graph.as_default():
    with session.as_default():
        convolve.convolve2D()
        run_meta = tensorflow.compat.v1.RunMetadata()
        opts = tensorflow.compat.v1.profiler.ProfileOptionBuilder.float_operation()
        flops = tensorflow.compat.v1.profiler.profile(graph = graph,
                                                      run_meta = run_meta, cmd ='op', options=opts)

tensorflow.compat.v1.reset_default_graph()

flops_conv = round(flops.total_float_ops, 0)
print(f"Number of flops: {flops_conv}")

In [None]:
##
# Evaluate the total excecution time for convolution network:
#

start = time.time()
model = convolve.convolve2D()
model.fit(X_train, y_train, epochs = 10, verbose = 1, batch_size = 64,
                         shuffle = True, validation_data = (X_test, y_test))
end = time.time()
duration_conv = end - start
print(f"Duration: {round(duration_conv,2)} s")

# III. Benchmark with recurrent neural network <a name = 'Test03'></a>

In [None]:
##
# Compute the number of flops:
#
session = tensorflow.compat.v1.Session()
graph = tensorflow.compat.v1.get_default_graph()

with graph.as_default():
    with session.as_default():
        rnn.rnn()
        run_meta = tensorflow.compat.v1.RunMetadata()
        opts = tensorflow.compat.v1.profiler.ProfileOptionBuilder.float_operation()
        flops = tensorflow.compat.v1.profiler.profile(graph = graph,
                                                      run_meta = run_meta, cmd ='op', options=opts)

tensorflow.compat.v1.reset_default_graph()

flops_rnn = round(flops.total_float_ops, 0)
print(f"Number of flops: {flops_conv}")

In [None]:
##
# Evaluate the total excecution time for recurrent neural network:
#
start = time.time()
model = rnn.rnn()
model.fit(X_train, y_train, epochs = 10, verbose = 1, batch_size = 64,
                         shuffle = True, validation_data = (X_test, y_test))
end = time.time()
duration_rnn = end - time
print(f"Duration: {round(duration_rnn,0)} s")