# Table of Contents
[I. Benchmark with General Matrix to Matrix Multiplication (GEMM) Operations](#Test01)

[II. Benchmark with Convolution](#Test02)


In [None]:
##
# Import important libraries:
#
import numpy as np
import tensorflow
import pandas as pd
from tensorflow.keras.utils import to_categorical
from tensorflow.keras import mixed_precision
from tensorflow.keras.datasets import imdb
from tensorflow.keras.preprocessing.sequence import pad_sequences
from utils import gemm, convolution, process_mnist, profiler
import time

In [None]:
##
# Recheck to see if GPU will be available:
#
device_name = tensorflow.test.gpu_device_name()
if not device_name:
  raise SystemError('GPU device not found')
print('Found GPU at: {}'.format(device_name))

# I. Benchmark with General Matrix to Matrix Multiplication (GEMM) operations <a name = 'Test01'></a>

In [None]:
##
# Evaluate the total excecution time for Dense Matrix Multiplication:
#
start = time.time()
gemm.dense_matrix_multiplication(3072,128,1024)
gemm.dense_matrix_multiplication(5124,9124,2560)
gemm.dense_matrix_multiplication(2560,64,2560)
gemm.dense_matrix_multiplication(7860,64,2560)
gemm.dense_matrix_multiplication(1760,128,1760)
end = time.time()
duration_dense = end - start

##
# Test for Dense Matrix Multiplication:
#
if not duration_dense:
    raise SystemError('Error when executing this cell block.')

In [None]:
##
# Evaluate the total excecution time for Sparse Matrix Multiplication:
#
start = time.time()
gemm.sparse_matrix_multiplication(10752,1,3584,0.9)
gemm.sparse_matrix_multiplication(7680,1500,2560,0.95)
gemm.sparse_matrix_multiplication(7680,2,2560,0.95)
gemm.sparse_matrix_multiplication(7680,1,2560,0.95)
end = time.time()
duration_sparse = end - start

##
# Test for Sparse Matrix Multiplication:
#
if not duration_sparse:
    raise SystemError('Error when executing this cell block.')

# II. Benchmark with Convolution operations <a name = 'Test02'></a>

In [None]:
##
# Load the Zalando MNIST dataset:
#
X_train, y_train = process_mnist.load_mnist('data/fashion', kind = 'train')
X_test, y_test = process_mnist.load_mnist('data/fashion', kind = 't10k')


##
# Convert the input data into RGB image type and resize the resolution to 32x32:
#
X_train = process_mnist.resize_mnist(X_train, 28, 28, 32, 32)
X_train = np.stack((X_train,) * 3, axis = -1)
X_test = process_mnist.resize_mnist(X_test, 28, 28, 32, 32)
X_test = np.stack((X_test,) * 3, axis = -1)

##
# One-hot encoding the output labels:
#
y_train = to_categorical(y_train, num_classes = 10)
y_test = to_categorical(y_test, num_classes = 10)

##
# Test for dataset:
#
if (not np.any(X_train)) or (not np.any(X_test)) or (not np.any(y_train)) or (not np.any(y_test)):
    raise SystemError('Error when executing this cell block.')

In [None]:
##
# Sanity check for input and output dimensions:
#
assert X_train.shape == (60000,32,32,3), "X_train should have a dimension of (60000,32,32,3)!"
assert X_test.shape == (10000,32,32,3), "X_test should have a dimension of (10000,32,32,3)!"
assert y_train.shape == (60000,10), "y_train should have a dimension of (60000,10)"
assert y_test.shape == (10000,10), "y_test should have a dimension of (10000,10)"

In [None]:
##
# Set up the precision target:
#
policy = tensorflow.keras.mixed_precision.experimental.Policy('mixed_float16')
tensorflow.keras.mixed_precision.experimental.set_policy(policy)

##
# Test for precision target:
#
if not policy:
    raise SystemError('Error when executing this cell block.')

In [None]:
##
# Compute the flops of the Conv2D model:
#
session = tensorflow.compat.v1.Session()
graph = tensorflow.compat.v1.get_default_graph()

with graph.as_default():
    with session.as_default():
        model = convolution.convolve2D(X_train.shape[1],X_train.shape[2],X_train.shape[3])
        run_meta = tensorflow.compat.v1.RunMetadata()
        opts = tensorflow.compat.v1.profiler.ProfileOptionBuilder.float_operation()
        flops = tensorflow.compat.v1.profiler.profile(graph=graph,
                                                      run_meta=run_meta, cmd = 'op', options=opts)

tensorflow.compat.v1.reset_default_graph()

##
# Compute the memory usage of this model:
#
batch_size = 64
model = convolution.convolve2D(X_train.shape[1],X_train.shape[2],X_train.shape[3])
memory_usage = profiler.memory_usage(model,batch_size)

##
# Compute the memory required to store of this model:
#
memory_weights = profiler.memory_weights(model)

##
# Test for model profiler:
#
if (not flops.total_float_ops) or (not memory_usage) or (not memory_weights):
    raise SystemError('Error when executing this cell block.')

In [None]:
##
# Evaluate the total excecution time for convolution network:
#

start = time.time()
model = convolution.convolve2D(X_train.shape[1],X_train.shape[2],X_train.shape[3])
model.fit(X_train, y_train, epochs = 10, verbose = 1, batch_size = 64,
                         shuffle = True, validation_data = (X_test, y_test))
end = time.time()
duration_conv = end - start

##
# Test for Convolution Neural Network:
#
if not duration_conv:
    raise SystemError('Error when executing this cell block.')

In [None]:
##
# Save the results into a .csv file:
#
results = {'Test': ['DMM','SMM','Convolve2D'],
        'Duration (s)': [duration_dense,duration_sparse,duration_conv],
        'FLOPS': flops.total_float_ops,
        'Memory Usage (Gbytes)': memory_usage,
        'Memory for Weights (Mbytes)': memory_weights
          }
df = pd.DataFrame(results, columns= ['Test', 'Duration (s)','FLOPS', 
                                     'Memory Usage (Gbytes)', 'Memory for Weights (Mbytes)'])
df.to_csv('results/simple.csv', index = False)

##
# Test saving results:
#
if (not np.any(df)):
    raise SystemError('Error when executing this cell block.')

In [None]:
##
# (Optional): Automated debugging:
#
print('1. Total Execution Time for Dense MM:')
if (duration_dense):
    print('Build:passing\n')
    
print('2. Total Execution Time for Sparse MM:')    
if (duration_sparse):
    print('Build:passing\n')

print('3. Total Execution Time for Conv2D:')    
if (duration_conv):
    print('Build:passing\n')
    
print('4. The number of FLOPS in the Conv2D model:')        
if (flops.total_float_ops):
    print('Build:passing\n')

print('5. Memory usage of the model:')    
if (memory_usage):
    print('Build:passing\n')

print('6. Memory required to store the model weights:')
if (memory_weights):
    print('Build:passing\n')

print('7. Save the results:')
if (np.any(df)):
    print('Build:passing\n')