# Lab 2.1 : Image classification [Keras model]

<img src="details/Residual.png">
# cr. https://arxiv.org/pdf/1512.03385.pdf

In [None]:
%matplotlib inline
import warnings; warnings.simplefilter('ignore')

### Standard Library
import cv2
import time
import PIL.Image
import IPython.display
import numpy as np
import os
import matplotlib.pyplot as plt
from matplotlib.ticker import FuncFormatter

### Keras Library
import keras
from keras.models import load_model
import keras.backend as K
from keras.applications.resnet50 import ResNet50

### TensorRT and Tensorflow Library
from tensorflow.python.platform import gfile
from tensorflow.core.protobuf import config_pb2 as cpb2
from tensorflow.python.framework import ops as ops
from tensorflow.python.ops import array_ops as aops
from tensorflow.python.framework import importer as importer
from tensorflow.python.client import session as csess
import tensorflow as tf
import tensorflow.contrib.tensorrt as trt
tf.logging.set_verbosity(tf.logging.INFO)

# Set number of GPUs
os.environ['CUDA_VISIBLE_DEVICES'] = "0"

In [None]:
model_path = "ResNet50.h5"
infer_model = load_model(model_path)

#infer_model = ResNet50(weights='imagenet')
#infer_model.save("ResNet50.h5")

In [None]:
imagenet_list = open('imagenet_classes.txt','r')
imagenet_label = []

for i in imagenet_list:
    if len(i)>1:
        imagenet_label.append(i.split(",")[0])

In [None]:
# Prepare data
img_path = 'data/image/piano.jpg'
img = cv2.imread(img_path)

# Resize image to 224 x224  
img = cv2.resize(img, (224, 224)) 

# Convert BGR to RGB
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

# Reshape input for 1 batch 
img = np.reshape(img,[1,224,224,3])

print(img.shape)

In [None]:
plt.axis('off')
plt.imshow(img[0])
plt.show

In [None]:
k_time = []
for i in range(0,101):
    t1 = time.time()
    result = infer_model.predict(img)
    predict = np.argmax(result)
    t2 = time.time()
    if i!=0:
        k_time.append((t2-t1))
        if i%10==0:
            print("Step %d : %f" %(i,(t2-t1)))
    else:
        print("Warm up : %f" %(t2-t1))
print("Predict time average: %f" %(sum(k_time)/100.))
print("Predict class : "+imagenet_label[predict])

# Lab 2.2 : Model optimization using TensorRT 4.0

<img src="details/TensorRT4.png">
### cr. https://developer.nvidia.com/tensorrt

In [None]:
# Convert Keras model (.h5) into Protobuf file (.pb) [tensorflow] 

!python3 keras_to_tensorflow.py -input_model_file ResNet50.h5

In [None]:
# Load classification graph def 

classifier_model_file = 'ResNet50.pb' 
classifier_graph_def = tf.GraphDef()
with tf.gfile.Open(classifier_model_file, 'rb') as f:
    data = f.read()
    classifier_graph_def.ParseFromString(data)
print('Loaded classifier graph def')

In [None]:
# Set TensorRt graph parameter
batch_size = 128
workspace_size_bytes = 1 << 25
trt_gpu_ops = tf.GPUOptions(per_process_gpu_memory_fraction = 1.0)

#### Open file "model.ascii" to get input_node name (first layer) [eg. name: "input_1"] and output_node name (last layer) [eg. name: "output_node0"]

In [None]:
def execute_graph(gdef, dumm_inp):
    """Run given graphdef once."""
    print("Executing ...")
    gpu_options = cpb2.GPUOptions(per_process_gpu_memory_fraction=1)
    sessconfig = cpb2.ConfigProto(gpu_options=gpu_options)
    ops.reset_default_graph()
    g = ops.Graph()
    keeptime = 0
    with g.as_default():
        inp, out = importer.import_graph_def(graph_def=gdef, return_elements=["input_1:0", "output_node0:0"])
    with csess.Session(config=sessconfig, graph=g) as sess:
        for i in range(101):
            start_time = time.process_time()
            val = sess.run([out], {inp: dumm_inp})
            stop_time = time.process_time()
            if i!=0:
                keeptime += stop_time - start_time
                if (i%10==0):
                    print("Step %d : %f seconds" %(i,(stop_time-start_time)))
        nd_result = val[0]
        # remove row's dimension
        onedim_result = nd_result[0,]
        # set column index to array of possibilities 
        indexed_result = enumerate(onedim_result)
        # sort with possibilities
        sorted_result = sorted(indexed_result, key=lambda x: x[1], reverse=True)
        # get the names of top possibilities
        print("Predict time average %f seconds" %((keeptime/100.0)))
        for top in sorted_result[:1]:
            print("Predict class : "+imagenet_label[top[0]]+"with confidence: %.3f" % top[1])
    return keeptime

In [None]:
fp32_graph = trt.create_inference_graph(
      input_graph_def=classifier_graph_def,
      outputs=["output_node0"],
      max_batch_size=batch_size,
      max_workspace_size_bytes=workspace_size_bytes,
      precision_mode="FP32",  
      )
#with gfile.FastGFile("resnetV150_TRTFP32.pb",'wb') as f:
    #f.write(fp32_graph.SerializeToString())

In [None]:
fp32_time = execute_graph(fp32_graph, img)

In [None]:
fp16_graph = trt.create_inference_graph(
      input_graph_def=classifier_graph_def,
      outputs=["output_node0"],
      max_batch_size=batch_size,
      max_workspace_size_bytes=workspace_size_bytes,
      precision_mode="FP16", 
      )
#with gfile.FastGFile("resnetV150_TRTFP16.pb",'wb') as f:
    #f.write(fp16_graph.SerializeToString())

In [None]:
fp16_time = execute_graph(fp16_graph, img)

In [None]:
### Speed up depend on GPU
x = np.arange(3)
image_per_sec = [1./(sum(k_time)/100.), 1./(fp32_time/100.), 1./(fp16_time/100.)]


def time_sec(x, pos):
    'The two args are the value and tick position'
    return '% images/sec' % (x)


formatter = FuncFormatter(time_sec)

fig, ax = plt.subplots()
ax.yaxis.set_major_formatter(formatter)

plt.bar([0,1,2], image_per_sec)
plt.xticks(x, ('Keras', 'TensorRT_FP32', 'TensorRT_FP16'))
plt.show()

<img src="details/Nvidia_K80.png">
### cr. https://www.nvidia.com/en-us/data-center/tesla-k80/

<img src="details/efficient_RT.png">
### cr. https://devblogs.nvidia.com/wp-content/uploads/2017/04/TensorRT1_perf.png