# TensorRT benchmarking with 2D U-Net
In this tutorial, we will use the TensorRT to perform benchmarking on InceptionV3 model.

This tutorial assumes that you running on [AWS Ubuntu DLAMI](https://aws.amazon.com/marketplace/pp/B07Y43P7X5). 

Following are steps:

1. Convert our Keras model to a Tensorflow model. 
1. Freeze the Tensorflow saved format model
1. Convert the above freezed-model to the TensorRT formats: FP32 and FP16 (for V100)
1. Benchmark with BZ=1, run the inference with BZ=1 for 1 min.


In [1]:
import keras
import os
import tensorflow as tf
import numpy as np
import keras as K
import shutil, sys  

Using TensorFlow backend.





In [2]:
def dice_coef(y_true, y_pred, axis=(1, 2), smooth=1):
    """
    Sorenson (Soft) Dice
    \frac{  2 \times \left | T \right | \cap \left | P \right |}{ \left | T \right | +  \left | P \right |  }
    where T is ground truth mask and P is the prediction mask
    """
    intersection = tf.reduce_sum(y_true * y_pred, axis=axis)
    union = tf.reduce_sum(y_true + y_pred, axis=axis)
    numerator = tf.constant(2.) * intersection + smooth
    denominator = union + smooth
    coef = numerator / denominator

    return tf.reduce_mean(coef)

def soft_dice_coef(target, prediction, axis=(1, 2), smooth=0.01):
    """
    Sorenson (Soft) Dice  - Don't round the predictions
    \frac{  2 \times \left | T \right | \cap \left | P \right |}{ \left | T \right | +  \left | P \right |  }
    where T is ground truth mask and P is the prediction mask
    """

    intersection = tf.reduce_sum(target * prediction, axis=axis)
    union = tf.reduce_sum(target + prediction, axis=axis)
    numerator = tf.constant(2.) * intersection + smooth
    denominator = union + smooth
    coef = numerator / denominator

    return tf.reduce_mean(coef)

def dice_coef_loss(target, prediction, axis=(1, 2), smooth=1.):
    """
    Sorenson (Soft) Dice loss
    Using -log(Dice) as the loss since it is better behaved.
    Also, the log allows avoidance of the division which
    can help prevent underflow when the numbers are very small.
    """
    intersection = tf.reduce_sum(prediction * target, axis=axis)
    p = tf.reduce_sum(prediction, axis=axis)
    t = tf.reduce_sum(target, axis=axis)
    numerator = tf.reduce_mean(intersection + smooth)
    denominator = tf.reduce_mean(t + p + smooth)
    dice_loss = -tf.log(2.*numerator) + tf.log(denominator)

    return dice_loss


def combined_dice_ce_loss(y_true, y_pred, axis=(1, 2), smooth=1.,
                          weight=0.9):
    """
    Combined Dice and Binary Cross Entropy Loss
    """
    return weight*dice_coef_loss(y_true, y_pred, axis, smooth) + \
        (1-weight)*K.losses.binary_crossentropy(y_true, y_pred)


In [3]:
inference_filename = "unet_decathlon_4_8814_128x128_randomcrop-any-input.h5"
model_filename = os.path.join("/home/ubuntu/models/unet", inference_filename)

# Load model
print("Loading Model... ")
model = K.models.load_model(model_filename, custom_objects={
    "combined_dice_ce_loss": combined_dice_ce_loss,
    "dice_coef_loss": dice_coef_loss,
    "soft_dice_coef": soft_dice_coef,
    "dice_coef": dice_coef})
print("Model loaded successfully from: " + model_filename)

sess = keras.backend.get_session()
sess.run(tf.global_variables_initializer())

Loading Model... 





Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.








Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where
Model loaded successfully from: /home/ubuntu/models/unet/unet_decathlon_4_8814_128x128_randomcrop-any-input.h5


In [4]:
import shutil, sys   

output_directory = "/home/ubuntu/models/unet/output"
print("Freezing the graph.")
keras.backend.set_learning_phase(0)

signature = tf.saved_model.signature_def_utils.predict_signature_def(
    inputs={'input': model.input}, outputs={'output': model.output})

#If directory exists, delete it and let builder rebuild the TF model.
if os.path.isdir(output_directory):
    print (output_directory, "exists already. Deleting the folder")
    shutil.rmtree(output_directory)

builder = tf.saved_model.builder.SavedModelBuilder(output_directory)
builder.add_meta_graph_and_variables(sess=sess,    
                                     tags=[tf.saved_model.tag_constants.SERVING],    
                                     signature_def_map={
                                         tf.saved_model.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY:signature
                                     }, saver=tf.train.Saver())
builder.save() 
print("TensorFlow protobuf version of model is saved in:", output_directory)

print("Model input name = ", model.input.op.name)
print("Model input shape = ", model.input.shape)
print("Model output name = ", model.output.op.name)
print("Model output shape = ", model.output.shape)

Freezing the graph.
Instructions for updating:
This function will only be available through the v1 compatibility library as tf.compat.v1.saved_model.utils.build_tensor_info or tf.compat.v1.saved_model.build_tensor_info.
/home/ubuntu/models/unet/output exists already. Deleting the folder
INFO:tensorflow:No assets to save.
INFO:tensorflow:No assets to write.
INFO:tensorflow:SavedModel written to: /home/ubuntu/models/unet/output/saved_model.pb
TensorFlow protobuf version of model is saved in: /home/ubuntu/models/unet/output
Model input name =  MRImages
Model input shape =  (?, ?, ?, 4)
Model output name =  PredictionMask/Sigmoid
Model output shape =  (?, ?, ?, 1)


In [5]:
import tensorflow as tf
from tensorflow.python.compiler.tensorrt import trt_convert as trt

input_saved_model_dir = output_directory
output_saved_model_dir = "/home/ubuntu/models/unet/trt-output/"

#If directory exists, delete it and let builder rebuild the TF model.
if os.path.isdir(output_saved_model_dir):
    print (output_saved_model_dir, "exists already. Deleting the folder")
    shutil.rmtree(output_saved_model_dir)
    
converter = trt.TrtGraphConverter(
    input_saved_model_dir=input_saved_model_dir,
    precision_mode="FP32",
    maximum_cached_engines=100)

_ = converter.convert()
_ = converter.save(output_saved_model_dir)

/home/ubuntu/models/unet/trt-output/ exists already. Deleting the folder
INFO:tensorflow:Linked TensorRT version: (0, 0, 0)
INFO:tensorflow:Loaded TensorRT version: (0, 0, 0)
INFO:tensorflow:Running against TensorRT version 0.0.0
Instructions for updating:
This function will only be available through the v1 compatibility library as tf.compat.v1.saved_model.loader.load or tf.compat.v1.saved_model.load. There will be a new function for importing SavedModels in Tensorflow 2.0.
INFO:tensorflow:Restoring parameters from /home/ubuntu/models/unet/output/variables/variables
Instructions for updating:
Use `tf.compat.v1.graph_util.convert_variables_to_constants`
Instructions for updating:
Use `tf.compat.v1.graph_util.extract_sub_graph`
INFO:tensorflow:Froze 38 variables.
INFO:tensorflow:Converted 38 variables to const ops.
INFO:tensorflow:No assets to save.
INFO:tensorflow:No assets to write.
INFO:tensorflow:SavedModel written to: /home/ubuntu/models/unet/trt-output/saved_model.pb


In [6]:
# Copy the variables. As the frozen graph donot have any variables and it raises error while serving.
!cp -pra /home/ubuntu/models/unet/output/variables/ /home/ubuntu/models/unet/trt-output/  

In [7]:
# Benchmark on one sample
import time
output_saved_model_dir = "/home/ubuntu/models/unet/trt-output/"
output_tensor =  'PredictionMask/Sigmoid:0'
input_tensor = 'MRImages:0'
input_data = np.random.randint(0, 255, size=(1,160,160,4))

with tf.Session() as sess:
    # First load the SavedModel into the session
    tf.saved_model.loader.load(
        sess, [tf.saved_model.tag_constants.SERVING],
       output_saved_model_dir)
    start_time = time.time()
    output = sess.run([output_tensor], feed_dict={input_tensor: input_data})
    delta = (time.time() - start_time)

print("\nModel: {}, Input shape: {} , Output shape: {} \nCompleted Inference with one sample in {:.3f} sec,"
      .format(output_saved_model_dir, input_data.shape, output[0].shape, delta))

INFO:tensorflow:Restoring parameters from /home/ubuntu/models/unet/trt-output/variables/variables

Model: /home/ubuntu/models/unet/trt-output/, Input shape: (1, 160, 160, 4) , Output shape: (1, 160, 160, 1) 
Completed Inference with one sample in 2.189 sec,


In [8]:
def benchmark_1min(output_saved_model_dir):
    output_tensor =  'PredictionMask/Sigmoid:0'
    input_tensor = 'MRImages:0'
    input_data = np.random.randint(0, 255, size=(1,160,160,4))

    tf_config = tf.ConfigProto()
    tf_config.gpu_options.allow_growth = True
    tf_sess = tf.Session(config=tf_config)

    # First load the SavedModel into the session
    tf.saved_model.loader.load(
            tf_sess, [tf.saved_model.tag_constants.SERVING],
           output_saved_model_dir)

    tf_sess.run(tf.global_variables_initializer())

    import time
    times = []
    # Run inference for 1 min.
    end = time.time() + 60
    print(time.strftime("%H:%M:%S"))
    print("Running inference for 1 min, with BZ=1")
    print("Model: {}, Input shape: {}  "
          .format(output_saved_model_dir, input_data.shape))
    while time.time() < end:
        start_time = time.time()
        output = tf_sess.run([output_tensor], feed_dict={input_tensor: input_data})
        delta = (time.time() - start_time)
        times.append(delta)

    mean_delta = np.array(times).mean()
    fps = 1 / mean_delta
    print('Output Shape: {}, \naverage(sec):{:.3f} , average(msec):{:.2f} , fps:{:.2f}'
          .format(output[0].shape, mean_delta, mean_delta*1000, fps))

In [9]:
benchmark_1min("/home/ubuntu/models/unet/trt-output/")

INFO:tensorflow:Restoring parameters from /home/ubuntu/models/unet/trt-output/variables/variables
23:53:29
Running inference for 1 min, with BZ=1
Model: /home/ubuntu/models/unet/trt-output/, Input shape: (1, 160, 160, 4)  
Output Shape: (1, 160, 160, 1), 
average(sec):0.007 , average(msec):7.15 , fps:139.83


## Convert and benchmark FP16

In [10]:
import tensorflow as tf
from tensorflow.python.compiler.tensorrt import trt_convert as trt

print("Converting the graph to TensorRT.")
input_saved_model_dir = "/home/ubuntu/models/unet/output"
output_saved_model_dir = "/home/ubuntu/models/unet/trt-output-fp16/"

#If directory exists, delete it and let builder rebuild the TF model.
if os.path.isdir(output_saved_model_dir):
    print (output_saved_model_dir, "exists already. Deleting the folder")
    shutil.rmtree(output_saved_model_dir)
    
converter = trt.TrtGraphConverter(
    input_saved_model_dir=input_saved_model_dir,
    precision_mode="FP16",
    maximum_cached_engines=100)

_ = converter.convert()
_ = converter.save(output_saved_model_dir)


print("Done. Converting the graph to TensorRT-FP16.")

Converting the graph to TensorRT.
INFO:tensorflow:Linked TensorRT version: (0, 0, 0)
INFO:tensorflow:Loaded TensorRT version: (0, 0, 0)
INFO:tensorflow:Running against TensorRT version 0.0.0
INFO:tensorflow:Restoring parameters from /home/ubuntu/models/unet/output/variables/variables
INFO:tensorflow:Froze 38 variables.
INFO:tensorflow:Converted 38 variables to const ops.
INFO:tensorflow:No assets to save.
INFO:tensorflow:No assets to write.
INFO:tensorflow:SavedModel written to: /home/ubuntu/models/unet/trt-output-fp16/saved_model.pb
Done. Converting the graph to TensorRT-FP16.


In [11]:
# Copy the variables. As the frozen graph donot have any variables and it raises error while serving.
!cp -pra /home/ubuntu/models/unet/output/variables/ /home/ubuntu/models/unet/trt-output-fp16/  

In [12]:
# Benchmark on one sample
import time
output_saved_model_dir = "/home/ubuntu/models/unet/trt-output-fp16/"
output_tensor =  'PredictionMask/Sigmoid:0'
input_tensor = 'MRImages:0'
input_data = np.random.randint(0, 255, size=(1,160,160,4))

with tf.Session() as sess:
    # First load the SavedModel into the session
    tf.saved_model.loader.load(
        sess, [tf.saved_model.tag_constants.SERVING],
       output_saved_model_dir)
    start_time = time.time()
    output = sess.run([output_tensor], feed_dict={input_tensor: input_data})
    delta = (time.time() - start_time)

print("\nModel: {}, Input shape: {} , Output shape: {} \nCompleted Inference with one sample in {:.3f} sec,"
      .format(output_saved_model_dir, input_data.shape, output[0].shape, delta))

INFO:tensorflow:Restoring parameters from /home/ubuntu/models/unet/trt-output-fp16/variables/variables

Model: /home/ubuntu/models/unet/trt-output-fp16/, Input shape: (1, 160, 160, 4) , Output shape: (1, 160, 160, 1) 
Completed Inference with one sample in 0.226 sec,


In [13]:
## Benchmark
benchmark_1min("/home/ubuntu/models/unet/trt-output-fp16/")

INFO:tensorflow:Restoring parameters from /home/ubuntu/models/unet/trt-output-fp16/variables/variables
23:54:43
Running inference for 1 min, with BZ=1
Model: /home/ubuntu/models/unet/trt-output-fp16/, Input shape: (1, 160, 160, 4)  
Output Shape: (1, 160, 160, 1), 
average(sec):0.007 , average(msec):7.16 , fps:139.72
