# Inference with saved TRT plan based on INT8 model

In [1]:
import time
import tensorflow as tf
import tensorflow_datasets as tfds
import tensorrt as trt
from tensorflow.python.compiler.tensorrt import trt_convert
from tensorflow.python.saved_model import tag_constants

In [2]:
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
    try:
        for gpu in gpus:
            print("Setting memory_growth")
            tf.config.experimental.set_memory_growth(gpu, True)
    except RuntimeError as e:
        print(e)

Setting memory_growth


In [3]:
print('Loading optimized model...')
optimized_model = tf.saved_model.load('saved_model/mobilenetv2_TFTRT_INT8', tags=[tag_constants.SERVING])
graph_func = optimized_model.signatures[trt_convert.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY]
trt_graph_def = graph_func.graph.as_graph_def()
print('Done loading model')



Loading optimized model...


KeyboardInterrupt: 

In [4]:
print('Converting model to stand-alone TensorRT Plan...')
for n in trt_graph_def.node:
    if n.op == "TRTEngineOp":
        print("Node: %s, %s" % (n.op, n.name.replace("/", "_")))
        with tf.io.gfile.GFile("%s.plan" % (n.name.replace("/", "_")), 'wb') as f:
            f.write(n.attr["serialized_segment"].s)
    else:
        print("Exclude Node: %s, %s" % (n.op, n.name.replace("/", "_")))
print('Done writing plan')

Converting model to stand-alone TensorRT Plan...
Exclude Node: Placeholder, input
Exclude Node: PartitionedCall, PartitionedCall
Exclude Node: Identity, Identity
Done writing plan


In [None]:
TRT_LOGGER = trt.Logger(trt.Logger.WARNING)
trt_runtime = trt.Runtime(TRT_LOGGER)
with open("engine.plan", "rb") as f:
    engine = trt_runtime.deserialize_cuda_engine(f.read())

In [None]:
with tf.saved_model.builder.Builder(TRT_LOGGER) as builder, builder.create_builder_config() as config:
    config.max_workspace_size = 1 << 20 # This determines the amount of memory available to the builder when building an optimized engine and should generally be set as high as possible.
    with builder.build_engine(network, config) as engine:
        with open("mobilenetv2.engine", "wb") as f:
        		f.write(engine.serialize())

In [10]:
optimized_model = tf.saved_model.load('saved_model/mobilenetv2_TFTRT_INT8', tags=[tag_constants.SERVING])
signature_keys = list(optimized_model.signatures.keys())
print('Signature keys of optimized model: ',signature_keys)
infer = optimized_model.signatures['serving_default']
print('Outputs of serving_default: ', infer.structured_outputs)

Signature keys of optimized model:  ['serving_default']
Outputs of serving_default:  {'predictions': TensorSpec(shape=<unknown>, dtype=tf.float32, name='predictions')}


In [11]:
tfds.disable_progress_bar()
ds, metadata = tfds.load(
    'cats_vs_dogs',
    split='train',
    with_info=True,
    as_supervised=True)
get_label_name = metadata.features['label'].int2str
decode_prediction = lambda x: 1 if x>=0 else 0

In [14]:
n_predictions = 0
n_correct_predictions = 0
start_time = time.time()
for image, label in ds.take(1000):
    x = tf.cast(image, tf.float32)
    x = (x/127.5)-1
    x = tf.image.resize(x, (160,160))
    x = tf.expand_dims(x, axis=0)

    preds = infer(x)
    n_predictions += 1
    prediction = preds['predictions'][0,0] # only process first object at first batch index
    decoded_pred = decode_prediction(prediction)
    correct_prediction = label == decoded_pred
    if correct_prediction:
        n_correct_predictions += 1
elapsed_time = time.time() - start_time
accuracy = n_correct_predictions / n_predictions
print('predicteded {} images with accuracy of {:.2f}% with a rate of {:.2f} images/s'.format(n_predictions, accuracy * 100, n_predictions/elapsed_time))


dog[1] image - correct prediction: True
dog[1] image - correct prediction: True
dog[1] image - correct prediction: True
cat[0] image - correct prediction: True
dog[1] image - correct prediction: True
dog[1] image - correct prediction: True
cat[0] image - correct prediction: True
cat[0] image - correct prediction: True
dog[1] image - correct prediction: True
dog[1] image - correct prediction: True
dog[1] image - correct prediction: True
cat[0] image - correct prediction: True
cat[0] image - correct prediction: True
dog[1] image - correct prediction: True
cat[0] image - correct prediction: True
cat[0] image - correct prediction: True
cat[0] image - correct prediction: True
dog[1] image - correct prediction: True
cat[0] image - correct prediction: True
dog[1] image - correct prediction: True
dog[1] image - correct prediction: True
cat[0] image - correct prediction: True
dog[1] image - correct prediction: True
cat[0] image - correct prediction: True
cat[0] image - correct prediction: True
