# Inference with TRT FP32 model

In [10]:
import tensorflow as tf
import tensorflow_datasets as tfds
from tensorflow.python.compiler.tensorrt import trt_convert as trt
from tensorflow.python.saved_model import tag_constants

In [6]:
model = tf.keras.models.load_model('saved_model/mobilenetv2')
print('Converting to TF-TRT FP32...')
conversion_params = trt.DEFAULT_TRT_CONVERSION_PARAMS._replace(precision_mode=trt.TrtPrecisionMode.FP32,
                                                               max_workspace_size_bytes=8000000000)

converter = trt.TrtGraphConverterV2(input_saved_model_dir='saved_model/mobilenetv2',
                                    conversion_params=conversion_params)
converter.convert()
converter.save(output_saved_model_dir='saved_model/mobilenetv2_TFTRT_FP32')
print('Done Converting to TF-TRT FP32')

Converting to TF-TRT FP32...
INFO:tensorflow:Linked TensorRT version: (7, 0, 0)
INFO:tensorflow:Loaded TensorRT version: (7, 0, 0)
INFO:tensorflow:Could not find TRTEngineOp_0_0 in TF-TRT cache. This can happen if build() is not called, which means TensorRT engines will be built and cached at runtime.
INFO:tensorflow:Assets written to: saved_model/mobilenetv2_TFTRT_FP32/assets
Done Converting to TF-TRT FP32
2020-06-14 20:14:27.539616: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcudart.so.10.2
usage: saved_model_cli show [-h] --dir DIR [--all] [--tag_set TAG_SET]
                            [--signature_def SIGNATURE_DEF_KEY]
saved_model_cli show: error: the following arguments are required: --dir


INFO:tensorflow:Linked TensorRT version: (7, 0, 0)
INFO:tensorflow:Loaded TensorRT version: (7, 0, 0)
INFO:tensorflow:Could not find TRTEngineOp_0_0 in TF-TRT cache. This can happen if build() is not called, which means TensorRT engines will be built and cached at runtime.
INFO:tensorflow:Assets written to: saved_model/mobilenetv2_TFTRT_FP32/assets


In [7]:
!saved_model_cli show --all --dir saved_model/mobilenetv2_TFTRT_FP32

2020-06-14 20:15:06.535649: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcudart.so.10.2

MetaGraphDef with tag-set: 'serve' contains the following SignatureDefs:

signature_def['__saved_model_init_op']:
  The given SavedModel SignatureDef contains the following input(s):
  The given SavedModel SignatureDef contains the following output(s):
    outputs['__saved_model_init_op'] tensor_info:
        dtype: DT_INVALID
        shape: unknown_rank
        name: NoOp
  Method name is: 

signature_def['serving_default']:
  The given SavedModel SignatureDef contains the following input(s):
    inputs['input'] tensor_info:
        dtype: DT_FLOAT
        shape: (-1, 160, 160, 3)
        name: serving_default_input:0
  The given SavedModel SignatureDef contains the following output(s):
    outputs['predictions'] tensor_info:
        dtype: DT_FLOAT
        shape: unknown_rank
        name: PartitionedCall:0
  Method

In [11]:
optimized_model = tf.saved_model.load('saved_model/mobilenetv2_TFTRT_FP32',
                                      tags=[tag_constants.SERVING])
signature_keys = list(optimized_model.signatures.keys())
print('Signature keys of optimized model: ',signature_keys)
infer = optimized_model.signatures[trt.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY]
print('Outputs of serving_default: ', infer.structured_outputs)

Signature keys of optimized model:  ['serving_default']
Outputs of serving_default:  {'predictions': TensorSpec(shape=<unknown>, dtype=tf.float32, name='predictions')}


In [12]:
tfds.disable_progress_bar()
ds, metadata = tfds.load(
    'cats_vs_dogs',
    split='train',
    with_info=True,
    as_supervised=True)
get_label_name = metadata.features['label'].int2str
decode_prediction = lambda x: 1 if x>=0 else 0

In [14]:
for image, label in ds.take(3):
    x = tf.cast(image, tf.float32)
    x = (x/127.5)-1
    x = tf.image.resize(x, (160,160))
    x = tf.expand_dims(x, axis=0)

    preds = infer(x)
    prediction = preds['predictions'][0,0] # only process first object at first batch index
    decoded_pred = decode_prediction(prediction)
    correct_prediction = label == decoded_pred

    print('{}[{}] image - correct prediction: {}'.format(get_label_name(label), label, correct_prediction))

dog[1] image - correct prediction: True
dog[1] image - correct prediction: True
dog[1] image - correct prediction: True
