# Convert ONNX to TRT

Build the engine with trtexec

In [2]:
!/usr/src/tensorrt/bin/trtexec --onnx=saved_model/mobilenetv2_ONNX/model.onnx --explicitBatch --saveEngine=saved_model/mobilenetv2_ONNX/model.trt

&&&& RUNNING TensorRT.trtexec # /usr/src/tensorrt/bin/trtexec --onnx=saved_model/mobilenetv2_ONNX/model.onnx --explicitBatch --saveEngine=saved_model/mobilenetv2_ONNX/model.trt
[09/27/2020-23:45:26] [I] === Model Options ===
[09/27/2020-23:45:26] [I] Format: ONNX
[09/27/2020-23:45:26] [I] Model: saved_model/mobilenetv2_ONNX/model.onnx
[09/27/2020-23:45:26] [I] Output:
[09/27/2020-23:45:26] [I] === Build Options ===
[09/27/2020-23:45:26] [I] Max batch: explicit
[09/27/2020-23:45:26] [I] Workspace: 16 MB
[09/27/2020-23:45:26] [I] minTiming: 1
[09/27/2020-23:45:26] [I] avgTiming: 8
[09/27/2020-23:45:26] [I] Precision: FP32
[09/27/2020-23:45:26] [I] Calibration: 
[09/27/2020-23:45:26] [I] Safe mode: Disabled
[09/27/2020-23:45:26] [I] Save engine: saved_model/mobilenetv2_ONNX/model.trt
[09/27/2020-23:45:26] [I] Load engine: 
[09/27/2020-23:45:26] [I] Builder Cache: Enabled
[09/27/2020-23:45:26] [I] NVTX verbosity: 0
[09/27/2020-23:45:26] [I] Inputs format: fp32:CHW
[09/27/

In [2]:
import time
import common
import tensorflow as tf
import tensorrt as trt
import tensorflow_datasets as tfds
TRT_LOGGER = trt.Logger(trt.Logger.WARNING)

Build the TRT engine with python

In [3]:
EXPLICIT_BATCH = 1 << (int)(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)

with trt.Builder(TRT_LOGGER) as builder, builder.create_builder_config() as config, \
    builder.create_network(EXPLICIT_BATCH) as network, trt.OnnxParser(network, TRT_LOGGER) as parser:
    print("Parsing ONNX model...")
    with open('saved_model/mobilenetv2_ONNX/model.onnx', 'rb') as model:
        if not parser.parse(model.read()):
            for error in range(parser.num_errors):
                print(parser.get_error(error))
    print("Done parsing ONNX model.")

    profile = builder.create_optimization_profile()
    profile.set_shape("input:0", (1, 160, 160, 3), (1, 160, 160, 3), (1, 160, 160, 3))
    config.add_optimization_profile(profile)

    with builder.build_engine(network, config) as engine:
        print("Serializing engine...")
        with open('saved_model/mobilenetv2_ONNX/model.engine', 'wb') as f:
            f.write(engine.serialize())
        print("Done serializing engine.")

Parsing ONNX model...
Done parsing ONNX model.
Serializing engine...
Done serializing engine.


Load the TRT engine and do inference

In [4]:
tfds.disable_progress_bar()
ds, metadata = tfds.load(
    'cats_vs_dogs',
    split='train',
    with_info=True,
    as_supervised=True)
get_label_name = metadata.features['label'].int2str
decode_prediction = lambda x: 1 if x>=0 else 0

In [5]:
with open('saved_model/mobilenetv2_ONNX/model.engine', 'rb') as f, trt.Runtime(TRT_LOGGER) as runtime:
    with runtime.deserialize_cuda_engine(f.read()) as engine, engine.create_execution_context() as context:
        inputs, outputs, bindings, stream = common.allocate_buffers(engine)

        n_predictions = 0
        n_correct_predictions = 0
        start_time = time.time()
        for image, label in ds.take(1000):
            x = tf.cast(image, tf.float32)
            x = (x/127.5)-1
            x = tf.image.resize(x, (160,160))
            x = tf.expand_dims(x, axis=0)
            inputs[0].host = x

            preds =  common.do_inference_v2(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream)
            n_predictions += 1
            prediction = preds[0][0] # only process first object at first batch index
            decoded_pred = decode_prediction(prediction)
            correct_prediction = label == decoded_pred
            if correct_prediction:
                n_correct_predictions += 1
        elapsed_time = time.time() - start_time
        accuracy = n_correct_predictions / n_predictions
        print('predicteded {} images with accuracy of {:.2f}% with a rate of {:.2f} images/s'.format(n_predictions, accuracy * 100, n_predictions/elapsed_time))


predicteded 1000 images with accuracy of 99.90% with a rate of 446.88 images/s
