## Convert the model to ONNX format

In [9]:
!python3 -m tf2onnx.convert --opset 11 --fold_const --saved-model saved_model/mobilenetv2 --output saved_model/mobilenetv2_ONNX/model.onnx

2020-10-04 20:48:25.646147: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcudart.so.10.2
2020-10-04 20:48:26.658669: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcuda.so.1
2020-10-04 20:48:26.662605: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:982] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2020-10-04 20:48:26.663143: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1716] Found device 0 with properties: 
pciBusID: 0000:42:00.0 name: GeForce RTX 2080 Ti computeCapability: 7.5
coreClock: 1.65GHz coreCount: 68 deviceMemorySize: 10.76GiB deviceMemoryBandwidth: 573.69GiB/s
2020-10-04 20:48:26.663162: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcudart.so.10.2
2020-10-04 20:48:26.664183: I tensorflow/stream_executo

## Create a tensorrt engine

In [10]:
import logging

import sys
import tensorrt as trt

from Calibrator import _Calibrator

In [11]:
TRT_LOGGER = trt.Logger(trt.Logger.WARNING)
logging.basicConfig(level=logging.DEBUG,
                    format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
                    datefmt="%Y-%m-%d %H:%M:%S")
logger = logging.getLogger(__name__)

In [12]:
def add_profiles(config, inputs, opt_profiles):
    logger.debug("=== Optimization Profiles ===")
    for i, profile in enumerate(opt_profiles):
        for inp in inputs:
            _min, _opt, _max = profile.get_shape(inp.name)
            logger.debug("{} - OptProfile {} - Min {} Opt {} Max {}".format(inp.name, i, _min, _opt, _max))
        config.add_optimization_profile(profile)

In [13]:
def mark_outputs(network):
    # Mark last layer's outputs if not already marked
    # NOTE: This may not be correct in all cases
    last_layer = network.get_layer(network.num_layers-1)
    if not last_layer.num_outputs:
        logger.error("Last layer contains no outputs.")
        return

    for i in range(last_layer.num_outputs):
        network.mark_output(last_layer.get_output(i))

In [14]:
def check_network(network):
    if not network.num_outputs:
        logger.warning("No output nodes found, marking last layer's outputs as network outputs. Correct this if wrong.")
        mark_outputs(network)

    inputs = [network.get_input(i) for i in range(network.num_inputs)]
    outputs = [network.get_output(i) for i in range(network.num_outputs)]
    max_len = max([len(inp.name) for inp in inputs] + [len(out.name) for out in outputs])

    logger.debug("=== Network Description ===")
    for i, inp in enumerate(inputs):
        logger.debug("Input  {0} | Name: {1:{2}} | Shape: {3}".format(i, inp.name, max_len, inp.shape))
    for i, out in enumerate(outputs):
        logger.debug("Output {0} | Name: {1:{2}} | Shape: {3}".format(i, out.name, max_len, out.shape))

In [15]:
def create_optimization_profiles(builder, inputs, batch_sizes = None ):
    # Check if all inputs are fixed explicit batch to create a single profile and avoid duplicates
    if batch_sizes is None:
        batch_sizes = [1,8,16,32,64]

    if all([inp.shape[0] > -1 for inp in inputs]):
        profile = builder.create_optimization_profile()
        for inp in inputs:
            fbs, shape = inp.shape[0], inp.shape[1:]
            profile.set_shape(inp.name, min=(fbs, *shape), opt=(fbs, *shape), max=(fbs, *shape))
            return [profile]

    # Otherwise for mixed fixed+dynamic explicit batch inputs, create several profiles
    profiles = {}
    for bs in batch_sizes:
        if not profiles.get(bs):
            profiles[bs] = builder.create_optimization_profile()

        for inp in inputs:
            shape = inp.shape[1:]
            # Check if fixed explicit batch
            if inp.shape[0] > -1:
                bs = inp.shape[0]

            profiles[bs].set_shape(inp.name, min=(bs, *shape), opt=(bs, *shape), max=(bs, *shape))

    return list(profiles.values())

In [16]:
network_flags = 1 << (int)(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)

with trt.Builder(TRT_LOGGER) as builder, \
        builder.create_network(network_flags) as network, \
        builder.create_builder_config() as config, \
        trt.OnnxParser(network, TRT_LOGGER) as parser:

    config.max_workspace_size = 2**30 # 1GiB

    config.set_flag(trt.BuilderFlag.INT8)

    if not builder.platform_has_fast_int8:
        TRT_LOGGER.warning("INT8 not supported on this platform.")

    config.int8_calibrator = _Calibrator()

    print("Parsing ONNX model...")
    model_file = 'saved_model/mobilenetv2_ONNX/model.onnx'
    # Fill network attributes with information by parsing model
    with open(model_file, 'rb') as f:
        if not parser.parse(f.read()):
            print('ERROR: Failed to parse the ONNX file: {}'.format(model_file))
            for error in range(parser.num_errors):
                print(parser.get_error(error))
            sys.exit(1)
    print("Done parsing ONNX model.")

    # Display network info and check certain properties
    check_network(network)

    # Add optimization profiles
    batch_sizes = [1, 8, 16, 32, 64]
    inputs = [network.get_input(i) for i in range(network.num_inputs)]
    opt_profiles = create_optimization_profiles(builder, inputs, batch_sizes)
    add_profiles(config, inputs, opt_profiles)

    print("Building engine...")
    with builder.build_engine(network, config) as engine:
        with open('saved_model/mobilenetv2_ONNX/model.engine', 'wb') as f:
            f.write(engine.serialize())
        print("Done building engine.")

2020-10-04 20:48:40 - __main__ - DEBUG - === Network Description ===
2020-10-04 20:48:40 - __main__ - DEBUG - Input  0 | Name: image:0    | Shape: (-1, 160, 160, 3)
2020-10-04 20:48:40 - __main__ - DEBUG - Output 0 | Name: Identity:0 | Shape: (-1, -1)
2020-10-04 20:48:40 - __main__ - DEBUG - === Optimization Profiles ===
2020-10-04 20:48:40 - __main__ - DEBUG - image:0 - OptProfile 0 - Min (1, 160, 160, 3) Opt (1, 160, 160, 3) Max (1, 160, 160, 3)
2020-10-04 20:48:40 - __main__ - DEBUG - image:0 - OptProfile 1 - Min (8, 160, 160, 3) Opt (8, 160, 160, 3) Max (8, 160, 160, 3)
2020-10-04 20:48:40 - __main__ - DEBUG - image:0 - OptProfile 2 - Min (16, 160, 160, 3) Opt (16, 160, 160, 3) Max (16, 160, 160, 3)
2020-10-04 20:48:40 - __main__ - DEBUG - image:0 - OptProfile 3 - Min (32, 160, 160, 3) Opt (32, 160, 160, 3) Max (32, 160, 160, 3)
2020-10-04 20:48:40 - __main__ - DEBUG - image:0 - OptProfile 4 - Min (64, 160, 160, 3) Opt (64, 160, 160, 3) Max (64, 160, 160, 3)
2020-10-04 20:50:03 - C

Parsing ONNX model...
Done parsing ONNX model.
Building engine...
Done building engine.
