# Convert SSD Mobilenet from TensorFlow to TensorRT

TensorFlow has a large selection of pre-trained SSD models in its [object detection model zoo](https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/tf1_detection_zoo.md).

This tutorial shows how to convert them to UFF and then to TensorRT.

**NOTE**: NVIDIA has deprecated the Caffe Parser and UFF Parser in TensorRT 7.0. It is expected that converting to ONNX will be required in the future, but for now we will convert to UFF.  
https://docs.nvidia.com/deeplearning/tensorrt/release-notes/tensorrt-7.html

This notebook is based on  
https://github.com/jkjung-avt/tensorrt_demos/blob/9dd56b3b8d841dcfc2e5d1868f4bd785a50cbe98/ssd/build_engine.py  
which is released under the MIT License.

Copyright (c) 2019 JK Jung  
https://github.com/jkjung-avt/tensorrt_demos/blob/9dd56b3b8d841dcfc2e5d1868f4bd785a50cbe98/LICENSE

## Downgrading TensorFlow to  make it compatible

In [8]:
!apt install --upgrade tensorflow=1.15.0

Reading package lists... Done
Building dependency tree       
Reading state information... Done
E: Unable to locate package tensorflow


In [1]:
import ctypes
import numpy as np
import tensorflow as tf
import uff
import tensorrt as trt
import graphsurgeon as gs
print(tf.__version__)

1.15.2


## Define functions

In [2]:
def replace_addv2(graph):
    """Replace all 'AddV2' in the graph with 'Add'.

    'AddV2' is not supported by UFF parser.

    Reference:
    1. https://github.com/jkjung-avt/tensorrt_demos/issues/113#issuecomment-629900809
    """
    for node in graph.find_nodes_by_op('AddV2'):
        gs.update_node(node, op='Add')
    return graph


def replace_fusedbnv3(graph):
    """Replace all 'FusedBatchNormV3' in the graph with 'FusedBatchNorm'.

    'FusedBatchNormV3' is not supported by UFF parser.

    Reference:
    1. https://devtalk.nvidia.com/default/topic/1066445/tensorrt/tensorrt-6-0-1-tensorflow-1-14-no-conversion-function-registered-for-layer-fusedbatchnormv3-yet/post/5403567/#5403567
    2. https://github.com/jkjung-avt/tensorrt_demos/issues/76#issuecomment-607879831
    """
    for node in graph.find_nodes_by_op('FusedBatchNormV3'):
        gs.update_node(node, op='FusedBatchNorm')
    return graph


def add_anchor_input(graph):
    """Add the missing const input for the GridAnchor node.

    Reference:
    1. https://www.minds.ai/post/deploying-ssd-mobilenet-v2-on-the-nvidia-jetson-and-nano-platforms
    """
    data = np.array([1, 1], dtype=np.float32)
    anchor_input = gs.create_node('AnchorInput', 'Const', value=data)
    graph.append(anchor_input)
    graph.find_nodes_by_op('GridAnchor_TRT')[0].input.insert(0, 'AnchorInput')
    return graph

def add_plugin(graph, num_classes, min_size, max_size, input_order):
    """add_plugin

    Reference:
    1. https://github.com/AastaNV/TRT_object_detection/blob/master/config/model_ssd_mobilenet_v1_coco_2018_01_28.py
    2. https://github.com/AastaNV/TRT_object_detection/blob/master/config/model_ssd_mobilenet_v2_coco_2018_03_29.py
    3. https://devtalk.nvidia.com/default/topic/1050465/jetson-nano/how-to-write-config-py-for-converting-ssd-mobilenetv2-to-uff-format/post/5333033/#5333033
    """
    numClasses, minSize, maxSize, inputOrder = num_classes, min_size, max_size, input_order

    all_assert_nodes = graph.find_nodes_by_op('Assert')
    graph.remove(all_assert_nodes, remove_exclusive_dependencies=True)

    all_identity_nodes = graph.find_nodes_by_op('Identity')
    graph.forward_inputs(all_identity_nodes)

    Input = gs.create_plugin_node(
        name='Input',
        op='Placeholder',
        shape=(1,) + (3, 300, 300)
    )

    PriorBox = gs.create_plugin_node(
        name='MultipleGridAnchorGenerator',
        op='GridAnchor_TRT',
        minSize=minSize,  # was 0.2
        maxSize=maxSize,  # was 0.95
        aspectRatios=[1.0, 2.0, 0.5, 3.0, 0.33],
        variance=[0.1, 0.1, 0.2, 0.2],
        featureMapShapes=[19, 10, 5, 3, 2, 1],
        numLayers=6
    )

    NMS = gs.create_plugin_node(
        name='NMS',
        op='NMS_TRT',
        shareLocation=1,
        varianceEncodedInTarget=0,
        backgroundLabelId=0,
        confidenceThreshold=0.3,  # was 1e-8
        nmsThreshold=0.6,
        topK=100,
        keepTopK=100,
        numClasses=numClasses,  # was 91
        inputOrder=inputOrder,
        confSigmoid=1,
        isNormalized=1
    )

    concat_priorbox = gs.create_node(
        'concat_priorbox',
        op='ConcatV2',
        axis=2
    )

    if trt.__version__[0] >= '7':
        concat_box_loc = gs.create_plugin_node(
            'concat_box_loc',
            op='FlattenConcat_TRT',
            axis=1,
            ignoreBatch=0
        )
        concat_box_conf = gs.create_plugin_node(
            'concat_box_conf',
            op='FlattenConcat_TRT',
            axis=1,
            ignoreBatch=0
        )
    else:
        concat_box_loc = gs.create_plugin_node(
            'concat_box_loc',
            op='FlattenConcat_TRT'
        )
        concat_box_conf = gs.create_plugin_node(
            'concat_box_conf',
            op='FlattenConcat_TRT'
        )

    namespace_for_removal = [
        'ToFloat',
        'image_tensor',
        'Preprocessor/map/TensorArrayStack_1/TensorArrayGatherV3',
    ]
    namespace_plugin_map = {
        'MultipleGridAnchorGenerator': PriorBox,
        'Postprocessor': NMS,
        'Preprocessor': Input,
        'ToFloat': Input,
        'Cast': Input,  # added for models trained with tf 1.15+
        'image_tensor': Input,
        'MultipleGridAnchorGenerator/Concatenate': concat_priorbox,  # for 'ssd_mobilenet_v1_coco'
        'Concatenate': concat_priorbox,  # for other models
        'concat': concat_box_loc,
        'concat_1': concat_box_conf
    }

    graph.remove(graph.find_nodes_by_path(['Preprocessor/map/TensorArrayStack_1/TensorArrayGatherV3']), remove_exclusive_dependencies=False)  # for 'ssd_inception_v2_coco'

    graph.collapse_namespaces(namespace_plugin_map)
    graph = replace_addv2(graph)
    graph = replace_fusedbnv3(graph)

    if 'image_tensor:0' in graph.find_nodes_by_name('Input')[0].input:
        graph.find_nodes_by_name('Input')[0].input.remove('image_tensor:0')
    if 'Input' in graph.find_nodes_by_name('NMS')[0].input:
        graph.find_nodes_by_name('NMS')[0].input.remove('Input')
    # Remove the Squeeze to avoid "Assertion 'isPlugin(layerName)' failed"
    graph.forward_inputs(graph.find_node_inputs_by_name(graph.graph_outputs[0], 'Squeeze'))
    if 'anchors' in [node.name for node in graph.graph_outputs]:
        graph.remove('anchors', remove_exclusive_dependencies=False)
    if len(graph.find_nodes_by_op('GridAnchor_TRT')[0].input) < 1:
        graph = add_anchor_input(graph)
    if 'NMS' not in [node.name for node in graph.graph_outputs]:
        graph.remove(graph.graph_outputs, remove_exclusive_dependencies=False)
        if 'NMS' not in [node.name for node in graph.graph_outputs]:
            # We expect 'NMS' to be one of the outputs
            raise RuntimeError('bad graph_outputs')

    return graph

In [3]:
# initialize
if trt.__version__[0] < '7':
    try:
        from jnmouse.ssd_tensorrt import load_flattenconcat_plugin
        load_flattenconcat_plugin()
    except:
        import ctypes
        import os
        from pathlib import Path
        import subprocess
        import sys
        command1 = ["cp", "-r", "/usr/src/tensorrt/samples/python/uff_ssd/", "."], Path().resolve() 
        command2 = ["mkdir", "-p", "uff_ssd/build"], Path().resolve()
        command3 = ["cmake", ".."], "{}/uff_ssd/build".format(Path().resolve())
        command4 = ["make"], "{}/uff_ssd/build".format(Path().resolve())
        command5 = ["chmod", "-x", "libflattenconcat.so"], "{}/uff_ssd/build".format(Path().resolve())
        command6 = ["cp", "libflattenconcat.so", "../../libflattenconcat.so.{}".format(trt.__version__[0])], "{}/uff_ssd/build".format(Path().resolve())
        for commands in (command1, command2, command3, command4, command5, command6):
            command, cwd = commands
            print(command)
            res = subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, cwd=cwd)
            print(res.stdout.decode("utf8"))
            if res.stderr: print(res.stderr.decode("utf8"))

        LIB_FILE = os.path.abspath(os.path.join(Path().resolve(), 'libflattenconcat.so.{}'.format(trt.__version__[0])))
        ctypes.CDLL(LIB_FILE)

TRT_LOGGER = trt.Logger(trt.Logger.INFO)

# load plugins
trt.init_libnvinfer_plugins(TRT_LOGGER, '')

True

## Download the pre-trained model and configure parameters

Download the pre-trained ssd_mobilenet_v2_coco model.  
http://download.tensorflow.org/models/object_detection/ssd_mobilenet_v2_coco_2018_03_29.tar.gz

In [38]:
# SSD MobileNetV2 COCO
!wget -O - http://download.tensorflow.org/models/object_detection/ssd_mobilenet_v2_coco_2018_03_29.tar.gz | tar zxvf -

--2024-06-11 11:23:52--  http://download.tensorflow.org/models/object_detection/ssd_mobilenet_v2_coco_2018_03_29.tar.gz
Resolving download.tensorflow.org (download.tensorflow.org)... ^C


In [4]:
# SSD MobileNetV2 COCO
tensorflow_model_path = "SSD-Mobilenet-V2-tf/frozen_inference_graph.pb"
uff_model_path = "ssd_mobilenet_v2_coco.uff"
trt_engine_path = "ssd_mobilenet_v2_coco_trt{}_t210.engine".format(trt.__version__[0])
num_classes = 91
min_size = 0.2
max_size = 0.95
input_order = [1, 0, 2]

## Convert the pre-trained model to the TensorRT engine file

NOTE: If you get `TypeError: Cannot convert value 0 to a TensorFlow DType.` on part 1,  
`/usr/lib/python3.6/dist-packages/graphsurgeon/node_manipulation.py` sould be updated.  
https://github.com/AastaNV/TRT_object_detection#update-graphsurgeon-converter

In [5]:
# convert the model into TensorRT engine
# part 1: compile the UFF file
print("build .uff file")
dynamic_graph = add_plugin(
    gs.DynamicGraph(tensorflow_model_path),
    num_classes, min_size, max_size, input_order)

_ = uff.from_tensorflow(
    dynamic_graph.as_graph_def(),
    output_nodes=['NMS'],
    output_filename=uff_model_path,
    text=True,
    debug_mode=False)

print("built .uff file successfully")

build .uff file
NOTE: UFF has been tested with TensorFlow 1.15.0.
UFF Version 0.6.9
=== Automatically deduced input nodes ===
[name: "Input"
op: "Placeholder"
attr {
  key: "dtype"
  value {
    type: DT_FLOAT
  }
}
attr {
  key: "shape"
  value {
    shape {
      dim {
        size: 1
      }
      dim {
        size: 3
      }
      dim {
        size: 300
      }
      dim {
        size: 300
      }
    }
  }
}
]

Using output node NMS
Converting to UFF graph
Converting NMS as custom op: NMS_TRT

Converting MultipleGridAnchorGenerator as custom op: GridAnchor_TRT
Converting concat_box_loc as custom op: FlattenConcat_TRT
Converting concat_box_conf as custom op: FlattenConcat_TRT
DEBUG [/usr/lib/python3.6/dist-packages/uff/converters/tensorflow/converter.py:143] Marking ['NMS'] as outputs
No. nodes: 1094
UFF Output written to ssd_mobilenet_v2_coco.uff
UFF Text Output written to ssd_mobilenet_v2_coco.pbtxt
built .uff file successfully


In [6]:
# covert the model into TensorRT engine
# part 2: compile the .engine/.bin file
print("build .engine file")
with trt.Builder(TRT_LOGGER) as builder, builder.create_network() as network, trt.UffParser() as parser:

    builder.max_workspace_size = 1 << 28
    builder.max_batch_size = 1
    builder.fp16_mode = True

    parser.register_input('Input', (3, 300, 300))
    parser.register_output('MarkOutput_0')

    print("parsing model")
    parser.parse(uff_model_path, network)

    print("building engine")
    with builder.build_cuda_engine(network) as engine:
        print("saving engine")
        with open(trt_engine_path, 'wb') as f:
            f.write(engine.serialize())

print("built .engine file successfully")
print("Finished converting the pre-trained model into TensorRT engine successfully")

build .engine file
parsing model
building engine
saving engine
built .engine file successfully
Finished converting the pre-trained model into TensorRT engine successfully
