# Download, convert ResNet50

In [1]:
from tensorflow.keras.applications import ResNet50

model = ResNet50(weights='imagenet')

In [3]:
import tf2onnx

model.save('my_model')

INFO:tensorflow:Assets written to: my_model/assets




In [5]:
!python3 -m tf2onnx.convert --saved-model my_model --output temp.onnx

2021-10-04 18:37:44,243 - INFO - Signatures found in model: [serving_default].
2021-10-04 18:37:44,244 - INFO - Output names: ['predictions']
Instructions for updating:
Use `tf.compat.v1.graph_util.extract_sub_graph`
Instructions for updating:
Use `tf.compat.v1.graph_util.extract_sub_graph`
2021-10-04 18:37:47,437 - INFO - Using tensorflow=2.6.0, onnx=1.9.0, tf2onnx=1.9.2/0f28b7
2021-10-04 18:37:47,438 - INFO - Using opset <onnx, 9>
2021-10-04 18:37:51,416 - INFO - Computed 0 values for constant folding
2021-10-04 18:37:54,082 - INFO - Optimizing ONNX model
2021-10-04 18:37:54,942 - INFO - After optimization: Add -1 (18->17), BatchNormalization -53 (53->0), Const -162 (270->108), GlobalAveragePool +1 (0->1), Identity -57 (57->0), ReduceMean -1 (1->0), Squeeze +1 (0->1), Transpose -213 (214->1)
2021-10-04 18:37:55,007 - INFO - 
2021-10-04 18:37:55,007 - INFO - Successfully converted TensorFlow model my_model to ONNX
2021-10-04 18:37:55,007 - INFO - Model inputs: ['input_1']
2021-10-04 1

In [7]:
import onnx

In [8]:
onnx_model = onnx.load_model('temp.onnx')

In [9]:
BATCH_SIZE = 1
inputs = onnx_model.graph.input
for input in inputs:
    dim1 = input.type.tensor_type.shape.dim[0]
    dim1.dim_value = BATCH_SIZE

In [10]:
model_name = "resnet50_onnx_model.onnx"
onnx.save_model(onnx_model, model_name)

In [11]:
!trtexec --onnx=resnet50_onnx_model.onnx --saveEngine=resnet_engine.trt  --explicitBatch

&&&& RUNNING TensorRT.trtexec # trtexec --onnx=resnet50_onnx_model.onnx --saveEngine=resnet_engine.trt --explicitBatch
[10/04/2021-18:38:36] [I] === Model Options ===
[10/04/2021-18:38:36] [I] Format: ONNX
[10/04/2021-18:38:36] [I] Model: resnet50_onnx_model.onnx
[10/04/2021-18:38:36] [I] Output:
[10/04/2021-18:38:36] [I] === Build Options ===
[10/04/2021-18:38:36] [I] Max batch: explicit
[10/04/2021-18:38:36] [I] Workspace: 16 MiB
[10/04/2021-18:38:36] [I] minTiming: 1
[10/04/2021-18:38:36] [I] avgTiming: 8
[10/04/2021-18:38:36] [I] Precision: FP32
[10/04/2021-18:38:36] [I] Calibration: 
[10/04/2021-18:38:36] [I] Refit: Disabled
[10/04/2021-18:38:36] [I] Safe mode: Disabled
[10/04/2021-18:38:36] [I] Save engine: resnet_engine.trt
[10/04/2021-18:38:36] [I] Load engine: 
[10/04/2021-18:38:36] [I] Builder Cache: Enabled
[10/04/2021-18:38:36] [I] NVTX verbosity: 0
[10/04/2021-18:38:36] [I] Tactic sources: Using default tactic sources
[10/04/2021-18:38:36] [I] Input(s)s format: fp32:CHW
[1

# RESTART NOTEBOOK

# Test Converted Model

In [1]:
import os
import sys
import shutil
import random
import time
import cv2
import numpy as np

PROJ_DIR = os.environ["RNB_PLANNING_DIR"]
sys.path.append(os.path.join(PROJ_DIR, "src"))
from pkg.utils.utils_python3 import *

In [2]:
#
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

import numpy as np
import tensorflow as tf
import tensorrt as trt

import pycuda.driver as cuda
import pycuda.autoinit

# For ONNX:

class ONNXClassifierWrapper():
    def __init__(self, file, num_classes, target_dtype = np.float32):
        
        self.target_dtype = target_dtype
        self.num_classes = num_classes
        self.load(file)
        
        self.stream = None
      
    def load(self, file):
        f = open(file, "rb")
        runtime = trt.Runtime(trt.Logger(trt.Logger.WARNING)) 

        engine = runtime.deserialize_cuda_engine(f.read())
        self.context = engine.create_execution_context()
        
    def allocate_memory(self, batch):
        self.output = np.empty(self.num_classes, dtype = self.target_dtype) # Need to set both input and output precisions to FP16 to fully enable FP16

        # Allocate device memory
        self.d_input = cuda.mem_alloc(1 * batch.nbytes)
        self.d_output = cuda.mem_alloc(1 * self.output.nbytes)

        self.bindings = [int(self.d_input), int(self.d_output)]

        self.stream = cuda.Stream()
        
    def predict(self, batch): # result gets copied into output
        if self.stream is None:
            self.allocate_memory(batch)
            
        # Transfer input data to device
        cuda.memcpy_htod_async(self.d_input, batch, self.stream)
        # Execute model
        self.context.execute_async_v2(self.bindings, self.stream.handle, None)
        # Transfer predictions back
        cuda.memcpy_dtoh_async(self.output, self.d_output, self.stream)
        # Syncronize threads
        self.stream.synchronize()
        
        return self.output

def convert_onnx_to_engine(onnx_filename, engine_filename = None, max_batch_size = 32, max_workspace_size = 1 << 30, fp16_mode = True):
    logger = trt.Logger(trt.Logger.WARNING)
    with trt.Builder(logger) as builder, builder.create_network() as network, trt.OnnxParser(network, logger) as parser:
        builder.max_workspace_size = max_workspace_size
        builder.fp16_mode = fp16_mode
        builder.max_batch_size = max_batch_size

        print("Parsing ONNX file.")
        with open(onnx_filename, 'rb') as model:
            if not parser.parse(model.read()):
                for error in range(parser.num_errors):
                    print(parser.get_error(error))

        print("Building TensorRT engine. This may take a few minutes.")
        engine = builder.build_cuda_engine(network)

        if engine_filename:
            with open(engine_filename, 'wb') as f:
                f.write(engine.serialize())

        return engine, logger

In [3]:

BATCH_SIZE = 1
PRECISION = np.float32
N_CLASSES = 1000 # Our ResNet-50 is trained on a 1000 class ImageNet task

trt_model = ONNXClassifierWrapper("resnet_engine.trt", [BATCH_SIZE, N_CLASSES], target_dtype = PRECISION)

In [4]:
gtimer = GlobalTimer.instance()
img = cv2.imread("test-container.png")
input_batch = img[np.newaxis, :224, 50:274, :].astype(np.float64)

In [5]:
gtimer.reset()
with gtimer.block("first_run"):
    predictions = trt_model.predict(input_batch)
for _ in range(100):
    with gtimer.block("test"):
        predictions = trt_model.predict(input_batch)

In [6]:
print(gtimer)

first_run: 	620.0 ms/1 = 620.04 ms (620.04/620.04)
test: 	163.0 ms/100 = 1.63 ms (1.528/2.922)



# RESTART NOTEBOOK

# Test Original Model

In [1]:
import os
import sys
import shutil
import random
import time
import cv2
import numpy as np

PROJ_DIR = os.environ["RNB_PLANNING_DIR"]
sys.path.append(os.path.join(PROJ_DIR, "src"))
from pkg.utils.utils_python3 import *

In [2]:
from tensorflow.keras.applications import ResNet50

model = ResNet50(weights='imagenet')

In [3]:
gtimer = GlobalTimer.instance()
img = cv2.imread("test-container.png")
input_batch = img[np.newaxis, :224, 50:274, :].astype(np.float64)

In [4]:
gtimer.reset()
with gtimer.block("first_run"):
    predictions = model.predict(input_batch)
for _ in range(100):
    with gtimer.block("test"):
        predictions = model.predict(input_batch)

In [5]:
print(gtimer)

first_run: 	2445.0 ms/1 = 2445.432 ms (2445.432/2445.432)
test: 	6079.0 ms/100 = 60.79 ms (36.351/103.676)

