In [5]:
# Assert that you are running on tensorrt container
! which trtexec
# If you want to check trtexec running options, run trtexec -h 
# ! trtexec -h

/opt/tensorrt/bin/trtexec


In [24]:
# prepare resnet-18 model
import timm
import torch
import inspect

model = timm.create_model("resnet18").cpu()

# check model forward's input parameter name
SHAPE = (3, 224, 224)
signature = inspect.signature(model.forward)
print(signature)

# export model to onnx format
# For more details, please refer to https://pytorch.org/docs/stable/onnx.html
dummy_input = (torch.randn(*((1,)+SHAPE)).cpu(),)
input_names = ["x"]
output_names = ["outputs"]

model = model.eval() # Need to set model to eval

torch.onnx.export(
    model,
    dummy_input,
    "resnet18.onnx",
    dynamic_axes={
        "x" : {0: "batch"}, # To support dynamic shape on target axis
    },
    verbose=True,
    input_names=input_names, # Need to be aligned with actual parameter name in forward function
    output_names=output_names # Required to match with number of actual output 
)


(x)
Exported graph: graph(%x : Float(*, 3, 224, 224, strides=[150528, 50176, 224, 1], requires_grad=0, device=cpu),
      %fc.weight : Float(1000, 512, strides=[512, 1], requires_grad=1, device=cpu),
      %fc.bias : Float(1000, strides=[1], requires_grad=1, device=cpu),
      %onnx::Conv_193 : Float(64, 3, 7, 7, strides=[147, 49, 7, 1], requires_grad=0, device=cpu),
      %onnx::Conv_194 : Float(64, strides=[1], requires_grad=0, device=cpu),
      %onnx::Conv_196 : Float(64, 64, 3, 3, strides=[576, 9, 3, 1], requires_grad=0, device=cpu),
      %onnx::Conv_199 : Float(64, 64, 3, 3, strides=[576, 9, 3, 1], requires_grad=0, device=cpu),
      %onnx::Conv_202 : Float(64, 64, 3, 3, strides=[576, 9, 3, 1], requires_grad=0, device=cpu),
      %onnx::Conv_208 : Float(128, 64, 3, 3, strides=[576, 9, 3, 1], requires_grad=0, device=cpu),
      %onnx::Conv_209 : Float(128, strides=[1], requires_grad=0, device=cpu),
      %onnx::Conv_211 : Float(128, 128, 3, 3, strides=[1152, 9, 3, 1], requires_gr

In [3]:
# visualize model graph with netron
! netron resnet18.onnx -b

Serving 'resnet18.onnx' at http://localhost:8080
^C

Stopping http://localhost:8080


In [28]:
# check onnx integrity

import onnx
import onnxruntime as ort
from tqdm import tqdm 

NUM_TEST = 10
B = 16

onnx_model = onnx.load("resnet18.onnx")
sess = ort.InferenceSession(
    onnx_model.SerializeToString(), 
    providers=["CPUExecutionProvider"]
)

mean_diff = 0
with torch.no_grad():
    for _ in tqdm(range(NUM_TEST)):
        input_dict = {"x" : torch.randn(*((B,) + SHAPE))}
        torch_output = model(**input_dict)
        onnx_output = sess.run(output_names, {k : v.numpy() for k, v in input_dict.items()})
        mean_diff += (torch_output - torch.from_numpy(onnx_output[0])).square().mean()


2023-07-24 05:31:11.263182515 [E:onnxruntime:Default, env.cc:251 ThreadMain] pthread_setaffinity_np failed for thread: 1525182, index: 8, mask: {9, 33, }, error code: 22 error msg: Invalid argument. Specify the number of threads explicitly so the affinity is not set.
2023-07-24 05:31:11.268547193 [E:onnxruntime:Default, env.cc:251 ThreadMain] pthread_setaffinity_np failed for thread: 1525191, index: 17, mask: {18, 42, }, error code: 22 error msg: Invalid argument. Specify the number of threads explicitly so the affinity is not set.
2023-07-24 05:31:11.268570122 [E:onnxruntime:Default, env.cc:251 ThreadMain] pthread_setaffinity_np failed for thread: 1525189, index: 15, mask: {16, 40, }, error code: 22 error msg: Invalid argument. Specify the number of threads explicitly so the affinity is not set.
2023-07-24 05:31:11.272535812 [E:onnxruntime:Default, env.cc:251 ThreadMain] pthread_setaffinity_np failed for thread: 1525184, index: 10, mask: {11, 35, }, error code: 22 error msg: Invalid a

In [29]:
print(mean_diff / NUM_TEST)

tensor(9.0126e-17)


In [7]:
# change onnx model to tensorrt using trtexec

! trtexec --onnx=resnet18.onnx --minShapes=x:1x3x224x224 --optShapes=x:16x3x224x224 --maxShapes=x:32x3x224x224 --useCudaGraph --saveEngine=resnet18.plan --verbose=true 


&&&& RUNNING TensorRT.trtexec [TensorRT v8601] # trtexec --onnx=resnet18.onnx --minShapes=x:1x3x224x224 --optShapes=x:16x3x224x224 --maxShapes=x:32x3x224x224 --useCudaGraph --saveEngine=resnet18.plan --verbose=true
[07/24/2023-05:44:16] [I] === Model Options ===
[07/24/2023-05:44:16] [I] Format: ONNX
[07/24/2023-05:44:16] [I] Model: resnet18.onnx
[07/24/2023-05:44:16] [I] Output:
[07/24/2023-05:44:16] [I] === Build Options ===
[07/24/2023-05:44:16] [I] Max batch: explicit batch
[07/24/2023-05:44:16] [I] Memory Pools: workspace: default, dlaSRAM: default, dlaLocalDRAM: default, dlaGlobalDRAM: default
[07/24/2023-05:44:16] [I] minTiming: 1
[07/24/2023-05:44:16] [I] avgTiming: 8
[07/24/2023-05:44:16] [I] Precision: FP32
[07/24/2023-05:44:16] [I] LayerPrecisions: 
[07/24/2023-05:44:16] [I] Layer Device Types: 
[07/24/2023-05:44:16] [I] Calibration: 
[07/24/2023-05:44:16] [I] Refit: Disabled
[07/24/2023-05:44:16] [I] Version Compatible: Disabled
[07/24/2023-05:44:16] [I] TensorRT runtime: f

In [None]:
from tensorrt_handson_lab.tensorrt_utils import common
import numpy as np
from typing import Dict

def infer(engine, input_bindings, output_bindings, batch: Dict[str, np.ndarray], batch_size):
    for k, val in batch.items():
        if input_bindings[k]["shape"][0] > val.shape[0]:
            padded = np.zeros(dtype=input_bindings[k]["dtype"], shape=input_bindings[k]["shape"])
            padded[: len(val)] = val
            batch_size = val.shape[0]
        common.memcpy_host_to_device(
            input_bindings[k]["allocation"],
            np.ascontiguousarray(val.astype(input_bindings[k]["dtype"])),
        )
