# Benchmark of TensorRT on GPU


|                     |                                         |
|---------------------|-------------------------------------------|
|**Hard ware**         | Intel Ice Lake with NVIDIA® Tesla® T4 with 4 CPUs and 16 Gb RAM.  |
|**Software Platform**| host |
|**Tools to compare**| Torch Script, Torch Trace, ONNX, Open Vino|

### Common

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import timm
import torch as th
import typing as tp
import numpy as np
import cv2
import yaml
import pandas as pd

from src.utils import get_batch, MAX_UINT8, benchmark


DATA_DIR = "./images"
BATCH_SIZES = [1,2,4,8,16]
PLATFORM = "1GPU"
WARMUP = 5
N_RUNS = 10
VERBOSE = False
N_PRINT = 5
DEVICE = "cuda"

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
batches = dict()
for batch_sz in BATCH_SIZES:
    batch = get_batch(DATA_DIR, batch_sz)
    batches[batch_sz] = batch
    del batch

In [4]:
inference_records = []

### Torch Model

In [5]:
from src.model.torch import ModelTorch

In [6]:
records = []
with open("./config/torch.yaml") as fp:
    cfg_torch = yaml.safe_load(fp)

cfg_torch["device"] = DEVICE
model_torch = ModelTorch(cfg_torch)
for batch_sz in BATCH_SIZES:
    avg, stdev = benchmark(
        model=model_torch,
        input_shape=(batch_sz, 3, MAX_UINT8, MAX_UINT8),
        nwarmup=WARMUP,
        nruns=N_RUNS,
        print_step=N_PRINT,
        verbose=VERBOSE
    )
    records.append(
        {
            "time": avg,
            "platform": PLATFORM, 
            "batch_sz": batch_sz,
            "tool": "Torch"
        }
    )
inference_records.extend(records)
pd.DataFrame(records)


Unnamed: 0,time,platform,batch_sz,tool
0,0.039872,1GPU,1,Torch
1,0.073134,1GPU,2,Torch
2,0.14164,1GPU,4,Torch
3,0.280348,1GPU,8,Torch
4,0.562738,1GPU,16,Torch


### TensorRT32 Model

In [7]:
import tensorrt as trt
import torch_tensorrt
from src.model.torch_jit import ModelTorchJIT

In [8]:
for batch_sz in BATCH_SIZES:
    trt_model = torch_tensorrt.compile(
        model_torch.model,                                       
        inputs = [
            torch_tensorrt.Input(
                (batch_sz, 3, MAX_UINT8, MAX_UINT8)
            )
        ], 
        enabled_precisions = th.float32,                # <- изменения здесь
        workspace_size = 1 << 30,                     
    )
    th.jit.save(trt_model, f"weights/model_trt_fp32_{batch_sz}.ts")

ERROR: [Torch-TensorRT] - 3: [runtime.cpp::~Runtime::346] Error Code 3: API Usage Error (Parameter check failed at: runtime/rt/runtime.cpp::~Runtime::346, condition: mEngineCounter.use_count() == 1. Destroying a runtime before destroying deserialized engines created by the runtime leads to undefined behavior.
)
ERROR: [Torch-TensorRT] - 3: [runtime.cpp::~Runtime::346] Error Code 3: API Usage Error (Parameter check failed at: runtime/rt/runtime.cpp::~Runtime::346, condition: mEngineCounter.use_count() == 1. Destroying a runtime before destroying deserialized engines created by the runtime leads to undefined behavior.
)


In [9]:

records = []
for batch_sz in BATCH_SIZES:
    with open(
        f"./config/tensorrt_fp32.yaml"
    ) as fp:
        cfg_trt = yaml.safe_load(fp)
    cfg_trt["device"] = DEVICE
    cfg_trt["path"] = f"weights/model_trt_fp32_{batch_sz}.ts"
    model_trt = ModelTorchJIT(cfg_trt)
    avg, _ = benchmark(
        model=model_trt,
        input_shape=(batch_sz, 3, MAX_UINT8, MAX_UINT8),
        nwarmup=WARMUP,
        nruns=N_RUNS,
        print_step=N_PRINT,
        verbose=VERBOSE
    )
    records.append(
        {
            "time": avg,
            "platform": PLATFORM, 
            "batch_sz": batch_sz,
            "tool": "TensorRT32"
        }
    )
inference_records.extend(records)
pd.DataFrame(records)

ERROR: [Torch-TensorRT] - 3: [runtime.cpp::~Runtime::346] Error Code 3: API Usage Error (Parameter check failed at: runtime/rt/runtime.cpp::~Runtime::346, condition: mEngineCounter.use_count() == 1. Destroying a runtime before destroying deserialized engines created by the runtime leads to undefined behavior.
)
ERROR: [Torch-TensorRT] - 3: [runtime.cpp::~Runtime::346] Error Code 3: API Usage Error (Parameter check failed at: runtime/rt/runtime.cpp::~Runtime::346, condition: mEngineCounter.use_count() == 1. Destroying a runtime before destroying deserialized engines created by the runtime leads to undefined behavior.
)
ERROR: [Torch-TensorRT] - 3: [runtime.cpp::~Runtime::346] Error Code 3: API Usage Error (Parameter check failed at: runtime/rt/runtime.cpp::~Runtime::346, condition: mEngineCounter.use_count() == 1. Destroying a runtime before destroying deserialized engines created by the runtime leads to undefined behavior.
)
ERROR: [Torch-TensorRT] - 3: [runtime.cpp::~Runtime::346] Err

Unnamed: 0,time,platform,batch_sz,tool
0,0.004407,1GPU,1,TensorRT32
1,0.006015,1GPU,2,TensorRT32
2,0.009285,1GPU,4,TensorRT32
3,0.014662,1GPU,8,TensorRT32
4,0.020792,1GPU,16,TensorRT32


### TensorRT16 Model

In [10]:
import tensorrt as trt
import torch_tensorrt
from src.model.torch_jit import ModelTorchJIT

In [11]:
for batch_sz in BATCH_SIZES:
    trt_model = torch_tensorrt.compile(
        model_torch.model,                                       
        inputs = [torch_tensorrt.Input((1, 3, MAX_UINT8, MAX_UINT8))], 
        enabled_precisions = th.float16,                # <- изменения здесь
        workspace_size = 1 << 30,                     
    )
    th.jit.save(trt_model, f"weights/model_trt_fp16_{batch_sz}.ts")

ERROR: [Torch-TensorRT] - 3: [runtime.cpp::~Runtime::346] Error Code 3: API Usage Error (Parameter check failed at: runtime/rt/runtime.cpp::~Runtime::346, condition: mEngineCounter.use_count() == 1. Destroying a runtime before destroying deserialized engines created by the runtime leads to undefined behavior.
)
ERROR: [Torch-TensorRT] - 3: [runtime.cpp::~Runtime::346] Error Code 3: API Usage Error (Parameter check failed at: runtime/rt/runtime.cpp::~Runtime::346, condition: mEngineCounter.use_count() == 1. Destroying a runtime before destroying deserialized engines created by the runtime leads to undefined behavior.
)


In [12]:

records = []
for batch_sz in BATCH_SIZES:
    with open("./config/tensorrt_fp16.yaml") as fp:
        cfg_trt = yaml.safe_load(fp)
    cfg_trt["device"] = DEVICE
    cfg_trt["path"] = f"weights/model_trt_fp16_{batch_sz}.ts"
    model_trt = ModelTorchJIT(cfg_trt)
    avg, _ = benchmark(
        model=model_trt,
        input_shape=(batch_sz, 3, MAX_UINT8, MAX_UINT8),
        nwarmup=WARMUP,
        nruns=N_RUNS,
        print_step=N_PRINT,
        verbose=VERBOSE
    )
    records.append(
        {
            "time": avg,
            "platform": PLATFORM, 
            "batch_sz": batch_sz,
            "tool": "TensorRT16"
        }
    )
inference_records.extend(records)
pd.DataFrame(records)

ERROR: [Torch-TensorRT] - 3: [executionContext.cpp::setInputShape::2264] Error Code 3: API Usage Error (Parameter check failed at: runtime/api/executionContext.cpp::setInputShape::2264, condition: engineDims.d[i] == dims.d[i]. Static dimension mismatch while setting input shape.
)
ERROR: [Torch-TensorRT] - 3: [executionContext.cpp::setInputShape::2264] Error Code 3: API Usage Error (Parameter check failed at: runtime/api/executionContext.cpp::setInputShape::2264, condition: engineDims.d[i] == dims.d[i]. Static dimension mismatch while setting input shape.
)
ERROR: [Torch-TensorRT] - 3: [executionContext.cpp::setInputShape::2264] Error Code 3: API Usage Error (Parameter check failed at: runtime/api/executionContext.cpp::setInputShape::2264, condition: engineDims.d[i] == dims.d[i]. Static dimension mismatch while setting input shape.
)
ERROR: [Torch-TensorRT] - 3: [executionContext.cpp::setInputShape::2264] Error Code 3: API Usage Error (Parameter check failed at: runtime/api/executionCo

Unnamed: 0,time,platform,batch_sz,tool
0,0.002287,1GPU,1,TensorRT16
1,0.013059,1GPU,2,TensorRT16
2,0.013825,1GPU,4,TensorRT16
3,0.014197,1GPU,8,TensorRT16
4,0.014572,1GPU,16,TensorRT16


In [13]:
import json
with open(
    "results/inference_results_trt.json", 
    "w",
    encoding="utf8"
) as fp:
    json.dump(fp=fp, obj=inference_records)

In [14]:
pd.DataFrame(inference_records)

Unnamed: 0,time,platform,batch_sz,tool
0,0.039872,1GPU,1,Torch
1,0.073134,1GPU,2,Torch
2,0.14164,1GPU,4,Torch
3,0.280348,1GPU,8,Torch
4,0.562738,1GPU,16,Torch
5,0.004407,1GPU,1,TensorRT32
6,0.006015,1GPU,2,TensorRT32
7,0.009285,1GPU,4,TensorRT32
8,0.014662,1GPU,8,TensorRT32
9,0.020792,1GPU,16,TensorRT32
