In [1]:
import tensorrt as trt
import pycuda.autoinit
import pycuda.driver as cuda
import numpy as np
import onnx

In [8]:
def build_engine(model_file, max_ws=512*1024*1024, fp16=False):
    print("building engine")
    TRT_LOGGER = trt.Logger(trt.Logger.WARNING)
    builder = trt.Builder(TRT_LOGGER)
    #builder.fp16_mode = fp16
    config = builder.create_builder_config()
    config.max_workspace_size = max_ws
    if fp16:
        config.flags |= 1 << int(trt.BuilderFlag.FP16)
    
    explicit_batch = 1 << int(trt.NetworkDefinitionCreationFlag.\
                                                  EXPLICIT_BATCH)
    network = builder.create_network(explicit_batch)
    with trt.OnnxParser(network, TRT_LOGGER) as parser:
        with open(model_file, 'rb') as model:
            parsed = parser.parse(model.read())
            print("network.num_layers", network.num_layers)
            #last_layer = network.get_layer(network.num_layers - 1)
            #network.mark_output(last_layer.get_output(0))
            engine = builder.build_engine(network, config=config)
            return engine


In [9]:
engine = build_engine("/home/airi/yolo/yolov8_files/detect/train/weights/best.onnx")

building engine
[08/09/2023-16:48:51] [TRT] [W] onnx2trt_utils.cpp:374: Your ONNX model has been generated with INT64 weights, while TensorRT does not natively support INT64. Attempting to cast down to INT32.
network.num_layers 248


  config.max_workspace_size = max_ws
  engine = builder.build_engine(network, config=config)


In [11]:
engine_path = '/home/airi/yolo/yolov8_files/detect/train/weights/engine.engine'
serialized_engine = engine.serialize()
with open(engine_path, 'wb') as f:
    f.write(serialized_engine)