In [1]:
import os
import numpy as np
import tensorrt as trt
from cuda import cudart

In [2]:
trtFile = "./model.plan"

In [3]:
def run():
    logger = trt.logger(trt.Logger.ERROR)
    if os.path.isfile(trtFile):
        with open(trtFile, 'rb') as f:
            engineString = f.read()
        if engineString == None:
            print("Failed getting serialized engine!")
            return
        print("Succeed se!")
    else:
        builder = trt.Builder(logger)
        network = builder.create_network(1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH))
        profile = builder.create_optimization_profile()
        config = builder.create_builder_config()
        config.max_workspace_size = 1 << 30

        inputTensor = network.add_input('inputT0', trt.float32, [-1, -1, -1])
        profile.set_shape(inputTensor.name, [1,1,1],[3,4,5],[6,8,10])
        config.add_optimization_profile(profile)

        identityLayer = network.add_identity(inputTensor)
        network.mark_output(identityLayer.get_output(0))

        engineString = builder.build_serialized_network(network, config)
        if(engineString == None):
            print("Failed getting serialized engine!")
            return
        print("Succeeded getting serialized engine!")
        with open(trtFile, 'wb') as f:
            f.write(engineString)
            print("Succeeded saving .plan file!")
    
    engine = trt.Runtime(logger).deserialize_cuda_engine(engineString)
    if engine == None:
        print("Failed building engine!")
        return
    print("success building")

    context = engine.create_execution_context()
    context.set_binding_shape(0, [3,4,5])

    nInput = np.sum([engine.binding_is_input(i) for i in range(engine.num_bingdings)])
    nOutput = engine.num_bingings - nInput
    for i in range(nInput):
        print("Bind[%2d]:i[%2d]->" % (i,i), engine.get_bingding_dtype(i), engine.get_bingding_shape(i), context.get_binding_shape(i), engine.get_binging_name(i))
    for i in range(nInput, nInput+nOutput):
        print("Bind[%2d]:o[%2d]->" %(i, i- nInput), engine.get_binding_dtype(i), engine.get_binding_shape(i), context.get_binding_shape(i), engine.get_binding_name(i))
    
    data = np.arange(3*4*5, dtype=np.float32).reshape(3,4,5)
    bufferH = []
    bufferH.append(np.ascontiguousarray(data.reshape(-1)))
    for i in range(nInput, nInput + nOutput):
        bufferH.append(np.empty(context.get_bingding_shape(i), dtype = trt.nptye(engine.get_binding_dtype(i))))
    bufferD = []
    for i in range(nInput + nOutput):
        bufferD.append(cudart.cudaMalloc(bufferH[i].nbytes)[1])

    for i in range(nInput):
        cudart.cudaMemcpy(bufferD[i], bufferH[i].ctypes.data, bufferH[i].nbytes, cudart.cudaMemcpyKind.cudaMemcpyHostToDevice)

    context.execute_v2(bufferD)

    for i in range(nInput, nInput+nOutput):
        cudart.cudaMemcpy(bufferD[i], bufferH[i].ctypes.data, bufferH[i].nbytes, cudart.cudaMemcpyKind.cudaMemcpyDeviceToHost)

    for i in range(nInput+nOutput):
        print(engine.get_binding_name(i))
        print(bufferH[i].reshape(context.get_binding_shape(i)))
    
    for b in bufferD:
        cudart.cudaFree(b)

In [6]:
logger = trt.Logger(trt.Logger.ERROR)

In [10]:
builder = trt.Builder(logger)

In [11]:
network = builder.create_network(1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH))

In [12]:
profile = builder.create_optimization_profile()


In [13]:
config = builder.create_builder_config()
config.max_workspace_size = 1 << 30

  


In [14]:
inputTensor = network.add_input('inputT0', trt.float32, [-1, -1, -1])
