In [27]:
import cv2
import numpy as np

In [28]:
siamRPN = cv2.dnn.readNet("dasiamrpn_model_271_.onnx");


In [29]:
siamRPN.setPreferableBackend(cv2.dnn.DNN_BACKEND_CUDA)
siamRPN.setPreferableTarget(cv2.dnn.DNN_TARGET_CUDA_FP16)

In [30]:
frame = cv2.imread("../9_1.jpg");

In [31]:
frame = cv2.resize(frame, (271,271));


In [32]:
frame= cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

In [33]:
resized_image = frame.astype(np.float32)

In [38]:
resized_image = resized_image.reshape(1,3,271,271)

In [39]:
resized_image.shape

(1, 3, 271, 271)

In [47]:
outNames = siamRPN.getUnconnectedOutLayersNames()

In [49]:
siamRPN.setInput(resized_image)
outputs = siamRPN.forward(outNames)

In [52]:
outputs[0].shape

(1, 20, 19, 19)

In [53]:
outputs[1].shape

(1, 10, 19, 19)

---

In [54]:
import os
import tensorrt as trt
import numpy as np
import pycuda.driver as cuda
import pycuda.autoinit
# def load_engine(trt_runtime, engine_path):
#     with open(engine_path, 'rb') as f:
#         engine_data = f.read()
#     engine = trt_runtime.deserialize_cuda_engine(engine_data)
#     return engine

def load_engine(trt_runtime, engine_path):
    trt.init_libnvinfer_plugins(None, "")             ### Try to add here
    with open(engine_path, 'rb') as f:
        engine_data = f.read()
    engine = trt_runtime.deserialize_cuda_engine(engine_data)
    return engine

TRT_LOGGER = trt.Logger(trt.Logger.WARNING)
trt_runtime = trt.Runtime(TRT_LOGGER)
trt_engine_path = "dasiamrpn_model32_r_271.trt"
trt_engine = load_engine(trt_runtime, trt_engine_path)

if trt_engine is not None:
    print("Success")                                        
else:
    print("Failed")

Success


In [55]:
def infer(context, input_img, output_size, batch_size):
    # Load engine
    # engine = context.get_engine()
    # assert(engine.get_nb_bindings() == 2)
    # Convert input data to float32
    input_img = input_img.astype(np.float32)
    # Create host buffer to receive data
    output_0 = np.empty(output_size[0], dtype = np.float32)
    output_1 = np.empty(output_size[1], dtype = np.float32)

    # Allocate device memory
    d_input = cuda.mem_alloc(batch_size * input_img.size * input_img.dtype.itemsize)
    d_output_0 = cuda.mem_alloc(batch_size * output_0.size * output_0.dtype.itemsize)
    d_output_1 = cuda.mem_alloc(batch_size * output_1.size * output_1.dtype.itemsize)


    bindings = [int(d_input), int(d_output_0), int(d_output_1) ]
    stream = cuda.Stream()
    # Transfer input data to device
    #print(type(input_img))
    cuda.memcpy_htod_async(d_input, input_img, stream)
    # Execute model
    context.execute_async(batch_size, bindings, stream.handle, None)
    # Transfer predictions back
    cuda.memcpy_dtoh_async(output_0, d_output_0, stream)
    cuda.memcpy_dtoh_async(output_1, d_output_1, stream)

    # Synchronize threads
    stream.synchronize()
    # Return predictions
    return output_0, output_1

In [56]:
context = trt_engine.create_execution_context()

In [59]:
trt_outputs = infer(context, resized_image, ((20, 19, 19),(10, 19, 19)), 1)

In [63]:
trt_outputs[0].shape

(20, 19, 19)

---

In [60]:
def MSE(Y, YH):
     return np.square(Y - YH).mean()

In [64]:
MSE(trt_outputs[0],outputs[0][0])

4.3490377e-13

In [65]:
MSE(trt_outputs[1],outputs[1][0])

4.982093e-11

In [67]:
trt_outputs[0]

array([[[ 0.62413144,  0.37625077,  0.39105883, ...,  0.89146537,
          0.68924814,  0.5945139 ],
        [ 0.64111656,  0.28482875,  0.296942  , ...,  0.8964713 ,
          0.6460773 ,  0.60448545],
        [ 0.45329237,  0.22925846,  0.37725642, ...,  0.81064445,
          0.42859083,  0.34351513],
        ...,
        [ 0.7738891 ,  0.8510453 ,  0.78433746, ...,  0.5530611 ,
          0.4002137 ,  0.46479753],
        [ 0.6979913 ,  0.86116743,  0.7195456 , ...,  0.5003079 ,
          0.2606023 ,  0.25208303],
        [ 0.29469752,  0.55350906,  0.44854686, ...,  0.54339296,
          0.46993694,  0.45106155]],

       [[ 0.4972058 ,  0.22878574,  0.2870435 , ...,  0.7740341 ,
          0.62201446,  0.47663295],
        [ 0.43245113,  0.14138657,  0.21727616, ...,  0.8188428 ,
          0.5941189 ,  0.42802143],
        [ 0.23891452,  0.11920982,  0.39351636, ...,  0.74957573,
          0.39137334,  0.21183448],
        ...,
        [ 0.72834206,  0.8286162 ,  0.76011866, ...,  

In [68]:
outputs[0][0]

array([[[ 0.6241312 ,  0.3762508 ,  0.39105868, ...,  0.8914651 ,
          0.68924767,  0.5945127 ],
        [ 0.6411166 ,  0.28482914,  0.29694262, ...,  0.89647144,
          0.6460778 ,  0.60448444],
        [ 0.45329198,  0.2292584 ,  0.37725693, ...,  0.8106448 ,
          0.42859033,  0.34351477],
        ...,
        [ 0.77389   ,  0.85104597,  0.78433764, ...,  0.55306125,
          0.40021372,  0.46479768],
        [ 0.6979912 ,  0.8611672 ,  0.7195449 , ...,  0.5003085 ,
          0.26060316,  0.25208274],
        [ 0.29469737,  0.5535086 ,  0.4485465 , ...,  0.5433933 ,
          0.46993628,  0.45106164]],

       [[ 0.49720567,  0.22878608,  0.28704336, ...,  0.7740336 ,
          0.6220139 ,  0.4766319 ],
        [ 0.43245113,  0.14138691,  0.21727662, ...,  0.8188431 ,
          0.59411937,  0.42802054],
        [ 0.23891407,  0.11920988,  0.39351705, ...,  0.74957603,
          0.39137274,  0.21183382],
        ...,
        [ 0.72834295,  0.828617  ,  0.7601186 , ...,  

In [69]:
import tensorrt as trt

In [70]:
trt.__version__

'8.2.1.8'