In [11]:
import numpy as np
import os
import onnxruntime as ort
import tqdm
import cv2

model_path = '../xfeat.onnx'

#Load some example images
im1 = cv2.imread('../assets/ref.png', cv2.IMREAD_COLOR)

In [12]:
tmp_ort_session = ort.InferenceSession(model_path, providers=['CPUExecutionProvider'])

# print the input,output names and shapes
for i in range(len(tmp_ort_session.get_inputs())):
    print(f"Input name: {tmp_ort_session.get_inputs()[i].name}, shape: {tmp_ort_session.get_inputs()[i].shape}")
for i in range(len(tmp_ort_session.get_outputs())):
    print(f"Output name: {tmp_ort_session.get_outputs()[i].name}, shape: {tmp_ort_session.get_outputs()[i].shape}")


providers = [
    # ('TensorrtExecutionProvider', { 
    #     'device_id': 0,
    #     'trt_max_workspace_size': 1 * 1024 * 1024 * 1024,
    #     'trt_fp16_enable': True,
    #     'trt_engine_cache_enable': True,
    #     'trt_engine_cache_path': './trt_engine_cache',
    #     'trt_engine_cache_prefix': 'xfeat',
    #     'trt_dump_subgraphs': False,
    #     'trt_timing_cache_enable': True,
    #     'trt_timing_cache_path': './trt_engine_cache',
    #     #'trt_builder_optimization_level': 3,
    # }),
    # ('CUDAExecutionProvider', {
    #     'device_id': 0,
    #     'gpu_mem_limit': 1 * 1024 * 1024 * 1024,
    # }),
    ('CPUExecutionProvider',{ 
    })
]
ort_session = ort.InferenceSession(model_path, providers=providers)

Input name: images, shape: ['batch_size', 3, 'height', 'width']
Input name: top_k, shape: []
Output name: keypoints, shape: ['ScatterNDkeypoints_dim_0', 'ScatterNDkeypoints_dim_1', 2]
Output name: scores, shape: ['GatherElementsscores_dim_0', 'GatherElementsscores_dim_1']
Output name: descriptors, shape: ['Divdescriptors_dim_0', 'Divdescriptors_dim_1', 64]


## Prepare the input tensor

In [13]:
# im1 = cv2.resize(im1, (640, 640)) # for bechmarking

input_array_1 = im1.transpose(2, 0, 1).astype(np.float32)
input_array_1 = np.expand_dims(input_array_1, axis=0)

batch_size = 8

# Psuedo-batch the input images
input_array_1 = np.concatenate([input_array_1 for _ in range(batch_size)], axis=0)

inputs = {
  ort_session.get_inputs()[0].name: input_array_1,
}

## Run

In [14]:
outputs = ort_session.run(None, inputs)

ValueError: Required inputs (['top_k']) are missing from input feed (['images']).

In [5]:
for i in range(len(outputs)):
    print(f"Output name: {ort_session.get_outputs()[i].name}, shape: {outputs[i].shape}")

Output name: feats, shape: (8, 64, 75, 100)
Output name: keypoints, shape: (8, 65, 75, 100)
Output name: heatmaps, shape: (8, 1, 75, 100)


In [6]:
# Validate the outputs of the psuedo-batched inputs

feats = outputs[0]
keypoints = outputs[1]
heatmaps = outputs[2]

feats_0 = feats[0]
keypoints_0 = keypoints[0]
heatmaps_0 = heatmaps[0]

valid = []
for i in range(1, input_array_1.shape[0]):
    valid.append(np.all(feats_0 == feats[i]))
    valid.append(np.all(keypoints_0 == keypoints[i]))
    valid.append(np.all(heatmaps_0 == heatmaps[i]))
print(f"equal: {valid}")

equal: [True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True]


In [7]:
import time

# Run the model 100 times to get an average time
loop = 100
start = time.perf_counter()
for i in tqdm.tqdm(range(loop)):
    outputs = ort_session.run(None, inputs)
duration = time.perf_counter()-start

print(f"Average time per batch: {duration/loop:.4f} seconds")
print(f"Average time per image: {duration/loop/batch_size:.4f} seconds")
print(f"Average FPS per image: {batch_size*loop/duration:.4f}")

100%|██████████| 100/100 [00:03<00:00, 31.91it/s]

Average time per batch: 0.0314 seconds
Average time per image: 0.0039 seconds
Average FPS per image: 254.7242



