In [1]:
import numpy as np
import os
import pycuda.driver as cuda
import pycuda.autoinit
import tensorrt as trt

import matplotlib.pyplot as plt
from PIL import Image

TRT_LOGGER = trt.Logger()

# Filenames of TensorRT plan file and input/output images.
engine_file = "superpoint.trt"



In [2]:
def load_engine(engine_file_path):
    assert os.path.exists(engine_file_path)
    print("Reading engine from file {}".format(engine_file_path))
    with open(engine_file_path, "rb") as f, trt.Runtime(TRT_LOGGER) as runtime:
        return runtime.deserialize_cuda_engine(f.read())

In [7]:

def infer(engine, input):
    # print("Reading input image from file {}".format(input_file))
    # with Image.open(input_file) as img:
    #     input_image = preprocess(img)
    #     image_width = img.width
    #     image_height = img.height
    img_height = input.shape[-2]
    img_width = input.shape[-1]
    with engine.create_execution_context() as context:
        # Set input shape based on image dimensions for inference
        context.set_binding_shape(engine.get_binding_index("input"), (1, 1, img_height, img_width))
        # Allocate host and device buffers
        bindings = []
        for binding in engine:
            binding_idx = engine.get_binding_index(binding)
            size = trt.volume(context.get_binding_shape(binding_idx))
            dtype = trt.nptype(engine.get_binding_dtype(binding))
            if engine.binding_is_input(binding):
                input_buffer = np.ascontiguousarray(input)
                input_memory = cuda.mem_alloc(input.nbytes)
                bindings.append(int(input_memory))
            else:
                output_buffer = cuda.pagelocked_empty(size, dtype)
                output_memory = cuda.mem_alloc(output_buffer.nbytes)
                bindings.append(int(output_memory))

        stream = cuda.Stream()
        # Transfer input data to the GPU.
        cuda.memcpy_htod_async(input_memory, input_buffer, stream)
        # Run inference
        context.execute_async_v2(bindings=bindings, stream_handle=stream.handle)
        # Transfer prediction output from the GPU.
        cuda.memcpy_dtoh_async(output_buffer, output_memory, stream)
        # Synchronize the stream
        stream.synchronize()

    return output_buffer.cpu().numpy()


In [2]:
import cv2
from superpoint.model import SuperPointNet
import torch
from time import time

# onnx_file_path = "/home/plus/tensorrt/tensorrt/trt_models/superpoint.onnx"
# engine_file_path = "/home/plus/tensorrt/tensorrt/trt_models/superpoint.trt"
pth_path = "/home/plus/tensorrt/tensorrt/superpoint/superpoint_v1.pth"
img_path = "/home/plus/tensorrt/tensorrt/img/test.jpg"
img = cv2.imread(img_path )[:, :, ::-1]
gray_img = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# input = torch.tensor(gray_img, dtype=torch.float, device=device)[None, None] / 255.
input = gray_img.astype('float32')[None, None]/255.
input = torch.from_numpy(input).cuda()
print(gray_img.shape)
model = SuperPointNet()
model.load_state_dict(torch.load(pth_path))
model = model.cuda()
model.eval()
start_time = time()
out = model.forward(input)
end_time = time()
semi, coarse_desc = out[0], out[1]  # [1,65,h/8,w/8], [1,256,h/8,w/8]
print(semi.shape, coarse_desc.shape)
print("inference time:", end_time-start_time)

ImportError: cannot import name 'SuperPointNet' from partially initialized module 'superpoint.model' (most likely due to a circular import) (/home/plus/tensorrt/tensorrt/superpoint/model.py)

In [4]:
dense = np.exp(semi) # Softmax.
dense = dense / (np.sum(dense, axis=0)+.00001) # Should sum to 1.
# Remove dustbin.
nodust = dense[:-1, :, :]
# Reshape to get full resolution heatmap.
Hc = int(H / self.cell)
Wc = int(W / self.cell)
nodust = nodust.transpose(1, 2, 0)
heatmap = np.reshape(nodust, [Hc, Wc, self.cell, self.cell])
heatmap = np.transpose(heatmap, [0, 2, 1, 3])
heatmap = np.reshape(heatmap, [Hc*self.cell, Wc*self.cell])
xs, ys = np.where(heatmap >= self.conf_thresh) # Confidence threshold.
if len(xs) == 0:
  return np.zeros((3, 0)), None, None
pts = np.zeros((3, len(xs))) # Populate point data sized 3xN.
pts[0, :] = ys
pts[1, :] = xs
pts[2, :] = heatmap[xs, ys]
pts, _ = self.nms_fast(pts, H, W, dist_thresh=self.nms_dist) # Apply NMS.
inds = np.argsort(pts[2,:])
pts = pts[:,inds[::-1]] # Sort by confidence.
# Remove points along border.
bord = self.border_remove
toremoveW = np.logical_or(pts[0, :] < bord, pts[0, :] >= (W-bord))
toremoveH = np.logical_or(pts[1, :] < bord, pts[1, :] >= (H-bord))
toremove = np.logical_or(toremoveW, toremoveH)
pts = pts[:, ~toremove]
# --- Process descriptor.
D = coarse_desc.shape[1]
if pts.shape[1] == 0:
  desc = np.zeros((D, 0))
else:
  # Interpolate into descriptor map using 2D point locations.
  samp_pts = torch.from_numpy(pts[:2, :].copy())
  samp_pts[0, :] = (samp_pts[0, :] / (float(W)/2.)) - 1.
  samp_pts[1, :] = (samp_pts[1, :] / (float(H)/2.)) - 1.
  samp_pts = samp_pts.transpose(0, 1).contiguous()
  samp_pts = samp_pts.view(1, 1, -1, 2)
  samp_pts = samp_pts.float()
  if self.cuda:
    samp_pts = samp_pts.cuda()
  desc = torch.nn.functional.grid_sample(coarse_desc, samp_pts)
  desc = desc.data.cpu().numpy().reshape(D, -1)
  desc /= np.linalg.norm(desc, axis=0)[np.newaxis, :]


NameError: name 'np' is not defined