## Run the inference with the PyTorch model

In [None]:
import torch
import lightglue_dynamo.models.superpoint_pytorch as superpoint_pytorch

detection_thresh = 0.005
nms_radius = 5
max_keypoints = 256

sp_th = superpoint_pytorch.SuperPointOpen(detection_threshold=detection_thresh, nms_radius=nms_radius, max_num_keypoints=max_keypoints).eval()
print('Config:', sp_th.conf)

# Load state dict (map_location="cpu" if no GPU)
ckpt = torch.load("/home/nvidia/third_party/LightGlue-ONNX-Jetson/weights/superpoint_v6_from_tf.pth", map_location="cuda" if torch.cuda.is_available() else "cpu")

# Some checkpoints may be wrapped in {"state_dict": ...}
if "state_dict" in ckpt:
    ckpt = ckpt["state_dict"]

# Load weights
sp_th.load_state_dict(ckpt, strict=True)
sp_th.eval().cuda()
# sp_th = torch.compile(sp_th, mode="reduce-overhead")  # "max-autotune" , "reduce-overhead"

In [None]:
import torch
import cv2
import numpy as np
from lightglue_dynamo.preprocessors import SuperPointOpenPreprocessor
from lightglue_dynamo import viz

# --- 1. Load and Normalize Images Individually ---
image_path1 = '/home/nvidia/third_party/LightGlue-ONNX-Jetson/assets/debug1.png'
image_path2 = '/home/nvidia/third_party/LightGlue-ONNX-Jetson/assets/debug2.png'

h, w = 400, 640 

# Load color images and resize them
raw_img1 = cv2.resize(cv2.imread(image_path1), (w, h))
raw_img2 = cv2.resize(cv2.imread(image_path2), (w, h))
image_batch_bgr = np.stack([raw_img1, raw_img2], axis=0)
preprocessed_batch = SuperPointOpenPreprocessor.preprocess(image_batch_bgr)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
tensor = torch.from_numpy(preprocessed_batch).to(device)

print(f"Final tensor shape: {tensor.shape}") #torch.Size([2, 1, H_padded, W_padded])

In [None]:
with torch.no_grad():
    pred = sp_th({"image": tensor})

kpts = pred['keypoints'].cpu().numpy()
scores = pred['keypoint_scores'].cpu().numpy()
descriptors = pred['descriptors'].cpu().numpy()
num_kpts = pred['num_keypoints'].cpu().numpy()

print(f"Keypoints: {kpts.shape}")
print(f"Keypoint scores: {scores.shape}")
print(f"Descriptors: {descriptors.shape}")
print(f"Number of keypoints: {num_kpts.shape}")

viz.plot_sp_open(image_batch_bgr, image_batch_bgr.shape[0], kpts, num_kpts)

In [None]:
import onnxruntime as ort

onnx_model_path = '/home/nvidia/third_party/LightGlue-ONNX-Jetson/weights/superpoint_open_b2_h400_w640_kp256.onnx'
session = ort.InferenceSession(onnx_model_path, providers=['CUDAExecutionProvider', 'CPUExecutionProvider'])

input_name = session.get_inputs()[0].name
output_names = [output.name for output in session.get_outputs()]
print(f"Input Name: {input_name}")
print(f"Output Names: {output_names}")

# Run Inference
onnx_inputs = {input_name: preprocessed_batch}
print("Preprocessed batch:", preprocessed_batch.shape, preprocessed_batch.dtype,
      preprocessed_batch.min(), preprocessed_batch.max())

outputs_onnx = session.run(output_names, onnx_inputs)

# Unpack the list of outputs
kpts_onnx, scores_onnx, desc_onnx, num_kpts_onnx = outputs_onnx

print("\n--- ONNX Runtime Output ---")
print(f"Keypoints shape: {kpts_onnx.shape}")
print(f"Scores shape: {scores_onnx.shape}")
print(f"Descriptors shape: {desc_onnx.shape}")
print(f"Num Keypoints: {num_kpts_onnx}")
print("Num keypoints (ONNX):", num_kpts_onnx)
print("Num keypoints (sum):", sum(num_kpts_onnx) if num_kpts_onnx.ndim > 0 else num_kpts_onnx)

viz.plot_sp_open(image_batch_bgr, image_batch_bgr.shape[0], kpts_onnx, num_kpts_onnx)