## Run the inference with the PyTorch model

In [None]:
import torch
import lightglue_dynamo.models.superpoint_pytorch as superpoint_pytorch

detection_thresh = 0.005
nms_radius = 5
max_keypoints = 256

sp_th = superpoint_pytorch.SuperPointOpen(detection_threshold=detection_thresh, nms_radius=nms_radius, max_num_keypoints=max_keypoints).eval()
print('Config:', sp_th.conf)

# Load state dict (map_location="cpu" if no GPU)
ckpt = torch.load("/home/nvidia/third_party/LightGlue-ONNX-Jetson/weights/superpoint_v6_from_tf.pth", map_location="cuda" if torch.cuda.is_available() else "cpu")

# Some checkpoints may be wrapped in {"state_dict": ...}
if "state_dict" in ckpt:
    ckpt = ckpt["state_dict"]

# Load weights
sp_th.load_state_dict(ckpt, strict=True)
sp_th.eval().cuda()
# sp_th = torch.compile(sp_th, mode="reduce-overhead")  # "max-autotune" , "reduce-overhead"

In [None]:
import torch
import cv2
import numpy as np
from lightglue_dynamo.preprocessors import SuperPointOpenPreprocessor
from lightglue_dynamo import viz

# --- 1. Load and Normalize Images Individually ---
image_path1 = '/home/nvidia/third_party/LightGlue-ONNX-Jetson/assets/sacre_coeur1.jpg' # sacre_coeur1.jpg, debug1.png
image_path2 = '/home/nvidia/third_party/LightGlue-ONNX-Jetson/assets/sacre_coeur2.jpg' # sacre_coeur2.jpg, debug2.png

h, w = 400, 640 

# Load color images and resize them
raw_img1 = cv2.resize(cv2.imread(image_path1), (w, h))
raw_img2 = cv2.resize(cv2.imread(image_path2), (w, h))
image_batch_bgr = np.stack([raw_img1, raw_img2], axis=0)
preprocessed_batch = SuperPointOpenPreprocessor.preprocess(image_batch_bgr)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
tensor = torch.from_numpy(preprocessed_batch).to(device)

print(f"Final tensor shape: {tensor.shape}") #torch.Size([2, 1, H_padded, W_padded])

In [None]:
with torch.no_grad():
    pred = sp_th({"image": tensor})

kpts = pred['keypoints'].cpu().numpy()
scores = pred['keypoint_scores'].cpu().numpy()
descriptors = pred['descriptors'].cpu().numpy()
num_kpts = pred['num_keypoints'].cpu().numpy()

print(f"Keypoints: {kpts.shape}")
print(f"Keypoint scores: {scores.shape}")
print(f"Descriptors: {descriptors.shape}")
print(f"Number of keypoints: {num_kpts.shape}")

viz.plot_sp_open(image_batch_bgr, image_batch_bgr.shape[0], kpts, num_kpts)

In [None]:
import onnxruntime as ort

onnx_model_path = '/home/nvidia/third_party/LightGlue-ONNX-Jetson/weights/superpoint_open_b2_h400_w640_kp256.onnx'
session = ort.InferenceSession(onnx_model_path, providers=['CUDAExecutionProvider', 'CPUExecutionProvider'])

input_name = session.get_inputs()[0].name
output_names = [output.name for output in session.get_outputs()]
print(f"Input Name: {input_name}")
print(f"Output Names: {output_names}")

# Run Inference
onnx_inputs = {input_name: preprocessed_batch}
print("Preprocessed batch:", preprocessed_batch.shape, preprocessed_batch.dtype,
      preprocessed_batch.min(), preprocessed_batch.max())

outputs_onnx = session.run(output_names, onnx_inputs)

# Unpack the list of outputs
kpts_onnx, scores_onnx, desc_onnx, num_kpts_onnx = outputs_onnx

print("\n--- ONNX Runtime Output ---")
print(f"Keypoints shape: {kpts_onnx.shape}")
print(f"Scores shape: {scores_onnx.shape}")
print(f"Descriptors shape: {desc_onnx.shape}")
print(f"Num Keypoints: {num_kpts_onnx}")
print("Num keypoints (ONNX):", num_kpts_onnx)
print("Num keypoints (sum):", sum(num_kpts_onnx) if num_kpts_onnx.ndim > 0 else num_kpts_onnx)

viz.plot_sp_open(image_batch_bgr, image_batch_bgr.shape[0], kpts_onnx, num_kpts_onnx)

In [13]:
import torch
import onnxruntime as ort
import numpy as np

def validate_export():
    """Run this right after your export to validate it matches PyTorch"""
    
    # Load your model exactly like in the export
    from lightglue_dynamo.models.superpoint_pytorch import SuperPointOpen
    
    detection_thresh = 0.005
    nms_radius = 5
    max_keypoints = 256
    
    # Create model
    extractor = SuperPointOpen(
        detection_threshold=detection_thresh, 
        nms_radius=nms_radius, 
        max_num_keypoints=max_keypoints
    )
    
    # Load same weights as export
    ckpt = torch.load("/home/nvidia/third_party/LightGlue-ONNX-Jetson/weights/superpoint_v6_from_tf.pth", map_location="cpu")
    if "state_dict" in ckpt:
        ckpt = ckpt["state_dict"]
    
    extractor.load_state_dict(ckpt, strict=True)
    extractor.eval()
    
    # Same random seed as export
    torch.manual_seed(42)
    
    # Create test input
    test_input = torch.randn(2, 1, 400, 640, dtype=torch.float32)
    
    # PyTorch inference
    with torch.no_grad():
        torch_output = extractor({"image": test_input})
    
    torch_kpts = torch_output["keypoints"].numpy()
    torch_scores = torch_output["keypoint_scores"].numpy()
    torch_desc = torch_output["descriptors"].numpy()
    torch_num = torch_output["num_keypoints"].numpy()
    
    print("=== PyTorch (reference) ===")
    print(f"Keypoints: {torch_kpts.shape}, range: [{torch_kpts.min():.3f}, {torch_kpts.max():.3f}]")
    print(f"Scores: {torch_scores.shape}, range: [{torch_scores.min():.3f}, {torch_scores.max():.3f}]")
    print(f"Descriptors: {torch_desc.shape}, range: [{torch_desc.min():.3f}, {torch_desc.max():.3f}]")
    print(f"Num keypoints: {torch_num}")
    
    # ONNX inference
    onnx_path = '/home/nvidia/third_party/LightGlue-ONNX-Jetson/weights/superpoint_open_b2_h400_w640_kp256.onnx'
    session = ort.InferenceSession(onnx_path, providers=['CPUExecutionProvider'])
    
    onnx_output = session.run(None, {"images": test_input.numpy()})
    onnx_kpts, onnx_scores, onnx_desc, onnx_num = onnx_output
    
    print("\n=== ONNX ===")
    print(f"Keypoints: {onnx_kpts.shape}, range: [{onnx_kpts.min():.3f}, {onnx_kpts.max():.3f}]")
    print(f"Scores: {onnx_scores.shape}, range: [{onnx_scores.min():.3f}, {onnx_scores.max():.3f}]")
    print(f"Descriptors: {onnx_desc.shape}, range: [{onnx_desc.min():.3f}, {onnx_desc.max():.3f}]")
    print(f"Num keypoints: {onnx_num}")
    
    # Compare
    print("\n=== Differences ===")
    kpt_diff = np.max(np.abs(torch_kpts - onnx_kpts))
    score_diff = np.max(np.abs(torch_scores - onnx_scores))
    desc_diff = np.max(np.abs(torch_desc - onnx_desc))
    num_diff = np.max(np.abs(torch_num - onnx_num))
    
    print(f"Max keypoint diff: {kpt_diff:.6f}")
    print(f"Max score diff: {score_diff:.6f}")
    print(f"Max descriptor diff: {desc_diff:.6f}")
    print(f"Max num_keypoints diff: {num_diff}")

validate_export()

  ckpt = torch.load("/home/nvidia/third_party/LightGlue-ONNX-Jetson/weights/superpoint_v6_from_tf.pth", map_location="cpu")


=== PyTorch (reference) ===
Keypoints: (2, 256, 2), range: [8.000, 632.000]
Scores: (2, 256), range: [1.000, 1.000]
Descriptors: (2, 256, 256), range: [-0.160, 0.152]
Num keypoints: [256 256]

=== ONNX ===
Keypoints: (2, 256, 2), range: [4.000, 632.000]
Scores: (2, 256), range: [1.000, 1.000]
Descriptors: (2, 256, 256), range: [-0.145, 0.160]
Num keypoints: [256 256]

=== Differences ===
Max keypoint diff: 592.000000
Max score diff: 0.000000
Max descriptor diff: 0.161384
Max num_keypoints diff: 0
