## Run the inference with the PyTorch model

In [None]:
import torch
import lightglue_dynamo.models.superpoint_pytorch as superpoint_pytorch

detection_thresh = 0.005
nms_radius = 5
max_keypoints = 256

sp_th = superpoint_pytorch.SuperPointOpen(detection_threshold=detection_thresh, nms_radius=nms_radius, max_num_keypoints=max_keypoints).eval()
print('Config:', sp_th.conf)

# Load state dict (map_location="cpu" if no GPU)
ckpt = torch.load("/home/nvidia/third_party/LightGlue-ONNX-Jetson/weights/superpoint_v6_from_tf.pth", map_location="cuda" if torch.cuda.is_available() else "cpu")

# Some checkpoints may be wrapped in {"state_dict": ...}
if "state_dict" in ckpt:
    ckpt = ckpt["state_dict"]

# Load weights
sp_th.load_state_dict(ckpt, strict=True)
sp_th.eval().cuda()
# sp_th = torch.compile(sp_th, mode="reduce-overhead")  # "max-autotune" , "reduce-overhead"

In [None]:
import torch
import cv2
import numpy as np

# --- 1. Load and Normalize Images Individually ---
image_path1 = '/home/nvidia/third_party/LightGlue-ONNX-Jetson/assets/debug1.png'
image_path2 = '/home/nvidia/third_party/LightGlue-ONNX-Jetson/assets/debug2.png'

# Load as grayscale float32 and normalize
img1 = cv2.imread(image_path1, cv2.IMREAD_GRAYSCALE).astype(np.float32) / 255.0
img2 = cv2.imread(image_path2, cv2.IMREAD_GRAYSCALE).astype(np.float32) / 255.0

# --- 2. Pad each image individually (simpler) ---
# SuperPoint stride is 8
stride = 8 
H, W = img1.shape
pad_h = (stride - H % stride) % stride
pad_w = (stride - W % stride) % stride

# np.pad takes a tuple of tuples for padding ((top, bottom), (left, right))
img1_padded = np.pad(img1, ((0, pad_h), (0, pad_w)), mode='constant')
img2_padded = np.pad(img2, ((0, pad_h), (0, pad_w)), mode='constant')

# --- 3. Stack to create a batch and add channel dimension ---
# Stack along axis=0 to create the batch. Shape: (2, H_padded, W_padded)
image_batch = np.stack([img1_padded, img2_padded], axis=0)

# Add the channel dimension. Shape: (2, 1, H_padded, W_padded)
image_batch_unsqueezed = image_batch[:, None, :, :]

# --- 4. Convert to tensor and run inference ---
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
tensor = torch.from_numpy(image_batch_unsqueezed).to(device)

print(f"Final tensor shape: {tensor.shape}") #torch.Size([2, 1, H_padded, W_padded])

In [None]:
with torch.no_grad():
    pred = sp_th({"image": tensor})

In [None]:
import matplotlib.pyplot as plt

kpts = pred['keypoints'].cpu().numpy()
scores = pred['keypoint_scores'].cpu().numpy()
descriptors = pred['descriptors'].cpu().numpy()
num_kpts = pred['num_keypoints'].cpu().numpy()

print(f"Keypoints: {kpts.shape}")
print(f"Keypoint scores: {scores.shape}")
print(f"Descriptors: {descriptors.shape}")
print(f"Number of keypoints: {num_kpts.shape}")

# Reload original images in color for better visualization
img1_color = cv2.imread(image_path1)
img2_color = cv2.imread(image_path2)
images = [img1_color, img2_color]
batch_size = tensor.shape[0]

output_images = []
for i in range(batch_size):
    # Get the valid keypoints for this image using num_keypoints
    num = num_kpts[i]
    kpts_i = kpts[i, :num, :]
    
    # Convert keypoints to OpenCV's format
    # cv2.KeyPoint(x, y, size)
    cv_kpts = [cv2.KeyPoint(p[0], p[1], 5) for p in kpts_i]
    
    # Draw keypoints on the image
    img_with_kpts = cv2.drawKeypoints(images[i], cv_kpts, None, color=(0, 255, 0))
    output_images.append(img_with_kpts)

# Combine images side-by-side
combined_image = np.hstack(output_images)

# Display using matplotlib (more portable than cv2.imshow)
plt.figure(figsize=(16, 8))
# OpenCV loads as BGR, matplotlib displays as RGB
plt.imshow(cv2.cvtColor(combined_image, cv2.COLOR_BGR2RGB))
plt.title('SuperPoint Keypoints')
plt.axis('off')
plt.show()