## Run the inference with the PyTorch model

In [None]:
import torch
import superpoint_pytorch

detection_thresh = 0.005
nms_radius = 5
max_keypoints = 256

sp_th = superpoint_pytorch.SuperPoint(detection_threshold=detection_thresh, nms_radius=nms_radius, max_num_keypoints=max_keypoints).eval()
print('Config:', sp_th.conf)

# Load state dict (map_location="cpu" if no GPU)
ckpt = torch.load("/home/nvidia/third_party/LightGlue-ONNX-Jetson/weights/superpoint_v6_from_tf.pth", map_location="cuda" if torch.cuda.is_available() else "cpu")

# Some checkpoints may be wrapped in {"state_dict": ...}
if "state_dict" in ckpt:
    ckpt = ckpt["state_dict"]

# Load weights
sp_th.load_state_dict(ckpt, strict=True)
sp_th.eval().cuda()
# sp_th = torch.compile(sp_th, mode="reduce-overhead")  # "max-autotune" , "reduce-overhead"

In [None]:
import torch, cv2, numpy as np
import matplotlib.pyplot as plt

# Load image (grayscale)
image_path = '/home/nvidia/third_party/LightGlue-ONNX-Jetson/assets/debug1.png'

img = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)  # [H,W]
img = img.astype(np.float32) / 255.0

# Pad image so that H,W are divisible by 8 (SuperPoint stride requirement)
H, W = img.shape
pad_h = int(np.ceil(H / 8) * 8 - H)
pad_w = int(np.ceil(W / 8) * 8 - W)
img = np.pad(img, ((0, pad_h), (0, pad_w)))

# Convert to tensor [B,1,H,W] and send to device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
tensor = torch.from_numpy(img)[None, None].to(device)  # shape [1,1,H,W]

In [None]:
with torch.no_grad():
    pred = sp_th({"image": tensor})

In [None]:
kpts = pred["keypoints"][0].cpu().numpy()       # (N,2)
scores = pred["keypoint_scores"][0].cpu().numpy()

plt.figure(figsize=(10,8))
plt.imshow(img, cmap='gray')
plt.scatter(kpts[:,0], kpts[:,1], s=4, c='lime')
plt.title(f"SuperPoint detected {len(kpts)} keypoints")
plt.show()