# 08 - SuperPoint + LightGlue + MAGSAC with Gray/Edge, Anchor Crop, and Optuna

Pipeline goal:
- gray filtering for map and frame images (optional edge enhancement)
- SuperPoint + LightGlue matching with MAGSAC outlier filtering
- per-frame intrinsics are used for scale/height handling
- anchor-based search from last known position (`id-1` preferred)
- test IDs below anchor ID are processed in reverse order (for this dataset: 1..12, anchor at 13)
- Optuna optimization for Kaggle metric




## Evaluation Metric (Kaggle style)

For each frame:
`d = sqrt((x_pred - x_gt)^2 + (y_pred - y_gt)^2)`

Score:
`100 * mean(acc@25px, acc@125px, acc@500px)`




In [None]:
from dataclasses import dataclass
from pathlib import Path
from time import perf_counter
from typing import Dict, List, Optional, Tuple

import cv2
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import torch

try:
    import optuna
    OPTUNA_AVAILABLE = True
except Exception:
    OPTUNA_AVAILABLE = False

print(f"Optuna available: {OPTUNA_AVAILABLE}")




In [None]:
# Dependency check for SuperPoint + LightGlue
try:
    from lightglue import LightGlue, SuperPoint
    from lightglue.utils import rbd
except Exception as e:
    raise ImportError('This notebook requires LightGlue+SuperPoint. Install with: pip install lightglue') from e

# Paths (robust for running from project root or notebooks/)
CANDIDATE_ROOTS = [Path.cwd(), Path.cwd().parent, Path('.'), Path('..')]
PROJECT_ROOT = None
_seen_roots = set()
for cand in CANDIDATE_ROOTS:
    try:
        root = cand.resolve()
    except Exception:
        continue
    key = str(root)
    if key in _seen_roots:
        continue
    _seen_roots.add(key)
    if (root / 'data' / 'data').exists():
        PROJECT_ROOT = root
        break
if PROJECT_ROOT is None:
    raise FileNotFoundError('Could not find project root containing data/data.')

DATA_ROOT = PROJECT_ROOT / 'data' / 'data'
TRAIN_IMG_DIR = DATA_ROOT / 'train_data' / 'train_images'
TEST_IMG_DIR = DATA_ROOT / 'test_data' / 'test_images'
TRAIN_POS_CSV = DATA_ROOT / 'train_data' / 'train_pos.csv'
TRAIN_CAM_CSV = DATA_ROOT / 'train_data' / 'train_cam.csv'
TEST_CAM_CSV = DATA_ROOT / 'test_data' / 'test_cam.csv'
MAP_PATH = DATA_ROOT / 'map.png'

# Split / runtime
SPLIT_SEED = 42
TRAIN_FRACTION = 0.8
IMAGE_MAX_SIDE = 1400  # set None for full-res

# Preprocessing (gray + optional edge)
ENABLE_GRAY_FILTER = True
ENABLE_EDGE_FILTER = True
EDGE_CANNY_LOW = 50
EDGE_CANNY_HIGH = 150
EDGE_DILATE_KERNEL = 3
EDGE_BLEND_GRAY = 0.70
EDGE_BLEND_EDGE = 0.30

# Matcher settings
LIGHTGLUE_MAX_NUM_KEYPOINTS = 2048

# Anchor / ordering
TEST_REVERSE_ANCHOR_ID = 13

# Base hyperparameters
BASE_HPARAMS = {
    'match_min_conf': 0.20,
    'magsac_reproj_thr': 3.0,
    'virtual_zoom_out': 1.80,
    'crop_width_factor': 3.0,
    'crop_height_factor_n': 4.5,
    'min_inliers': 12,
    'scale_low_mul': 0.85,
    'scale_high_mul': 1.20,
}

# Height calibration settings
HEIGHT_CALIB_FRAMES = 30
HEIGHT_LOCAL_RADIUS = 900
HEIGHT_SCALE_CANDIDATES = [0.09, 0.11, 0.13, 0.15, 0.17, 0.19, 0.22, 0.25, 0.28]
HEIGHT_MIN_NCC = 0.08

# Optuna settings
RUN_OPTUNA = False
OPTUNA_TRIALS = 15
OPTUNA_VAL_LIMIT = 45
OPTUNA_TIMEOUT_S = None

SUBMISSION_OUT = PROJECT_ROOT / 'build' / 'submission_08_superpoint_lightglue_gray_magsac.csv'

# Load data
train_pos_df = pd.read_csv(TRAIN_POS_CSV)
train_cam_df = pd.read_csv(TRAIN_CAM_CSV)
test_cam_df = pd.read_csv(TEST_CAM_CSV)

train_df = train_cam_df.merge(train_pos_df, on='id', how='inner').copy()
train_df['id'] = train_df['id'].astype(int)
test_cam_df['id'] = test_cam_df['id'].astype(int)
train_df = train_df.sort_values('id').reset_index(drop=True)
test_cam_df = test_cam_df.sort_values('id').reset_index(drop=True)

map_bgr = cv2.imread(str(MAP_PATH), cv2.IMREAD_COLOR)
if map_bgr is None:
    raise FileNotFoundError(f'Map not found: {MAP_PATH}')
map_rgb = cv2.cvtColor(map_bgr, cv2.COLOR_BGR2RGB)
MAP_H, MAP_W = map_rgb.shape[:2]

print('project_root:', PROJECT_ROOT)
print('train:', len(train_df), 'test:', len(test_cam_df), 'map:', (MAP_W, MAP_H))
print('optuna_available:', OPTUNA_AVAILABLE)





In [None]:
# Reproducible train/val split
rng = np.random.default_rng(SPLIT_SEED)
all_ids = train_df['id'].unique().copy()
rng.shuffle(all_ids)

n_fit = max(1, min(len(all_ids) - 1, int(round(len(all_ids) * TRAIN_FRACTION))))
fit_ids = set(int(x) for x in all_ids[:n_fit])
val_ids = set(int(x) for x in all_ids[n_fit:])

fit_df = train_df[train_df['id'].isin(fit_ids)].copy().sort_values('id').reset_index(drop=True)
val_df = train_df[train_df['id'].isin(val_ids)].copy().sort_values('id').reset_index(drop=True)

print('fit:', len(fit_df), 'val:', len(val_df))




In [None]:
# Utilities: image loading, preprocessing, metric, ordering
_IMAGE_CACHE: Dict[Tuple[str, int, Optional[int]], np.ndarray] = {}
_PROC_CACHE: Dict[Tuple[str, int, Optional[int], bool, bool, int, int, int], np.ndarray] = {}


def resolve_image_path(image_id: int, is_train: bool) -> Path:
    folder = TRAIN_IMG_DIR if is_train else TEST_IMG_DIR
    stems = [f'{int(image_id):04d}', str(int(image_id))]
    exts = ['.JPG', '.jpg', '.jpeg', '.JPEG', '.png', '.PNG']
    for st in stems:
        for ext in exts:
            p = folder / f'{st}{ext}'
            if p.exists():
                return p
    raise FileNotFoundError(f'Image not found for id={image_id} in {folder}')


def resize_keep_aspect(img_rgb: np.ndarray, max_side: Optional[int]) -> np.ndarray:
    if max_side is None:
        return img_rgb
    h, w = img_rgb.shape[:2]
    m = max(h, w)
    if m <= int(max_side):
        return img_rgb
    s = float(max_side) / float(m)
    nw = max(32, int(round(w * s)))
    nh = max(32, int(round(h * s)))
    return cv2.resize(img_rgb, (nw, nh), interpolation=cv2.INTER_AREA)


def load_image_cached(image_id: int, split: str, max_side: Optional[int]) -> np.ndarray:
    key = (split, int(image_id), max_side)
    if key in _IMAGE_CACHE:
        return _IMAGE_CACHE[key]

    is_train = split in {'train', 'fit', 'val'}
    p = resolve_image_path(int(image_id), is_train=is_train)
    bgr = cv2.imread(str(p), cv2.IMREAD_COLOR)
    if bgr is None:
        raise RuntimeError(f'Cannot read image: {p}')
    rgb = cv2.cvtColor(bgr, cv2.COLOR_BGR2RGB)
    rgb = resize_keep_aspect(rgb, max_side=max_side)
    _IMAGE_CACHE[key] = rgb
    return rgb


def to_gray_rgb(img_rgb: np.ndarray) -> np.ndarray:
    gray = cv2.cvtColor(img_rgb, cv2.COLOR_RGB2GRAY)
    return cv2.cvtColor(gray, cv2.COLOR_GRAY2RGB)


def preprocess_for_matching(
    img_rgb: np.ndarray,
    enable_gray: bool,
    enable_edge: bool,
    canny_low: int,
    canny_high: int,
) -> np.ndarray:
    proc = to_gray_rgb(img_rgb) if bool(enable_gray) else img_rgb

    if not bool(enable_edge):
        return proc

    gray = cv2.cvtColor(proc, cv2.COLOR_RGB2GRAY)
    edges = cv2.Canny(gray, int(canny_low), int(canny_high))
    k = int(max(1, EDGE_DILATE_KERNEL))
    if k > 1:
        kernel = np.ones((k, k), dtype=np.uint8)
        edges = cv2.dilate(edges, kernel, iterations=1)

    mix = cv2.addWeighted(gray, float(EDGE_BLEND_GRAY), edges, float(EDGE_BLEND_EDGE), 0.0)
    return cv2.cvtColor(mix, cv2.COLOR_GRAY2RGB)


def get_match_image(image_id: int, split: str) -> np.ndarray:
    key = (
        split,
        int(image_id),
        IMAGE_MAX_SIDE,
        bool(ENABLE_GRAY_FILTER),
        bool(ENABLE_EDGE_FILTER),
        int(EDGE_CANNY_LOW),
        int(EDGE_CANNY_HIGH),
        int(EDGE_DILATE_KERNEL),
    )
    if key in _PROC_CACHE:
        return _PROC_CACHE[key]

    rgb = load_image_cached(int(image_id), split=split, max_side=IMAGE_MAX_SIDE)
    proc = preprocess_for_matching(
        rgb,
        enable_gray=ENABLE_GRAY_FILTER,
        enable_edge=ENABLE_EDGE_FILTER,
        canny_low=EDGE_CANNY_LOW,
        canny_high=EDGE_CANNY_HIGH,
    )
    _PROC_CACHE[key] = proc
    return proc


def choose_processing_order(ids: List[int], reverse_anchor_id: int) -> List[int]:
    low = sorted([i for i in ids if i < int(reverse_anchor_id)], reverse=True)
    high = sorted([i for i in ids if i >= int(reverse_anchor_id)])
    return low + high


def eval_metric_from_df(df: pd.DataFrame) -> Dict[str, float]:
    if len(df) == 0:
        return {
            'n': 0.0,
            'mean_err_px': np.nan,
            'median_err_px': np.nan,
            'acc_25': np.nan,
            'acc_125': np.nan,
            'acc_500': np.nan,
            'score': np.nan,
        }

    d = np.sqrt((df['pred_x'] - df['gt_x']) ** 2 + (df['pred_y'] - df['gt_y']) ** 2)
    a25 = float((d <= 25.0).mean())
    a125 = float((d <= 125.0).mean())
    a500 = float((d <= 500.0).mean())
    score = 100.0 * float(np.mean([a25, a125, a500]))

    return {
        'n': float(len(df)),
        'mean_err_px': float(d.mean()),
        'median_err_px': float(d.median()),
        'acc_25': a25,
        'acc_125': a125,
        'acc_500': a500,
        'score': score,
    }





In [None]:
# SuperPoint + LightGlue + MAGSAC matcher
class SuperPointLightGlueMagsacMatcher:
    def __init__(self, min_conf: float = 0.2, max_num_keypoints: int = 2048, device: Optional[str] = None):
        self.min_conf = float(min_conf)
        self.max_num_keypoints = int(max_num_keypoints)
        self.device = device or ('cuda' if torch.cuda.is_available() else 'cpu')

        self.extractor = None
        self.matcher = None

    def _lazy_init(self):
        if self.extractor is not None and self.matcher is not None:
            return
        self.extractor = SuperPoint(max_num_keypoints=self.max_num_keypoints).eval().to(self.device)
        self.matcher = LightGlue(features='superpoint').eval().to(self.device)

    def _to_tensor(self, img_rgb: np.ndarray) -> torch.Tensor:
        gray = cv2.cvtColor(img_rgb, cv2.COLOR_RGB2GRAY)
        t = torch.from_numpy(gray).float()[None, None] / 255.0
        return t.to(self.device)

    @torch.inference_mode()
    def match(self, img0_rgb: np.ndarray, img1_rgb: np.ndarray):
        self._lazy_init()
        t0 = self._to_tensor(img0_rgb)
        t1 = self._to_tensor(img1_rgb)

        f0 = self.extractor.extract(t0)
        f1 = self.extractor.extract(t1)
        out = self.matcher({'image0': f0, 'image1': f1})
        f0, f1, out = [rbd(x) for x in [f0, f1, out]]

        k0 = f0['keypoints'].detach().cpu().numpy().astype(np.float32)
        k1 = f1['keypoints'].detach().cpu().numpy().astype(np.float32)

        m = out['matches'].detach().cpu().numpy().astype(np.int32)
        if m.size == 0:
            return {
                'k0': k0,
                'k1': k1,
                'matches': np.zeros((0, 2), dtype=np.int32),
                'conf': np.zeros((0,), dtype=np.float32),
            }

        if m.ndim != 2:
            m = m.reshape(-1, 2)

        if 'scores' in out:
            conf = out['scores'].detach().cpu().numpy().astype(np.float32)
        else:
            conf = np.ones((m.shape[0],), dtype=np.float32)

        if conf.shape[0] != m.shape[0]:
            conf = np.ones((m.shape[0],), dtype=np.float32)

        keep = conf >= self.min_conf
        m = m[keep]
        conf = conf[keep]

        return {'k0': k0, 'k1': k1, 'matches': m, 'conf': conf}

    @staticmethod
    def magsac_homography(k0: np.ndarray, k1: np.ndarray, matches: np.ndarray, reproj_thr: float = 3.0):
        if matches.shape[0] < 8:
            return None, np.zeros((matches.shape[0],), dtype=bool)

        p0 = k0[matches[:, 0]].astype(np.float32)
        p1 = k1[matches[:, 1]].astype(np.float32)

        H, mask = cv2.findHomography(
            p0,
            p1,
            method=cv2.USAC_MAGSAC,
            ransacReprojThreshold=float(reproj_thr),
            maxIters=10000,
            confidence=0.999,
        )

        if H is None or mask is None:
            return None, np.zeros((matches.shape[0],), dtype=bool)

        return H, mask.ravel().astype(bool)


matcher = SuperPointLightGlueMagsacMatcher(
    min_conf=BASE_HPARAMS['match_min_conf'],
    max_num_keypoints=LIGHTGLUE_MAX_NUM_KEYPOINTS,
)
print('matcher device:', matcher.device)





In [None]:
# Height/scale model from train GT + per-frame intrinsics
@dataclass
class HeightScaleModel:
    rel_height_ref: float
    scale_ref: float
    scale_low: float
    scale_high: float
    n_samples: int


def focal_from_row(row) -> float:
    fx = float(getattr(row, 'fx', np.nan))
    fy = float(getattr(row, 'fy', np.nan))
    if np.isfinite(fx) and np.isfinite(fy):
        return 0.5 * (fx + fy)
    if np.isfinite(fx):
        return fx
    if np.isfinite(fy):
        return fy
    return 1.0


def extract_map_crop(map_img: np.ndarray, center_xy: Tuple[float, float], radius_px: int) -> Tuple[np.ndarray, int, int]:
    cx, cy = float(center_xy[0]), float(center_xy[1])
    r = int(max(32, radius_px))
    x0 = int(np.clip(round(cx) - r, 0, max(0, map_img.shape[1] - 1)))
    y0 = int(np.clip(round(cy) - r, 0, max(0, map_img.shape[0] - 1)))
    x1 = int(np.clip(round(cx) + r, 0, map_img.shape[1]))
    y1 = int(np.clip(round(cy) + r, 0, map_img.shape[0]))
    if x1 <= x0:
        x1 = min(map_img.shape[1], x0 + 1)
    if y1 <= y0:
        y1 = min(map_img.shape[0], y0 + 1)
    return map_img[y0:y1, x0:x1], x0, y0


def template_best_scale_for_gt(
    q_proc: np.ndarray,
    gt_xy: Tuple[float, float],
    map_proc: np.ndarray,
    scale_candidates: List[float],
    local_radius: int,
) -> Tuple[float, float]:
    best_scale = np.nan
    best_ncc = -1.0

    for s in scale_candidates:
        tpl = cv2.resize(q_proc, None, fx=float(s), fy=float(s), interpolation=cv2.INTER_AREA)
        if tpl.shape[0] < 8 or tpl.shape[1] < 8:
            continue

        rr = max(int(local_radius), int(max(tpl.shape[:2]) * 1.2))
        crop, _, _ = extract_map_crop(map_proc, center_xy=gt_xy, radius_px=rr)
        if crop.shape[0] < tpl.shape[0] or crop.shape[1] < tpl.shape[1]:
            continue

        res = cv2.matchTemplate(crop, tpl, method=cv2.TM_CCOEFF_NORMED)
        _, ncc, _, _ = cv2.minMaxLoc(res)
        if float(ncc) > best_ncc:
            best_ncc = float(ncc)
            best_scale = float(s)

    return float(best_scale), float(best_ncc)


def derive_height_scale_model(fit_subset: pd.DataFrame, map_proc: np.ndarray) -> Tuple[HeightScaleModel, pd.DataFrame]:
    ids = fit_subset['id'].astype(int).tolist()
    max_frames = int(max(6, HEIGHT_CALIB_FRAMES))
    if len(ids) > max_frames:
        idx = np.linspace(0, len(ids) - 1, max_frames, dtype=int)
        ids = [ids[i] for i in idx]

    row_by_id = {int(r.id): r for r in fit_subset.itertuples(index=False)}

    rows = []
    for qid in ids:
        row = row_by_id[int(qid)]
        q_proc = get_match_image(int(qid), split='fit')
        gt_xy = (float(row.x_pixel), float(row.y_pixel))

        s_best, ncc_best = template_best_scale_for_gt(
            q_proc=q_proc,
            gt_xy=gt_xy,
            map_proc=map_proc,
            scale_candidates=[float(x) for x in HEIGHT_SCALE_CANDIDATES],
            local_radius=int(HEIGHT_LOCAL_RADIUS),
        )

        if np.isfinite(s_best) and float(ncc_best) >= float(HEIGHT_MIN_NCC):
            f = focal_from_row(row)
            rel_h = float(s_best * f)
            rows.append({
                'id': int(qid),
                'focal': float(f),
                'best_scale': float(s_best),
                'best_ncc': float(ncc_best),
                'rel_height': float(rel_h),
            })

    diag = pd.DataFrame(rows)
    if len(diag) == 0:
        hm = HeightScaleModel(
            rel_height_ref=180.0,
            scale_ref=0.18,
            scale_low=0.10,
            scale_high=0.30,
            n_samples=0,
        )
        return hm, diag

    med_h = float(diag['rel_height'].median())
    mad_h = float(np.median(np.abs(diag['rel_height'].to_numpy() - med_h))) + 1e-6
    z = 0.6745 * (diag['rel_height'] - med_h) / mad_h
    inlier_mask = np.abs(z) <= 3.0
    diag['height_inlier'] = inlier_mask

    inl = diag[diag['height_inlier']].copy()
    if len(inl) < 5:
        inl = diag.copy()

    rel_h = float(inl['rel_height'].median())
    scale_ref = float(inl['best_scale'].median())
    q1 = float(inl['best_scale'].quantile(0.25))
    q3 = float(inl['best_scale'].quantile(0.75))
    iqr = max(1e-4, q3 - q1)
    s_low = max(0.05, q1 - 1.0 * iqr)
    s_high = min(0.6, q3 + 1.0 * iqr)

    hm = HeightScaleModel(
        rel_height_ref=float(rel_h),
        scale_ref=float(scale_ref),
        scale_low=float(s_low),
        scale_high=float(s_high),
        n_samples=int(len(inl)),
    )
    return hm, diag


map_proc = preprocess_for_matching(
    map_rgb,
    enable_gray=ENABLE_GRAY_FILTER,
    enable_edge=ENABLE_EDGE_FILTER,
    canny_low=EDGE_CANNY_LOW,
    canny_high=EDGE_CANNY_HIGH,
)

height_model, height_diag_df = derive_height_scale_model(fit_df, map_proc=map_proc)
print('height_model:', height_model)
if len(height_diag_df) > 0:
    print('height_samples:', len(height_diag_df), 'inliers:', int(height_diag_df.get('height_inlier', pd.Series(dtype=bool)).sum()))
else:
    print('height_samples: 0')

display(height_diag_df.head(20))





In [None]:
# Anchor-based crop/zoom/padding localization
all_train_gt = {int(r.id): (float(r.x_pixel), float(r.y_pixel)) for r in train_df.itertuples(index=False)}
fit_train_gt = {int(r.id): (float(r.x_pixel), float(r.y_pixel)) for r in fit_df.itertuples(index=False)}


def get_anchor_xy(image_id: int, known_xy: Dict[int, Tuple[float, float]]) -> Tuple[Tuple[float, float], int, str]:
    i = int(image_id)
    if (i - 1) in known_xy:
        return known_xy[i - 1], i - 1, 'prev_id'
    if (i + 1) in known_xy:
        return known_xy[i + 1], i + 1, 'next_id'
    if len(known_xy) > 0:
        nearest = min(known_xy.keys(), key=lambda k: abs(int(k) - i))
        return known_xy[nearest], int(nearest), f'nearest_id_{nearest}'
    return (MAP_W * 0.5, MAP_H * 0.5), -1, 'map_center'


def compute_expected_scale(row, hm: HeightScaleModel, hparams: Dict[str, float]) -> float:
    f = focal_from_row(row)
    s = float(hm.rel_height_ref / max(1e-6, f))
    s = float(np.clip(s, hm.scale_low * float(hparams['scale_low_mul']), hm.scale_high * float(hparams['scale_high_mul'])))
    return s


def resize_rgb(img: np.ndarray, scale: float) -> np.ndarray:
    h, w = img.shape[:2]
    nw = max(8, int(round(w * float(scale))))
    nh = max(8, int(round(h * float(scale))))
    return cv2.resize(img, (nw, nh), interpolation=cv2.INTER_AREA)


def make_centered_canvas(img: np.ndarray, out_w: int, out_h: int) -> Tuple[np.ndarray, Tuple[int, int]]:
    out_w = int(max(8, out_w))
    out_h = int(max(8, out_h))
    canvas = np.zeros((out_h, out_w, 3), dtype=img.dtype)

    ih, iw = img.shape[:2]
    # if too large, shrink to fit the canvas
    if ih > out_h or iw > out_w:
        sf = min(float(out_w) / max(1, iw), float(out_h) / max(1, ih))
        sf = max(1e-3, sf)
        img = resize_rgb(img, sf)
        ih, iw = img.shape[:2]

    x0 = (out_w - iw) // 2
    y0 = (out_h - ih) // 2
    canvas[y0:y0 + ih, x0:x0 + iw] = img
    return canvas, (x0, y0)


def extract_crop_by_size(map_img: np.ndarray, center_xy: Tuple[float, float], crop_w: int, crop_h: int) -> Tuple[np.ndarray, int, int]:
    cw = int(max(16, crop_w))
    ch = int(max(16, crop_h))

    cx, cy = float(center_xy[0]), float(center_xy[1])
    x0 = int(round(cx - cw / 2.0))
    y0 = int(round(cy - ch / 2.0))
    x0 = int(np.clip(x0, 0, max(0, map_img.shape[1] - cw)))
    y0 = int(np.clip(y0, 0, max(0, map_img.shape[0] - ch)))

    x1 = min(map_img.shape[1], x0 + cw)
    y1 = min(map_img.shape[0], y0 + ch)
    crop = map_img[y0:y1, x0:x1]
    return crop, x0, y0


def project_query_center(H: np.ndarray, q_shape: Tuple[int, int, int], offset_xy: Tuple[float, float]) -> Tuple[float, float]:
    hq, wq = q_shape[:2]
    c = np.array([wq / 2.0, hq / 2.0, 1.0], dtype=np.float64)
    p = H @ c
    if abs(float(p[2])) < 1e-12:
        return np.nan, np.nan
    uv = p[:2] / p[2]
    return float(uv[0] + offset_xy[0]), float(uv[1] + offset_xy[1])


def localize_one_frame(
    row,
    split_name: str,
    anchor_xy: Tuple[float, float],
    hm: HeightScaleModel,
    hparams: Dict[str, float],
) -> Dict[str, object]:
    qid = int(row.id)
    split_for_img = 'val' if split_name == 'val' else 'test'

    q_proc = get_match_image(qid, split=split_for_img)

    s_expected = compute_expected_scale(row, hm=hm, hparams=hparams)
    s_eff = float(s_expected / max(1e-6, float(hparams['virtual_zoom_out'])))

    q_scaled = resize_rgb(q_proc, s_eff)
    qh, qw = q_scaled.shape[:2]

    crop_w = int(round(float(hparams['crop_width_factor']) * float(qw)))
    crop_h = int(round(float(hparams['crop_height_factor_n']) * float(qh)))
    crop_w = int(np.clip(crop_w, 64, MAP_W))
    crop_h = int(np.clip(crop_h, 64, MAP_H))

    q_canvas, _ = make_centered_canvas(q_scaled, out_w=crop_w, out_h=crop_h)

    map_crop, x0, y0 = extract_crop_by_size(
        map_proc,
        center_xy=anchor_xy,
        crop_w=crop_w,
        crop_h=crop_h,
    )

    # If crop got clipped at borders, resize query canvas to exact crop size.
    if q_canvas.shape[:2] != map_crop.shape[:2]:
        q_canvas, _ = make_centered_canvas(q_scaled, out_w=map_crop.shape[1], out_h=map_crop.shape[0])

    t0 = perf_counter()
    m = matcher.match(q_canvas, map_crop)
    H, inl = matcher.magsac_homography(
        m['k0'],
        m['k1'],
        m['matches'],
        reproj_thr=float(hparams['magsac_reproj_thr']),
    )
    dt_ms = (perf_counter() - t0) * 1000.0

    raw_matches = int(m['matches'].shape[0])
    inliers = int(inl.sum()) if inl is not None else 0

    if H is None or inliers < int(hparams['min_inliers']):
        return {
            'pred_xy': (float(anchor_xy[0]), float(anchor_xy[1])),
            'raw_matches': raw_matches,
            'inliers': inliers,
            'used_match': False,
            'scale_expected': float(s_expected),
            'scale_effective': float(s_eff),
            'crop_w': int(map_crop.shape[1]),
            'crop_h': int(map_crop.shape[0]),
            'runtime_ms': float(dt_ms),
        }

    px, py = project_query_center(H, q_canvas.shape, offset_xy=(x0, y0))
    if not (np.isfinite(px) and np.isfinite(py)):
        px, py = float(anchor_xy[0]), float(anchor_xy[1])
        used = False
    else:
        px = float(np.clip(px, 0, MAP_W - 1))
        py = float(np.clip(py, 0, MAP_H - 1))
        used = True

    return {
        'pred_xy': (float(px), float(py)),
        'raw_matches': raw_matches,
        'inliers': inliers,
        'used_match': bool(used),
        'scale_expected': float(s_expected),
        'scale_effective': float(s_eff),
        'crop_w': int(map_crop.shape[1]),
        'crop_h': int(map_crop.shape[0]),
        'runtime_ms': float(dt_ms),
    }


def localize_split(
    query_df: pd.DataFrame,
    split_name: str,
    known_xy_init: Dict[int, Tuple[float, float]],
    hm: HeightScaleModel,
    hparams: Dict[str, float],
    reverse_prefix_for_test: bool = False,
) -> pd.DataFrame:
    known_xy = dict(known_xy_init)

    ids = query_df['id'].astype(int).tolist()
    if split_name == 'test' and bool(reverse_prefix_for_test):
        order = choose_processing_order(ids, reverse_anchor_id=int(TEST_REVERSE_ANCHOR_ID))
    else:
        order = sorted(ids)

    row_by_id = {int(r.id): r for r in query_df.itertuples(index=False)}
    gt_by_id = {}
    if 'x_pixel' in query_df.columns and 'y_pixel' in query_df.columns:
        for r in query_df.itertuples(index=False):
            gt_by_id[int(r.id)] = (float(r.x_pixel), float(r.y_pixel))

    rows = []
    t_loop = perf_counter()
    for j, qid in enumerate(order, 1):
        row = row_by_id[int(qid)]
        anchor_xy, anchor_id, anchor_src = get_anchor_xy(int(qid), known_xy)

        out = localize_one_frame(
            row=row,
            split_name=split_name,
            anchor_xy=anchor_xy,
            hm=hm,
            hparams=hparams,
        )

        pred_xy = out['pred_xy']
        known_xy[int(qid)] = pred_xy

        gt = gt_by_id.get(int(qid), (np.nan, np.nan))
        err = np.nan if np.isnan(gt[0]) else float(np.hypot(pred_xy[0] - gt[0], pred_xy[1] - gt[1]))

        rows.append({
            'id': int(qid),
            'pred_x': float(pred_xy[0]),
            'pred_y': float(pred_xy[1]),
            'gt_x': float(gt[0]),
            'gt_y': float(gt[1]),
            'err_px': err,
            'anchor_id': int(anchor_id),
            'anchor_source': anchor_src,
            'raw_matches': int(out['raw_matches']),
            'inliers': int(out['inliers']),
            'used_match': bool(out['used_match']),
            'scale_expected': float(out['scale_expected']),
            'scale_effective': float(out['scale_effective']),
            'crop_w': int(out['crop_w']),
            'crop_h': int(out['crop_h']),
            'runtime_ms': float(out['runtime_ms']),
        })

        if j % 10 == 0:
            dt = perf_counter() - t_loop
            print(f'[{split_name}] done {j}/{len(order)} | avg {dt/max(1,j):.2f}s/frame')

    return pd.DataFrame(rows).sort_values('id').reset_index(drop=True)




In [None]:
# Baseline validation run
matcher.min_conf = float(BASE_HPARAMS['match_min_conf'])

val_pred_df = localize_split(
    query_df=val_df,
    split_name='val',
    known_xy_init=fit_train_gt,
    hm=height_model,
    hparams=dict(BASE_HPARAMS),
    reverse_prefix_for_test=False,
)

val_metrics = eval_metric_from_df(val_pred_df.dropna(subset=['gt_x', 'gt_y']))
print(pd.Series(val_metrics))
display(val_pred_df.head(20))




In [None]:
# Optuna optimization
if RUN_OPTUNA and not OPTUNA_AVAILABLE:
    raise ImportError('RUN_OPTUNA=True but optuna is not available. Install with: pip install optuna')


def run_optuna_optimization(
    n_trials: int = 15,
    val_limit: int = 45,
    timeout_s: Optional[int] = None,
    seed: int = 123,
):
    val_sub = val_df.sort_values('id').head(int(min(val_limit, len(val_df))))

    def objective(trial: 'optuna.trial.Trial') -> float:
        hp = {
            'match_min_conf': float(trial.suggest_float('match_min_conf', 0.05, 0.40)),
            'magsac_reproj_thr': float(trial.suggest_float('magsac_reproj_thr', 1.5, 6.0)),
            'virtual_zoom_out': float(trial.suggest_float('virtual_zoom_out', 1.1, 3.0)),
            'crop_width_factor': float(trial.suggest_float('crop_width_factor', 2.6, 3.8)),
            'crop_height_factor_n': float(trial.suggest_float('crop_height_factor_n', 4.0, 5.8)),
            'min_inliers': int(trial.suggest_int('min_inliers', 8, 30)),
            'scale_low_mul': float(trial.suggest_float('scale_low_mul', 0.70, 1.0)),
            'scale_high_mul': float(trial.suggest_float('scale_high_mul', 1.0, 1.5)),
        }

        matcher.min_conf = float(hp['match_min_conf'])

        t0 = perf_counter()
        pred = localize_split(
            query_df=val_sub,
            split_name='val',
            known_xy_init=fit_train_gt,
            hm=height_model,
            hparams=hp,
            reverse_prefix_for_test=False,
        )
        met = eval_metric_from_df(pred.dropna(subset=['gt_x', 'gt_y']))
        dt = perf_counter() - t0

        trial.set_user_attr('mean_err_px', float(met['mean_err_px']))
        trial.set_user_attr('runtime_s', float(dt))
        return float(met['score'])

    sampler = optuna.samplers.TPESampler(seed=int(seed))
    study = optuna.create_study(direction='maximize', sampler=sampler)
    study.optimize(objective, n_trials=int(n_trials), timeout=timeout_s)

    rows = []
    for t in study.trials:
        row = {'trial': int(t.number), 'score': float(t.value) if t.value is not None else np.nan}
        row.update(t.params)
        row['mean_err_px'] = t.user_attrs.get('mean_err_px', np.nan)
        row['runtime_s'] = t.user_attrs.get('runtime_s', np.nan)
        rows.append(row)

    trials_df = pd.DataFrame(rows).sort_values('score', ascending=False).reset_index(drop=True)
    best_hp = dict(BASE_HPARAMS)
    best_hp.update(study.best_trial.params)
    return study, trials_df, best_hp


if RUN_OPTUNA:
    study, trials_df, best_hp = run_optuna_optimization(
        n_trials=int(OPTUNA_TRIALS),
        val_limit=int(OPTUNA_VAL_LIMIT),
        timeout_s=OPTUNA_TIMEOUT_S,
        seed=777,
    )
    print('best score:', study.best_value)
    print('best params:', study.best_trial.params)
    display(trials_df.head(10))
else:
    study = None
    trials_df = pd.DataFrame()
    best_hp = None




In [None]:
# Final validation + test run
final_hp = dict(BASE_HPARAMS)
if best_hp is not None:
    final_hp.update(best_hp)

matcher.min_conf = float(final_hp['match_min_conf'])

val_final_df = localize_split(
    query_df=val_df,
    split_name='val',
    known_xy_init=fit_train_gt,
    hm=height_model,
    hparams=final_hp,
    reverse_prefix_for_test=False,
)
val_final_metrics = eval_metric_from_df(val_final_df.dropna(subset=['gt_x', 'gt_y']))
print('Final validation metrics:')
print(pd.Series(val_final_metrics))

# For test: reverse order for IDs < 13, anchor at id 13
known_for_test = dict(all_train_gt)
if TEST_REVERSE_ANCHOR_ID in all_train_gt:
    known_for_test = {int(TEST_REVERSE_ANCHOR_ID): all_train_gt[int(TEST_REVERSE_ANCHOR_ID)], **known_for_test}

test_final_df = localize_split(
    query_df=test_cam_df,
    split_name='test',
    known_xy_init=known_for_test,
    hm=height_model,
    hparams=final_hp,
    reverse_prefix_for_test=True,
)
print('test predicted:', len(test_final_df))
display(test_final_df.head())




In [None]:
# Write submission.csv
submission_df = test_final_df[['id', 'pred_x', 'pred_y']].copy()
submission_df = submission_df.rename(columns={'pred_x': 'x_pixel', 'pred_y': 'y_pixel'})
submission_df = submission_df.sort_values('id').reset_index(drop=True)

SUBMISSION_OUT.parent.mkdir(parents=True, exist_ok=True)
submission_df.to_csv(SUBMISSION_OUT, index=False)
print('written:', SUBMISSION_OUT)
print(submission_df.head())




In [None]:
# Visualization on map.png
fit_xy = fit_df[['x_pixel', 'y_pixel']].to_numpy(dtype=np.float32)

plt.figure(figsize=(16, 10))
plt.imshow(map_rgb)

if len(fit_xy) > 0:
    plt.scatter(fit_xy[:, 0], fit_xy[:, 1], s=8, c='deepskyblue', alpha=0.60, label='fit GT anchors')

if len(val_final_df) > 0:
    v = val_final_df.dropna(subset=['gt_x', 'gt_y']).copy()
    for r in v.itertuples(index=False):
        plt.plot([r.pred_x, r.gt_x], [r.pred_y, r.gt_y], color='yellow', alpha=0.45, linewidth=1.0)
    plt.scatter(v['gt_x'], v['gt_y'], s=20, c='lime', label='val GT')
    plt.scatter(v['pred_x'], v['pred_y'], s=20, c='red', label='val Pred')

if len(test_final_df) > 0:
    plt.scatter(test_final_df['pred_x'], test_final_df['pred_y'], s=14, c='orange', alpha=0.75, label='test Pred')

plt.title(
    '08 SuperPoint+LightGlue+MAGSAC Gray/Edge Anchor Localization | '
    f"score={val_final_metrics['score']:.2f}"
)
plt.legend(loc='upper right')
plt.axis('off')
plt.show()




In [None]:
# Match diagnostics: gray/edge filtered+scaled query vs filtered map crop

def resolve_anchor_xy_for_debug(
    res_row: pd.Series,
    pred_df: pd.DataFrame,
    known_xy_init: Dict[int, Tuple[float, float]],
) -> Tuple[float, float]:
    aid = int(round(float(res_row['anchor_id'])))
    src = str(res_row.get('anchor_source', ''))

    if src == 'train_gt' and aid in known_xy_init:
        ax, ay = known_xy_init[aid]
        return float(ax), float(ay)

    pred_anchor = pred_df[pred_df['id'].astype(int) == aid]
    if len(pred_anchor) > 0:
        rr = pred_anchor.iloc[0]
        px = float(rr['pred_x'])
        py = float(rr['pred_y'])
        if np.isfinite(px) and np.isfinite(py):
            return px, py

    if aid in known_xy_init:
        ax, ay = known_xy_init[aid]
        return float(ax), float(ay)

    px = float(res_row['pred_x'])
    py = float(res_row['pred_y'])
    if np.isfinite(px) and np.isfinite(py):
        return px, py

    return float(MAP_W / 2.0), float(MAP_H / 2.0)


def build_match_debug_for_id(
    qid: int,
    pred_df: pd.DataFrame,
    cam_df: pd.DataFrame,
    hm: HeightScaleModel,
    hparams: Dict[str, float],
    known_xy_init: Dict[int, Tuple[float, float]],
    split_name: str = 'val',
) -> Dict[str, object]:
    row_pred_df = pred_df[pred_df['id'].astype(int) == int(qid)]
    if len(row_pred_df) == 0:
        raise ValueError(f'id={qid} not found in pred_df')
    row_pred = row_pred_df.iloc[0]

    row_cam_df = cam_df[cam_df['id'].astype(int) == int(qid)]
    if len(row_cam_df) == 0:
        raise ValueError(f'id={qid} not found in cam_df')
    row_cam = row_cam_df.iloc[0]

    split_for_img = 'val' if split_name == 'val' else 'test'
    anchor_xy = resolve_anchor_xy_for_debug(row_pred, pred_df=pred_df, known_xy_init=known_xy_init)

    q_proc = get_match_image(int(qid), split=split_for_img)

    s_expected = compute_expected_scale(row_cam, hm=hm, hparams=hparams)
    s_eff = float(s_expected / max(1e-6, float(hparams['virtual_zoom_out'])))

    q_scaled = resize_rgb(q_proc, s_eff)
    qh, qw = q_scaled.shape[:2]

    crop_w = int(round(float(hparams['crop_width_factor']) * float(qw)))
    crop_h = int(round(float(hparams['crop_height_factor_n']) * float(qh)))
    crop_w = int(np.clip(crop_w, 64, MAP_W))
    crop_h = int(np.clip(crop_h, 64, MAP_H))

    q_canvas, q_offset = make_centered_canvas(q_scaled, out_w=crop_w, out_h=crop_h)
    map_crop, x0, y0 = extract_crop_by_size(
        map_proc,
        center_xy=anchor_xy,
        crop_w=crop_w,
        crop_h=crop_h,
    )

    if q_canvas.shape[:2] != map_crop.shape[:2]:
        q_canvas, q_offset = make_centered_canvas(q_scaled, out_w=map_crop.shape[1], out_h=map_crop.shape[0])

    m = matcher.match(q_canvas, map_crop)
    H, inl = matcher.magsac_homography(
        m['k0'],
        m['k1'],
        m['matches'],
        reproj_thr=float(hparams['magsac_reproj_thr']),
    )

    raw_matches = int(m['matches'].shape[0])
    inliers = int(inl.sum()) if inl is not None else 0

    used_match = False
    pred_local_xy = (np.nan, np.nan)
    if H is not None and inliers >= int(hparams['min_inliers']):
        px_local, py_local = project_query_center(H, q_canvas.shape, offset_xy=(0.0, 0.0))
        pred_local_xy = (float(px_local), float(py_local))
        used_match = bool(np.isfinite(px_local) and np.isfinite(py_local))

    anchor_local_xy = (float(anchor_xy[0] - x0), float(anchor_xy[1] - y0))

    return {
        'id': int(qid),
        'anchor_id': int(row_pred['anchor_id']),
        'anchor_source': str(row_pred['anchor_source']),
        'q_scaled': q_scaled,
        'q_canvas': q_canvas,
        'q_offset': (int(q_offset[0]), int(q_offset[1])),
        'map_crop': map_crop,
        'x0': int(x0),
        'y0': int(y0),
        'scale_expected': float(s_expected),
        'scale_effective': float(s_eff),
        'match': m,
        'H': H,
        'inlier_mask': inl,
        'raw_matches': raw_matches,
        'inliers': inliers,
        'used_match': bool(used_match),
        'pred_local_xy': pred_local_xy,
        'anchor_local_xy': anchor_local_xy,
    }


def make_side_by_side_rgb(img_left: np.ndarray, img_right: np.ndarray) -> Tuple[np.ndarray, int]:
    h = max(img_left.shape[0], img_right.shape[0])
    w0 = img_left.shape[1]
    w1 = img_right.shape[1]
    canvas = np.zeros((h, w0 + w1, 3), dtype=img_left.dtype)
    canvas[:img_left.shape[0], :w0] = img_left
    canvas[:img_right.shape[0], w0:w0 + w1] = img_right
    return canvas, int(w0)


def draw_match_debug(debug: Dict[str, object], max_lines: int = 220, seed: int = 7) -> None:
    q_canvas = debug['q_canvas']
    map_crop = debug['map_crop']
    combo, x_shift = make_side_by_side_rgb(q_canvas, map_crop)

    m = debug['match']
    n_total = int(m['matches'].shape[0])

    inl = debug['inlier_mask']
    if inl is None:
        inl_mask = np.zeros((n_total,), dtype=bool)
    else:
        inl_mask = inl.astype(bool)
        if inl_mask.shape[0] != n_total:
            tmp = np.zeros((n_total,), dtype=bool)
            n = min(n_total, inl_mask.shape[0])
            tmp[:n] = inl_mask[:n]
            inl_mask = tmp

    draw_idx = np.arange(n_total)
    if n_total > int(max_lines):
        rng = np.random.default_rng(int(seed))
        draw_idx = np.sort(rng.choice(draw_idx, size=int(max_lines), replace=False))

    fig, ax = plt.subplots(figsize=(20, 8))
    ax.imshow(combo)
    ax.axvline(x=float(x_shift) - 0.5, color='white', linewidth=1.0, alpha=0.9)

    for i in draw_idx:
        a = int(m['matches'][i, 0])
        b = int(m['matches'][i, 1])
        p0 = m['k0'][a]
        p1 = m['k1'][b]
        is_inlier = bool(inl_mask[i])
        color = 'lime' if is_inlier else 'orangered'
        lw = 1.2 if is_inlier else 0.6
        alpha = 0.85 if is_inlier else 0.25
        ax.plot([float(p0[0]), float(p1[0] + x_shift)], [float(p0[1]), float(p1[1])], color=color, linewidth=lw, alpha=alpha)

    if n_total > 0:
        ax.scatter(m['k0'][:, 0], m['k0'][:, 1], s=7, c='deepskyblue', alpha=0.55)
        ax.scatter(m['k1'][:, 0] + x_shift, m['k1'][:, 1], s=7, c='orange', alpha=0.55)

    qx0, qy0 = debug['q_offset']
    qh, qw = debug['q_scaled'].shape[:2]
    q_rect = plt.Rectangle(
        (float(qx0), float(qy0)),
        float(qw),
        float(qh),
        fill=False,
        linewidth=1.2,
        edgecolor='cyan',
        linestyle='--',
        alpha=0.9,
    )
    ax.add_patch(q_rect)

    ax.scatter(
        [float(debug['anchor_local_xy'][0] + x_shift)],
        [float(debug['anchor_local_xy'][1])],
        marker='x',
        c='white',
        s=70,
        linewidths=1.4,
    )

    px_local, py_local = debug['pred_local_xy']
    if np.isfinite(px_local) and np.isfinite(py_local):
        ax.scatter([float(px_local + x_shift)], [float(py_local)], marker='*', c='yellow', s=120)

    ax.set_title(
        f"ID {debug['id']} | left: gray/edge filtered+scaled query (canvas, cyan box=scaled content) | "
        f"right: gray/edge filtered map crop | matches={debug['raw_matches']} inliers={debug['inliers']}"
    )
    ax.axis('off')
    plt.tight_layout()
    plt.show()


# Set DEBUG_VIS_ID to inspect a specific validation frame, e.g. DEBUG_VIS_ID = 28
DEBUG_VIS_ID = None

diag_pred_df = val_final_df if 'val_final_df' in globals() else val_pred_df
diag_hparams = final_hp if 'final_hp' in globals() else dict(BASE_HPARAMS)

if len(diag_pred_df) == 0:
    print('No validation predictions available for debug visualization.')
else:
    if DEBUG_VIS_ID is None:
        by_err = diag_pred_df.dropna(subset=['err_px'])
        if len(by_err) > 0:
            DEBUG_VIS_ID = int(by_err.sort_values('err_px', ascending=False).iloc[0]['id'])
        else:
            DEBUG_VIS_ID = int(diag_pred_df.sort_values('id').iloc[0]['id'])

    old_min_conf = float(matcher.min_conf)
    matcher.min_conf = float(diag_hparams['match_min_conf'])
    try:
        debug = build_match_debug_for_id(
            qid=int(DEBUG_VIS_ID),
            pred_df=diag_pred_df,
            cam_df=val_df,
            hm=height_model,
            hparams=diag_hparams,
            known_xy_init=fit_train_gt,
            split_name='val',
        )
    finally:
        matcher.min_conf = old_min_conf

    draw_match_debug(debug, max_lines=220, seed=7)
    print({
        'id': int(debug['id']),
        'anchor_id': int(debug['anchor_id']),
        'anchor_source': debug['anchor_source'],
        'raw_matches': int(debug['raw_matches']),
        'inliers': int(debug['inliers']),
        'used_match': bool(debug['used_match']),
        'scale_effective': float(debug['scale_effective']),
        'query_scaled_shape_hw': tuple(int(x) for x in debug['q_scaled'].shape[:2]),
        'query_canvas_shape_hw': tuple(int(x) for x in debug['q_canvas'].shape[:2]),
        'map_crop_shape_hw': tuple(int(x) for x in debug['map_crop'].shape[:2]),
    })




In [None]:
# Diagnostics
print('Anchor sources (val):')
print(val_final_df['anchor_source'].value_counts(dropna=False))

print('Matching usage (val):')
print(val_final_df['used_match'].value_counts(dropna=False))

print('Inliers stats (val):')
print(val_final_df['inliers'].describe())

display(val_final_df[[
    'id', 'err_px', 'anchor_source', 'raw_matches', 'inliers',
    'scale_expected', 'scale_effective', 'crop_w', 'crop_h', 'runtime_ms'
]].head(40))


