In [None]:
from pathlib import Path
import numpy as np
import pandas as pd
import cv2
import matplotlib.pyplot as plt

DATA_PATH = Path("../data/data")
MAP_PATH = DATA_PATH / "map.png"

TRAIN_IMG_DIR = DATA_PATH / "train_data" / "train_images"
TEST_IMG_DIR  = DATA_PATH / "test_data" / "test_images"

TRAIN_POS_CSV = DATA_PATH / "train_data" / "train_pos.csv"
TRAIN_CAM_CSV = DATA_PATH / "train_data" / "train_cam.csv"
TEST_CAM_CSV  = DATA_PATH / "test_data" / "test_cam.csv"



In [None]:
map_bgr = cv2.imread(str(MAP_PATH), cv2.IMREAD_COLOR)
assert map_bgr is not None
map_rgb = cv2.cvtColor(map_bgr, cv2.COLOR_BGR2RGB)

plt.figure(figsize=(14, 8))
plt.imshow(map_rgb)
plt.title(f"map.png shape={map_rgb.shape}")
plt.axis("off")
plt.show()


In [None]:
pos_df = pd.read_csv(TRAIN_POS_CSV)
pos_df["id"] = pos_df["id"].astype(int)

plt.figure(figsize=(14, 8))
plt.imshow(map_rgb)
plt.scatter(pos_df["x_pixel"], pos_df["y_pixel"], s=8)
plt.title("Train GT positions on map.png")
plt.axis("off")
plt.show()

print(pos_df.describe())


In [None]:
import matplotlib.pyplot as plt
import numpy as np
import cv2


def overlay_train_images_on_map(
    map_rgb,
    pos_df,
    train_img_dir,
    start_idx=0,
    end_idx=20,
    scale=0.25,
    alpha=0.5,
    draw_center_dot=True
):
    """
    map_rgb: RGB image of map
    pos_df: dataframe with columns ['id','x_pixel','y_pixel']
    train_img_dir: path to train_images folder
    start_idx, end_idx: index range in pos_df (row indices, not image IDs)
    scale: scale factor for drone image
    alpha: transparency of overlay
    draw_center_dot: whether to draw GT point
    """

    plt.figure(figsize=(16, 10))
    plt.imshow(map_rgb)
    plt.axis("off")

    H_map, W_map = map_rgb.shape[:2]

    for idx in range(start_idx, min(end_idx, len(pos_df))):
        row = pos_df.iloc[idx]
        img_id = int(row["id"])
        x = float(row["x_pixel"])
        y = float(row["y_pixel"])

        # find image file
        img_path = None
        for ext in [".JPG", ".jpg", ".png", ".jpeg"]:
            p = train_img_dir / f"{img_id:04d}{ext}"
            if p.exists():
                img_path = p
                break
            p = train_img_dir / f"{img_id}{ext}"
            if p.exists():
                img_path = p
                break

        if img_path is None:
            print(f"Image not found for id {img_id}")
            continue

        img_bgr = cv2.imread(str(img_path))
        img_rgb = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2RGB)

        # resize
        h, w = img_rgb.shape[:2]
        new_w = int(w * scale)
        new_h = int(h * scale)
        img_small = cv2.resize(img_rgb, (new_w, new_h), interpolation=cv2.INTER_AREA)

        # compute overlay extent
        x_min = x - new_w / 2
        x_max = x + new_w / 2
        y_min = y - new_h / 2
        y_max = y + new_h / 2

        # draw image
        plt.imshow(
            img_small,
            extent=[x_min, x_max, y_max, y_min],  # origin='upper'
            alpha=alpha
        )

        # draw GT center
        if draw_center_dot:
            plt.scatter([x], [y], s=40)

        # draw ID text
        plt.text(
            x, y,
            str(img_id),
            color="red",
            fontsize=10,
            ha="center",
            va="bottom"
        )

    plt.title(f"Train images overlayed | idx range [{start_idx}, {end_idx})")
    plt.show()

overlay_train_images_on_map(
    map_rgb,
    pos_df,
    TRAIN_IMG_DIR,
    start_idx=19,
    end_idx=20,
    scale=0.05,
    alpha=0.8,
    draw_center_dot=True
)

In [None]:
def read_train_image(img_id: int):
    # handle 13.jpg / 0013.JPG / 13.png etc. if needed; simplest first:
    for ext in [".JPG", ".jpg", ".png", ".jpeg", ".JPEG", ".PNG"]:
        p = TRAIN_IMG_DIR / f"{img_id:04d}{ext}"
        if p.exists():
            img = cv2.imread(str(p), cv2.IMREAD_COLOR)
            if img is not None:
                return p, cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        p = TRAIN_IMG_DIR / f"{img_id}{ext}"
        if p.exists():
            img = cv2.imread(str(p), cv2.IMREAD_COLOR)
            if img is not None:
                return p, cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    return None, None

sample_ids = pos_df["id"][20:26]#+.sample(6, random_state=0).tolist()
print(sample_ids)
plt.figure(figsize=(14, 8))
for i, img_id in enumerate(sample_ids, 1):
    p, img = read_train_image(img_id)
    ax = plt.subplot(2, 3, i)
    ax.imshow(img)
    row = pos_df[pos_df["id"] == img_id].iloc[0]
    ax.set_title(f"id={img_id} | gt=({row.x_pixel:.0f},{row.y_pixel:.0f})")
    ax.axis("off")
plt.tight_layout()
plt.show()


In [None]:
!git clone https://github.com/cvg/LightGlue.git

In [None]:
import sys
sys.path.append("LightGlue")

import torch
from lightglue import LightGlue, SuperPoint
from lightglue.utils import rbd

In [None]:
def draw_inlier_matches(
    img0_rgb,
    img1_rgb,
    kpts0,
    kpts1,
    matches01,
    inlier_mask,
    max_draw=50,
    line_thickness=4,
    point_radius=6,
    line_color=(255, 0, 0),
    darken_bg=True
):
    idx = np.where(inlier_mask)[0]
    if len(idx) == 0:
        print("No inliers to draw.")
        return

    # Random subset for clarity
    if len(idx) > max_draw:
        idx = np.random.choice(idx, size=max_draw, replace=False)

    pts0 = kpts0[matches01[idx, 0]]
    pts1 = kpts1[matches01[idx, 1]]

    h0, w0 = img0_rgb.shape[:2]
    h1, w1 = img1_rgb.shape[:2]

    canvas = np.zeros((max(h0, h1), w0 + w1, 3), dtype=np.uint8)
    canvas[:h0, :w0] = img0_rgb
    canvas[:h1, w0:] = img1_rgb

    if darken_bg:
        canvas = (canvas * 0.6).astype(np.uint8)

    for p0, p1 in zip(pts0, pts1):
        x0, y0 = int(p0[0]), int(p0[1])
        x1, y1 = int(p1[0]) + w0, int(p1[1])

        cv2.line(canvas, (x0, y0), (x1, y1), line_color, thickness=line_thickness)
        cv2.circle(canvas, (x0, y0), point_radius, (0, 255, 0), -1)
        cv2.circle(canvas, (x1, y1), point_radius, (0, 255, 0), -1)

    plt.figure(figsize=(18, 8))
    plt.imshow(canvas)
    plt.axis("off")
    plt.show()


In [None]:
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {device}")

extractor = SuperPoint(max_num_keypoints=2048*2).eval().to(device)
matcher = LightGlue(features="superpoint").eval().to(device)

def to_tensor_image(img_rgb):
    # LightGlue expects torch image in [0,1], shape [1,1,H,W] if grayscale
    gray = cv2.cvtColor(img_rgb, cv2.COLOR_RGB2GRAY)
    t = torch.from_numpy(gray).float()[None, None] / 255.0
    return t.to(device)

def match_two_images(img0_rgb, img1_rgb):
    image0 = to_tensor_image(img0_rgb)
    image1 = to_tensor_image(img1_rgb)

    feats0 = extractor.extract(image0)
    feats1 = extractor.extract(image1)

    matches01 = matcher({"image0": feats0, "image1": feats1})
    feats0, feats1, matches01 = [rbd(x) for x in [feats0, feats1, matches01]]

    kpts0 = feats0["keypoints"].cpu().numpy()
    kpts1 = feats1["keypoints"].cpu().numpy()
    matches = matches01["matches"].cpu().numpy()  # (M,2) indices into kpts0/kpts1

    return kpts0, kpts1, matches


In [None]:
def magsac_inliers_homography(kpts0, kpts1, matches, reproj_thr=3.0):
    pts0 = kpts0[matches[:, 0]].astype(np.float32)
    pts1 = kpts1[matches[:, 1]].astype(np.float32)

    if len(pts0) < 8:
        return None, None, np.zeros((len(pts0),), dtype=bool)

    H, mask = cv2.findHomography(
        pts0, pts1,
        method=cv2.USAC_MAGSAC,
        ransacReprojThreshold=reproj_thr,
        maxIters=10000,
        confidence=0.999
    )
    if mask is None:
        return H, None, np.zeros((len(pts0),), dtype=bool)
    inlier_mask = mask.ravel().astype(bool)
    return H, mask, inlier_mask


In [None]:
# pick two random train ids
ids = pos_df["id"][0:2].tolist()#.sample(2, random_state=1).tolist()
p0, img0 = read_train_image(ids[0])
p1, img1 = read_train_image(ids[1])

k0, k1, matches = match_two_images(img0, img1)
H, mask, inliers = magsac_inliers_homography(k0, k1, matches)

print("train-train")
print("ids:", ids, "matches:", len(matches), "inliers:", int(inliers.sum()))
draw_inlier_matches(
    img0,
    img1,
    k0,
    k1,
    matches,
    inliers,
    max_draw=40,        # very important
    line_thickness=5,
    point_radius=7,
    line_color=(0, 255, 255)
)

In [None]:
def iter_tiles(map_rgb, tile_size=768, stride=512):
    H, W = map_rgb.shape[:2]
    for y0 in range(0, H - tile_size + 1, stride):
        for x0 in range(0, W - tile_size + 1, stride):
            tile = map_rgb[y0:y0+tile_size, x0:x0+tile_size]
            yield (x0, y0, tile)


In [None]:
train_id = int(pos_df["id"].sample(1, random_state=2).iloc[0])
p_q, img_q = read_train_image(train_id)
gt = pos_df[pos_df["id"] == train_id].iloc[0]
print("train_id", train_id, "gt", (gt.x_pixel, gt.y_pixel))

best = None

# debug: nur Tiles in der Nähe der GT (um zu prüfen, ob es prinzipiell klappt)
tile_size = 768
stride = 512
radius = 1000  # pixels around GT to search (debug only)

for (x0, y0, tile_rgb) in iter_tiles(map_rgb, tile_size=tile_size, stride=stride):
    cx_tile = x0 + tile_size/2
    cy_tile = y0 + tile_size/2
    if abs(cx_tile - gt.x_pixel) > radius or abs(cy_tile - gt.y_pixel) > radius:
        continue

    kq, kt, m = match_two_images(img_q, tile_rgb)
    Hqt, _, inl = magsac_inliers_homography(kq, kt, m, reproj_thr=3.0)
    score = int(inl.sum())

    if best is None or score > best["score"]:
        best = {"x0": x0, "y0": y0, "tile": tile_rgb, "kq": kq, "kt": kt, "m": m, "inl": inl, "H": Hqt, "score": score}

print("best inliers:", best["score"], "tile origin:", (best["x0"], best["y0"]))
draw_inlier_matches(img_q, best["tile"], best["kq"], best["kt"], best["m"], best["inl"], max_draw=250)


In [None]:
import numpy as np
import cv2

def apply_homography(H: np.ndarray, xy: np.ndarray) -> np.ndarray:
    """
    xy: shape (2,) float [x,y] in source image coordinates
    returns: shape (2,) float in destination coordinates
    """
    p = np.array([xy[0], xy[1], 1.0], dtype=np.float64)
    q = H @ p
    if abs(q[2]) < 1e-12:
        return np.array([np.nan, np.nan], dtype=np.float64)
    return (q[:2] / q[2]).astype(np.float64)

def euclidean_px(a_xy, b_xy) -> float:
    dx = float(a_xy[0]) - float(b_xy[0])
    dy = float(a_xy[1]) - float(b_xy[1])
    return float(np.sqrt(dx*dx + dy*dy))


In [None]:
from dataclasses import dataclass
from typing import List, Tuple

@dataclass
class Tile:
    tile_id: int
    x0: int
    y0: int
    rgb: np.ndarray  # tile image

def build_tiles(map_rgb: np.ndarray, tile_size: int = 768, stride: int = 512) -> List[Tile]:
    H, W = map_rgb.shape[:2]
    tiles: List[Tile] = []
    tid = 0
    for y0 in range(0, H - tile_size + 1, stride):
        for x0 in range(0, W - tile_size + 1, stride):
            tile_rgb = map_rgb[y0:y0+tile_size, x0:x0+tile_size].copy()
            tiles.append(Tile(tile_id=tid, x0=x0, y0=y0, rgb=tile_rgb))
            tid += 1
    return tiles

# Example
tile_size = 768
stride = 512
tiles = build_tiles(map_rgb, tile_size=tile_size, stride=stride)
print("tiles:", len(tiles), "tile_size:", tile_size, "stride:", stride)


In [None]:
import torch
from typing import Dict, Any
import torch
import cv2
import numpy as np

def to_tensor_image(img_rgb: np.ndarray, device: str) -> torch.Tensor:
    gray = cv2.cvtColor(img_rgb, cv2.COLOR_RGB2GRAY)
    t = torch.from_numpy(gray).float()[None, None] / 255.0  # [1,1,H,W]
    return t.to(device)

def extract_features_rgb(extractor, img_rgb: np.ndarray, device: str):
    """
    Returns features in LightGlue expected shape:
      keypoints:   [1, N, 2]
      descriptors: [1, N, D]
      scores:      [1, N]
      image_size:  [1, 2]
    Keep them as torch tensors (CPU or GPU doesn't matter; we'll move on demand).
    """
    image_t = to_tensor_image(img_rgb, device)
    feats = extractor.extract(image_t)  # already batched
    # move to CPU for caching (optional, saves GPU memory)
    feats_cpu = {k: v.detach().cpu() for k, v in feats.items()}
    return feats_cpu


# Build tile feature cache
device = "cuda" if torch.cuda.is_available() else "cpu"

tile_feats = {}
for t in tiles:
    tile_feats[t.tile_id] = extract_features_rgb(extractor, t.rgb, device)
print("cached tile feats:", len(tile_feats))



In [None]:
from lightglue.utils import rbd
import numpy as np
import cv2
import torch

def match_query_to_tile(
    matcher,
    query_feats,
    tile_feats,
    device: str,
    reproj_thr: float = 3.0,
):
    # move tensors to device
    q = {k: (v.to(device) if torch.is_tensor(v) else v) for k, v in query_feats.items()}
    t = {k: (v.to(device) if torch.is_tensor(v) else v) for k, v in tile_feats.items()}

    out = matcher({"image0": q, "image1": t})
    out = rbd(out)

    matches = out["matches"].cpu().numpy()  # (M,2)

    # keypoints: [1, N, 2] -> remove batch for numpy indexing
    kq = query_feats["keypoints"][0].numpy()
    kt = tile_feats["keypoints"][0].numpy()

    if len(matches) < 8:
        return {"matches": matches, "inliers": np.zeros((len(matches),), bool), "H": None, "score": 0}

    pts0 = kq[matches[:, 0]].astype(np.float32)
    pts1 = kt[matches[:, 1]].astype(np.float32)

    H, mask = cv2.findHomography(
        pts0, pts1,
        method=cv2.USAC_MAGSAC,
        ransacReprojThreshold=reproj_thr,
        maxIters=10000,
        confidence=0.999
    )

    if mask is None or H is None:
        inl = np.zeros((len(matches),), dtype=bool)
        return {"matches": matches, "inliers": inl, "H": None, "score": 0, "kq": kq, "kt": kt}

    inl = mask.ravel().astype(bool)
    score = int(inl.sum())
    return {"matches": matches, "inliers": inl, "H": H, "score": score, "kq": kq, "kt": kt}


In [None]:
def predict_xy_from_best_tile(
    query_rgb: np.ndarray,
    best_tile: Tile,
    H_query_to_tile: np.ndarray,
) -> Tuple[float, float]:
    """
    Project query image center into tile coords using H, then add tile offset to get map coords.
    """
    hq, wq = query_rgb.shape[:2]
    center_q = np.array([wq / 2.0, hq / 2.0], dtype=np.float64)

    center_in_tile = apply_homography(H_query_to_tile, center_q)  # (x,y) in tile
    x_map = float(center_in_tile[0] + best_tile.x0)
    y_map = float(center_in_tile[1] + best_tile.y0)
    return x_map, y_map


In [None]:
def localize_query_against_map_tiles(
    query_rgb,
    tiles,
    tile_feats,
    device: str,
    top_k: int = 5,
    reproj_thr: float = 3.0,
    tile_subsample: int = 1,
):
    q_feats = extract_features_rgb(extractor, query_rgb, device)

    scored = []
    for idx, t in enumerate(tiles):
        if tile_subsample > 1 and (idx % tile_subsample != 0):
            continue

        res = match_query_to_tile(matcher, q_feats, tile_feats[t.tile_id], device=device, reproj_thr=reproj_thr)
        scored.append((res["score"], t, res))

    scored.sort(key=lambda x: x[0], reverse=True)
    return scored[:top_k], q_feats


In [None]:
for id in range(210, 300, 10):
    # pick one train sample
    train_id = int(pos_df["id"][id])#.sample(1, random_state=3).iloc[0])
    p_q, img_q = read_train_image(train_id)
    gt_row = pos_df[pos_df["id"] == train_id].iloc[0]
    gt_xy = (float(gt_row.x_pixel), float(gt_row.y_pixel))
    print("train_id:", train_id, "GT:", gt_xy, "file:", p_q.name if p_q else None)

    best_list, q_feats = localize_query_against_map_tiles(
        img_q, tiles, tile_feats,
        device=device,
        top_k=4, reproj_thr=3.0,
        tile_subsample=1
    )

    for rank, (score, tile, res) in enumerate(best_list, 1):
        print(f"rank={rank} score(inliers)={score} tile_id={tile.tile_id} origin=({tile.x0},{tile.y0}) H={'yes' if res['H'] is not None else 'no'}")

    # take best
    best_score, best_tile, best_res = best_list[0]
    if best_res["H"] is None:
        print("No homography found.")
    else:
        pred_xy = predict_xy_from_best_tile(img_q, best_tile, best_res["H"])
        err = euclidean_px(pred_xy, gt_xy)
        print("pred:", pred_xy, "err_px:", err)

        # visualize inlier matches (query vs best tile)
        draw_inlier_matches(img_q, best_tile.rgb, best_res["kq"], best_res["kt"], best_res["matches"], best_res["inliers"], max_draw=250)

        # visualize overlay on global map: point GT vs Pred
        import matplotlib.pyplot as plt
        plt.figure(figsize=(14,8))
        plt.imshow(map_rgb)
        plt.scatter([gt_xy[0]], [gt_xy[1]], s=80, label="GT")
        plt.scatter([pred_xy[0]], [pred_xy[1]], s=80, label="Pred")
        plt.legend()
        plt.title(f"id={train_id} | inliers={best_score} | err={err:.1f}px")
        plt.axis("off")
        plt.show()


In [None]:
import numpy as np

tile_centers = np.zeros((len(tiles), 2), dtype=np.float32)  # [cx, cy]
for i, t in enumerate(tiles):
    tile_centers[i, 0] = t.x0 + tile_size * 0.5
    tile_centers[i, 1] = t.y0 + tile_size * 0.5

tile_ids = np.array([t.tile_id for t in tiles], dtype=np.int32)
