## Example submission

Image Matching Challenge 2025: https://www.kaggle.com/competitions/image-matching-challenge-2025

This notebook creates a simple submission using ALIKED and LightGlue, plus DINO for shortlisting, on GPU. Adapted from [last year](https://www.kaggle.com/code/oldufo/imc-2024-submission-example).

Remember to select an accelerator on the sidebar to the right, and to disable internet access when submitting a notebook to the competition.

In [26]:
# IMPORTANT 
#Install dependencies and copy model weights to run the notebook without internet access when submitting to the competition.

!pip install --no-index /kaggle/input/imc2024-packages-lightglue-rerun-kornia/* --no-deps
!mkdir -p /root/.cache/torch/hub/checkpoints
!cp /kaggle/input/aliked/pytorch/aliked-n16/1/aliked-n16.pth /root/.cache/torch/hub/checkpoints/
!cp /kaggle/input/lightglue/pytorch/aliked/1/aliked_lightglue.pth /root/.cache/torch/hub/checkpoints/
!cp /kaggle/input/lightglue/pytorch/aliked/1/aliked_lightglue.pth /root/.cache/torch/hub/checkpoints/aliked_lightglue_v0-1_arxiv-pth

Processing /kaggle/input/imc2024-packages-lightglue-rerun-kornia/kornia-0.7.2-py2.py3-none-any.whl
Processing /kaggle/input/imc2024-packages-lightglue-rerun-kornia/kornia_moons-0.2.9-py3-none-any.whl
Processing /kaggle/input/imc2024-packages-lightglue-rerun-kornia/kornia_rs-0.1.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
Processing /kaggle/input/imc2024-packages-lightglue-rerun-kornia/lightglue-0.0-py3-none-any.whl
Processing /kaggle/input/imc2024-packages-lightglue-rerun-kornia/pycolmap-0.6.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
Processing /kaggle/input/imc2024-packages-lightglue-rerun-kornia/rerun_sdk-0.15.0a2-cp38-abi3-manylinux_2_31_x86_64.whl
kornia is already installed with the same version as the provided wheel. Use --force-reinstall to force an installation of the wheel.
kornia-moons is already installed with the same version as the provided wheel. Use --force-reinstall to force an installation of the wheel.
kornia-rs is already installed 

In [27]:
!cp /kaggle/input/disk-depth/disk_lightglue.pth /root/.cache/torch/hub/checkpoints/
!cp /kaggle/input/disk-depth/disk_lightglue.pth /root/.cache/torch/hub/checkpoints/disk_lightglue_v0-1_arxiv-pth
!cp /kaggle/input/disk-depth/disk_lightglue.pth /root/.cache/torch/hub/checkpoints/depth-save.pth

In [28]:
import os
print(os.path.exists("/root/.cache/torch/hub/checkpoints/disk_lightglue_v0-1_arxiv-pth"))

True


In [29]:
import sys
import os
from tqdm import tqdm
from time import time, sleep
import gc
import numpy as np
import h5py
import dataclasses
import pandas as pd
from IPython.display import clear_output
from collections import defaultdict
from copy import deepcopy
from PIL import Image

import cv2
import torch
import torch.nn.functional as F
import kornia as K
import kornia.feature as KF

import torch
from lightglue import match_pair
from lightglue import ALIKED, LightGlue
from lightglue.utils import load_image, rbd
from transformers import AutoImageProcessor, AutoModel

# from lightglue import DISK
from kornia.feature import LightGlueMatcher as KF_LightGlueMatcher
from scipy.spatial import cKDTree # For efficient nearest neighbor search to remove duplicate keypoints

# IMPORTANT Utilities: importing data into colmap and competition metric
import pycolmap
sys.path.append('/kaggle/input/imc25-utils')
from database import *
from h5_to_db import *
import metric


# LightGlue
from lightglue import match_pair
from lightglue import ALIKED, SuperPoint,DISK, DoGHardNet, LightGlue, SIFT
from fastprogress import progress_bar


In [30]:
from collections import defaultdict
from copy import deepcopy
import concurrent.futures


In [31]:

print("PyTorch version:", torch.__version__)
import sys
print("Python version:", sys.version)

print("CUDA available:", torch.cuda.is_available())
print("CUDA version:", torch.version.cuda)
print("Device count:", torch.cuda.device_count())
print("Current device:", torch.cuda.current_device())
print("Device name:", torch.cuda.get_device_name(torch.cuda.current_device()))


PyTorch version: 2.5.1+cu121
Python version: 3.10.12 (main, Nov  6 2024, 20:22:13) [GCC 11.4.0]
CUDA available: True
CUDA version: 12.1
Device count: 2
Current device: 0
Device name: Tesla T4


In [32]:
# Do not forget to select an accelerator on the sidebar to the right.
device = K.utils.get_cuda_device_if_available(0)
print(f'{device=}')

device=device(type='cuda', index=0)


In [33]:
VERBOSE = True

In [34]:
class CONFIG:
    # DEBUG Settings
    DRY_RUN = False
    DRY_RUN_MAX_IMAGES = 10

    # Pipeline settings
    NUM_CORES = 2
    
    # COLMAP Reconstruction
    CAMERA_MODEL = "simple-radial"
    
    # Rotation correction
    ROTATION_CORRECTION = True
    
    # Keypoints handling
    MERGE_PARAMS = {
        "min_matches" : 15,
        # When merging keypoints, it is enable to filtering matches with cv2.findFundamentalMatrix.
        "filter_FundamentalMatrix" : False,
        "filter_iterations" : 10,
        "filter_threshold" : 8,
    }
    
    # Keypoints Extraction
    use_aliked_lightglue = True
    use_doghardnet_lightglue = False
    use_superpoint_lightglue = False
    use_disk_lightglue = False
    use_sift_lightglue = False
    use_loftr = False
    use_dkm = False
    use_superglue = False
    use_matchformer = False
        
    # Keypoints Extraction Parameters
    params_aliked_lightglue = {
        "num_features" : 8192,
        "detection_threshold" : 0.05,
        "min_matches" : 100,
        "resize_to" : 2048,
    }

    params_rot_detection_aliked_lightglue={
        "num_features" : 4096,
        "resize_to":960,
        "min_matches":60,
        "min_inliers":40
        }
    
    params_doghardnet_lightglue = {
        "num_features" : 8192,
        "detection_threshold" : 0.001,
        "min_matches" : 15,
        "resize_to" : 1024,
    }
    
    params_superpoint_lightglue = {
        "num_features" : 4096,
        "detection_threshold" : 0.005,
        "min_matches" : 15,
        "resize_to" : 1024,
    }
    
    params_disk_lightglue = {
        "num_features" : 4096,
        "detection_threshold" : 0.3,
        "min_matches" : 100,
        "resize_to" : 1024,
    }

    params_sift_lightglue = {
        "num_features" : 8192,
        "detection_threshold" : 0.001,
        "min_matches" : 15,
        "resize_to" : 1024,
    }

    params_loftr = {
        "resize_small_edge_to" : 750,
        "min_matches" : 15,
    }
    
    params_dkm = {
        "num_features" : 2048,
        "detection_threshold" : 0.4,
        "min_matches" : 15,
        "resize_to" : (540, 720),    
    }
    
    # superpoint + superglue  ...  https://www.kaggle.com/competitions/image-matching-challenge-2023/discussion/416873
    params_sg1 = {
        "sg_config" : 
        {
            "superpoint": {
                "nms_radius": 4, 
                "keypoint_threshold": 0.005,
                "max_keypoints": -1,
            },
            "superglue": {
                "weights": "outdoor",
                "sinkhorn_iterations": 20,
                "match_threshold": 0.2,
            },
        },
        "resize_to": 1088,
        "min_matches": 15,
    }
    params_sg2 = {
        "sg_config" : 
        {
            "superpoint": {
                "nms_radius": 4, 
                "keypoint_threshold": 0.005,
                "max_keypoints": -1,
            },
            "superglue": {
                "weights": "outdoor",
                "sinkhorn_iterations": 20,
                "match_threshold": 0.2,
            },
        },
        "resize_to": 1280,
        "min_matches": 15,
    }
    params_sg3 = {
        "sg_config" : 
        {
            "superpoint": {
                "nms_radius": 4, 
                "keypoint_threshold": 0.005,
                "max_keypoints": -1,
            },
            "superglue": {
                "weights": "outdoor",
                "sinkhorn_iterations": 20,
                "match_threshold": 0.2,
            },
        },
        "resize_to": 1376,
        "min_matches": 15,
    }
    params_sgs = [params_sg1, params_sg2, params_sg3]
    
    params_matchformer = {
        "detection_threshold" : 0.15,
        "resize_to" : (560, 750),
        "num_features" : 2000,
        "min_matches" : 15, 
    }

In [35]:
# --- Helper function for image loading (if not already defined) ---
def load_torch_image(fname, device=torch.device('cpu')):
    img = K.io.load_image(fname, K.io.ImageLoadType.RGB32, device=device)[None, ...]
    return img

In [36]:
def init_cache_from_imglist(img_fnames, n_rotations=4, n_groups=5):
    """
    Initializes a nested defaultdict cache structure for each image and rotation.
    Structure: cache[image_key][rotation][index] = dict()
    """
    cache = defaultdict(lambda: [[] for _ in range(n_rotations)])
    for fname in img_fnames:
        key = fname.split('/')[-1]  # use basename
        for r in range(n_rotations):
            cache[key][r] = [dict() for _ in range(n_groups)]
    return cache

In [37]:
from collections import defaultdict
from PIL import Image
from torchvision import transforms

class RotationEstimator:
    def __init__(self, device='cuda'):
        self.device = torch.device(device)
        self.extractor = ALIKED(weights=f"/kaggle/input/aliked/pytorch/aliked-n16/1/aliked-n16.pth", 
                                # detection_threshold=CONFIG.params_rot_detection_aliked_lightglue["detection_threshold"],
                                num_features=CONFIG.params_rot_detection_aliked_lightglue["num_features"],
                                min_matches=CONFIG.params_rot_detection_aliked_lightglue["min_matches"]
                               ).to(self.device, dtype = torch.float32).eval()
        lg_cfg = {
            "features": "aliked",
            "depth_confidence": -1,         # 禁用深度置信度过滤
            "width_confidence": -1,         # 禁用宽度置信度过滤
            "filter_threshold": 0.1,        # 设置匹配分数阈值
            "mp": True                     # 禁用多进程加速
        }
        self.lightglue = LightGlue(**lg_cfg).eval().to(self.device)
        self.verbose = VERBOSE
        
    def extract(self, img):
        with torch.inference_mode():
            return self.extractor.extract(img, resize=CONFIG.params_rot_detection_aliked_lightglue["resize_to"])

    def match_and_filter(self, desc0, desc1, kpts0, kpts1):
        data = {
            "image0": {"keypoints": kpts0, "descriptors": desc0},
            "image1": {"keypoints": kpts1, "descriptors": desc1},
        }
        with torch.inference_mode():
            pred = self.lightglue(data)
        matches0 = pred["matches0"][0].cpu().numpy()
        valid = matches0 > -1
        if np.sum(valid) == 0:
            return 0
        pts0 = kpts0[0][valid].cpu().numpy()
        pts1 = kpts1[0][matches0[valid]].cpu().numpy()
        try:
            _, inliers = cv2.findFundamentalMat(pts0, pts1, cv2.USAC_MAGSAC, 5, 0.9999, 50000)
            return int(np.sum(inliers)) if inliers is not None else 0
        except:
            return 0

    def run(self,img_fnames, index_pairs):
        rot_dict = defaultdict(dict)
        for idx1, idx2 in tqdm(index_pairs, desc="Finding valid rotations"):
            fname1, fname2 = img_fnames[idx1], img_fnames[idx2]
            key1, key2 = os.path.basename(fname1), os.path.basename(fname2)
            try:
                img0 = load_torch_image(fname1, device = self.device)
                img1 = load_torch_image(fname2, device = self.device)
            except Exception as e:
                print(f"Failed to load image: {fname1}, {fname2}, reason: {e}")
                continue

            desc0 = self.extract(img0)
            kpts0, desc0 = desc0["keypoints"], desc0["descriptors"]

            for rot in range(4):  # 0, 90, 180, 270 degrees
                rotated_img1 = torch.rot90(img1, k=rot, dims=[2, 3])
                desc1 = self.extract(rotated_img1)
                kpts1, desc1 = desc1["keypoints"], desc1["descriptors"]

                inliers = self.match_and_filter(desc0, desc1, kpts0, kpts1)
                if inliers > CONFIG.params_rot_detection_aliked_lightglue['min_inliers']:
                    rot_dict[key1][key2] = rot
                    # if self.verbose:
                    #     print(f"matching {key1}-{key2} with {inliers} inliers!")
                    break
        del self.extractor
        gc.collect()
        del self.lightglue
        gc.collect()    
        return dict(rot_dict)

In [38]:
import numpy as np
import h5py
from collections import defaultdict

def merge_and_save_keypoints_matches(
    unique_kpts,
    out_match,
    rot_dict,
    feature_dir
):
    # Step 1: Merge all keypoints per image (across rotations)
    merged_kpts = {}
    kpt_offset = {}

    for img_name, rot_kpts_dict in unique_kpts.items():
        merged = []
        offset = {}
        total = 0
        for rot in sorted(rot_kpts_dict):  # rot 0,1,2,3
            kpts = rot_kpts_dict[rot]
            offset[rot] = total
            total += len(kpts)
            merged.append(kpts)
        if total == 0:
            continue
        merged_kpts[img_name] = np.concatenate(merged, axis=0)
        kpt_offset[img_name] = offset

    # Step 2: Remap match indices based on offset
    updated_out_match = defaultdict(dict)

    for k1, match_group in out_match.items():
        for k2, match_data in match_group.items():
            matches = match_data["matches"]
            rot = match_data["rot"]
            if len(matches) == 0:
                continue

            offset0 = kpt_offset[k1][rot]
            offset1 = kpt_offset[k2][0]  # image2 is always rot 0

            updated_matches = matches.copy()
            updated_matches[:, 0] += offset0
            updated_matches[:, 1] += offset1
            updated_out_match[k1][k2] = updated_matches.astype(np.int32)

    unified_kp_path = f'{feature_dir}/keypoints.h5'
    remapped_matches_path = f'{feature_dir}/matches.h5'
    # Step 3: Save merged keypoints
    with h5py.File(f'{feature_dir}/keypoints.h5', mode='w') as f_kp:
        for img_name, kpts in merged_kpts.items():
            f_kp[img_name] = kpts.astype(np.float32)  # shape (N, 2)

    # Step 4: Save remapped matches
    with h5py.File(f'{feature_dir}/matches.h5', mode='w') as f_match:
        for k1, match_group in updated_out_match.items():
            g = f_match.require_group(k1)
            for k2, matches in match_group.items():
                if len(matches) > 0:
                    g[k2] = matches  # shape (M, 2), dtype=int32

    # print("✔ Merged keypoints and remapped matches saved.")
    return unified_kp_path, remapped_matches_path

In [39]:
class LightGlueCustomMatching_sep(torch.nn.Module):
    def __init__(self, device=None, extractor_cfg=None):
        super().__init__()
        self.device=device
        self.extractor = ALIKED(weights=f"/kaggle/input/aliked/pytorch/aliked-n16/1/aliked-n16.pth", \
                                **extractor_cfg).to(self.device, dtype = torch.float32).eval()
        lg_cfg = {
            "features": "aliked",
            "depth_confidence": -1,         # 禁用深度置信度过滤
            "width_confidence": -1,         # 禁用宽度置信度过滤
            "filter_threshold": 0.1,        # 设置匹配分数阈值
            "mp": False                     # 禁用多进程加速
        }
        self.lightglue = LightGlue(**lg_cfg).eval().to(self.device)
        self.ttas = list(range(5))
        self.tta2id = {k: i for i, k in enumerate(self.ttas)}
        self.tta_combination = [[i, j] for i in range(5) for j in range(5)]

    def forward_flat(self, data, cache_args):
        # print("in forward")
        pred = {}        
        cache, key1, key2, quad = cache_args # quad: Rotation times of image1. 0:No rotation, 1:90deg, 2:180deg, 3:270deg
        img_key_list = [key1,key2]
        data["image0"] = {"image": data["image0"]}
        data["image1"] = {"image": data["image1"]}
        keypoints_dict, descriptors_dict = {}, {}
        for i, img_key in enumerate(['image0', 'image1']):
            keypoints_list, descriptors_list = [], []
            if i == 0:
                _quad = 0
            else:
                _quad = quad
            if "pred" not in cache[img_key_list[i]][_quad][0]:
                no_cache = True
            else:
                no_cache = False
            # Get ALIKED descriptors
            for j, img in enumerate(data[img_key]["image"]):
                if no_cache:
                    img = img.unsqueeze(0)
                    cache[img_key_list[i]][_quad][j]["pred"] = self.extractor.extract(img, resize=None)
                    # print(f"aliked extraction done {j}")
                pred = cache[img_key_list[i]][_quad][j]["pred"]
                keypoints_list.append(pred['keypoints'])
                descriptors_list.append(pred['descriptors'])
            keypoints_dict[img_key] = keypoints_list
            descriptors_dict[img_key] = descriptors_list
        # print("get keypoints and descirptions")
        # Prepare data for LightGlue and run matching one by one
        group_pred_list = []
        for tta_group in self.tta_combination:
            group_idx = self.tta2id[tta_group[0]], self.tta2id[tta_group[1]]
            i0, i1 = group_idx[0], group_idx[1]
            data["image0"]["keypoints"], data["image0"]["descriptors"]= keypoints_dict['image0'][i0], descriptors_dict['image0'][i0]
            data["image1"]["keypoints"], data["image1"]["descriptors"]= keypoints_dict['image1'][i1], descriptors_dict['image1'][i1]
            group_pred = self.lightglue(data)
            group_pred.update({"keypoints0":data['image0']["keypoints"],
                                "keypoints1":data['image1']["keypoints"]})
            group_pred_list.append(group_pred)
        # print("out forward")
        return group_pred_list, cache

class LightGlueMatcherPipeline_sep:
    def __init__(self, device=None, conf_th=None, extractor_cfg=None, lg_cfg=None):
        self.device = device
        self.extractor_cfg = extractor_cfg
        self.lg_cfg = lg_cfg
        self._lightglue_matcher = LightGlueCustomMatching_sep(
            device=self.device, extractor_cfg=self.extractor_cfg)
        self.conf_thresh = conf_th
        self.tta_combination = self._lightglue_matcher.tta_combination

    def prep_img(self, img, long_side=None):
        """Resize the tensor image to a specified long side."""
        img = img.clone()
        if long_side is not None:
            scale = long_side / max(img.shape[2], img.shape[3])
            w = int(img.shape[3] * scale)
            h = int(img.shape[2] * scale)
            img = torch.nn.functional.interpolate(img, size=(h, w), mode='bilinear', align_corners=False)
        else:
            scale = 1.0
        return img, scale

    def split_image(self, image):
        """Split the image into 4 quadrants and return them along with a resized version."""
        h, w = image.shape[2], image.shape[3]
        if h % 2 != 0:
            h = h - 1
        if w % 2 != 0:
            w = w - 1
        image = image[:, :, :h, :w]
        return [image[:, :, :h//2, :w//2], 
            image[:, :, :h//2, w//2:], 
            image[:, :, h//2:, :w//2], 
            image[:, :, h//2:, w//2:],
            transforms.functional.resize(image, size=(h//2,w//2))]

    def reconstruct_coords(self, coords, quadrant, w, h):
        """Reconstruct coordinates based on the separation quadrant."""
        if quadrant == 1:
            coords[:, 0] += w//2
        elif quadrant == 2:
            coords[:, 1] += h//2
        elif quadrant == 3:
            coords[:, 0] += w//2
            coords[:, 1] += h//2
        elif quadrant == 4:
            coords = [[y*2, x*2] for y, x in coords]            
        return coords

    def __call__(self, img_ts0, img_ts1, cache_args, input_longside=None):
        with torch.no_grad():
            img_ts0, scale0 = self.prep_img(img_ts0, input_longside)
            img_ts1, scale1 = self.prep_img(img_ts1, input_longside)
            img_parts0 = self.split_image(img_ts0) 
            img_parts1 = self.split_image(img_ts1)
            cat_mkpts0, cat_mkpts1 = [], []
            pred, cache = self._lightglue_matcher.forward_flat(
                data={
                    "image0": torch.cat(img_parts0),
                    "image1": torch.cat(img_parts1),
                },
            cache_args=cache_args)
            # print("self._lightglue_matcher.forward_flat done")
        kpts0_all, kpts1_all = [], []  # Reserve original keypoints
        cat_kpts0_all, cat_kpts1_all = [], []
        matched_kpts0, matched_kpts1 = [], []
        matched_ids = []
        pre_len1 = 0
        pre_len2 = 0
        cur_len1 = 0
        cur_len2 = 0
        for idx, [i0, i1] in enumerate(self.tta_combination):
            group_pred = pred[idx]
            pred_aug = {}
            use_keys = ["keypoints0", "keypoints1", "matches0", "matching_scores0"]
            for k in use_keys:
                v = group_pred[k]
                if isinstance(v, torch.Tensor):
                    pred_aug[k] = v[0].detach().cpu().numpy().squeeze()
                else:
                    pred_aug[k] = v
        
            kpts0, kpts1 = pred_aug["keypoints0"], pred_aug["keypoints1"]
            matches = pred_aug["matches0"]
            valid = matches > -1
        
            if np.sum(valid) == 0:
                continue
        
            kpts0_all.append(self.reconstruct_coords(kpts0, i0, img_ts0.shape[3], img_ts0.shape[2]))
            kpts1_all.append(self.reconstruct_coords(kpts1, i1, img_ts1.shape[3], img_ts1.shape[2]))
            cur_len1 = len(kpts0)
            cur_len2 = len(kpts1)
            # print(f"cur_len1:{cur_len1}, cur_len2:{cur_len2}")

            # Keep matched coords
            matched_kpts0.append(self.reconstruct_coords(kpts0[valid], i0, img_ts0.shape[3], img_ts0.shape[2]))
            matched_kpts1.append(self.reconstruct_coords(kpts1[matches[valid]], i1, img_ts1.shape[3], img_ts1.shape[2]))
        
            # Keep index pairs
            matched_ids.append(np.stack([np.where(valid)[0]+pre_len1, matches[valid]+pre_len2], axis=1))
            pre_len1 += cur_len1
            pre_len2 += cur_len2
            
        if len(matched_kpts0) == 0:
            _, key1, key2, _ = cache_args
            print(f"No matches at {key1} vs. {key2}")
            return np.empty((0, 2)), np.empty((0, 2)), np.empty((0, 2), dtype=np.int32), cache
        
        cat_mkpts0 = np.concatenate(matched_kpts0)
        cat_mkpts1 = np.concatenate(matched_kpts1)
        cat_kpts0_all = np.concatenate(kpts0_all) 
        cat_kpts1_all = np.concatenate(kpts1_all) 
        
        matched_ids = np.concatenate(matched_ids).astype(np.int32)
        # print("before ransac")
        # Apply RANSAC
        try:
            _, inliers = cv2.findFundamentalMat(cat_mkpts0, cat_mkpts1, cv2.USAC_MAGSAC, ransacReprojThreshold=5, confidence=0.9999, maxIters=50000)
            inliers = inliers.ravel() > 0
            cat_mkpts0 = cat_mkpts0[inliers]
            cat_mkpts1 = cat_mkpts1[inliers]
            matched_ids = matched_ids[inliers]
        except Exception:
            _, key1, key2, _ = cache_args
            print(f"Error in findFundamentalMat: {key1}-{key2}")
            return np.empty((0, 2)), np.empty((0, 2)), np.empty((0, 2), dtype=np.int32), cache
        
        # Bounds check
        mask0 = (cat_mkpts0[:, 0] >= 0) & (cat_mkpts0[:, 0] < img_ts0.shape[3]) & \
                (cat_mkpts0[:, 1] >= 0) & (cat_mkpts0[:, 1] < img_ts0.shape[2])
        mask1 = (cat_mkpts1[:, 0] >= 0) & (cat_mkpts1[:, 0] < img_ts1.shape[3]) & \
                (cat_mkpts1[:, 1] >= 0) & (cat_mkpts1[:, 1] < img_ts1.shape[2])
        mask = mask0 & mask1
        
        return cat_kpts0_all/scale0, \
               cat_kpts1_all/scale1, \
               matched_ids[mask], \
               cache

In [40]:
def save_keypoints_and_matches_split(
    img_fnames, rot_dict, matcher_pip, feature_dir,
    device = "cuda",
    verbose=VERBOSE
):
    os.makedirs(feature_dir, exist_ok=True)
    
    cache = init_cache_from_imglist(img_fnames)
    # print("cache initializetion done")
    unique_kpts = {}
    out_match = {}
    for key1 in tqdm(rot_dict, desc="Matching and storing"):
        fname1 = next((f for f in img_fnames if os.path.basename(f) == key1), None)
        if fname1 is None:
            continue
        img0 = load_torch_image(fname1, device = device)

        for key2 in rot_dict[key1]:
            fname2 = next((f for f in img_fnames if os.path.basename(f) == key2), None)
            if fname2 is None:
                continue
            rot = rot_dict[key1][key2]
            # print(f"{key1}-{key2}-rot{rot}")
            img1 = load_torch_image(fname2, device = device)
            with torch.inference_mode():
                pts0, pts1, matches, cache = matcher_pip(
                    img0, img1,
                    cache_args=[cache, key1, key2, rot],
                    input_longside = 1216
                )
            print(f"{key1}_{rot}-{key2}:{len(matches)}")

            # Save keypoints once per image per rotation
            if key1 not in unique_kpts and len(pts0) > 0:
                unique_kpts[key1] = {}
            if rot not in unique_kpts[key1] and len(pts0) > 0:
                unique_kpts[key1][rot] = pts0.astype(np.float32)
            
            if key2 not in unique_kpts and len(pts1) > 0:
                unique_kpts[key2] = {}
            if 0 not in unique_kpts[key2] and len(pts1) > 0:
                unique_kpts[key2][0] = pts1.astype(np.float32)
            
            if len(matches) > 0:
                if key1 not in out_match:
                    out_match[key1] = {}
                out_match[key1][key2] = {
                    "rot": rot,
                    "matches": matches}

    unified_kp_path, remapped_matches_path = merge_and_save_keypoints_matches(
        unique_kpts,
        out_match,
        rot_dict,
        feature_dir)
    return unified_kp_path, remapped_matches_path

In [41]:
# Assume these are available from your environment or previous code
# from .utils import load_torch_image # Assuming load_torch_image is defined elsewhere
# from kornia.feature import ALIKED # Already in your detect_aliked
# from kornia.feature import LightGlueMatcher as KF_LightGlueMatcher # Already in your match_with_lightglue
# from kornia.geometry import laf_from_center_scale_ori # Already in your match_with_lightglue
# from colmap_database import COLMAPDatabase, add_keypoints, add_matches # Already in your colmap_import


def convert_coord(r, w, h, rotk):
    if rotk == 0:
        return r
    elif rotk == 1:
        rx = w-1-r[:, 1]
        ry = r[:, 0]
        return torch.concat([rx[None], ry[None]], dim=0).T
    elif rotk == 2:
        rx = w-1-r[:, 0]
        ry = h-1-r[:, 1]
        return torch.concat([rx[None], ry[None]], dim=0).T
    elif rotk == 3:
        rx = r[:, 1]
        ry = h-1-r[:, 0]
        return torch.concat([rx[None], ry[None]], dim=0).T

def detect_common(img_fnames,
                  model_name,
                  rots,
                  file_keypoints,
                  feature_dir = '.featureout',
                  num_features = 4096,
                  resize_to = 1024,
                  detection_threshold = 0.01,
                  device=torch.device('cpu'),
                  min_matches=15,verbose=VERBOSE
                 ):
    if not os.path.isdir(feature_dir):
        os.makedirs(feature_dir)

    #####################################################
    # Extract keypoints and descriptions
    #####################################################
    dict_model = {
        "aliked" : ALIKED,
        "superpoint" : SuperPoint,
        "doghardnet" : DoGHardNet,
        "disk" : DISK,
        "sift" : SIFT,
    }
    extractor_class = dict_model[model_name]
    dtype = torch.float32 # ALIKED has issues with float16
    # extractor = extractor_class(max_num_keypoints=num_features, detection_threshold=detection_threshold, 
    #                             resize=resize_to).eval().to(device, dtype)
    if model_name == 'disk':
        # print("dissskkkkkk")
        # extractor = DISK(
        #     max_num_keypoints=num_features,
        #     detection_threshold=detection_threshold,
        #     resize=resize_to
        # ).to(device).eval()
        # checkpoint = torch.load(ckpt_path, map_location=device)
        # extractor.load_state_dict(checkpoint['model'])
        
        extractor = DISK(
            max_num_keypoints=num_features,
            detection_threshold=detection_threshold,
            resize=resize_to
        ).eval().to(device)
        
        print("get extractor")
        
    else:
        extractor_class = dict_model[model_name]
        extractor = extractor_class(
            max_num_keypoints=num_features,
            detection_threshold=detection_threshold,
            resize=resize_to
        ).to(device, dtype).eval()

    
    dict_kpts_cuda = {}
    dict_descs_cuda = {}
    for (img_path, rot_k) in zip(img_fnames, rots):
        img_fname = img_path.split('/')[-1]
        key = img_fname
        with torch.inference_mode():
            image0 = load_torch_image(img_path, device=device).to(dtype)
            h, w = image0.shape[2], image0.shape[3]
            image1 = torch.rot90(image0, rot_k, [2, 3])
            feats0 = extractor.extract(image1)  # auto-resize the image, disable with resize=None
            kpts = feats0['keypoints'].reshape(-1, 2).detach()
            descs = feats0['descriptors'].reshape(len(kpts), -1).detach()
            kpts = convert_coord(kpts, w, h, rot_k)
            dict_kpts_cuda[f"{key}"] = kpts
            dict_descs_cuda[f"{key}"] = descs
            if verbose:
                print(f"{model_name} > rot_k={rot_k}, kpts.shape={kpts.shape}, descs.shape={descs.shape}")
    del extractor
    gc.collect()

    #####################################################
    # Matching keypoints
    #####################################################
    # print("KF glue matcher")
    lg_matcher = KF.LightGlueMatcher(model_name, {"width_confidence": -1,
                                            "depth_confidence": -1,
                                             "mp": True if 'cuda' in str(device) else False}).eval().to(device)
    
    cnt_pairs = 0
    with h5py.File(file_keypoints, mode='w') as f_match:
        for pair_idx in tqdm(index_pairs):
            idx1, idx2 = pair_idx
            fname1, fname2 = img_fnames[idx1], img_fnames[idx2]
            
            key1, key2 = fname1.split('/')[-1], fname2.split('/')[-1]
            
            kp1 = dict_kpts_cuda[key1]
            kp2 = dict_kpts_cuda[key2]
            desc1 = dict_descs_cuda[key1]
            desc2 = dict_descs_cuda[key2]
            with torch.inference_mode():
                dists, idxs = lg_matcher(desc1,
                                     desc2,
                                     KF.laf_from_center_scale_ori(kp1[None]),
                                     KF.laf_from_center_scale_ori(kp2[None]))
            if len(idxs)  == 0:
                continue
            n_matches = len(idxs)
            kp1 = kp1[idxs[:,0], :].cpu().numpy().reshape(-1, 2).astype(np.float32)
            kp2 = kp2[idxs[:,1], :].cpu().numpy().reshape(-1, 2).astype(np.float32)
            group  = f_match.require_group(key1)
            if n_matches >= min_matches:
                group.create_dataset(key2, data=np.concatenate([kp1, kp2], axis=1))
                cnt_pairs+=1
                if verbose:
                    print (f'{model_name}> {key1}-{key2}: {n_matches} matches @ {cnt_pairs}th pair({model_name}+lightglue)')            
            else:
                if verbose:
                    print (f'{model_name}> {key1}-{key2}: {n_matches} matches --> skipped')
    del lg_matcher
    torch.cuda.empty_cache()
    gc.collect()
    return

def detect_lightglue_common(
    img_fnames, model_name, index_pairs, feature_dir, device, file_keypoints, rots,
    resize_to=1024,
    detection_threshold=0.01, 
    num_features=4096, 
    min_matches=15,
):
    t=time()
    detect_common(
        img_fnames, model_name, rots, file_keypoints, feature_dir, 
        resize_to=resize_to,
        num_features=num_features, 
        detection_threshold=detection_threshold, 
        device=device,
        min_matches=min_matches,
    )
    gc.collect()
    t=time() -t 
    print(f'Features matched in  {t:.4f} sec ({model_name}+LightGlue)')
    return t

In [42]:
def get_unique_idxs(A, dim=0):
    # https://stackoverflow.com/questions/72001505/how-to-get-unique-elements-and-their-firstly-appeared-indices-of-a-pytorch-tenso
    unique, idx, counts = torch.unique(A, dim=dim, sorted=True, return_inverse=True, return_counts=True)
    _, ind_sorted = torch.sort(idx, stable=True)
    cum_sum = counts.cumsum(0)
    cum_sum = torch.cat((torch.tensor([0],device=cum_sum.device), cum_sum[:-1]))
    first_indices = ind_sorted[cum_sum]
    return first_indices

def get_keypoint_from_h5(fp, key1, key2):
    rc = -1
    try:
        kpts = np.array(fp[key1][key2])
        rc = 0
        return (rc, kpts)
    except:
        return (rc, None)

def get_keypoint_from_multi_h5(fps, key1, key2):
    list_mkpts = []
    for fp in fps:
        rc, mkpts = get_keypoint_from_h5(fp, key1, key2)
        if rc == 0:
            list_mkpts.append(mkpts)
    if len(list_mkpts) > 0:
        list_mkpts = np.concatenate(list_mkpts, axis=0)
    else:
        list_mkpts = None
    return list_mkpts

def matches_merger(
    img_fnames,
    index_pairs,
    files_keypoints,
    save_file,
    feature_dir = 'featureout',
    filter_FundamentalMatrix = False,
    filter_iterations = 10,
    filter_threshold = 8,
    verbose = VERBOSE
):
    # open h5 files
    fps = [ h5py.File(file, mode="r") for file in files_keypoints ]

    with h5py.File(save_file, mode='w') as f_match:
        counter = 0
        for pair_idx in progress_bar(index_pairs):
            idx1, idx2 = pair_idx
            fname1, fname2 = img_fnames[idx1], img_fnames[idx2]
            key1, key2 = fname1.split('/')[-1], fname2.split('/')[-1]

            # extract keypoints
            mkpts = get_keypoint_from_multi_h5(fps, key1, key2)
            if mkpts is None:
                if verbose:
                    print(f"skipped key1={key1}, key2={key2}")
                continue

            ori_size = mkpts.shape[0]
            if mkpts.shape[0] < CONFIG.MERGE_PARAMS["min_matches"]:
                continue
            
            if filter_FundamentalMatrix:
                store_inliers = { idx:0 for idx in range(mkpts.shape[0]) }
                idxs = np.array(range(mkpts.shape[0]))
                for iter in range(filter_iterations):
                    try:
                        Fm, inliers = cv2.findFundamentalMat(
                            mkpts[:,:2], mkpts[:,2:4], cv2.USAC_MAGSAC, 0.15, 0.9999, 20000)
                        if Fm is not None:
                            inliers = inliers > 0
                            inlier_idxs = idxs[inliers[:, 0]]
                            #print(inliers.shape, inlier_idxs[:5])
                            for idx in inlier_idxs:
                                store_inliers[idx] += 1
                    except:
                        print(f"Failed to cv2.findFundamentalMat. mkpts.shape={mkpts.shape}")
                inliers = np.array([ count for (idx, count) in store_inliers.items() ]) >= filter_threshold
                mkpts = mkpts[inliers]
                if mkpts.shape[0] < 15:
                    if verbose:
                        print(f"skipped key1={key1}, key2={key2}: mkpts.shape={mkpts.shape} after filtered.")
                    continue
                #print(f"filter_FundamentalMatrix: {len(store_inliers)} matches --> {mkpts.shape[0]} matches")
            
            if verbose:
                print (f'{key1}-{key2}: {ori_size} --> {mkpts.shape[0]} matches')            
            # regist tmp file
            group  = f_match.require_group(key1)
            group.create_dataset(key2, data=mkpts)
            counter += 1
    print( f"Ensembled pairs : {counter} pairs" )
    for fp in fps:
        fp.close()

def keypoints_merger(
    img_fnames,
    index_pairs,
    files_keypoints,
    feature_dir = 'featureout',
    filter_FundamentalMatrix = False,
    filter_iterations = 10,
    filter_threshold = 8,
):
    save_file = f'{feature_dir}/merge_tmp.h5'
    !rm -rf {save_file}
    matches_merger(
        img_fnames,
        index_pairs,
        files_keypoints,
        save_file,
        feature_dir = feature_dir,
        filter_FundamentalMatrix = filter_FundamentalMatrix,
        filter_iterations = filter_iterations,
        filter_threshold = filter_threshold,
    )
        
    # Let's find unique loftr pixels and group them together.
    kpts = defaultdict(list)
    match_indexes = defaultdict(dict)
    total_kpts=defaultdict(int)
    with h5py.File(save_file, mode='r') as f_match:
        for k1 in f_match.keys():
            group  = f_match[k1]
            for k2 in group.keys():
                matches = group[k2][...]
                total_kpts[k1]
                kpts[k1].append(matches[:, :2])
                kpts[k2].append(matches[:, 2:])
                current_match = torch.arange(len(matches)).reshape(-1, 1).repeat(1, 2)
                current_match[:, 0]+=total_kpts[k1]
                current_match[:, 1]+=total_kpts[k2]
                total_kpts[k1]+=len(matches)
                total_kpts[k2]+=len(matches)
                match_indexes[k1][k2]=current_match

    for k in kpts.keys():
        kpts[k] = np.round(np.concatenate(kpts[k], axis=0))
    unique_kpts = {}
    unique_match_idxs = {}
    out_match = defaultdict(dict)
    for k in kpts.keys():
        uniq_kps, uniq_reverse_idxs = torch.unique(torch.from_numpy(kpts[k]),dim=0, return_inverse=True)
        unique_match_idxs[k] = uniq_reverse_idxs
        unique_kpts[k] = uniq_kps.numpy()
    for k1, group in match_indexes.items():
        for k2, m in group.items():
            m2 = deepcopy(m)
            m2[:,0] = unique_match_idxs[k1][m2[:,0]]
            m2[:,1] = unique_match_idxs[k2][m2[:,1]]
            mkpts = np.concatenate([unique_kpts[k1][ m2[:,0]],
                                    unique_kpts[k2][  m2[:,1]],
                                   ],
                                   axis=1)
            unique_idxs_current = get_unique_idxs(torch.from_numpy(mkpts), dim=0)
            m2_semiclean = m2[unique_idxs_current]
            unique_idxs_current1 = get_unique_idxs(m2_semiclean[:, 0], dim=0)
            m2_semiclean = m2_semiclean[unique_idxs_current1]
            unique_idxs_current2 = get_unique_idxs(m2_semiclean[:, 1], dim=0)
            m2_semiclean2 = m2_semiclean[unique_idxs_current2]
            out_match[k1][k2] = m2_semiclean2.numpy()
    with h5py.File(f'{feature_dir}/keypoints.h5', mode='w') as f_kp:
        for k, kpts1 in unique_kpts.items():
            f_kp[k] = kpts1
    
    with h5py.File(f'{feature_dir}/matches.h5', mode='w') as f_match:
        for k1, gr in out_match.items():
            group  = f_match.require_group(k1)
            for k2, match in gr.items():
                group[k2] = match
    return

In [43]:
def get_img_pairs_exhaustive(img_fnames):
    index_pairs = []
    for i in range(len(img_fnames)):
        for j in range(i+1, len(img_fnames)):
            index_pairs.append((i,j))
    return index_pairs

In [44]:
# Must Use efficientnet global descriptor to get matching shortlists.
def get_global_desc(fnames, device = torch.device('cpu')):
    processor = AutoImageProcessor.from_pretrained('/kaggle/input/dinov2/pytorch/base/1')
    model = AutoModel.from_pretrained('/kaggle/input/dinov2/pytorch/base/1')
    model = model.eval()
    model = model.to(device)
    global_descs_dinov2 = []
    for i, img_fname_full in tqdm(enumerate(fnames),total= len(fnames)):
        key = os.path.splitext(os.path.basename(img_fname_full))[0]
        timg = load_torch_image(img_fname_full)
        with torch.inference_mode():
            inputs = processor(images=timg, return_tensors="pt", do_rescale=False).to(device)
            outputs = model(**inputs)
            dino_mac = F.normalize(outputs.last_hidden_state[:,1:].max(dim=1)[0], dim=1, p=2)
        global_descs_dinov2.append(dino_mac.detach().cpu())
    global_descs_dinov2 = torch.cat(global_descs_dinov2, dim=0)
    return global_descs_dinov2


def get_img_pairs_exhaustive(img_fnames):
    index_pairs = []
    for i in range(len(img_fnames)):
        for j in range(i+1, len(img_fnames)):
            index_pairs.append((i,j))
    return index_pairs

In [45]:
def get_image_pairs_shortlist_org(fnames,
                              sim_th = 0.6, # should be strict
                              min_pairs = 60,
                              exhaustive_if_less = 20,
                              device=torch.device('cpu')):
    num_imgs = len(fnames)
    if num_imgs <= exhaustive_if_less:
        return get_img_pairs_exhaustive(fnames)
    descs = get_global_desc(fnames, device=device)
    dm = torch.cdist(descs, descs, p=2).detach().cpu().numpy()

    
    mask = dm <= sim_th
    total = 0
    matching_list = []
    ar = np.arange(num_imgs)
    already_there_set = []
    for st_idx in range(num_imgs-1):
        mask_idx = mask[st_idx]
        to_match = ar[mask_idx]
        if len(to_match) < min_pairs:
            to_match = np.argsort(dm[st_idx])[:min_pairs]  
        for idx in to_match:
            if st_idx == idx:
                continue
            if dm[st_idx, idx] < 10000:
                matching_list.append(tuple(sorted((st_idx, idx.item()))))
                total+=1
    matching_list = sorted(list(set(matching_list)))
    return matching_list

In [46]:
def wrapper_keypoints(
    img_fnames, index_pairs, feature_dir, device, timings, rots
):
    #############################################################
    # get keypoints
    #############################################################
    files_keypoints = []
    
    if CONFIG.use_superglue:
        for params_sg in CONFIG.params_sgs:
            resize_to = params_sg["resize_to"]
            file_keypoints = f"{feature_dir}/matches_superglue_{resize_to}pix.h5"
            !rm -rf {file_keypoints}
            t = detect_superglue(
                img_fnames, index_pairs, feature_dir, device, 
                params_sg["sg_config"], file_keypoints, 
                resize_to=params_sg["resize_to"], 
                min_matches=params_sg["min_matches"],
            )
            gc.collect()
            files_keypoints.append( file_keypoints )
            timings['feature_matching'].append(t)

    if CONFIG.use_aliked_lightglue:
        model_name = "aliked"
        file_keypoints = f'{feature_dir}/matches_lightglue_{model_name}.h5'
        t = detect_lightglue_common(
            img_fnames, model_name, index_pairs, feature_dir, device, file_keypoints, rots,
            resize_to=CONFIG.params_aliked_lightglue["resize_to"],
            detection_threshold=CONFIG.params_aliked_lightglue["detection_threshold"],
            num_features=CONFIG.params_aliked_lightglue["num_features"],
            min_matches=CONFIG.params_aliked_lightglue["min_matches"],
        )
        gc.collect()
        files_keypoints.append(file_keypoints)
        timings['feature_matching'].append(t)

    if CONFIG.use_doghardnet_lightglue:
        model_name = "doghardnet"
        file_keypoints = f'{feature_dir}/matches_lightglue_{model_name}.h5'
        t = detect_lightglue_common(
            img_fnames, model_name, index_pairs, feature_dir, device, file_keypoints, rots,
            resize_to=CONFIG.params_doghardnet_lightglue["resize_to"],
            detection_threshold=CONFIG.params_doghardnet_lightglue["detection_threshold"],
            num_features=CONFIG.params_doghardnet_lightglue["num_features"],
            min_matches=CONFIG.params_doghardnet_lightglue["min_matches"],
        )
        gc.collect()
        files_keypoints.append(file_keypoints)
        timings['feature_matching'].append(t)

    if CONFIG.use_superpoint_lightglue:
        model_name = "superpoint"
        file_keypoints = f'{feature_dir}/matches_lightglue_{model_name}.h5'
        t = detect_lightglue_common(
            img_fnames, model_name, index_pairs, feature_dir, device, file_keypoints, rots,
            resize_to=CONFIG.params_superpoint_lightglue["resize_to"],
            detection_threshold=CONFIG.params_superpoint_lightglue["detection_threshold"],
            num_features=CONFIG.params_superpoint_lightglue["num_features"],
            min_matches=CONFIG.params_superpoint_lightglue["min_matches"],
        )
        gc.collect()
        files_keypoints.append(file_keypoints)
        timings['feature_matching'].append(t)

    if CONFIG.use_disk_lightglue:
        model_name = "disk"
        file_keypoints = f'{feature_dir}/matches_lightglue_{model_name}.h5'
        t = detect_lightglue_common(
            img_fnames, model_name, index_pairs, feature_dir, device, file_keypoints, rots,
            resize_to=CONFIG.params_disk_lightglue["resize_to"],
            detection_threshold=CONFIG.params_disk_lightglue["detection_threshold"],
            num_features=CONFIG.params_disk_lightglue["num_features"],
            min_matches=CONFIG.params_disk_lightglue["min_matches"],
        )
        gc.collect()
        files_keypoints.append(file_keypoints)
        timings['feature_matching'].append(t)

    if CONFIG.use_sift_lightglue:
        model_name = "sift"
        file_keypoints = f'{feature_dir}/matches_lightglue_{model_name}.h5'
        t = detect_lightglue_common(
            img_fnames, model_name, index_pairs, feature_dir, device, file_keypoints, rots,
            resize_to=CONFIG.params_sift_lightglue["resize_to"],
            detection_threshold=CONFIG.params_sift_lightglue["detection_threshold"],
            num_features=CONFIG.params_sift_lightglue["num_features"],
            min_matches=CONFIG.params_sift_lightglue["min_matches"],
        )
        gc.collect()
        files_keypoints.append(file_keypoints)
        timings['feature_matching'].append(t)

    if CONFIG.use_loftr:
        file_keypoints = f'{feature_dir}/matches_loftr_{CONFIG.params_loftr["resize_small_edge_to"]}pix.h5'
        t = detect_loftr(
            img_fnames, index_pairs, feature_dir, device, file_keypoints,
            resize_small_edge_to=CONFIG.params_loftr["resize_small_edge_to"],
            min_matches=CONFIG.params_loftr["min_matches"],
        )
        gc.collect()
        files_keypoints.append( file_keypoints )
        timings['feature_matching'].append(t)

    if CONFIG.use_dkm:
        file_keypoints = f'{feature_dir}/matches_dkm.h5'
        t = detect_dkm(
            img_fnames, index_pairs, feature_dir, device, file_keypoints,
            resize_to=CONFIG.params_dkm["resize_to"], 
            detection_threshold=CONFIG.params_dkm["detection_threshold"], 
            num_features=CONFIG.params_dkm["num_features"], 
            min_matches=CONFIG.params_dkm["min_matches"]
        )
        gc.collect()
        files_keypoints.append(file_keypoints)
        timings['feature_matching'].append(t)

    if CONFIG.use_matchformer:
        file_keypoints = f'{feature_dir}/matches_matchformer_{CONFIG.params_matchformer["resize_to"]}pix.h5'
        t = detect_matchformer(
            img_fnames, index_pairs, feature_dir, device, file_keypoints,
            resize_to=CONFIG.params_matchformer["resize_to"],
            num_features=CONFIG.params_matchformer["num_features"], 
            min_matches=CONFIG.params_matchformer["min_matches"]
        )
        gc.collect()
        files_keypoints.append( file_keypoints )
        timings['feature_matching'].append(t)

    #############################################################
    # merge keypoints
    #############################################################
    keypoints_merger(
        img_fnames,
        index_pairs,
        files_keypoints,
        feature_dir = feature_dir,
        filter_FundamentalMatrix = CONFIG.MERGE_PARAMS["filter_FundamentalMatrix"],
        filter_iterations = CONFIG.MERGE_PARAMS["filter_iterations"],
        filter_threshold = CONFIG.MERGE_PARAMS["filter_threshold"],
    )    
    return timings


def import_into_colmap(img_dir, feature_dir ='.featureout', database_path = 'colmap.db'):
    db = COLMAPDatabase.connect(database_path)
    db.create_tables()
    single_camera = False
    fname_to_id = add_keypoints(db, feature_dir, img_dir, '', 'simple-pinhole', single_camera)
    add_matches(
        db,
        feature_dir,
        fname_to_id,
    )
    db.commit()
    return

In [47]:
def reconstruct_from_db(feature_dir, img_dir):
    result = {}
    local_timings = {'RANSAC': [], 'Reconstruction': []}
    #############################################################
    # regist keypoints from h5 into colmap db
    #############################################################
    database_path = f'{feature_dir}/colmap.db'
    if os.path.isfile(database_path):
        os.remove(database_path)
    gc.collect()
    import_into_colmap(img_dir, feature_dir=feature_dir, database_path=database_path)
    output_path = f'{feature_dir}/colmap_rec'
    os.makedirs(output_path, exist_ok=True)
    print("colmap database")
    #############################################################
    # Calculate fundamental matrix with colmap api
    #############################################################
    t=time()
    # options = pycolmap.SiftMatchingOptions()
    # options.confidence = 0.9999
    # options.max_num_trials = 20000
    # pycolmap.match_exhaustive(database_path, sift_options=options)
    pycolmap.match_exhaustive(database_path)
    # print("matching done!!!!")
    local_timings['RANSAC'].append(time() - t)
    print(f'RANSAC in {local_timings["RANSAC"][-1]:.4f} sec')

    #############################################################
    # Execute bundle adjustmnet with colmap api
    # --> Bundle adjustment Calcs Camera matrix, R and t
    #############################################################
    t=time()
    # By default colmap does not generate a reconstruction if less than 10 images are registered. Lower it to 3.
    mapper_options = pycolmap.IncrementalPipelineOptions()
    # mapper_options.mapper.filter_max_reproj_error	 = 10.0
    mapper_options.min_model_size = 8
    mapper_options.max_num_models = 25
    maps = pycolmap.incremental_mapping(database_path=database_path, image_path=img_dir, 
                                        output_path=output_path, options=mapper_options)
    print(maps)
    for map_index, rec in maps.items():
        result[map_index] = {}
        for img_id, image in rec.images.items():
            result[map_index][image.name] = {
                'R': image.cam_from_world.rotation.matrix().tolist(),
                't': image.cam_from_world.translation.tolist()
            }
    # clear_output(wait=False)
    local_timings['Reconstruction'].append(time() - t)
    print(f'Reconstruction done in {local_timings["Reconstruction"][-1]:.4f} sec')

    #############################################################
    # Extract R,t from maps 
    #############################################################            
    return result, local_timings

In [48]:
# Collect vital info from the dataset

@dataclasses.dataclass
class Prediction:
    image_id: str | None  # A unique identifier for the row -- unused otherwise. Used only on the hidden test set.
    dataset: str
    filename: str
    cluster_index: int | None = None
    rotation: np.ndarray | None = None
    translation: np.ndarray | None = None

# Set is_train=True to run the notebook on the training data.
# Set is_train=False if submitting an entry to the competition (test data is hidden, and different from what you see on the "test" folder).
is_train = True
data_dir = '/kaggle/input/image-matching-challenge-2025'
workdir = '/kaggle/working/result/'
os.makedirs(workdir, exist_ok=True)

if is_train:
    sample_submission_csv = os.path.join(data_dir, 'train_labels.csv')
else:
    sample_submission_csv = os.path.join(data_dir, 'sample_submission.csv')

samples = {}
competition_data = pd.read_csv(sample_submission_csv)
for _, row in competition_data.iterrows():
    # Note: For the test data, the "scene" column has no meaning, and the rotation_matrix and translation_vector columns are random.
    if row.dataset not in samples:
        samples[row.dataset] = []
    samples[row.dataset].append(
        Prediction(
            image_id=None if is_train else row.image_id,
            dataset=row.dataset,
            filename=row.image
        )
    )

for dataset in samples:
    print(f'Dataset "{dataset}" -> num_images={len(samples[dataset])}')

Dataset "imc2023_haiper" -> num_images=54
Dataset "imc2023_heritage" -> num_images=209
Dataset "imc2023_theather_imc2024_church" -> num_images=76
Dataset "imc2024_dioscuri_baalshamin" -> num_images=138
Dataset "imc2024_lizard_pond" -> num_images=214
Dataset "pt_brandenburg_british_buckingham" -> num_images=225
Dataset "pt_piazzasanmarco_grandplace" -> num_images=168
Dataset "pt_sacrecoeur_trevi_tajmahal" -> num_images=225
Dataset "pt_stpeters_stpauls" -> num_images=200
Dataset "amy_gardens" -> num_images=200
Dataset "fbk_vineyard" -> num_images=163
Dataset "ETs" -> num_images=22
Dataset "stairs" -> num_images=51


In [49]:
import cv2
import h5py
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.patches as patches


def draw_keypoints_and_matches(images_input, unified_kp_path, remapped_matches_path, feature_dir='visualization_output'):
    output_dir = os.path.join(feature_dir, 'visualization_output')
    os.makedirs(output_dir, exist_ok=True)

    # Load images and determine image_keys for HDF5 lookup
    if isinstance(images_input[0], str):
        loaded_images = [cv2.imread(img_path) for img_path in images_input]
        image_keys = [os.path.basename(img_path) for img_path in images_input]
    else:
        loaded_images = images_input
        # If images_input are already arrays, you need to provide the corresponding keys
        # This part is crucial: image_keys MUST align with the HDF5 keys
        image_keys = image_keys_in_h5 # Use the predefined list for the dummy case

    # Load unified keypoints
    keypoints_data = {}
    with h5py.File(unified_kp_path, 'r') as f_kp:
        for img_name_raw in f_kp.keys():
            img_name = img_name_raw.decode('utf-8') if isinstance(img_name_raw, bytes) else img_name_raw
            keypoints_data[img_name] = f_kp[img_name_raw][()] # Access with raw key if bytes

    # Load remapped matches - CORRECTED LOGIC
    # Store (img1_key, img2_key) directly with matches for robust iteration
    matches_data_pairs = [] # Will store (img1_key, img2_key, matches_array)
    with h5py.File(remapped_matches_path, 'r') as f_matches:
        print("\n--- Loading remapped matches from HDF5 ---")
        for img1_group_key_candidate in tqdm(f_matches.keys(), desc="Loading matches"):
            img1_key = img1_group_key_candidate.decode('utf-8') if isinstance(img1_group_key_candidate, bytes) else img1_group_key_candidate

            img1_group = f_matches[img1_group_key_candidate] # Access with raw key

            if isinstance(img1_group, h5py.Group):
                for img2_dataset_key_candidate in img1_group.keys():
                    img2_key = img2_dataset_key_candidate.decode('utf-8') if isinstance(img2_dataset_key_candidate, bytes) else img2_dataset_key_candidate

                    try:
                        matches_array = img1_group[img2_dataset_key_candidate][()]
                        matches_data_pairs.append((img1_key, img2_key, matches_array))
                    except Exception as e:
                        print(f"Error loading matches for pair ({img1_key}, {img2_key}): {e}")
            else:
                print(f"Warning: Expected '{img1_key}' to be a group, but found {type(img1_group)}. Skipping its contents.")


    # --- Drawing Keypoints ---
    print("\n--- Drawing Keypoints ---")
    for i, img_key in enumerate(image_keys):
        if img_key in keypoints_data:
            img = loaded_images[i].copy()
            kpts = keypoints_data[img_key]

            for kp in kpts:
                x, y = int(kp[0]), int(kp[1])
                cv2.circle(img, (x, y), 3, (0, 255, 0), -1) # Green circle for keypoint

            output_kp_path = os.path.join(output_dir, f"keypoints_{img_key}")
            if len(img.shape) == 2:
                img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
            cv2.imwrite(output_kp_path, img)
            print(f"Keypoints drawn on {img_key}, saved to {output_kp_path}")
        else:
            print(f"No keypoints found for {img_key} in unified keypoints file.")

    # --- Drawing Matches ---
    print("\n--- Drawing Matches ---")
    # Iterate through the (img1_key, img2_key, matches) tuples directly
    for img_name1, img_name2, matches in matches_data_pairs:
        # We no longer need to split img_pair_key, as we have img_name1 and img_name2 directly

        # Find the actual image objects and their keypoints using image_keys list
        try:
            img1_idx = image_keys.index(img_name1)
            img2_idx = image_keys.index(img_name2)
        except ValueError:
            print(f"Skipping matches for {img_name1}-{img_name2}: One or both image names not found in the provided 'images' list/keys.")
            continue

        img1 = loaded_images[img1_idx].copy()
        img2 = loaded_images[img2_idx].copy()

        kpts1 = keypoints_data.get(img_name1)
        kpts2 = keypoints_data.get(img_name2)

        if kpts1 is None or kpts2 is None:
            print(f"Skipping matches for {img_name1}-{img_name2}: keypoints not found for one or both images in unified keypoints.")
            continue
        if len(matches) == 0:
            print(f"No matches to draw for {img_name1}-{img_name2}.")
            continue

        # Ensure images are 3 channels for drawing lines
        if len(img1.shape) == 2:
            img1 = cv2.cvtColor(img1, cv2.COLOR_GRAY2BGR)
        if len(img2.shape) == 2:
            img2 = cv2.cvtColor(img2, cv2.COLOR_GRAY2BGR)

        # Create a concatenated image for drawing matches
        h1, w1 = img1.shape[:2]
        h2, w2 = img2.shape[:2]
        max_h = max(h1, h2)
        matched_img = np.zeros((max_h, w1 + w2, 3), dtype=np.uint8)
        matched_img[0:h1, 0:w1] = img1
        matched_img[0:h2, w1:w1+w2] = img2

        num_matches_to_draw = min(len(matches), 200) # Draw up to 200 matches to avoid clutter, adjust as needed

        for i in range(num_matches_to_draw):
            match = matches[i]
            kp1_idx, kp2_idx = int(match[0]), int(match[1])

            # Bounds check for keypoint indices
            if kp1_idx >= len(kpts1) or kp2_idx >= len(kpts2):
                # print(f"Warning: Match index out of bounds for {img_name1}-{img_name2}. Skipping match {kp1_idx}-{kp2_idx}.")
                continue

            pt1 = tuple(map(int, kpts1[kp1_idx][:2]))
            pt2 = tuple(map(int, kpts2[kp2_idx][:2]))

            # Draw circles on the concatenated image
            cv2.circle(matched_img, pt1, 5, (0, 0, 255), 2) # Red circle on img1 side
            cv2.circle(matched_img, (pt2[0] + w1, pt2[1]), 5, (255, 0, 0), 2) # Blue circle on img2 side

            # Draw a line connecting the matched keypoints
            color = tuple(np.random.randint(0, 255, 3).tolist())
            cv2.line(matched_img, pt1, (pt2[0] + w1, pt2[1]), color, 1)

        output_match_path = os.path.join(output_dir, f"matches_{img_name1}_{img_name2}.png")
        cv2.imwrite(output_match_path, matched_img)
        print(f"Matches drawn between {img_name1} and {img_name2}, saved to {output_match_path}")


# Example call (replace with your actual 'images' list)
# If your 'images' are file paths:
# images_file_paths = ['path/to/your/image1.jpg', 'path/to/your/image2.jpg', ...]
# draw_keypoints_and_matches(images_file_paths, unified_kp_path, remapped_matches_path)

# If your 'images' are loaded numpy arrays (as in the dummy example above):
# draw_keypoints_and_matches(images, unified_kp_path, remapped_matches_path)

In [54]:
gc.collect()

max_images = None  # Used For debugging only. Set to None to disable.
datasets_to_process = None  # Not the best convention, but None means all datasets.

if is_train:
    # max_images = 5

    # Note: When running on the training dataset, the notebook will hit the time limit and die. Use this filter to run on a few specific datasets.
    datasets_to_process = [
    	# New data.
    	# 'amy_gardens',
    	'ETs',
    	# 'fbk_vineyard',
    	# 'stairs',
    	# Data from IMC 2023 and 2024.
    	# 'imc2024_dioscuri_baalshamin',
    	# 'imc2023_theather_imc2024_church',
    	# 'imc2023_heritage',
    	# 'imc2023_haiper',
    	# 'imc2024_lizard_pond',
    	# Crowdsourced PhotoTourism data.
    	# 'pt_stpeters_stpauls',
    	# 'pt_brandenburg_british_buckingham',
    	# 'pt_piazzasanmarco_grandplace',
    	# 'pt_sacrecoeur_trevi_tajmahal',
    ]

timings = {
    'rotation_detection':[],
    "shortlisting":[],
    "feature_matching":[],
    "RANSAC": [],
    "Reconstruction": [],
}
mapping_result_strs = []

# Load DINOv2 model (for feature extraction, not global descriptor here)
print("Loading DINOv2 model for patch feature extraction...")
dino_processor = AutoImageProcessor.from_pretrained('/kaggle/input/dinov2/pytorch/base/1')
dino_model = AutoModel.from_pretrained('/kaggle/input/dinov2/pytorch/base/1')
dino_model = dino_model.eval().to(device)
print("DINOv2 model loaded.")

print(f"CONFIG.ROTATION_CORRECTION: {CONFIG.ROTATION_CORRECTION}")

with concurrent.futures.ProcessPoolExecutor(max_workers=CONFIG.NUM_CORES) as executors:
    # print (f"Extracting on device {device}")
    for dataset, predictions in samples.items():
        if datasets_to_process and dataset not in datasets_to_process:
            print(f'Skipping "{dataset}"')
            continue
        
        images_dir = os.path.join(data_dir, 'train' if is_train else 'test', dataset)
        images = [os.path.join(images_dir, p.filename) for p in predictions]
        if max_images is not None:
            images = images[:max_images]
    
        print(f'\nProcessing dataset "{dataset}": {len(images)} images')
    
        filename_to_index = {p.filename: idx for idx, p in enumerate(predictions)}
    
        feature_dir = os.path.join(workdir, 'featureout', dataset)
        os.makedirs(feature_dir, exist_ok=True)
    
        # Wrap algos in try-except blocks so we can populate a submission even if one scene crashes.
        try:
            # --- Pipeline Execution ---
            
            #############################################################
            # get image pairs
            #############################################################
            # 1. Detect ALIKED features and combine with DINO patch features
            t = time()
            index_pairs = get_image_pairs_shortlist_org(
                images,
                sim_th = 0.2, # should be strict
                min_pairs = 60, # we should select at least min_pairs PER IMAGE with biggest similarity
                exhaustive_if_less = 20,
                device=device
            )
            timings['shortlisting'].append(time() - t)
            print (f'Shortlisting. Number of pairs to match: {len(index_pairs)}. Done in {time() - t:.4f} sec')
            gc.collect()

            #############################################################
            # get image rotations
            #############################################################
            t = time()

            rotation_estimator = RotationEstimator(device=device)
            rot_dict = rotation_estimator.run(images, index_pairs)
            # print(rot_dict)

            t = time()-t
            timings['rotation_detection'].append(t)
            print(f'rotation_detection for {len(images)} images : {t:.4f} sec')
            gc.collect()

            #############################################################
            # get keypoints
            #############################################################    
            t=time()

            lightglue_25groups_matcher_pipeline = LightGlueMatcherPipeline_sep(
                extractor_cfg = {"max_num_keypoints": 4096, "detection_threshold":0.1}, 
                device= device)
            # print("lightglue_25groups_matcher_pipeline initialization done")            
            unified_kp_path, remapped_matches_path = save_keypoints_and_matches_split(
                img_fnames = images,
                rot_dict = rot_dict,
                matcher_pip = lightglue_25groups_matcher_pipeline,
                feature_dir = feature_dir,
                device = device)
            
            timings['feature_matching'].append(time() - t)
            gc.collect()
            print (f'Local feature extracting and matching. Done in {time() - t:.4f} sec')
            draw_keypoints_and_matches(images, unified_kp_path, remapped_matches_path)
            #############################################################
            # kick COLMAP reconstruction
            #############################################################            
            future = executors.submit(
                reconstruct_from_db, 
                feature_dir, images_dir)
            maps, local_timings = future.result()
            # 合并 timings（主进程里）
            for k in local_timings:
                timings[k].extend(local_timings[k])
            # clear_output(wait=False)
            print(maps)
            registered = 0
            for map_index, cur_map in maps.items():  # cur_map: image_name → {'R': list, 't': list}
                for image_name, pose in cur_map.items():
                    idx = filename_to_index[image_name]
                    pred = predictions[idx]
                    pred.cluster_index = map_index
                    pred.rotation = np.array(pose['R'])  # convert back to np.ndarray
                    pred.translation = np.array(pose['t'])
                    registered += 1
            mapping_result_str = f"Dataset  {dataset} -> Registered {registered} / {len(images)} images with {len(maps)} clusters"
            mapping_result_strs.append(mapping_result_str)
            print(mapping_result_str)

            gc.collect()
        except Exception as e:
            print(e)
            # raise e
            mapping_result_str = f'Dataset "{dataset}" -> Failed!'
            mapping_result_strs.append(mapping_result_str)
            print(mapping_result_str)

print('\nResults')
for s in mapping_result_strs:
    print(s)

print('\nTimings')
for k, v in timings.items():
    print(f'{k} -> total={sum(v):.02f} sec.')

Loading DINOv2 model for patch feature extraction...
DINOv2 model loaded.
CONFIG.ROTATION_CORRECTION: True
Skipping "imc2023_haiper"
Skipping "imc2023_heritage"
Skipping "imc2023_theather_imc2024_church"
Skipping "imc2024_dioscuri_baalshamin"
Skipping "imc2024_lizard_pond"
Skipping "pt_brandenburg_british_buckingham"
Skipping "pt_piazzasanmarco_grandplace"
Skipping "pt_sacrecoeur_trevi_tajmahal"
Skipping "pt_stpeters_stpauls"
Skipping "amy_gardens"
Skipping "fbk_vineyard"

Processing dataset "ETs": 22 images


100%|██████████| 22/22 [00:01<00:00, 18.84it/s]


Shortlisting. Number of pairs to match: 231. Done in 1.4056 sec


Finding valid rotations: 100%|██████████| 231/231 [00:46<00:00,  5.02it/s]


rotation_detection for 22 images : 46.7220 sec


Matching and storing:   0%|          | 0/19 [00:00<?, ?it/s]

outliers_out_et001.png_0-outliers_out_et002.png:37
outliers_out_et001.png_0-another_et_another_et002.png:9
outliers_out_et001.png_0-another_et_another_et003.png:19


Matching and storing:   5%|▌         | 1/19 [00:03<00:54,  3.02s/it]

outliers_out_et001.png_2-another_et_another_et001.png:12


Matching and storing:  11%|█         | 2/19 [00:03<00:28,  1.69s/it]

outliers_out_et002.png_3-another_et_another_et004.png:7
et_et007.png_0-et_et003.png:23
et_et007.png_0-et_et006.png:185
et_et007.png_0-et_et001.png:45
et_et007.png_0-et_et004.png:0
et_et007.png_0-et_et002.png:86
et_et007.png_0-et_et008.png:494
et_et007.png_0-et_et005.png:769
et_et007.png_0-et_et000.png:4


Matching and storing:  16%|█▌        | 3/19 [00:10<01:07,  4.20s/it]

et_et007.png_0-another_et_another_et002.png:11
et_et003.png_0-et_et006.png:17
et_et003.png_0-et_et001.png:1101
et_et003.png_0-et_et004.png:659
et_et003.png_0-et_et002.png:79
et_et003.png_1-et_et008.png:23
et_et003.png_0-et_et005.png:16


Matching and storing:  21%|██        | 4/19 [00:16<01:08,  4.57s/it]

et_et003.png_0-et_et000.png:2144
et_et006.png_0-et_et001.png:65
et_et006.png_0-et_et004.png:0
et_et006.png_0-et_et002.png:66
et_et006.png_0-et_et008.png:193
et_et006.png_0-et_et005.png:384
et_et006.png_0-et_et000.png:43


Matching and storing:  26%|██▋       | 5/19 [00:21<01:08,  4.87s/it]

et_et006.png_0-another_et_another_et004.png:24
et_et001.png_0-et_et004.png:1023
et_et001.png_0-et_et002.png:1231
et_et001.png_0-et_et008.png:0
et_et001.png_0-et_et005.png:12


Matching and storing:  32%|███▏      | 6/19 [00:25<00:58,  4.48s/it]

et_et001.png_0-et_et000.png:1155
et_et004.png_0-et_et002.png:628
et_et004.png_0-et_et008.png:6
et_et004.png_0-et_et005.png:9
et_et004.png_0-et_et000.png:765


Matching and storing:  37%|███▋      | 7/19 [00:29<00:51,  4.25s/it]

et_et004.png_2-another_et_another_et004.png:6
et_et002.png_0-et_et008.png:0
et_et002.png_0-et_et005.png:9


Matching and storing:  42%|████▏     | 8/19 [00:31<00:39,  3.58s/it]

et_et002.png_0-et_et000.png:479
et_et008.png_0-et_et005.png:521
et_et008.png_0-et_et000.png:11


Matching and storing:  47%|████▋     | 9/19 [00:33<00:31,  3.15s/it]

et_et008.png_0-another_et_another_et004.png:12
et_et005.png_0-et_et000.png:4
et_et005.png_0-another_et_another_et006.png:19
et_et005.png_0-another_et_another_et002.png:20


Matching and storing:  53%|█████▎    | 10/19 [00:36<00:27,  3.05s/it]

et_et005.png_0-another_et_another_et004.png:30


Matching and storing:  58%|█████▊    | 11/19 [00:36<00:18,  2.35s/it]

et_et000.png_0-another_et_another_et007.png:9
another_et_another_et006.png_0-another_et_another_et002.png:168
another_et_another_et006.png_0-another_et_another_et004.png:139
another_et_another_et006.png_0-another_et_another_et007.png:197
another_et_another_et006.png_0-another_et_another_et008.png:143
another_et_another_et006.png_0-another_et_another_et003.png:174
another_et_another_et006.png_0-another_et_another_et005.png:162
another_et_another_et006.png_0-another_et_another_et001.png:210


Matching and storing:  63%|██████▎   | 12/19 [00:42<00:22,  3.26s/it]

another_et_another_et006.png_0-another_et_another_et009.png:55
another_et_another_et002.png_0-another_et_another_et004.png:851
another_et_another_et002.png_0-another_et_another_et007.png:127
another_et_another_et002.png_0-another_et_another_et008.png:66
another_et_another_et002.png_0-another_et_another_et003.png:518
another_et_another_et002.png_0-another_et_another_et005.png:703


Matching and storing:  68%|██████▊   | 13/19 [00:46<00:20,  3.48s/it]

another_et_another_et002.png_0-another_et_another_et001.png:1222
another_et_another_et010.png_0-another_et_another_et007.png:43
another_et_another_et010.png_0-another_et_another_et008.png:127


Matching and storing:  74%|███████▎  | 14/19 [00:48<00:14,  3.00s/it]

another_et_another_et010.png_0-another_et_another_et009.png:47
another_et_another_et004.png_0-another_et_another_et007.png:82
another_et_another_et004.png_0-another_et_another_et008.png:72
another_et_another_et004.png_0-another_et_another_et003.png:472
another_et_another_et004.png_0-another_et_another_et005.png:820
another_et_another_et004.png_0-another_et_another_et001.png:636


Matching and storing:  79%|███████▉  | 15/19 [00:51<00:12,  3.23s/it]

another_et_another_et004.png_0-another_et_another_et009.png:26
another_et_another_et007.png_0-another_et_another_et008.png:192
another_et_another_et007.png_0-another_et_another_et003.png:0
another_et_another_et007.png_0-another_et_another_et005.png:73
another_et_another_et007.png_0-another_et_another_et001.png:91


Matching and storing:  84%|████████▍ | 16/19 [00:55<00:09,  3.21s/it]

another_et_another_et007.png_0-another_et_another_et009.png:56
another_et_another_et008.png_0-another_et_another_et003.png:42
another_et_another_et008.png_0-another_et_another_et005.png:0
another_et_another_et008.png_0-another_et_another_et001.png:54


Matching and storing:  89%|████████▉ | 17/19 [00:57<00:06,  3.02s/it]

another_et_another_et008.png_0-another_et_another_et009.png:120
another_et_another_et003.png_0-another_et_another_et005.png:445
another_et_another_et003.png_0-another_et_another_et001.png:561


Matching and storing:  95%|█████████▍| 18/19 [00:59<00:02,  2.67s/it]

another_et_another_et003.png_0-another_et_another_et009.png:32


Matching and storing: 100%|██████████| 19/19 [01:00<00:00,  3.17s/it]

another_et_another_et005.png_0-another_et_another_et001.png:518





Local feature extracting and matching. Done in 60.6398 sec

--- Loading remapped matches from HDF5 ---


Loading matches: 100%|██████████| 19/19 [00:00<00:00, 1589.00it/s]


--- Drawing Keypoints ---
Keypoints drawn on outliers_out_et001.png, saved to visualization_output/visualization_output/keypoints_outliers_out_et001.png
No keypoints found for outliers_out_et003.png in unified keypoints file.
Keypoints drawn on outliers_out_et002.png, saved to visualization_output/visualization_output/keypoints_outliers_out_et002.png
Keypoints drawn on et_et007.png, saved to visualization_output/visualization_output/keypoints_et_et007.png





Keypoints drawn on et_et003.png, saved to visualization_output/visualization_output/keypoints_et_et003.png
Keypoints drawn on et_et006.png, saved to visualization_output/visualization_output/keypoints_et_et006.png
Keypoints drawn on et_et001.png, saved to visualization_output/visualization_output/keypoints_et_et001.png
Keypoints drawn on et_et004.png, saved to visualization_output/visualization_output/keypoints_et_et004.png
Keypoints drawn on et_et002.png, saved to visualization_output/visualization_output/keypoints_et_et002.png
Keypoints drawn on et_et008.png, saved to visualization_output/visualization_output/keypoints_et_et008.png
Keypoints drawn on et_et005.png, saved to visualization_output/visualization_output/keypoints_et_et005.png
Keypoints drawn on et_et000.png, saved to visualization_output/visualization_output/keypoints_et_et000.png
Keypoints drawn on another_et_another_et006.png, saved to visualization_output/visualization_output/keypoints_another_et_another_et006.png
Keypo

100%|██████████| 21/21 [00:00<00:00, 83.39it/s]
 46%|████▌     | 79/171 [00:00<00:00, 3608.10it/s]


colmap database
RANSAC in 1.8554 sec
{0: Reconstruction(num_reg_images=10, num_cameras=10, num_points3D=864, num_observations=3721), 1: Reconstruction(num_reg_images=9, num_cameras=9, num_points3D=1823, num_observations=6630)}
Reconstruction done in 4.0681 sec
{0: {'another_et_another_et001.png': {'R': [[0.9979197979958444, -0.04841849398394174, 0.04256437722157083], [0.0468183942330182, 0.9981872562701275, 0.037818505805445615], [-0.044318334009469135, -0.035747039880832554, 0.9983777012786209]], 't': [-2.8449509159940405, -1.950235503918883, 2.763808234303306]}, 'another_et_another_et002.png': {'R': [[0.9985505841613178, -0.04611294642655011, 0.027754766130398026], [0.04575075837917251, 0.9988610321750504, 0.013546457467672248], [-0.02834782141226281, -0.012257021418558287, 0.9995229694444863]], 't': [-2.6882196727066243, -1.162810273027691, 1.2168281815590467]}, 'another_et_another_et003.png': {'R': [[0.9932250900634779, -0.08664963701617243, 0.07743229864444005], [0.090220489771372

In [55]:
# Helpers
array_to_str = lambda array: ';'.join([f"{x:.09f}" for x in array])
none_to_str = lambda n: ';'.join(['nan'] * n)

submission_file = '/kaggle/working/submission.csv'
with open(submission_file, 'w') as f:
    if is_train:
        f.write('dataset,scene,image,rotation_matrix,translation_vector\n')
        for dataset, predictions in samples.items():
            for prediction in predictions:
                cluster_name = 'outliers' if prediction.cluster_index is None else f'cluster{prediction.cluster_index}'

                # ✅ `rotation` is a list of lists, flatten it
                if prediction.rotation is None:
                    rotation_str = none_to_str(9)
                else:
                    rotation_flat =  prediction.rotation.flatten()  # flatten 3x3 list -> 9 elems
                    rotation_str = array_to_str(rotation_flat)

                # ✅ `translation` is a flat list
                if prediction.translation is None:
                    translation_str = none_to_str(3)
                else:
                    translation_str = array_to_str(prediction.translation)

                f.write(f'{prediction.dataset},{cluster_name},{prediction.filename},{rotation_str},{translation_str}\n')
    else:
        f.write('image_id,dataset,scene,image,rotation_matrix,translation_vector\n')
        for dataset, predictions in samples.items():
            for prediction in predictions:
                cluster_name = 'outliers' if prediction.cluster_index is None else f'cluster{prediction.cluster_index}'

                if prediction.rotation is None:
                    rotation_str = none_to_str(9)
                else:
                    rotation_flat =  prediction.rotation.flatten()
                    rotation_str = array_to_str(rotation_flat)

                if prediction.translation is None:
                    translation_str = none_to_str(3)
                else:
                    translation_str = array_to_str(prediction.translation)

                f.write(f'{prediction.image_id},{prediction.dataset},{cluster_name},{prediction.filename},{rotation_str},{translation_str}\n')

# Preview the output
!head {submission_file}


dataset,scene,image,rotation_matrix,translation_vector
imc2023_haiper,outliers,fountain_image_116.png,nan;nan;nan;nan;nan;nan;nan;nan;nan,nan;nan;nan
imc2023_haiper,outliers,fountain_image_108.png,nan;nan;nan;nan;nan;nan;nan;nan;nan,nan;nan;nan
imc2023_haiper,outliers,fountain_image_101.png,nan;nan;nan;nan;nan;nan;nan;nan;nan,nan;nan;nan
imc2023_haiper,outliers,fountain_image_082.png,nan;nan;nan;nan;nan;nan;nan;nan;nan,nan;nan;nan
imc2023_haiper,outliers,fountain_image_071.png,nan;nan;nan;nan;nan;nan;nan;nan;nan,nan;nan;nan
imc2023_haiper,outliers,fountain_image_025.png,nan;nan;nan;nan;nan;nan;nan;nan;nan,nan;nan;nan
imc2023_haiper,outliers,fountain_image_000.png,nan;nan;nan;nan;nan;nan;nan;nan;nan,nan;nan;nan
imc2023_haiper,outliers,fountain_image_007.png,nan;nan;nan;nan;nan;nan;nan;nan;nan,nan;nan;nan
imc2023_haiper,outliers,fountain_image_012.png,nan;nan;nan;nan;nan;nan;nan;nan;nan,nan;nan;nan


In [56]:
# Definitely Compute results if running on the training set.
# Do not do this when submitting a notebook for scoring. All you have to do is save your submission to /kaggle/working/submission.csv.

if is_train:
    t = time()
    final_score, dataset_scores = metric.score(
        gt_csv='/kaggle/input/image-matching-challenge-2025/train_labels.csv',
        user_csv=submission_file,
        thresholds_csv='/kaggle/input/image-matching-challenge-2025/train_thresholds.csv',
        mask_csv=None if is_train else os.path.join(data_dir, 'mask.csv'),
        inl_cf=0,
        strict_cf=-1,
        verbose=True,
    )
    print(f'Computed metric in: {time() - t:.02f} sec.')

imc2023_haiper: score=0.00% (mAA=0.00%, clusterness=100.00%)
imc2023_heritage: score=0.00% (mAA=0.00%, clusterness=100.00%)
imc2023_theather_imc2024_church: score=0.00% (mAA=0.00%, clusterness=100.00%)
imc2024_dioscuri_baalshamin: score=0.00% (mAA=0.00%, clusterness=100.00%)
imc2024_lizard_pond: score=0.00% (mAA=0.00%, clusterness=100.00%)
pt_brandenburg_british_buckingham: score=0.00% (mAA=0.00%, clusterness=100.00%)
pt_piazzasanmarco_grandplace: score=0.00% (mAA=0.00%, clusterness=100.00%)
pt_sacrecoeur_trevi_tajmahal: score=0.00% (mAA=0.00%, clusterness=100.00%)
pt_stpeters_stpauls: score=0.00% (mAA=0.00%, clusterness=100.00%)
amy_gardens: score=0.00% (mAA=0.00%, clusterness=100.00%)
fbk_vineyard: score=0.00% (mAA=0.00%, clusterness=100.00%)
ETs: score=32.26% (mAA=19.23%, clusterness=100.00%)
stairs: score=0.00% (mAA=0.00%, clusterness=100.00%)
Average over all datasets: score=2.48% (mAA=1.48%, clusterness=100.00%)
Computed metric in: 0.25 sec.
