## Example submission

Image Matching Challenge 2025: https://www.kaggle.com/competitions/image-matching-challenge-2025

This notebook creates a simple submission using ALIKED and LightGlue, plus DINO for shortlisting, on GPU. Adapted from [last year](https://www.kaggle.com/code/oldufo/imc-2024-submission-example).

Remember to select an accelerator on the sidebar to the right, and to disable internet access when submitting a notebook to the competition.

In [1]:
# IMPORTANT 
#Install dependencies and copy model weights to run the notebook without internet access when submitting to the competition.

!pip install --no-index /kaggle/input/imc2024-packages-lightglue-rerun-kornia/* --no-deps
!mkdir -p /root/.cache/torch/hub/checkpoints
!cp /kaggle/input/aliked/pytorch/aliked-n16/1/aliked-n16.pth /root/.cache/torch/hub/checkpoints/
!cp /kaggle/input/lightglue/pytorch/aliked/1/aliked_lightglue.pth /root/.cache/torch/hub/checkpoints/
!cp /kaggle/input/lightglue/pytorch/aliked/1/aliked_lightglue.pth /root/.cache/torch/hub/checkpoints/aliked_lightglue_v0-1_arxiv-pth

Processing /kaggle/input/imc2024-packages-lightglue-rerun-kornia/kornia-0.7.2-py2.py3-none-any.whl
Processing /kaggle/input/imc2024-packages-lightglue-rerun-kornia/kornia_moons-0.2.9-py3-none-any.whl
Processing /kaggle/input/imc2024-packages-lightglue-rerun-kornia/kornia_rs-0.1.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
Processing /kaggle/input/imc2024-packages-lightglue-rerun-kornia/lightglue-0.0-py3-none-any.whl
Processing /kaggle/input/imc2024-packages-lightglue-rerun-kornia/pycolmap-0.6.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
Processing /kaggle/input/imc2024-packages-lightglue-rerun-kornia/rerun_sdk-0.15.0a2-cp38-abi3-manylinux_2_31_x86_64.whl
Installing collected packages: rerun-sdk, pycolmap, lightglue, kornia-rs, kornia-moons, kornia
  Attempting uninstall: kornia-rs
    Found existing installation: kornia_rs 0.1.8
    Uninstalling kornia_rs-0.1.8:
      Successfully uninstalled kornia_rs-0.1.8
  Attempting uninstall: kornia
   

In [2]:
import sys
import os
from tqdm import tqdm
from time import time, sleep
import gc
import numpy as np
import h5py
import dataclasses
import pandas as pd
from IPython.display import clear_output
from collections import defaultdict
from copy import deepcopy
from PIL import Image

import cv2
import torch
import torch.nn.functional as F
import kornia as K
import kornia.feature as KF

import torch
from lightglue import match_pair
from lightglue import ALIKED, LightGlue
from lightglue.utils import load_image, rbd
from transformers import AutoImageProcessor, AutoModel

# IMPORTANT Utilities: importing data into colmap and competition metric
import pycolmap
sys.path.append('/kaggle/input/imc25-utils')
from database import *
from h5_to_db import *
import metric

  @torch.cuda.amp.custom_fwd(cast_inputs=torch.float32)
  @torch.cuda.amp.custom_fwd(cast_inputs=torch.float32)


In [3]:

# print("PyTorch version:", torch.__version__)
# import sys
# print("Python version:", sys.version)

# print("CUDA available:", torch.cuda.is_available())
# print("CUDA version:", torch.version.cuda)
# print("Device count:", torch.cuda.device_count())
# print("Current device:", torch.cuda.current_device())
# print("Device name:", torch.cuda.get_device_name(torch.cuda.current_device()))


In [4]:
# Do not forget to select an accelerator on the sidebar to the right.
device = K.utils.get_cuda_device_if_available(0)
print(f'{device=}')

device=device(type='cuda', index=0)


In [5]:
# !zip -r /kaggle/working/result/featureout/ETs/featurept.zip /kaggle/working/result/featureout/ETs/featurept


In [6]:
from pathlib import Path

def draw_and_save_feature_points(image_path, keypoints, result_folder):
    """
    Draw feature points on the image and save to result folder.

    Args:
        image_path (str or Path): Path to the input image.
        keypoints (np.ndarray): (N, 2) array of (x, y) coordinates.
        result_folder (str or Path): Folder to save the output image.
    """
    # Load image in BGR
    return
    image = cv2.imread(str(image_path))
    if image is None:
        raise ValueError(f"Cannot read image from {image_path}")

    # Draw keypoints
    for (x, y) in keypoints.astype(int):
        cv2.circle(image, (x, y), radius=2, color=(0, 255, 0), thickness=-1)  # Green dots

    img_fname = image_path.split('/')[-1]

    result_folder = Path(result_folder)
    img_fname = Path(image_path).stem  # no extension
    output_path = result_folder / f"{img_fname}_fe.png"

    cv2.imwrite(str(output_path), image)
    print(f"Saved: {output_path}")

In [7]:
def load_torch_image(fname, device=torch.device('cpu')):
    img = K.io.load_image(fname, K.io.ImageLoadType.RGB32, device=device)[None, ...]
    return img

def gem(x, p=3, eps=1e-6):
    return F.normalize(torch.mean(x.clamp(min=eps).pow(p), dim=1).pow(1/p), p=2, dim=1)



# Must Use efficientnet global descriptor to get matching shortlists.
def get_global_desc(fnames, device = torch.device('cpu'), is_max = True):
    processor = AutoImageProcessor.from_pretrained('/kaggle/input/dinov2/pytorch/base/1')
    model = AutoModel.from_pretrained('/kaggle/input/dinov2/pytorch/base/1')
    model = model.eval()
    model = model.to(device)
    global_descs_dinov2 = []
    for i, img_fname_full in tqdm(enumerate(fnames),total= len(fnames)):
        key = os.path.splitext(os.path.basename(img_fname_full))[0]
        timg = load_torch_image(img_fname_full)
        with torch.inference_mode():
            inputs = processor(images=timg, return_tensors="pt", do_rescale=False).to(device)
            outputs = model(**inputs)
            if is_max:
                dino_fea = gem(outputs.last_hidden_state[:,1:], p=3)
            else:
                dino_fea = F.normalize(outputs.last_hidden_state[:,1:].max(dim=1)[0], dim=1, p=2)
        global_descs_dinov2.append(dino_fea.detach().cpu())
    global_descs_dinov2 = torch.cat(global_descs_dinov2, dim=0)
    return global_descs_dinov2


def get_img_pairs_exhaustive(img_fnames):
    index_pairs = []
    for i in range(len(img_fnames)):
        for j in range(i+1, len(img_fnames)):
            index_pairs.append((i,j))
    return index_pairs


def get_image_pairs_shortlist(fnames,
                              sim_th = 0.6, # should be strict
                              min_pairs = 10,
                              exhaustive_if_less = 20,
                              device=torch.device('cpu'),
                              max_pairs = 30):
    num_imgs = len(fnames)
    if num_imgs <= exhaustive_if_less:
        return get_img_pairs_exhaustive(fnames)
    descs = get_global_desc(fnames, device=device, is_max = False)
    dm = torch.cdist(descs, descs, p=2).detach().cpu().numpy()
    # print(dm)
    # 只分析上三角（去掉对角线），避免重复
    triu_indices = np.triu_indices_from(dm, k=1)
    dm_flat = dm[triu_indices]
    
    # 打印统计信息
    print("Distance Matrix Statistics:")
    print(f"Min:  {dm_flat.min():.4f}")
    print(f"Max:  {dm_flat.max():.4f}")
    print(f"Mean: {dm_flat.mean():.4f}")
    print(f"Std:  {dm_flat.std():.4f}")
    print(f"20%:  {np.percentile(dm_flat, 20):.4f}")
    print(f"30%:  {np.percentile(dm_flat, 30):.4f}")
    print(f"USED 50%:  {np.percentile(dm_flat, 50):.4f}")
    print(f"75%:  {np.percentile(dm_flat, 75):.4f}")
    
    thr = np.percentile(dm_flat, 50)
    threshold = max(dm_flat.mean() + np.sqrt(3) * dm_flat.std(), thr)
    # removing half
    mask = dm <= thr
    total = 0
    matching_list = []
    ar = np.arange(num_imgs)
    already_there_set = []
    for st_idx in range(num_imgs-1):
        mask_idx = mask[st_idx]
        to_match = ar[mask_idx]
        if len(to_match) < min_pairs :
            to_match = np.argsort(dm[st_idx])[:min_pairs]  
        if len(to_match) >= max_pairs:
            to_match = np.argsort(dm[st_idx])[:max_pairs]  
        for idx in to_match:
            if st_idx == idx:
                continue
            if dm[st_idx, idx] < threshold:
                matching_list.append(tuple(sorted((st_idx, idx.item()))))
                total+=1
    matching_list = sorted(list(set(matching_list)))
    return matching_list

def detect_aliked(img_fnames,
                  feature_dir = '.featureout',
                  num_features = 4096,
                  resize_to = 2048,
                  device=torch.device('cpu')):
    dtype = torch.float32 # ALIKED has issues with float16
    extractor = ALIKED(max_num_keypoints=num_features, detection_threshold=0.15).eval().to(device, dtype)
    extractor.preprocess_conf["resize"] = resize_to
    if not os.path.isdir(feature_dir):
        os.makedirs(feature_dir)

    # Calculate the expected scale factor ALIKED will apply
    # ALIKED uses preprocess_conf["resize"] on the *input image tensor*
    # Input image tensor size will be (H, W) after Kornia loading/conversion
    
    draw_feature_dir = os.path.join(feature_dir, 'featurept')
    os.makedirs(draw_feature_dir, exist_ok=True)
    
    with h5py.File(f'{feature_dir}/keypoints.h5', mode='w') as f_kp, \
         h5py.File(f'{feature_dir}/descriptors.h5', mode='w') as f_desc:
        for img_path in tqdm(img_fnames):
            img_fname = img_path.split('/')[-1]
            key = img_fname
            with torch.inference_mode():
                image0 = load_torch_image(img_path, device=device).to(dtype)
                feats0 = extractor.extract(image0)  # auto-resize the image, disable with resize=None
                kpts = feats0['keypoints'].reshape(-1, 2).detach().cpu().numpy()
                descs = feats0['descriptors'].reshape(len(kpts), -1).detach().cpu().numpy()
                f_kp[key] = kpts
                f_desc[key] = descs
                draw_and_save_feature_points(img_path, kpts, draw_feature_dir)
    return

def match_with_lightglue(img_fnames,
                   index_pairs,
                   feature_dir = '.featureout',
                   device=torch.device('cpu'),
                   min_matches=30,
                   verbose=False,
                   match_score_thresh = 0.25):
    lg_matcher = KF.LightGlueMatcher("aliked", {"width_confidence": -1,
                                                "depth_confidence": -1,
                                                 "mp": True if 'cuda' in str(device) else False}).eval().to(device)
    with h5py.File(f'{feature_dir}/keypoints.h5', mode='r') as f_kp, \
        h5py.File(f'{feature_dir}/descriptors.h5', mode='r') as f_desc, \
        h5py.File(f'{feature_dir}/matches.h5', mode='w') as f_match:
        for pair_idx in tqdm(index_pairs):
            idx1, idx2 = pair_idx
            fname1, fname2 = img_fnames[idx1], img_fnames[idx2]
            key1, key2 = fname1.split('/')[-1], fname2.split('/')[-1]
            kp1 = torch.from_numpy(f_kp[key1][...]).to(device)
            kp2 = torch.from_numpy(f_kp[key2][...]).to(device)
            desc1 = torch.from_numpy(f_desc[key1][...]).to(device)
            desc2 = torch.from_numpy(f_desc[key2][...]).to(device)
            with torch.inference_mode():
                dists, idxs = lg_matcher(desc1,
                                         desc2,
                                         KF.laf_from_center_scale_ori(kp1[None]),
                                         KF.laf_from_center_scale_ori(kp2[None]))
            if len(idxs)  == 0:
                continue
            n_matches = len(idxs)
            # if verbose:
            #     print (f'{key1}-{key2}: {n_matches} matches')
            # group  = f_match.require_group(key1)
            # if n_matches >= min_matches:
            #      group.create_dataset(key2, data=idxs.detach().cpu().numpy().reshape(-1, 2))
            # Filter by match score (distance)
            
            mask = dists > match_score_thresh
            idxs_filtered = idxs[mask.squeeze(1)]
    
            n_matches = len(idxs_filtered)
            if n_matches == 0:
                continue
    
            if verbose:
                print(f'{key1}-{key2}: {n_matches} matches (filtered from {len(idxs)})')
    
            group = f_match.require_group(key1)
            if n_matches >= min_matches:
                group.create_dataset(key2, data=idxs_filtered.detach().cpu().numpy().reshape(-1, 2))

    return



def import_into_colmap(img_dir, feature_dir ='.featureout', database_path = 'colmap.db'):
    db = COLMAPDatabase.connect(database_path)
    db.create_tables()
    single_camera = False
    fname_to_id = add_keypoints(db, feature_dir, img_dir, '', 'simple-pinhole', single_camera)
    add_matches(
        db,
        feature_dir,
        fname_to_id,
    )
    db.commit()
    return

In [8]:
# Collect vital info from the dataset

@dataclasses.dataclass
class Prediction:
    image_id: str | None  # A unique identifier for the row -- unused otherwise. Used only on the hidden test set.
    dataset: str
    filename: str
    cluster_index: int | None = None
    rotation: np.ndarray | None = None
    translation: np.ndarray | None = None

# Set is_train=True to run the notebook on the training data.
# Set is_train=False if submitting an entry to the competition (test data is hidden, and different from what you see on the "test" folder).
is_train = True

data_dir = '/kaggle/input/image-matching-challenge-2025'
workdir = '/kaggle/working/result/'
os.makedirs(workdir, exist_ok=True)

if is_train:
    sample_submission_csv = os.path.join(data_dir, 'train_labels.csv')
else:
    sample_submission_csv = os.path.join(data_dir, 'sample_submission.csv')

samples = {}
competition_data = pd.read_csv(sample_submission_csv)
for _, row in competition_data.iterrows():
    # Note: For the test data, the "scene" column has no meaning, and the rotation_matrix and translation_vector columns are random.
    if row.dataset not in samples:
        samples[row.dataset] = []
    samples[row.dataset].append(
        Prediction(
            image_id=None if is_train else row.image_id,
            dataset=row.dataset,
            filename=row.image
        )
    )

for dataset in samples:
    print(f'Dataset "{dataset}" -> num_images={len(samples[dataset])}')

Dataset "imc2023_haiper" -> num_images=54
Dataset "imc2023_heritage" -> num_images=209
Dataset "imc2023_theather_imc2024_church" -> num_images=76
Dataset "imc2024_dioscuri_baalshamin" -> num_images=138
Dataset "imc2024_lizard_pond" -> num_images=214
Dataset "pt_brandenburg_british_buckingham" -> num_images=225
Dataset "pt_piazzasanmarco_grandplace" -> num_images=168
Dataset "pt_sacrecoeur_trevi_tajmahal" -> num_images=225
Dataset "pt_stpeters_stpauls" -> num_images=200
Dataset "amy_gardens" -> num_images=200
Dataset "fbk_vineyard" -> num_images=163
Dataset "ETs" -> num_images=22
Dataset "stairs" -> num_images=51


In [9]:

gc.collect()

max_images = None  # Used For debugging only. Set to None to disable.
datasets_to_process = None  # Not the best convention, but None means all datasets.

if is_train:
    # max_images = 5

    # Note: When running on the training dataset, the notebook will hit the time limit and die. Use this filter to run on a few specific datasets.
    datasets_to_process = [
    	# New data.
    	'amy_gardens',
    	'ETs',
    	'fbk_vineyard',
    	'stairs', 
    	# Data from IMC 2023 and 2024.
    	# 'imc2024_dioscuri_baalshamin',
    	# 'imc2023_theather_imc2024_church',
    	'imc2023_heritage',
    	'imc2023_haiper',
    	# 'imc2024_lizard_pond',
    	# Crowdsourced PhotoTourism data.
    	# 'pt_stpeters_stpauls',
    	# 'pt_brandenburg_british_buckingham',
    	# 'pt_piazzasanmarco_grandplace',
    	# 'pt_sacrecoeur_trevi_tajmahal',
    ]

timings = {
    "shortlisting":[],
    "feature_detection": [],
    "feature_matching":[],
    "RANSAC": [],
    "Reconstruction": [],
}
mapping_result_strs = []


print (f"Extracting on device {device}")
for dataset, predictions in samples.items():
    if datasets_to_process and dataset not in datasets_to_process:
        print(f'Skipping "{dataset}"')
        continue
    
    images_dir = os.path.join(data_dir, 'train' if is_train else 'test', dataset)
    images = [os.path.join(images_dir, p.filename) for p in predictions]
    if max_images is not None:
        images = images[:max_images]

    print(f'\nProcessing dataset "{dataset}": {len(images)} images')

    filename_to_index = {p.filename: idx for idx, p in enumerate(predictions)}

    feature_dir = os.path.join(workdir, 'featureout', dataset)
    os.makedirs(feature_dir, exist_ok=True)

    # Wrap algos in try-except blocks so we can populate a submission even if one scene crashes.
    try:
        t = time()
        index_pairs = get_image_pairs_shortlist(
            images,
            sim_th = 0.5, # should be strict
            min_pairs = 10, # we should select at least min_pairs PER IMAGE with biggest similarity
            exhaustive_if_less = 20,
            device=device
        )
        timings['shortlisting'].append(time() - t)
        print (f'Shortlisting. Number of pairs to match: {len(index_pairs)}. Done in {time() - t:.4f} sec')
        gc.collect()
    
        t = time()

        detect_aliked(images, feature_dir, 8192, device=device)
        gc.collect()
        timings['feature_detection'].append(time() - t)
        print(f'Features detected in {time() - t:.4f} sec')
        
        t = time()
        match_with_lightglue(images, index_pairs, feature_dir=feature_dir, device=device, verbose=False)
        # match_with_lightglue_and_cluster(images, index_pairs, feature_dir=feature_dir, device=device, verbose=False)
        timings['feature_matching'].append(time() - t)
        print(f'Features matched in {time() - t:.4f} sec')

        database_path = os.path.join(feature_dir, 'colmap.db')
        if os.path.isfile(database_path):
            os.remove(database_path)
        gc.collect()
        sleep(1)
        import_into_colmap(images_dir, feature_dir=feature_dir, database_path=database_path)
        output_path = f'{feature_dir}/colmap_rec_aliked'
        
        t = time()
        pycolmap.match_exhaustive(database_path)
        timings['RANSAC'].append(time() - t)
        print(f'Ran RANSAC in {time() - t:.4f} sec')
        
        # By default colmap does not generate a reconstruction if less than 10 images are registered.
        # Lower it to 3.
        mapper_options = pycolmap.IncrementalPipelineOptions()
        mapper_options.min_model_size = 5
        mapper_options.max_num_models = 25
        mapper_options.mapper.filter_max_reproj_error	 = 6.0
        # mapper_options.min_num_matches	 = 50
        # mapper_options.ba_local_max_num_iterations = 100
        # mapper_options.ba_local_num_images = 10
        mapper_options.ba_global_images_freq = 5
        

        os.makedirs(output_path, exist_ok=True)
        t = time()
        maps = pycolmap.incremental_mapping(
            database_path=database_path, 
            image_path=images_dir,
            output_path=output_path,
            options=mapper_options)
        sleep(1)
        timings['Reconstruction'].append(time() - t)
        print(f'Reconstruction done in  {time() - t:.4f} sec')
        print(maps)

        # clear_output(wait=False)
    
        registered = 0
        for map_index, cur_map in maps.items():
            img_list =[]
            for index, image in cur_map.images.items():
                prediction_index = filename_to_index[image.name]
                predictions[prediction_index].cluster_index = map_index
                predictions[prediction_index].rotation = deepcopy(image.cam_from_world.rotation.matrix())
                predictions[prediction_index].translation = deepcopy(image.cam_from_world.translation)
                img_list.append(image.name)
                registered += 1
            img_list_str = ' '.join(img_list) 
            print(f"map_index = {map_index}", img_list_str)
        mapping_result_str = f'Dataset "{dataset}" -> Registered {registered} / {len(images)} images with {len(maps)} clusters'
        mapping_result_strs.append(mapping_result_str)
        print(mapping_result_str)
        gc.collect()
    except Exception as e:
        print(e)
        # raise e
        mapping_result_str = f'Dataset "{dataset}" -> Failed!'
        mapping_result_strs.append(mapping_result_str)
        print(mapping_result_str)

print('\nResults')
for s in mapping_result_strs:
    print(s)

print('\nTimings')
for k, v in timings.items():
    print(f'{k} -> total={sum(v):.02f} sec.')

Extracting on device cuda:0

Processing dataset "imc2023_haiper": 54 images


100%|██████████| 54/54 [00:19<00:00,  2.81it/s]


Distance Matrix Statistics:
Min:  0.1691
Max:  0.4170
Mean: 0.3158
Std:  0.0524
20%:  0.2605
30%:  0.2852
USED 50%:  0.3285
75%:  0.3588
Shortlisting. Number of pairs to match: 710. Done in 24.1036 sec


100%|██████████| 54/54 [00:09<00:00,  5.96it/s]


Features detected in 9.4312 sec
Loaded LightGlue model


100%|██████████| 710/710 [08:29<00:00,  1.39it/s]


Features matched in 509.9425 sec


100%|██████████| 54/54 [00:04<00:00, 11.50it/s]
 18%|█▊        | 244/1326 [00:00<00:00, 4341.58it/s]


Ran RANSAC in 6.7178 sec
Reconstruction done in  82.1359 sec
{0: Reconstruction(num_reg_images=23, num_cameras=23, num_points3D=20460, num_observations=82430), 1: Reconstruction(num_reg_images=31, num_cameras=31, num_points3D=12500, num_observations=41758)}
map_index = 0 fountain_image_000.png fountain_image_007.png fountain_image_012.png fountain_image_025.png fountain_image_033.png fountain_image_041.png fountain_image_056.png fountain_image_071.png fountain_image_082.png fountain_image_101.png fountain_image_108.png fountain_image_116.png fountain_image_129.png fountain_image_136.png fountain_image_143.png fountain_image_155.png fountain_image_163.png fountain_image_166.png fountain_image_173.png fountain_image_186.png fountain_image_199.png fountain_image_214.png fountain_image_230.png
map_index = 1 bike_image_004.png bike_image_029.png bike_image_038.png bike_image_049.png bike_image_062.png bike_image_076.png bike_image_088.png bike_image_094.png bike_image_101.png bike_image_115

100%|██████████| 209/209 [04:24<00:00,  1.27s/it]


Distance Matrix Statistics:
Min:  0.1159
Max:  0.5188
Mean: 0.3667
Std:  0.0539
20%:  0.3260
30%:  0.3430
USED 50%:  0.3727
75%:  0.4056
Shortlisting. Number of pairs to match: 3815. Done in 264.6938 sec


100%|██████████| 209/209 [01:13<00:00,  2.85it/s]


Features detected in 73.5992 sec
Loaded LightGlue model


100%|██████████| 3815/3815 [42:22<00:00,  1.50it/s]


Features matched in 2542.7445 sec


100%|██████████| 209/209 [01:06<00:00,  3.14it/s]
  2%|▏         | 480/20910 [00:00<00:05, 3611.73it/s]


Ran RANSAC in 8.4097 sec
Reconstruction done in  296.2004 sec
{0: Reconstruction(num_reg_images=43, num_cameras=43, num_points3D=61495, num_observations=242754), 1: Reconstruction(num_reg_images=58, num_cameras=58, num_points3D=31591, num_observations=134553), 2: Reconstruction(num_reg_images=8, num_cameras=8, num_points3D=4642, num_observations=15623), 3: Reconstruction(num_reg_images=13, num_cameras=13, num_points3D=4988, num_observations=13746), 4: Reconstruction(num_reg_images=6, num_cameras=6, num_points3D=4088, num_observations=9944), 5: Reconstruction(num_reg_images=5, num_cameras=5, num_points3D=902, num_observations=2522)}
map_index = 0 wall_dsc_4973_acr.png wall_dsc_4976_acr.png wall_dsc_4979_acr.png wall_dsc_4982_acr.png wall_dsc_4991_acr.png wall_dsc_4994_acr.png wall_dsc_4997_acr.png wall_dsc_5000_acr.png wall_dsc_5003_acr.png wall_dsc_5006_acr.png wall_dsc_5009_acr.png wall_dsc_5012_acr.png wall_dsc_5015_acr.png wall_dsc_5018_acr.png wall_dsc_5021_acr.png wall_dsc_5024_ac

100%|██████████| 200/200 [00:21<00:00,  9.21it/s]


Distance Matrix Statistics:
Min:  0.1448
Max:  0.4239
Mean: 0.2737
Std:  0.0388
20%:  0.2375
30%:  0.2505
USED 50%:  0.2754
75%:  0.3017
Shortlisting. Number of pairs to match: 3894. Done in 21.9896 sec


100%|██████████| 200/200 [00:19<00:00, 10.10it/s]


Features detected in 20.1068 sec
Loaded LightGlue model


100%|██████████| 3894/3894 [45:50<00:00,  1.42it/s]


Features matched in 2750.8123 sec


100%|██████████| 200/200 [00:05<00:00, 38.21it/s]
  3%|▎         | 519/18721 [00:00<00:04, 3851.84it/s]


Ran RANSAC in 6.4977 sec
Reconstruction done in  265.6455 sec
{0: Reconstruction(num_reg_images=3, num_cameras=3, num_points3D=0, num_observations=0), 1: Reconstruction(num_reg_images=68, num_cameras=68, num_points3D=37888, num_observations=179412), 2: Reconstruction(num_reg_images=14, num_cameras=14, num_points3D=9286, num_observations=38168), 3: Reconstruction(num_reg_images=15, num_cameras=15, num_points3D=5482, num_observations=20262), 4: Reconstruction(num_reg_images=11, num_cameras=11, num_points3D=5801, num_observations=22178), 5: Reconstruction(num_reg_images=10, num_cameras=10, num_points3D=5031, num_observations=17732), 6: Reconstruction(num_reg_images=11, num_cameras=11, num_points3D=5645, num_observations=17788), 7: Reconstruction(num_reg_images=11, num_cameras=11, num_points3D=3416, num_observations=10938), 8: Reconstruction(num_reg_images=6, num_cameras=6, num_points3D=678, num_observations=2107), 9: Reconstruction(num_reg_images=6, num_cameras=6, num_points3D=2090, num_o

100%|██████████| 163/163 [00:09<00:00, 18.02it/s]


Distance Matrix Statistics:
Min:  0.1510
Max:  0.3338
Mean: 0.2232
Std:  0.0303
20%:  0.1980
30%:  0.2047
USED 50%:  0.2173
75%:  0.2396
Shortlisting. Number of pairs to match: 3113. Done in 9.3067 sec


100%|██████████| 163/163 [00:13<00:00, 11.82it/s]


Features detected in 14.0948 sec
Loaded LightGlue model


100%|██████████| 3113/3113 [37:06<00:00,  1.40it/s]


Features matched in 2227.0568 sec


100%|██████████| 163/163 [00:01<00:00, 96.19it/s]
  2%|▏         | 281/12561 [00:00<00:03, 3556.47it/s]


Ran RANSAC in 1.9856 sec
Reconstruction done in  117.2586 sec
{0: Reconstruction(num_reg_images=42, num_cameras=42, num_points3D=28600, num_observations=106004), 1: Reconstruction(num_reg_images=29, num_cameras=29, num_points3D=14705, num_observations=49602), 2: Reconstruction(num_reg_images=15, num_cameras=15, num_points3D=7119, num_observations=24401), 3: Reconstruction(num_reg_images=6, num_cameras=6, num_points3D=3630, num_observations=12649), 4: Reconstruction(num_reg_images=13, num_cameras=13, num_points3D=5003, num_observations=16088), 5: Reconstruction(num_reg_images=15, num_cameras=15, num_points3D=6046, num_observations=19391), 6: Reconstruction(num_reg_images=13, num_cameras=13, num_points3D=7462, num_observations=25422)}
map_index = 0 vineyard_split_3_frame_0070.png vineyard_split_3_frame_0075.png vineyard_split_3_frame_0080.png vineyard_split_3_frame_0085.png vineyard_split_3_frame_0090.png vineyard_split_3_frame_0095.png vineyard_split_3_frame_0100.png vineyard_split_3_fr

100%|██████████| 22/22 [00:01<00:00, 16.40it/s]


Distance Matrix Statistics:
Min:  0.1504
Max:  0.4104
Mean: 0.2817
Std:  0.0495
20%:  0.2356
30%:  0.2647
USED 50%:  0.2804
75%:  0.3260
Shortlisting. Number of pairs to match: 150. Done in 1.5901 sec


100%|██████████| 22/22 [00:01<00:00, 12.16it/s]


Features detected in 2.1152 sec
Loaded LightGlue model


100%|██████████| 150/150 [00:15<00:00,  9.42it/s]


Features matched in 16.0994 sec


100%|██████████| 22/22 [00:00<00:00, 90.80it/s]
 30%|██▉       | 62/210 [00:00<00:00, 4127.47it/s]


Ran RANSAC in 0.8544 sec
Reconstruction done in  7.3126 sec
{0: Reconstruction(num_reg_images=9, num_cameras=9, num_points3D=3400, num_observations=15654), 1: Reconstruction(num_reg_images=11, num_cameras=11, num_points3D=1078, num_observations=5775)}
map_index = 0 et_et000.png et_et001.png et_et002.png et_et003.png et_et004.png et_et005.png et_et006.png et_et007.png et_et008.png
map_index = 1 another_et_another_et001.png another_et_another_et002.png another_et_another_et003.png another_et_another_et004.png another_et_another_et005.png another_et_another_et006.png another_et_another_et007.png another_et_another_et008.png another_et_another_et009.png another_et_another_et010.png outliers_out_et001.png
Dataset "ETs" -> Registered 20 / 22 images with 2 clusters

Processing dataset "stairs": 51 images


100%|██████████| 51/51 [00:08<00:00,  5.85it/s]


Distance Matrix Statistics:
Min:  0.1598
Max:  0.4240
Mean: 0.2807
Std:  0.0451
20%:  0.2433
30%:  0.2557
USED 50%:  0.2767
75%:  0.3089
Shortlisting. Number of pairs to match: 647. Done in 8.9654 sec


100%|██████████| 51/51 [00:06<00:00,  8.26it/s]


Features detected in 6.4830 sec
Loaded LightGlue model


100%|██████████| 647/647 [01:25<00:00,  7.54it/s]


Features matched in 85.9812 sec


100%|██████████| 51/51 [00:02<00:00, 22.36it/s]
  7%|▋         | 80/1176 [00:00<00:00, 4075.65it/s]


Ran RANSAC in 0.5918 sec
Reconstruction done in  9.0782 sec
{0: Reconstruction(num_reg_images=11, num_cameras=11, num_points3D=1070, num_observations=3051), 1: Reconstruction(num_reg_images=5, num_cameras=5, num_points3D=312, num_observations=874)}
map_index = 0 stairs_split_2_1710453805788.png stairs_split_2_1710453871430.png stairs_split_1_1710453689727.png stairs_split_2_1710453720741.png stairs_split_2_1710453736752.png stairs_split_2_1710453739354.png stairs_split_2_1710453740954.png stairs_split_2_1710453756762.png stairs_split_2_1710453759963.png stairs_split_2_1710453783374.png stairs_split_2_1710453786375.png
map_index = 1 stairs_split_1_1710453704934.png stairs_split_1_1710453901046.png stairs_split_2_1710453745156.png stairs_split_2_1710453790978.png stairs_split_2_1710453793579.png
Dataset "stairs" -> Registered 16 / 51 images with 2 clusters

Results
Dataset "imc2023_haiper" -> Registered 54 / 54 images with 2 clusters
Dataset "imc2023_heritage" -> Registered 133 / 209 ima

In [10]:
# Must Create a submission file.

array_to_str = lambda array: ';'.join([f"{x:.09f}" for x in array])
none_to_str = lambda n: ';'.join(['nan'] * n)

submission_file = '/kaggle/working/submission.csv'
with open(submission_file, 'w') as f:
    if is_train:
        f.write('dataset,scene,image,rotation_matrix,translation_vector\n')
        for dataset in samples:
            for prediction in samples[dataset]:
                cluster_name = 'outliers' if prediction.cluster_index is None else f'cluster{prediction.cluster_index}'
                rotation = none_to_str(9) if prediction.rotation is None else array_to_str(prediction.rotation.flatten())
                translation = none_to_str(3) if prediction.translation is None else array_to_str(prediction.translation)
                f.write(f'{prediction.dataset},{cluster_name},{prediction.filename},{rotation},{translation}\n')
    else:
        f.write('image_id,dataset,scene,image,rotation_matrix,translation_vector\n')
        for dataset in samples:
            for prediction in samples[dataset]:
                cluster_name = 'outliers' if prediction.cluster_index is None else f'cluster{prediction.cluster_index}'
                rotation = none_to_str(9) if prediction.rotation is None else array_to_str(prediction.rotation.flatten())
                translation = none_to_str(3) if prediction.translation is None else array_to_str(prediction.translation)
                f.write(f'{prediction.image_id},{prediction.dataset},{cluster_name},{prediction.filename},{rotation},{translation}\n')

!head {submission_file}

dataset,scene,image,rotation_matrix,translation_vector
imc2023_haiper,cluster0,fountain_image_116.png,0.869129317;0.232353614;-0.436607408;-0.403378095;0.843817073;-0.353919284;0.286182361;0.483719490;0.827112514,0.358965655;-0.843690342;1.845886880
imc2023_haiper,cluster0,fountain_image_108.png,0.933615065;-0.135776390;0.331553437;0.282382009;0.848417909;-0.447713583;-0.220506940;0.511616872;0.830436551,0.330236871;-0.729795670;1.630062740
imc2023_haiper,cluster0,fountain_image_101.png,0.639016636;-0.278117785;0.717152869;0.592606595;0.772403491;-0.228495670;-0.490382669;0.571002054;0.658393113,-0.092049249;-1.001903594;1.791716316
imc2023_haiper,cluster0,fountain_image_082.png,-0.981584690;-0.123141475;0.146039973;0.029802770;0.656417776;0.753808661;-0.188688345;0.744279437;-0.640659682,0.099493123;-1.709364600;4.183197031
imc2023_haiper,cluster0,fountain_image_071.png,-0.805769103;0.188254240;-0.561512683;-0.314870977;0.666844519;0.675407029;0.501589892;0.721026164;-0.478046913

In [11]:
# Definitely Compute results if running on the training set.
# Do not do this when submitting a notebook for scoring. All you have to do is save your submission to /kaggle/working/submission.csv.

if is_train:
    t = time()
    final_score, dataset_scores = metric.score(
        gt_csv='/kaggle/input/image-matching-challenge-2025/train_labels.csv',
        user_csv=submission_file,
        thresholds_csv='/kaggle/input/image-matching-challenge-2025/train_thresholds.csv',
        mask_csv=None if is_train else os.path.join(data_dir, 'mask.csv'),
        inl_cf=0,
        strict_cf=-1,
        verbose=True,
    )
    print(f'Computed metric in: {time() - t:.02f} sec.')

imc2023_haiper: score=63.15% (mAA=62.78%, clusterness=63.53%)
imc2023_heritage: score=56.89% (mAA=39.75%, clusterness=100.00%)
imc2023_theather_imc2024_church: score=0.00% (mAA=0.00%, clusterness=0.00%)
imc2024_dioscuri_baalshamin: score=0.00% (mAA=0.00%, clusterness=0.00%)
imc2024_lizard_pond: score=0.00% (mAA=0.00%, clusterness=0.00%)
pt_brandenburg_british_buckingham: score=0.00% (mAA=0.00%, clusterness=0.00%)
pt_piazzasanmarco_grandplace: score=0.00% (mAA=0.00%, clusterness=0.00%)
pt_sacrecoeur_trevi_tajmahal: score=0.00% (mAA=0.00%, clusterness=0.00%)
pt_stpeters_stpauls: score=0.00% (mAA=0.00%, clusterness=0.00%)
amy_gardens: score=25.47% (mAA=14.59%, clusterness=100.00%)
fbk_vineyard: score=43.65% (mAA=27.92%, clusterness=100.00%)
ETs: score=44.25% (mAA=28.85%, clusterness=95.00%)
stairs: score=4.32% (mAA=2.22%, clusterness=75.00%)
Average over all datasets: score=18.29% (mAA=13.55%, clusterness=41.04%)
Computed metric in: 100.05 sec.
