## Example submission

Image Matching Challenge 2025: https://www.kaggle.com/competitions/image-matching-challenge-2025

This notebook creates a simple submission using ALIKED and LightGlue, plus DINO for shortlisting, on GPU. Adapted from [last year](https://www.kaggle.com/code/oldufo/imc-2024-submission-example).

Remember to select an accelerator on the sidebar to the right, and to disable internet access when submitting a notebook to the competition.

In [2]:
# IMPORTANT 
#Install dependencies and copy model weights to run the notebook without internet access when submitting to the competition.

!pip install --no-index /kaggle/input/imc2024-packages-lightglue-rerun-kornia/* --no-deps
!mkdir -p /root/.cache/torch/hub/checkpoints
!cp /kaggle/input/aliked/pytorch/aliked-n16/1/aliked-n16.pth /root/.cache/torch/hub/checkpoints/
!cp /kaggle/input/lightglue/pytorch/aliked/1/aliked_lightglue.pth /root/.cache/torch/hub/checkpoints/
!cp /kaggle/input/lightglue/pytorch/aliked/1/aliked_lightglue.pth /root/.cache/torch/hub/checkpoints/aliked_lightglue_v0-1_arxiv-pth

Processing /kaggle/input/imc2024-packages-lightglue-rerun-kornia/kornia-0.7.2-py2.py3-none-any.whl
Processing /kaggle/input/imc2024-packages-lightglue-rerun-kornia/kornia_moons-0.2.9-py3-none-any.whl
Processing /kaggle/input/imc2024-packages-lightglue-rerun-kornia/kornia_rs-0.1.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
Processing /kaggle/input/imc2024-packages-lightglue-rerun-kornia/lightglue-0.0-py3-none-any.whl
Processing /kaggle/input/imc2024-packages-lightglue-rerun-kornia/pycolmap-0.6.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
Processing /kaggle/input/imc2024-packages-lightglue-rerun-kornia/rerun_sdk-0.15.0a2-cp38-abi3-manylinux_2_31_x86_64.whl
Installing collected packages: rerun-sdk, pycolmap, lightglue, kornia-rs, kornia-moons, kornia
  Attempting uninstall: kornia-rs
    Found existing installation: kornia_rs 0.1.8
    Uninstalling kornia_rs-0.1.8:
      Successfully uninstalled kornia_rs-0.1.8
  Attempting uninstall: kornia
    Found exist

In [3]:
import sys
import os
from tqdm import tqdm
from time import time, sleep
import gc
import numpy as np
import h5py
import dataclasses
import pandas as pd
from IPython.display import clear_output
from collections import defaultdict
from copy import deepcopy
from PIL import Image

import cv2
import torch
import torch.nn.functional as F
import kornia as K
import kornia.feature as KF

import torch
from lightglue import match_pair
from lightglue import ALIKED, LightGlue
from lightglue.utils import load_image, rbd
from transformers import AutoImageProcessor, AutoModel

# IMPORTANT Utilities: importing data into colmap and competition metric
import pycolmap
sys.path.append('/kaggle/input/imc25-utils')
from database import *
from h5_to_db import *
import metric
from sklearn.cluster import DBSCAN
from PIL import Image, ImageDraw # Import ImageDraw for drawing keypoints

# ... other imports

  @torch.cuda.amp.custom_fwd(cast_inputs=torch.float32)
  @torch.cuda.amp.custom_fwd(cast_inputs=torch.float32)


In [24]:

print("PyTorch version:", torch.__version__)
import sys
print("Python version:", sys.version)

print("CUDA available:", torch.cuda.is_available())
print("CUDA version:", torch.version.cuda)
print("Device count:", torch.cuda.device_count())
print("Current device:", torch.cuda.current_device())
print("Device name:", torch.cuda.get_device_name(torch.cuda.current_device()))


PyTorch version: 2.5.1+cu121
Python version: 3.10.12 (main, Nov  6 2024, 20:22:13) [GCC 11.4.0]
CUDA available: True
CUDA version: 12.1
Device count: 1
Current device: 0
Device name: Tesla P100-PCIE-16GB


In [25]:
!rm -rf /kaggle/working/result

In [26]:
# Do not forget to select an accelerator on the sidebar to the right.
device = K.utils.get_cuda_device_if_available(0)
print(f'{device=}')

device=device(type='cuda', index=0)


In [27]:
def load_pil_image(fname):
    """Loads an image using PIL."""
    return Image.open(fname).convert('RGB')

def get_image_size(fname):
    """Gets image size (width, height) using PIL."""
    with Image.open(fname) as img:
        return img.size # (width, height)

def get_original_coords(kp_coords, img_orig_size, variation_info):
    """
    Transforms keypoint coordinates from variation space back to original image space.

    Args:
        kp_coords (np.ndarray): Keypoint coordinates [N, 2] in the variation space.
        img_orig_size (tuple): Original image size (width, height).
        variation_info (dict): Dictionary containing 'type' ('orig' or 'crop'),
                               'scale_factor' (scale used for resize),
                               'crop_box' ([x, y, w, h] in original coords, None if type is 'orig').

    Returns:
        np.ndarray: Keypoint coordinates [N, 2] in the original image space.
    """
    if len(kp_coords) == 0:
        return np.empty((0, 2))

    coords = kp_coords.copy() # Work on a copy

    # 1. Reverse scaling
    scale_factor = variation_info['scale_factor']
    coords /= scale_factor # Now coords are in the space of the original/cropped image (before resize)

    # 2. Reverse cropping offset
    if variation_info['type'] == 'crop' and variation_info['crop_box'] is not None:
        x_crop, y_crop, _, _ = variation_info['crop_box']
        coords[:, 0] += x_crop
        coords[:, 1] += y_crop

    # Ensure points are within original image bounds (optional, but good practice)
    # coords[:, 0] = np.clip(coords[:, 0], 0, img_orig_size[0] - 1)
    # coords[:, 1] = np.clip(coords[:, 1], 0, img_orig_size[1] - 1)

    return coords

In [28]:
# --- Configuration ---
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
GLOBAL_DESC_MODEL = '/kaggle/input/dinov2/pytorch/base/1' # Path to your DINOv2 model
DATA_DIR = '.' # Base directory for outputs
# FEATURE_DIR = os.path.join(DATA_DIR, 'features_combined')
# MATCH_DIR = os.path.join(DATA_DIR, 'matches_global')

# Initial detection parameters for cropping data collection
INITIAL_DETECTION_RESIZE = 1280
INITIAL_DETECTION_NUM_FEATURES = 2048

# Parameters for TTA detection and combination
TTA_SCALES = [2048]
TTA_NUM_FEATURES = 4096
USE_CROPPED_IMAGES = True

# Parameters for New Cropping Method
MIN_PAIRS_FOR_CROPPING = 10
CROP_PADDING = 50
DBSCAN_EPS = 20 # Fixed EPS fallback or base value
DBSCAN_MIN_SAMPLES = 5 # DBSCAN min_samples parameter
# New config for adaptive EPS based on resolution
DBSCAN_EPS_RESOLUTION_RATIO = 0.02 # Ratio of max image dimension for EPS (e.g., 0.01 -> 1% of longer side)


# Coordinate precision for deduplication (rounding float coordinates)
COORD_PRECISION = 1 # Number of decimal places to round coordinates for uniqueness check

# Matching parameters
MIN_MATCHES_PER_VARIATION = 5 # Lowered this threshold slightly, as combining might filter some
MIN_TOTAL_MATCHES_PER_PAIR = 20 # Minimum unique matches for a pair to be saved in global list

# Output file names (within FEATURE_DIR and MATCH_DIR)
KEYPOINTS_SUBDIR = 'keypoints'
DESCRIPTORS_H5 = 'descriptors.h5'
MATCHES_PT = 'matches.pt'
CROP_DATA = 'crop_data.h5'
CROP_INFO = 'crop_info.h5'

# Parameters for graph building and clustering thresholds
# These are the internal names used in the function; map external arguments to these if needed
MIN_MATCHES_FOR_GRAPH_EDGE = 20  # Min matches for adding edge to graph
MIN_MATCHES_FOR_FILTERED_GRAPH = 75 # Min matches for filtering graph (your aliked_dis_min)
MIN_IMAGES_PER_CLUSTER = 5 # Min images in a final cluster


# Config for NMS
NMS_SIZE_PIXELS = 3 # Radius in pixels for NMS suppression
NMS_SIZE_PIXELS_ratio = 0.003

# --- NEW: Constant for Keypoint Visualization ---
VISUALIZE_KEYPOINTS = False # Set to True to enable saving visualized keypoints
KEYPOINT_VIS_SUBDIR = 'keypoint_visualizations' # Subdirectory for saving visualized images


In [29]:
def load_torch_image(fname, device=torch.device('cpu')):
    img = K.io.load_image(fname, K.io.ImageLoadType.RGB32, device=device)[None, ...]
    return img


# Must Use efficientnet global descriptor to get matching shortlists.
def get_global_desc(fnames, device = torch.device('cpu')):
    processor = AutoImageProcessor.from_pretrained('/kaggle/input/dinov2/pytorch/base/1')
    model = AutoModel.from_pretrained('/kaggle/input/dinov2/pytorch/base/1')
    model = model.eval()
    model = model.to(device)
    global_descs_dinov2 = []
    for i, img_fname_full in tqdm(enumerate(fnames),total= len(fnames)):
        key = os.path.splitext(os.path.basename(img_fname_full))[0]
        timg = load_torch_image(img_fname_full)
        with torch.inference_mode():
            inputs = processor(images=timg, return_tensors="pt", do_rescale=False).to(device)
            outputs = model(**inputs)
            dino_mac = F.normalize(outputs.last_hidden_state[:,1:].max(dim=1)[0], dim=1, p=2)
        global_descs_dinov2.append(dino_mac.detach().cpu())
    global_descs_dinov2 = torch.cat(global_descs_dinov2, dim=0)
    return global_descs_dinov2


def get_img_pairs_exhaustive(img_fnames):
    index_pairs = []
    for i in range(len(img_fnames)):
        for j in range(i+1, len(img_fnames)):
            index_pairs.append((i,j))
    return index_pairs


def get_image_pairs_shortlist(fnames,
                              sim_th = 0.6, # should be strict
                              min_pairs = 30,
                              exhaustive_if_less = 20,
                              device=torch.device('cpu')):
    num_imgs = len(fnames)
    if num_imgs <= exhaustive_if_less:
        return get_img_pairs_exhaustive(fnames)
    descs = get_global_desc(fnames, device=device)
    dm = torch.cdist(descs, descs, p=2).detach().cpu().numpy()

    # 只分析上三角（去掉对角线），避免重复
    triu_indices = np.triu_indices_from(dm, k=1)
    dm_flat = dm[triu_indices]
    
    # 打印统计信息
    print("Distance Matrix Statistics:")
    print(f"Min:  {dm_flat.min():.4f}")
    print(f"Max:  {dm_flat.max():.4f}")
    print(f"Mean: {dm_flat.mean():.4f}")
    print(f"Std:  {dm_flat.std():.4f}")
    print(f"20%:  {np.percentile(dm_flat, 20):.4f}")
    print(f"25%:  {np.percentile(dm_flat, 25):.4f}")
    print(f"USED 50%:  {np.percentile(dm_flat, 50):.4f}")
    print(f"75%:  {np.percentile(dm_flat, 75):.4f}")
    threshold = dm_flat.mean() + np.sqrt(3) * dm_flat.std()
    # removing half
    # thr = min(np.percentile(dm_flat, 50), sim_th)
    mask = dm <= np.percentile(dm_flat, 50)
    # print("thr :", thr)
    # mask = dm<=threshold
    total = 0
    matching_list = []
    ar = np.arange(num_imgs)
    already_there_set = []
    for st_idx in range(num_imgs-1):
        mask_idx = mask[st_idx]
        to_match = ar[mask_idx]
        if len(to_match) < min_pairs:
            to_match = np.argsort(dm[st_idx])[:min_pairs]  
        for idx in to_match:
            if st_idx == idx:
                continue
            if dm[st_idx, idx] < threshold:
                matching_list.append(tuple(sorted((st_idx, idx.item()))))
                total+=1
    matching_list = sorted(list(set(matching_list)))
    return matching_list


def import_into_colmap(img_dir, feature_dir ='.featureout', database_path = 'colmap.db'):
    db = COLMAPDatabase.connect(database_path)
    db.create_tables()
    single_camera = False
    fname_to_id = add_keypoints(db, feature_dir, img_dir, '', 'simple-pinhole', single_camera)
    add_matches(
        db,
        feature_dir,
        fname_to_id,
    )
    db.commit()
    return

In [30]:
# --- 自定义 NMS 函数 ---
def custom_nms_2d_keypoints(keypoints_np, scores_np, nms_radius):
    """
    Perform Non-Maximum Suppression on 2D keypoints based on scores.
    This is a custom implementation to replace kornia.feature.non_maximum_suppression2d
    due to potential API differences across Kornia versions.

    Args:
        keypoints_np (np.ndarray): Keypoint coordinates (N, 2) in pixel space.
        scores_np (np.ndarray): Scores for each keypoint (N,).
        nms_radius (float): Radius for suppression in pixel units.

    Returns:
        np.ndarray: Indices of the keypoints that survive NMS (original indices).
    """
    if len(keypoints_np) == 0:
        return np.array([], dtype=int)

    # Get indices sorted by score in descending order
    order = scores_np.argsort()[::-1]

    keep = []
    # suppressed array to track which keypoints have been suppressed
    suppressed = np.zeros(len(keypoints_np), dtype=bool)

    for i_sorted in order: # Iterate through keypoints from highest score to lowest
        if suppressed[i_sorted]:
            continue # This keypoint has already been suppressed by a higher-scoring one

        keep.append(i_sorted) # Keep this keypoint

        current_kp = keypoints_np[i_sorted]

        # Calculate squared distances from the current keypoint to all other keypoints
        # Using squared distance avoids sqrt for efficiency if only comparing to radius^2
        distances_sq = np.sum((keypoints_np - current_kp)**2, axis=1)

        # Identify keypoints within the suppression radius
        points_within_radius_mask = distances_sq < nms_radius**2

        # Mark these keypoints as suppressed
        suppressed[points_within_radius_mask] = True

    return np.array(keep, dtype=int)

In [31]:
import os
import h5py
import numpy as np

# Assuming these constants are defined elsewhere in your setup
# from .config import CROP_DATA, CROP_BOXES_FILE, CROP_PADDING_FACTOR, MIN_CROP_DIMENSION

# Placeholder constants for demonstration if not imported
CROP_DATA = 'crop_data.h5'
CROP_BOXES_FILE = 'crop_boxes.h5' # File to save the final crop boxes
CROP_PADDING_FACTOR = 0.1 # e.g., 10% padding
MIN_CROP_DIMENSION = 512 # Minimum width/height for a crop box

def calculate_crop_boxes(data_dir: str):
    """
    Calculates the crop boxes for each image based on the frequent keypoints
    stored in the crop_data_file.
    Assumes all input data (HDF5 files, datasets, attributes) are valid and exist.

    Args:
        data_dir (str): Directory where crop_data.h5 is located and
                        where crop_boxes.h5 will be saved.

    Returns:
        str: Path to the generated crop_boxes.h5 file.
    """
    crop_data_file = os.path.join(data_dir, CROP_DATA)
    crop_boxes_output_file = os.path.join(data_dir, CROP_BOXES_FILE)

    print(f"Calculating crop boxes from {crop_data_file}...")

    image_crop_boxes = {}

    with h5py.File(crop_data_file, 'r') as f_crop_data:
        for img_key in f_crop_data.keys():
            # Retrieve keypoints and original image size, assuming they are present and valid
            kps = f_crop_data[img_key]['keypoints'][...] # Now simply 'keypoints'
            original_pil_size = f_crop_data[img_key].attrs['original_pil_size'] # (W, H)
            original_width, original_height = original_pil_size

            if len(kps) == 0:
                print(f"Warning: No frequent keypoints for {img_key}. Skipping crop box calculation.")
                continue

            # Calculate bounding box of keypoints
            min_x, min_y = np.min(kps, axis=0)
            max_x, max_y = np.max(kps, axis=0)

            # Apply padding
            padding_w = (max_x - min_x) * CROP_PADDING_FACTOR
            padding_h = (max_y - min_y) * CROP_PADDING_FACTOR

            crop_min_x = max(0, min_x - padding_w)
            crop_min_y = max(0, min_y - padding_h)
            crop_max_x = min(original_width, max_x + padding_w)
            crop_max_y = min(original_height, max_y + padding_h)

            # Ensure minimum crop dimensions
            current_crop_width = crop_max_x - crop_min_x
            current_crop_height = crop_max_y - crop_min_y

            if current_crop_width < MIN_CROP_DIMENSION:
                center_x = (crop_min_x + crop_max_x) / 2
                crop_min_x = max(0, center_x - MIN_CROP_DIMENSION / 2)
                crop_max_x = min(original_width, center_x + MIN_CROP_DIMENSION / 2)
                # Adjust if clipping occurred
                if crop_max_x - crop_min_x < MIN_CROP_DIMENSION:
                    if crop_min_x == 0:
                        crop_max_x = min(original_width, MIN_CROP_DIMENSION)
                    elif crop_max_x == original_width:
                        crop_min_x = max(0, original_width - MIN_CROP_DIMENSION)

            if current_crop_height < MIN_CROP_DIMENSION:
                center_y = (crop_min_y + crop_max_y) / 2
                crop_min_y = max(0, center_y - MIN_CROP_DIMENSION / 2)
                crop_max_y = min(original_height, center_y + MIN_CROP_DIMENSION / 2)
                # Adjust if clipping occurred
                if crop_max_y - crop_min_y < MIN_CROP_DIMENSION:
                    if crop_min_y == 0:
                        crop_max_y = min(original_height, MIN_CROP_DIMENSION)
                    elif crop_max_y == original_height:
                        crop_min_y = max(0, original_height - MIN_CROP_DIMENSION)

            # Ensure integer coordinates for crop box (x_min, y_min, x_max, y_max)
            crop_box = np.array([
                int(round(crop_min_x)),
                int(round(crop_min_y)),
                int(round(crop_max_x)),
                int(round(crop_max_y))
            ], dtype=np.int32)

            image_crop_boxes[img_key] = crop_box

    # Save the calculated crop boxes to a new HDF5 file
    with h5py.File(crop_boxes_output_file, 'w') as f_crop_boxes:
        for img_key, crop_box in image_crop_boxes.items():
            f_crop_boxes.create_dataset(img_key, data=crop_box)

    print(f"Crop boxes calculated and saved to {crop_boxes_output_file}")
    return crop_boxes_output_file

In [32]:
# ... (imports, configs, utility functions like load_torch_image, load_pil_image, get_image_size)

def perform_initial_detection_and_matching(img_fnames, index_pairs, data_dir, device=DEVICE):
    """
    Performs detection (ALIKED) and matching (LightGlue) on original images
    at a base resolution (e.g., 1024) to collect data for the cropping step.
    Stores keypoints (in original image coordinates) and matches in temporary HDF5.
    Analyzes matches to create crop data file.
    Assumes all image inputs are valid and exist.
    """
    temp_feature_dir = os.path.join(data_dir, '.temp_crop_features')
    os.makedirs(temp_feature_dir, exist_ok=True)

    initial_feature_file = os.path.join(temp_feature_dir, f'initial_features_{INITIAL_DETECTION_RESIZE}.h5')
    initial_match_file = os.path.join(temp_feature_dir, f'initial_matches_{INITIAL_DETECTION_RESIZE}.h5')
    crop_data_file = os.path.join(data_dir, CROP_DATA)

    if os.path.exists(crop_data_file):
        print(f"Initial detection and matching data for cropping exists: {crop_data_file}. deleting.")
        os.remove(crop_data_file)

    print(f"Performing initial ALIKED detection ({INITIAL_DETECTION_RESIZE}) and LightGlue matching for cropping data...")

    # 1. Initial Detection
    print("Running initial ALIKED detection...")
    extractor = ALIKED(max_num_keypoints=INITIAL_DETECTION_NUM_FEATURES, detection_threshold=0.2).eval().to(device, dtype=torch.float32)
    extractor.preprocess_conf["resize"] = INITIAL_DETECTION_RESIZE

    with h5py.File(initial_feature_file, mode='w') as f_kp_desc:
        for img_path in tqdm(img_fnames, desc="Initial ALIKED Detection"):
            img_key = os.path.basename(img_path)

            img_orig_pil = load_pil_image(img_path)
            original_pil_size = img_orig_pil.size # (W, H)

            timg = K.image_to_tensor(np.array(img_orig_pil), keepdim=True).to(device, torch.float32) / 255.0 # Normalize
            if timg.ndim == 3: timg = timg[None, ...] # Ensure BxCxHxW

            with torch.inference_mode():
                feats = extractor.extract(timg)

                # Keypoints (kps) are already in ORIGINAL image coordinates from ALIKED
                kps = feats['keypoints'].reshape(-1, 2).detach().cpu().numpy()
                desc = feats['descriptors'].reshape(len(kps), -1).detach().cpu().numpy()

            # Save keypoints and descriptors
            img_group = f_kp_desc.create_group(img_key)
            img_group.create_dataset('keypoints', data=kps.astype(np.float32)) # Stored simply as 'keypoints'
            img_group.create_dataset('descriptors', data=desc.astype(np.float32))
            img_group.attrs['original_pil_size'] = original_pil_size # (W, H) tuple
            img_group.attrs['original_path'] = img_path # Store original path

    # 2. Initial Matching
    print("Running initial LightGlue matching...")
    lg_matcher = KF.LightGlueMatcher("aliked", {"width_confidence": -1,
                                                 "depth_confidence": -1,
                                                 "mp": True if 'cuda' in str(device) else False}).eval().to(device)
    if device == torch.device('cpu'):
        lg_matcher.to('cpu')

    # Ensure initial_match_file is created even if empty
    with h5py.File(initial_match_file, mode='w') as f:
        pass # Create an empty file

    # Read keys from the initial_feature_file HDF5
    extracted_image_keys = []
    with h5py.File(initial_feature_file, mode='r') as f_kp_desc_read:
        extracted_image_keys = list(f_kp_desc_read.keys())

    # Filter index_pairs to include only pairs where both images had features extracted
    filtered_index_pairs = []
    for idx1, idx2 in index_pairs:
        key1, key2 = os.path.basename(img_fnames[idx1]), os.path.basename(img_fnames[idx2])
        if key1 in extracted_image_keys and key2 in extracted_image_keys:
            filtered_index_pairs.append((idx1, idx2))

    if not filtered_index_pairs:
        print("No image pairs with extracted features to perform initial matching.")
    else:
        with h5py.File(initial_feature_file, mode='r') as f_kp_desc_read, \
             h5py.File(initial_match_file, mode='a') as f_match:

            for idx1, idx2 in tqdm(filtered_index_pairs, desc="Initial LightGlue Matching"):
                fname1, fname2 = img_fnames[idx1], img_fnames[idx2]
                key1, key2 = os.path.basename(fname1), os.path.basename(fname2)

                # Load keypoints and descriptors from the initial detection file (these are at original scale)
                kp1 = torch.from_numpy(f_kp_desc_read[key1]['keypoints'][...]).to(device)
                kp2 = torch.from_numpy(f_kp_desc_read[key2]['keypoints'][...]).to(device)
                desc1 = torch.from_numpy(f_kp_desc_read[key1]['descriptors'][...]).to(device)
                desc2 = torch.from_numpy(f_kp_desc_read[key2]['descriptors'][...]).to(device)

                if len(kp1) == 0 or len(kp2) == 0:
                    continue

                # Create LAFs based on original scale keypoints
                laf1 = KF.laf_from_center_scale_ori(kp1[None])
                laf2 = KF.laf_from_center_scale_ori(kp2[None])

                with torch.inference_mode():
                    dists, idxs = lg_matcher(desc1, desc2, laf1, laf2)
                if len(idxs) > 0:
                    group = f_match.require_group(key1)
                    group.create_dataset(key2, data=idxs.detach().cpu().numpy().astype(np.int32))

    # --- Analyze Initial Matches for Cropping Data ---
    print("Analyzing initial matches for cropping data...")

    kp_match_pairs = {}

    with h5py.File(initial_match_file, mode='r') as f_match:
        for img_key1 in f_match.keys():
            for img_key2 in f_match[img_key1].keys():
                matches = f_match[img_key1][img_key2][...] # Indices (kp1_idx, kp2_idx)

                for kp1_idx, kp2_idx in matches:
                    if img_key1 not in kp_match_pairs: kp_match_pairs[img_key1] = {}
                    if kp1_idx not in kp_match_pairs[img_key1]: kp_match_pairs[img_key1][kp1_idx] = set()
                    kp_match_pairs[img_key1][kp1_idx].add(img_key2)

                    if img_key2 not in kp_match_pairs: kp_match_pairs[img_key2] = {}
                    if kp2_idx not in kp_match_pairs[img_key2]: kp_match_pairs[img_key2][kp2_idx] = set()
                    kp_match_pairs[img_key2][kp2_idx].add(img_key1)


    frequent_kp_data = {}

    with h5py.File(initial_feature_file, mode='r') as f_kp_desc:
        for img_key in kp_match_pairs.keys():
            # Retrieve keypoints directly; they are already at original scale
            original_scale_kps = f_kp_desc[img_key]['keypoints'][...]
            original_pil_size = f_kp_desc[img_key].attrs['original_pil_size'] # (W, H)

            kp_data_dict = kp_match_pairs[img_key]

            frequent_indices = [kp_idx for kp_idx, matched_pairs in kp_data_dict.items()
                                if len(matched_pairs) >= MIN_PAIRS_FOR_CROPPING and kp_idx < len(original_scale_kps)]


            if frequent_indices:
                # Get coordinates directly from the stored keypoints (already at original scale)
                frequent_kps = original_scale_kps[frequent_indices]

                # Check for valid coordinates (non-negative)
                valid_frequent_kps = frequent_kps[~np.any(frequent_kps < 0, axis=1)]


                if len(valid_frequent_kps) > 0:
                    frequent_kp_data[img_key] = {
                        'keypoints': valid_frequent_kps,
                        'original_pil_size': original_pil_size # ADDED: Store original_pil_size here
                    }


    # Save frequent_kp_data to crop_data_file
    with h5py.File(crop_data_file, mode='w') as f_crop_data:
        if frequent_kp_data:
            for img_key, data in frequent_kp_data.items():
                group = f_crop_data.create_group(img_key)
                group.create_dataset('keypoints', data=data['keypoints'])
                group.attrs['original_pil_size'] = data['original_pil_size'] # ADDED: Save as attribute


    print(f"Initial detection and matching complete. Cropping data saved to {crop_data_file}")

    return crop_data_file

In [33]:
# Assume this is defined globally or imported
def transform_points_from_processed(points, crop_box=None):
    """
    Transforms keypoints from a cropped image's coordinate system
    back to the original full image's coordinate system by adding the crop offset.
    This function *does not perform any scaling*, as ALIKED handles that internally.

    Args:
        points (np.ndarray): Nx2 numpy array of (x,y) keypoint coordinates
                             relative to the cropped image.
        crop_box (list or None): [x, y, w, h] of the crop region in original image coordinates.
                                 If None, no offset is applied (e.g., for full original images).

    Returns:
        np.ndarray: Nx2 numpy array of keypoint coordinates in the original
                    full image's coordinate system.
    """
    transformed_points = points.copy() # Start with a copy to avoid modifying original array

    if crop_box is not None:
        # If these points came from a cropped image, add the crop's top-left offset
        # crop_box is [x, y, w, h]
        offset_x, offset_y = crop_box[0], crop_box[1]
        transformed_points[:, 0] += offset_x
        transformed_points[:, 1] += offset_y
    
    return transformed_points

# calculate_kornia_resize_scale will no longer be used for keypoint handling.
# You can remove its definition if it's not used anywhere else in your project.
# If it's used for other non-keypoint-related resizing calculations, keep it.
# For the scope of this request, it's not needed for feature extraction or transformation.

In [34]:
def load_image_names_from_json(cluster_path):
    with open(os.path.join(cluster_path, 'images.json'), 'r') as f:
        full_paths = json.load(f)  # 可能是 ['/path/to/images/img001.jpg', ...]
        image_names = [os.path.basename(p) for p in full_paths]  # 提取 'img001.jpg'
    return image_names

In [59]:
# ... (imports like kornia.feature as KF, torch, numpy, os, h5py, tqdm)

# Placeholder constants for NMS if not globally defined. Adjust these values as needed.
NMS_SIZE_PIXELS_ratio = 0.005 # Example: 0.5% of the max original dimension
NMS_SIZE_PIXELS = 8 # Example: absolute max NMS radius in pixels

# Assume transform_points_from_processed and load_pil_image are defined as previously discussed.


def detect_and_combine_features(img_fnames, crop_info_file, feature_dir, device=DEVICE):
    """
    Detects ALIKED features for multiple scales and original/cropped images,
    combines unique features per image (deduplicating based on original coords),
    applies NMS, and saves combined features to .pt and .h5 files per image.
    All error handling via try-except blocks has been removed, assuming valid inputs.
    ALIKED's `resize` parameter handles internal scaling; keypoints are returned
    in the coordinate system of the *input image* provided to `extractor.extract`.
    `transform_points_from_processed` is used only for applying crop offsets.
    """
    print("Running multi-variation ALIKED detection and combining features (with NMS)...")
    os.makedirs(feature_dir, exist_ok=True)
    keypoints_subdir_path = os.path.join(feature_dir, KEYPOINTS_SUBDIR)
    os.makedirs(keypoints_subdir_path, exist_ok=True)
    descriptors_h5_path = os.path.join(feature_dir, DESCRIPTORS_H5)
    # NEW: Create directory for keypoint visualizations if enabled
    if VISUALIZE_KEYPOINTS:
        keypoint_vis_dir = os.path.join(feature_dir, KEYPOINT_VIS_SUBDIR)
        os.makedirs(keypoint_vis_dir, exist_ok=True)
    with h5py.File(descriptors_h5_path, mode='w') as f_descriptors, \
         h5py.File(crop_info_file, mode='r') as f_crop_info:

        extractor = ALIKED(max_num_keypoints=TTA_NUM_FEATURES, detection_threshold=0.25).eval().to(DEVICE, dtype=torch.float32)
        if DEVICE == torch.device('cpu'):
            extractor.to('cpu', torch.float32)

        for img_path in tqdm(img_fnames, desc="Detecting & Combining Features"):
            img_key = os.path.basename(img_path)
            kp_pt_path = os.path.join(keypoints_subdir_path, f'{img_key}.pt')

            # Skip if combined features already exist for this image
            if os.path.exists(kp_pt_path) and img_key in f_descriptors:
                continue

            img_orig_pil = load_pil_image(img_path)
            img_orig_w, img_orig_h = img_orig_pil.size # Get original image dimensions for NMS radius calculation

            # Get crop info
            has_crop = False
            crop_box = None
            if img_key in f_crop_info:
                img_crop_group = f_crop_info[img_key]
                has_crop_attr = img_crop_group.attrs.get('has_crop', False)
                if has_crop_attr:
                    temp_crop_box = img_crop_group.attrs.get('crop_box', [0, 0, 0, 0]).tolist()
                    if temp_crop_box[2] > 0 and temp_crop_box[3] > 0:
                        has_crop = True
                        crop_box = temp_crop_box
            
            variations_to_process = []
            for scale in TTA_SCALES:
                variations_to_process.append({'type': 'orig', 'scale_target': scale, 'pil_img': img_orig_pil, 'input_crop_box_offset': None})
                
                if USE_CROPPED_IMAGES and has_crop:
                    x, y, w, h = crop_box
                    img_cropped_pil = img_orig_pil.crop((x, y, x + w, y + h))
                    variations_to_process.append({'type': 'crop', 'scale_target': scale, 'pil_img': img_cropped_pil, 'input_crop_box_offset': crop_box})

            all_kps = []
            all_descriptors = []
            all_scores = []

            for var_info in variations_to_process:
                var_type = var_info['type']
                var_scale_target = var_info['scale_target']
                var_pil_img = var_info['pil_img']
                input_crop_box_offset = var_info['input_crop_box_offset']

                timg = K.image_to_tensor(np.array(var_pil_img), keepdim=True).to(device, torch.float32) / 255.0
                if timg.ndim == 3: timg = timg[None, ...]
                
                if timg.shape[2] == 0 or timg.shape[3] == 0:
                    all_kps.append(np.empty((0, 2), dtype=np.float32))
                    all_descriptors.append(np.empty((0, all_descriptors[0].shape[1] if all_descriptors and len(all_descriptors[0]) > 0 else 0), dtype=np.float32))
                    all_scores.append(np.empty(0, dtype=np.float32))
                    continue

                with torch.inference_mode():
                    extractor.preprocess_conf["resize"] = var_scale_target
                    feats = extractor.extract(timg, resize=var_scale_target, return_processed_size=False, return_scores=True)
                    

                kp_variation = feats['keypoints'].reshape(-1, 2).detach().cpu().numpy()
                desc_variation = feats['descriptors'].reshape(len(kp_variation), -1).detach().cpu().numpy()
                score_variation = feats['keypoint_scores'].reshape(-1).detach().cpu().numpy()

                kps_in_original_img_coords = transform_points_from_processed(
                    kp_variation, input_crop_box_offset
                )

                all_kps.append(kps_in_original_img_coords)
                all_descriptors.append(desc_variation)
                all_scores.append(score_variation)

            # Combine all detected points and descriptors/scores
            valid_kps = [k for k in all_kps if len(k) > 0]
            valid_descriptors = [d for d in all_descriptors if len(d) > 0]
            valid_scores = [s for s in all_scores if len(s) > 0]

            if not valid_kps:
                torch.save(torch.empty(0, 2, dtype=torch.float32), kp_pt_path)
                if img_key not in f_descriptors:
                    f_descriptors.create_group(img_key)
                continue

            combined_kps = np.concatenate(valid_kps, axis=0)
            combined_descriptors = np.concatenate(valid_descriptors, axis=0)
            combined_scores = np.concatenate(valid_scores, axis=0) if valid_scores else np.empty(0, dtype=np.float32)

            # --- NMS (Non-Maximum Suppression) ---
            combined_kps_for_dedup = combined_kps
            combined_descs_for_dedup = combined_descriptors
            
            # Apply NMS if scores are available and keypoints exist
            if len(combined_kps) > 0 and len(combined_scores) > 0 and len(combined_kps) == len(combined_scores):
                # Calculate adaptive NMS radius based on original image dimensions
                max_orig_dim = max(img_orig_w, img_orig_h)
                adaptive_nms_size = min(max_orig_dim * NMS_SIZE_PIXELS_ratio, NMS_SIZE_PIXELS)

                indices_after_nms_np = custom_nms_2d_keypoints(
                    combined_kps,
                    combined_scores,
                    adaptive_nms_size
                )
                
                # Filter the combined arrays using NMS results
                combined_kps_for_dedup = combined_kps[indices_after_nms_np]
                combined_descs_for_dedup = combined_descriptors[indices_after_nms_np]
                # Note: Scores are not saved to the final output, so no need to filter combined_scores.

                # print(f"Image {img_key}: Features before NMS = {len(combined_kps)}, Features after NMS = {len(combined_kps_for_dedup)}")

            # --- Perform Coordinate-based Deduplication ---
            seen_coords = {}
            unique_kps = []
            unique_descriptors = []

            for i, (kp_coord, descriptor) in enumerate(zip(combined_kps_for_dedup, combined_descs_for_dedup)):
                rounded_coord = tuple(np.round(kp_coord + 1e-6, COORD_PRECISION).astype(float))

                if rounded_coord not in seen_coords:
                    seen_coords[rounded_coord] = len(unique_kps)
                    unique_kps.append(kp_coord)
                    unique_descriptors.append(descriptor)

            unique_kps_np = np.array(unique_kps, dtype=np.float32)
            unique_descriptors_np = np.array(unique_descriptors, dtype=np.float32)

            # Save unique keypoints to .pt
            torch.save(torch.from_numpy(unique_kps_np), kp_pt_path)

            # print(f"KP_PT_PATH:{kp_pt_path}, number is {len(unique_kps_np)}")

            # Save unique descriptors to descriptors.h5
            img_desc_group = f_descriptors.require_group(img_key)
            img_desc_group.create_dataset('data', data=unique_descriptors_np, compression="gzip")

    
            # --- NEW: Visualize and Save Keypoints ---
            if VISUALIZE_KEYPOINTS:
                if len(unique_kps_np) > 0:
                    vis_img = img_orig_pil.copy()
                    draw = ImageDraw.Draw(vis_img)
                    radius = 3 # Radius for drawing keypoints

                    for kp_x, kp_y in unique_kps_np:
                        # Draw a small circle at each keypoint
                        draw.ellipse((kp_x - radius, kp_y - radius, kp_x + radius, kp_y + radius),
                                     fill='red', outline='red')
                    
                    vis_output_path = os.path.join(keypoint_vis_dir, f'{img_key}_kps.jpg')
                    vis_img.save(vis_output_path)
                    # print(f"Saved keypoint visualization for {img_key} to {vis_output_path}") # Optional verbose
                # else:
                    # print(f"No keypoints to visualize for {img_key}.") # Optional verbose


    print("Multi-variation detection, combination, and deduplication complete.")

In [60]:
def import_into_colmap_cluster(
    img_dir,
    cluster_path='.featureout/cluster_0',
    database_path = '.featureout/cluster_0/colmap.db',
    image_names = None
):
    """
    Import keypoints and matches into COLMAP database using helper functions.

    Args:
        img_dir (str): Directory containing image files
        cluster_path (str): Path with matches.h5
        database_path (str): Output database location
        image_names (list[str]): Optional subset of image names to include
    """
    db = COLMAPDatabase.connect(database_path)
    db.create_tables()
    single_camera = False
    # Add keypoints and images
    fname_to_id = add_keypoints(
        db=db,
        h5_path=cluster_path,
        image_path=img_dir,
        img_ext='',
        camera_model='simple-pinhole',
        single_camera=single_camera
    )
    # Filter fname_to_id to only use the selected subset (if provided)
    if image_names is not None:
        fname_to_id = {k: v for k, v in fname_to_id.items() if k in image_names}

    # Add matches between selected image pairs
    add_matches(
        db=db,
        h5_path=cluster_path,
        fname_to_id=fname_to_id
    )
    db.commit()
    db.close()

In [61]:
# ... (imports)
import kornia.feature as KF
import torch
import numpy as np
import os
import h5py
import json # Ensure json is imported
from tqdm import tqdm


def match_images_global(
    img_fnames,
    index_pairs,
    data_dir='.',
    device=torch.device('cpu'),
    min_matches=MIN_MATCHES_FOR_GRAPH_EDGE,
    verbose=False
):
    """
    Performs LightGlue matching on combined features for all image pairs
    in index_pairs, builds a match graph (implicitly), and saves global files
    (images.json, keypoints.h5, matches.h5) for the entire dataset.

    Args:
        img_fnames (list): List of full paths to image files.
        index_pairs (list): List of (idx1, idx2) tuples for image pairs to match.
        data_dir (str): Base directory where 'features_combined' is located and
                        where the global output will be created.
        device (torch.device): Device to use for matching.
        min_matches (int): Min matches for considering a pair and saving its matches.
        verbose (bool): Whether to print detailed match info.

    Returns:
        list: A list containing a single list with the global indices of all images.
              This is to maintain a similar structure to the clustering output,
              indicating a single group.
    """
    # Define paths based on data_dir and configuration
    feature_dir_combined = os.path.join(data_dir, 'features_combined')
    keypoints_subdir_path = os.path.join(feature_dir_combined, KEYPOINTS_SUBDIR) # Directory holding combined per-image .pt files
    descriptors_h5_path = os.path.join(feature_dir_combined, DESCRIPTORS_H5) # HDF5 holding combined per-image descriptors

    # Directory where the global output files will be created
    global_output_dir = os.path.join(feature_dir_combined, 'global')
    os.makedirs(global_output_dir, exist_ok=True) # Ensure global output dir exists

    lg_matcher = KF.LightGlueMatcher(
        "aliked", {
            "width_confidence": -1,
            "depth_confidence": -1,
            "mp": 'cuda' in str(device)
        }
    ).eval().to(device)

    # Store match indices (relative to combined per-image features)
    all_matches = {}

    # Open combined descriptors file once
    f_descriptors = h5py.File(descriptors_h5_path, mode='r')

    print("Performing LightGlue matching on combined features...")

    # Iterate through shortlisted pairs
    for idx1, idx2 in tqdm(index_pairs, desc="LightGlue Matching"):
        fname1, fname2 = img_fnames[idx1], img_fnames[idx2]
        key1 = os.path.basename(fname1)
        key2 = os.path.basename(fname2)

        kp1_pt_path = os.path.join(keypoints_subdir_path, f'{key1}.pt')
        kp2_pt_path = os.path.join(keypoints_subdir_path, f'{key2}.pt')

        # Load combined keypoints (original coordinates)
        kp1_combined_orig = torch.load(kp1_pt_path, weights_only=False).to(device)
        kp2_combined_orig = torch.load(kp2_pt_path, weights_only=False).to(device)

        # Load combined descriptors
        desc1_combined = torch.from_numpy(f_descriptors[key1]['data'][...]).to(device)
        desc2_combined = torch.from_numpy(f_descriptors[key2]['data'][...]).to(device)

        # Skip if zero features are found
        if len(kp1_combined_orig) == 0 or len(kp2_combined_orig) == 0 or \
           len(desc1_combined) == 0 or len(desc2_combined) == 0:
            if verbose:
                tqdm.write(f"Skipping {key1}-{key2}: Zero features found.")
            continue

        # Create dummy LAFs centered at keypoints (using original coordinates)
        kp1_tensor = kp1_combined_orig.float()[None] # Add batch dim
        kp2_tensor = kp2_combined_orig.float()[None] # Add batch dim
        laf1 = KF.laf_from_center_scale_ori(kp1_tensor) # Use batch size 1, scale 1.0
        laf2 = KF.laf_from_center_scale_ori(kp2_tensor)

        with torch.inference_mode():
            scores, matches = lg_matcher(desc1_combined, desc2_combined, laf1, laf2)

        n_matches = len(matches) # Number of matches

        if verbose:
            tqdm.write(f'{key1}-{key2}: {n_matches} matches')

        # Store matches if enough are found
        if n_matches >= min_matches:
            matches_indices = matches.cpu().detach().numpy().astype('int16')
            all_matches.setdefault(key1, {})[key2] = matches_indices

    # Close the descriptors file
    f_descriptors.close()

    print("Matching complete. Saving global files...")

    # --- Save Global Files ---

    # 1. Save images.json (list of full filenames for all images)
    images_json_path = os.path.join(global_output_dir, 'images.json')
    with open(images_json_path, 'w') as f_json:
        json.dump(img_fnames, f_json, indent=2)
    print(f"Saved global images list to {images_json_path}")

    # 2. Save keypoints.h5 for all images (using combined keypoints per image)
    keypoints_h5_path = os.path.join(global_output_dir, 'keypoints.h5')
    with h5py.File(keypoints_h5_path, 'w') as f_out_kp:
        for img_idx, img_fname in enumerate(img_fnames):
            img_key = os.path.basename(img_fname)
            kp_pt_path = os.path.join(keypoints_subdir_path, f'{img_key}.pt') # Path to combined KPs for this image
            
            # Load combined KPs for this image (original coords)
            kp_combined_np = torch.load(kp_pt_path, weights_only=False).cpu().numpy() # Load and convert to numpy
            f_out_kp.create_dataset(img_key, data=kp_combined_np.astype(np.float32))
            if verbose:
                print(f"Saved combined KPs for {img_key} to global H5")

    print(f"Saved global keypoints to {keypoints_h5_path}")

    # 3. Save matches.h5 for all valid pairs
    matches_h5_path = os.path.join(global_output_dir, 'matches.h5')
    if all_matches: # Only create file if there are matches to save
        with h5py.File(matches_h5_path, 'w') as f_match:
            for key1, matches_dict in all_matches.items():
                if matches_dict:
                    group = f_match.create_group(key1)
                    for key2, match_data in matches_dict.items():
                        group.create_dataset(key2, data=match_data, dtype='int16')
        print(f"Saved global matches to {matches_h5_path}")
    else:
        print(f"No matches found above threshold {min_matches} to save to {matches_h5_path}")

    # Return a list containing a single list of all image indices
    all_image_indices = list(range(len(img_fnames)))
    return [all_image_indices]

# Note: This function assumes that detect_and_combine_features
# has already been run and created the combined features in
# data_dir/features_combined/keypoints/ and data_dir/features_combined/descriptors.h5.
# It saves the global feature and match files into data_dir/features_combined/global/.

In [62]:
import json
import os
import gc
from time import time, sleep
import pycolmap
import h5py
import numpy as np

# Assume import_into_colmap_cluster is defined elsewhere and can handle
# importing keypoints and matches from the .h5 files into a COLMAP database.
# It will need access to keypoints.h5 and matches.h5 paths.
# from your_colmap_utils import import_into_colmap_cluster


def run_colmap_global(
    feature_dir,
    images_dir,
    timings
):
    """
    Run COLMAP reconstruction for the entire dataset using global feature and match files.
    All error handling via try-except blocks has been removed, assuming valid inputs
    and successful COLMAP operations.

    Parameters:
    - feature_dir: Base directory containing 'features_combined/global' folder.
    - images_dir: Path to raw image files.
    - timings: dict to record durations.
    """
    # Define the path to the global files
    global_path = os.path.join(feature_dir, 'features_combined', 'global')

    database_path = os.path.join(global_path, 'colmap.db')
    image_list_path = os.path.join(global_path, 'images.json')
    keypoints_h5_path = os.path.join(global_path, 'keypoints.h5')
    matches_h5_path = os.path.join(global_path, 'matches.h5')

    # Load image names
    with open(image_list_path, 'r') as f:
        # images.json in the global folder contains full paths, extract basenames
        image_names = [os.path.basename(x) for x in json.load(f)]

    # Remove existing database if it exists
    if os.path.isfile(database_path):
        os.remove(database_path)
        print(f"[Global Reconstruction] Removed existing database at {database_path}")

    gc.collect()
    sleep(1)

    # Step 1: Import keypoints, matches into COLMAP db
    # Your import_into_colmap_cluster function is assumed to read from
    # keypoints_h5_path and matches_h5_path and populate the database.
    import_into_colmap_cluster(
        img_dir=images_dir,
        cluster_path=global_path, # Pass the global path where H5 files are
        database_path=database_path,
        image_names=image_names # Pass the list of image basenames
    )

    # Step 2: RANSAC (Geometric verification)
    t = time()
    pycolmap.match_exhaustive(database_path)
    t_ransac = time() - t
    timings['RANSAC_Global'] = t_ransac # Use a specific key for global timings
    print(f'[Global Reconstruction] Ran RANSAC in {t_ransac:.4f} sec')

    # Step 3: Incremental mapping for the global dataset
    output_path = os.path.join(global_path, 'colmap_rec_aliked')
    os.makedirs(output_path, exist_ok=True)

    mapper_options = pycolmap.IncrementalPipelineOptions()
    # Adjust mapper options as needed for a large global reconstruction
    mapper_options.min_model_size = 8 # Minimum number of registered images
    mapper_options.max_num_models = 25 # We expect only one main model
    mapper_options.mapper.filter_max_reproj_error = 10.0 # Example, adjust as needed

    t = time()
    # pycolmap.incremental_mapping expects image_path to be the directory
    # containing the actual image files.
    maps = pycolmap.incremental_mapping(
        database_path=database_path,
        image_path=images_dir, # Path to the actual image files
        output_path=output_path,
        options=mapper_options
    )
    
    # maps is already a dictionary of valid reconstructions, so its length gives the number of maps
    num_successful_maps = len(maps)
    print(f'[Global Reconstruction] Found {num_successful_maps} successful reconstruction(s).')

    t_rec = time() - t
    timings['Reconstruction_Global'] = t_rec # Use a specific key for global timings
    print(f'[Global Reconstruction] Reconstruction done in {t_rec:.4f} sec')
    # all_maps = [maps] # Return as a list containing the single global map
    # all_maps_len = [num_successful_maps]
    # print(all_maps)
    # print(all_maps_len)
    return timings, maps

# Note: This function now expects the output of match_images_global to be in
# feature_dir/features_combined/global/.

In [63]:
# Collect vital info from the dataset

@dataclasses.dataclass
class Prediction:
    image_id: str | None  # A unique identifier for the row -- unused otherwise. Used only on the hidden test set.
    dataset: str
    filename: str
    cluster_index: int | None = None
    rotation: np.ndarray | None = None
    translation: np.ndarray | None = None

# Set is_train=True to run the notebook on the training data.
# Set is_train=False if submitting an entry to the competition (test data is hidden, and different from what you see on the "test" folder).
is_train = True
data_dir = '/kaggle/input/image-matching-challenge-2025'
workdir = '/kaggle/working/result/'
os.makedirs(workdir, exist_ok=True)

if is_train:
    sample_submission_csv = os.path.join(data_dir, 'train_labels.csv')
else:
    sample_submission_csv = os.path.join(data_dir, 'sample_submission.csv')

samples = {}
competition_data = pd.read_csv(sample_submission_csv)
for _, row in competition_data.iterrows():
    # Note: For the test data, the "scene" column has no meaning, and the rotation_matrix and translation_vector columns are random.
    if row.dataset not in samples:
        samples[row.dataset] = []
    samples[row.dataset].append(
        Prediction(
            image_id=None if is_train else row.image_id,
            dataset=row.dataset,
            filename=row.image
        )
    )

for dataset in samples:
    print(f'Dataset "{dataset}" -> num_images={len(samples[dataset])}')

Dataset "imc2023_haiper" -> num_images=54
Dataset "imc2023_heritage" -> num_images=209
Dataset "imc2023_theather_imc2024_church" -> num_images=76
Dataset "imc2024_dioscuri_baalshamin" -> num_images=138
Dataset "imc2024_lizard_pond" -> num_images=214
Dataset "pt_brandenburg_british_buckingham" -> num_images=225
Dataset "pt_piazzasanmarco_grandplace" -> num_images=168
Dataset "pt_sacrecoeur_trevi_tajmahal" -> num_images=225
Dataset "pt_stpeters_stpauls" -> num_images=200
Dataset "amy_gardens" -> num_images=200
Dataset "fbk_vineyard" -> num_images=163
Dataset "ETs" -> num_images=22
Dataset "stairs" -> num_images=51


In [64]:
import os
import shutil

def delete_cluster_folders(base_dir):
    for name in os.listdir(base_dir):
        path = os.path.join(base_dir, name)
        if os.path.isdir(path) and name.startswith("cluster"):
            print(f"Deleting: {path}")
            shutil.rmtree(path)


In [68]:
import json
import os
import gc
from time import time, sleep
import pycolmap
import h5py
import numpy as np
from tqdm import tqdm
from copy import deepcopy # For deepcopy(image.cam_from_world...)
# Assuming `clear_output` is from IPython.display
# from IPython.display import clear_output


# Placeholder definitions for constants and functions
# Ensure these are correctly imported or defined in your actual script
# For example:
# from .config import (
#     MIN_MATCHES_FOR_GRAPH_EDGE, KEYPOINTS_SUBDIR, DESCRIPTORS_H5,
#     CROP_DATA, CROP_BOXES_FILE, CROP_PADDING_FACTOR, MIN_CROP_DIMENSION
# )
# from .feature_extraction_utils import (
#     load_pil_image, transform_points_from_processed,
#     perform_initial_detection_and_matching, detect_and_combine_features
# )
# from .matching_utils import get_image_pairs_shortlist, match_images_global
# from .colmap_utils import run_colmap_global, import_into_colmap_cluster

# If not imported, define placeholders here to avoid NameErrors:
# (These values should match your actual configurations)
# MIN_MATCHES_FOR_GRAPH_EDGE = 100
# KEYPOINTS_SUBDIR = 'keypoints'
# DESCRIPTORS_H5 = 'descriptors.h5'
# CROP_DATA = 'crop_data.h5'
# CROP_BOXES_FILE = 'crop_boxes.h5'
# CROP_PADDING_FACTOR = 0.1
# MIN_CROP_DIMENSION = 512
# DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')


gc.collect()

max_images = None  # Used For debugging only. Set to None to disable.
datasets_to_process = None  # Not the best convention, but None means all datasets.

# is_train and samples are assumed to be defined externally, e.g., from your notebook setup
# For demonstration:
# is_train = True
# class Prediction:
#     def __init__(self, filename, cluster_index=None, rotation=None, translation=None):
#         self.filename = filename
#         self.cluster_index = cluster_index
#         self.rotation = rotation
#         self.translation = translation
# samples = {
#     'stairs': [Prediction('image001.jpg'), Prediction('image002.jpg'), Prediction('image003.jpg'), Prediction('image004.jpg'), Prediction('image005.jpg')]
# }


if is_train:
    # Note: When running on the training dataset, the notebook will hit the time limit and die. Use this filter to run on a few specific datasets.
    datasets_to_process = [
        # New data.
        'amy_gardens',
        'ETs',
        'fbk_vineyard',
        'stairs',
        # Data from IMC 2023 and 2024.
        # 'imc2024_dioscuri_baalshamin',
        # 'imc2023_theather_imc2024_church',
        # 'imc2023_heritage',
        # 'imc2023_haiper',
        # 'imc2024_lizard_pond',
        # Crowdsourced PhotoTourism data.
        # 'pt_stpeters_stpauls',
        # 'pt_brandenburg_british_buckingham',
        # 'pt_piazzasanmarco_grandplace',
        # 'pt_sacrecoeur_trevi_tajmahal',
    ]

timings = {
    "shortlisting":[],
    "feature_detection": [],
    "feature_augmentation":[],
    "feature_merge":[],
    "feature_matching":[],
    "RANSAC": [],
    "Reconstruction": [],
}
mapping_result_strs = []


print (f"Extracting on device {DEVICE}") # Changed to DEVICE for consistency
for dataset, predictions in samples.items():
    if datasets_to_process and dataset not in datasets_to_process:
        print(f'Skipping "{dataset}"')
        continue
    
    images_dir = os.path.join(data_dir, 'train' if is_train else 'test', dataset)
    images = [os.path.join(images_dir, p.filename) for p in predictions]
    if max_images is not None:
        images = images[:max_images]

    print(f'\nProcessing dataset "{dataset}": {len(images)} images')

    filename_to_index = {p.filename: idx for idx, p in enumerate(predictions)}

    feature_dir = os.path.join(workdir, 'featureout', dataset)
    os.makedirs(feature_dir, exist_ok=True)

    # All try-except blocks around main pipeline steps are removed.
    # We assume successful execution for each step.
    try:

        t_start_pipeline = time() # Start timer for the whole pipeline for this dataset
    
        # 1. Image Pair Shortlisting
        t = time()
        index_pairs = get_image_pairs_shortlist(
            images,
            sim_th = 0.5,
            min_pairs = 10,
            exhaustive_if_less = 20,
            device=DEVICE
        )
        timings['shortlisting'].append(time() - t)
        print (f'Shortlisting. Number of pairs to match: {len(index_pairs)}. Done in {time() - t:.4f} sec')
        gc.collect()
        
        # 2. Perform initial detection and matching for cropping data
        # (This step will overwrite crop_data.h5 if it exists)
        t = time()
        # pass `feature_dir` as the `data_dir` for initial_detection_and_matching
        # so it creates its temp files within feature_dir
        crop_data_file = perform_initial_detection_and_matching(images, index_pairs, data_dir=feature_dir, device=DEVICE)
        timings['feature_detection'].append(time() - t)
        print(f'Initial detection for cropping done in {time() - t:.4f} sec')
        gc.collect()
        
        # 3. Calculate crop boxes based on initial match analysis
        t = time()
        # `calculate_crop_boxes` only needs `data_dir` which is `feature_dir` in this context
        # It will find crop_data.h5 within feature_dir and save crop_boxes.h5 there too.
        # The return value `crop_info_file` is effectively the path to `crop_boxes.h5`
        crop_info_file = calculate_crop_boxes(data_dir=feature_dir)
        timings['feature_augmentation'].append(time() - t)
        print(f'Crop boxes calculated in {time() - t:.4f} sec') # Renamed print output
        gc.collect()    
        
        # 4. Perform multi-variation ALIKED detection, combine features per image, save to .pt/.h5
        t = time()
        # `detect_and_combine_features` expects `feature_dir` as the base for its outputs
        # and `crop_info_file` (which is `crop_boxes.h5`) for crop information.
        detect_and_combine_features(images, crop_info_file, os.path.join(feature_dir, 'features_combined'), device=DEVICE)
        timings['feature_merge'].append(time() - t)
        print(f'Features combined in {time() - t:.4f} sec') # Renamed print output
        gc.collect()   
    
        # 5. Load combined features and perform LightGlue matching, save global matches to .h5
        t = time()
        # `match_images_global` expects `data_dir` (where `features_combined` is a subdir)
        # which is `feature_dir` in this context.
        match_images_global(images, index_pairs, data_dir=feature_dir, device=DEVICE)
        timings['feature_matching'].append(time() - t)
        print(f'Features matched globally in {time() - t:.4f} sec') # Renamed print output
        gc.collect()
        
        # 6. Run COLMAP global reconstruction
        # timings dict is passed by reference, so updates inside run_colmap_global are reflected
        timings, maps = run_colmap_global(feature_dir, images_dir, timings)
        timings['Reconstruction'].append(time() - t_start_pipeline) # This timing is for the whole reconstruction part
        print(f'Global reconstruction done in {time() - t_start_pipeline:.4f} sec') # This measures the whole pipeline
        print(maps)

        # clear_output(wait=False)
    
        registered = 0
        for map_index, cur_map in maps.items():
            for index, image in cur_map.images.items():
                prediction_index = filename_to_index[image.name]
                predictions[prediction_index].cluster_index = map_index
                predictions[prediction_index].rotation = deepcopy(image.cam_from_world.rotation.matrix())
                predictions[prediction_index].translation = deepcopy(image.cam_from_world.translation)
                registered += 1
        mapping_result_str = f'Dataset "{dataset}" -> Registered {registered} / {len(images)} images with {len(maps)} clusters'
        mapping_result_strs.append(mapping_result_str)
        print(mapping_result_str)
        gc.collect()
    except Exception as e:
        print(e)
        # raise e
        mapping_result_str = f'Dataset "{dataset}" -> Failed!'
        mapping_result_strs.append(mapping_result_str)
        print(mapping_result_str)
print('\nResults')
for s in mapping_result_strs:
    print(s)

# print('\nTimings')
# for k, v in timings.items():
#      print(f'{k} -> total={sum(v):.02f} sec.')

Extracting on device cuda
Skipping "imc2023_haiper"
Skipping "imc2023_heritage"
Skipping "imc2023_theather_imc2024_church"
Skipping "imc2024_dioscuri_baalshamin"
Skipping "imc2024_lizard_pond"
Skipping "pt_brandenburg_british_buckingham"
Skipping "pt_piazzasanmarco_grandplace"
Skipping "pt_sacrecoeur_trevi_tajmahal"
Skipping "pt_stpeters_stpauls"

Processing dataset "amy_gardens": 200 images


100%|██████████| 200/200 [00:23<00:00,  8.56it/s]


Distance Matrix Statistics:
Min:  0.1448
Max:  0.4239
Mean: 0.2737
Std:  0.0388
20%:  0.2375
25%:  0.2439
USED 50%:  0.2754
75%:  0.3017
Shortlisting. Number of pairs to match: 9954. Done in 23.6682 sec
Performing initial ALIKED detection (1280) and LightGlue matching for cropping data...
Running initial ALIKED detection...


Initial ALIKED Detection: 100%|██████████| 200/200 [00:13<00:00, 14.93it/s]


Running initial LightGlue matching...
Loaded LightGlue model


Initial LightGlue Matching: 100%|██████████| 9954/9954 [12:50<00:00, 12.93it/s]


Analyzing initial matches for cropping data...
Initial detection and matching complete. Cropping data saved to /kaggle/working/result/featureout/amy_gardens/crop_data.h5
Initial detection for cropping done in 786.7096 sec
Calculating crop boxes from /kaggle/working/result/featureout/amy_gardens/crop_data.h5...
Crop boxes calculated and saved to /kaggle/working/result/featureout/amy_gardens/crop_boxes.h5
Crop boxes calculated in 0.0878 sec
Running multi-variation ALIKED detection and combining features (with NMS)...


Detecting & Combining Features: 100%|██████████| 200/200 [01:56<00:00,  1.71it/s]


Multi-variation detection, combination, and deduplication complete.
Features combined in 116.9322 sec
Loaded LightGlue model
Performing LightGlue matching on combined features...


LightGlue Matching:   8%|▊         | 776/9954 [02:26<28:52,  5.30it/s]


KeyboardInterrupt: 

In [None]:
# Must Create a submission file.

array_to_str = lambda array: ';'.join([f"{x:.09f}" for x in array])
none_to_str = lambda n: ';'.join(['nan'] * n)

submission_file = '/kaggle/working/submission.csv'
with open(submission_file, 'w') as f:
    if is_train:
        f.write('dataset,scene,image,rotation_matrix,translation_vector\n')
        for dataset in samples:
            for prediction in samples[dataset]:
                cluster_name = 'outliers' if prediction.cluster_index is None else f'cluster{prediction.cluster_index}'
                rotation = none_to_str(9) if prediction.rotation is None else array_to_str(prediction.rotation.flatten())
                translation = none_to_str(3) if prediction.translation is None else array_to_str(prediction.translation)
                f.write(f'{prediction.dataset},{cluster_name},{prediction.filename},{rotation},{translation}\n')
    else:
        f.write('image_id,dataset,scene,image,rotation_matrix,translation_vector\n')
        for dataset in samples:
            for prediction in samples[dataset]:
                cluster_name = 'outliers' if prediction.cluster_index is None else f'cluster{prediction.cluster_index}'
                rotation = none_to_str(9) if prediction.rotation is None else array_to_str(prediction.rotation.flatten())
                translation = none_to_str(3) if prediction.translation is None else array_to_str(prediction.translation)
                f.write(f'{prediction.image_id},{prediction.dataset},{cluster_name},{prediction.filename},{rotation},{translation}\n')

!head {submission_file}

In [None]:
# Definitely Compute results if running on the training set.
# Do not do this when submitting a notebook for scoring. All you have to do is save your submission to /kaggle/working/submission.csv.

if is_train:
    t = time()
    final_score, dataset_scores = metric.score(
        gt_csv='/kaggle/input/image-matching-challenge-2025/train_labels.csv',
        user_csv=submission_file,
        thresholds_csv='/kaggle/input/image-matching-challenge-2025/train_thresholds.csv',
        mask_csv=None if is_train else os.path.join(data_dir, 'mask.csv'),
        inl_cf=0,
        strict_cf=-1,
        verbose=True,
    )
    print(f'Computed metric in: {time() - t:.02f} sec.')