## Example submission

Image Matching Challenge 2025: https://www.kaggle.com/competitions/image-matching-challenge-2025

This notebook creates a simple submission using ALIKED and LightGlue, plus DINO for shortlisting, on GPU. Adapted from [last year](https://www.kaggle.com/code/oldufo/imc-2024-submission-example).

Remember to select an accelerator on the sidebar to the right, and to disable internet access when submitting a notebook to the competition.

In [1]:
# IMPORTANT 
#Install dependencies and copy model weights to run the notebook without internet access when submitting to the competition.

!pip install --no-index /kaggle/input/imc2024-packages-lightglue-rerun-kornia/* --no-deps
!mkdir -p /root/.cache/torch/hub/checkpoints
!cp /kaggle/input/aliked/pytorch/aliked-n16/1/aliked-n16.pth /root/.cache/torch/hub/checkpoints/
!cp /kaggle/input/lightglue/pytorch/aliked/1/aliked_lightglue.pth /root/.cache/torch/hub/checkpoints/
!cp /kaggle/input/lightglue/pytorch/aliked/1/aliked_lightglue.pth /root/.cache/torch/hub/checkpoints/aliked_lightglue_v0-1_arxiv-pth

Processing /kaggle/input/imc2024-packages-lightglue-rerun-kornia/kornia-0.7.2-py2.py3-none-any.whl
Processing /kaggle/input/imc2024-packages-lightglue-rerun-kornia/kornia_moons-0.2.9-py3-none-any.whl
Processing /kaggle/input/imc2024-packages-lightglue-rerun-kornia/kornia_rs-0.1.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
Processing /kaggle/input/imc2024-packages-lightglue-rerun-kornia/lightglue-0.0-py3-none-any.whl
Processing /kaggle/input/imc2024-packages-lightglue-rerun-kornia/pycolmap-0.6.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
Processing /kaggle/input/imc2024-packages-lightglue-rerun-kornia/rerun_sdk-0.15.0a2-cp38-abi3-manylinux_2_31_x86_64.whl
Installing collected packages: rerun-sdk, pycolmap, lightglue, kornia-rs, kornia-moons, kornia
  Attempting uninstall: kornia-rs
    Found existing installation: kornia_rs 0.1.8
    Uninstalling kornia_rs-0.1.8:
      Successfully uninstalled kornia_rs-0.1.8
  Attempting uninstall: kornia
   

In [2]:
import sys
import os
from tqdm import tqdm
from time import time, sleep
import gc
import numpy as np
import h5py
import dataclasses
import pandas as pd
from IPython.display import clear_output
from collections import defaultdict
from copy import deepcopy
from PIL import Image

import cv2
import torch
import torch.nn.functional as F
import kornia as K
import kornia.feature as KF

import torch
from lightglue import match_pair
from lightglue import ALIKED, LightGlue
from lightglue.utils import load_image, rbd
from transformers import AutoImageProcessor, AutoModel

# IMPORTANT Utilities: importing data into colmap and competition metric
import pycolmap
sys.path.append('/kaggle/input/imc25-utils')
from database import *
from h5_to_db import *
import metric
from sklearn.cluster import DBSCAN
# ... other imports

  @torch.cuda.amp.custom_fwd(cast_inputs=torch.float32)
  @torch.cuda.amp.custom_fwd(cast_inputs=torch.float32)


In [3]:

print("PyTorch version:", torch.__version__)
import sys
print("Python version:", sys.version)

print("CUDA available:", torch.cuda.is_available())
print("CUDA version:", torch.version.cuda)
print("Device count:", torch.cuda.device_count())
print("Current device:", torch.cuda.current_device())
print("Device name:", torch.cuda.get_device_name(torch.cuda.current_device()))


PyTorch version: 2.5.1+cu121
Python version: 3.10.12 (main, Nov  6 2024, 20:22:13) [GCC 11.4.0]
CUDA available: True
CUDA version: 12.1
Device count: 1
Current device: 0
Device name: Tesla P100-PCIE-16GB


In [4]:
# Do not forget to select an accelerator on the sidebar to the right.
device = K.utils.get_cuda_device_if_available(0)
print(f'{device=}')

device=device(type='cuda', index=0)


In [5]:
def load_pil_image(fname):
    """Loads an image using PIL."""
    return Image.open(fname).convert('RGB')

def get_image_size(fname):
    """Gets image size (width, height) using PIL."""
    with Image.open(fname) as img:
        return img.size # (width, height)

def get_original_coords(kp_coords, img_orig_size, variation_info):
    """
    Transforms keypoint coordinates from variation space back to original image space.

    Args:
        kp_coords (np.ndarray): Keypoint coordinates [N, 2] in the variation space.
        img_orig_size (tuple): Original image size (width, height).
        variation_info (dict): Dictionary containing 'type' ('orig' or 'crop'),
                               'scale_factor' (scale used for resize),
                               'crop_box' ([x, y, w, h] in original coords, None if type is 'orig').

    Returns:
        np.ndarray: Keypoint coordinates [N, 2] in the original image space.
    """
    if len(kp_coords) == 0:
        return np.empty((0, 2))

    coords = kp_coords.copy() # Work on a copy

    # 1. Reverse scaling
    scale_factor = variation_info['scale_factor']
    coords /= scale_factor # Now coords are in the space of the original/cropped image (before resize)

    # 2. Reverse cropping offset
    if variation_info['type'] == 'crop' and variation_info['crop_box'] is not None:
        x_crop, y_crop, _, _ = variation_info['crop_box']
        coords[:, 0] += x_crop
        coords[:, 1] += y_crop

    # Ensure points are within original image bounds (optional, but good practice)
    # coords[:, 0] = np.clip(coords[:, 0], 0, img_orig_size[0] - 1)
    # coords[:, 1] = np.clip(coords[:, 1], 0, img_orig_size[1] - 1)

    return coords

In [6]:
# --- Configuration ---
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
GLOBAL_DESC_MODEL = '/kaggle/input/dinov2/pytorch/base/1' # Path to your DINOv2 model
DATA_DIR = '.' # Base directory for outputs
# FEATURE_DIR = os.path.join(DATA_DIR, 'features_combined')
# MATCH_DIR = os.path.join(DATA_DIR, 'matches_global')

# Initial detection parameters for cropping data collection
INITIAL_DETECTION_RESIZE = 1280
INITIAL_DETECTION_NUM_FEATURES = 3072

# Parameters for TTA detection and combination
TTA_SCALES = [1280, 2048]
TTA_NUM_FEATURES = 2048
USE_CROPPED_IMAGES = True

ALIKED_TH = 0.4
ALIKED_TTA_TH = 0.6

# Parameters for New Cropping Method
MIN_PAIRS_FOR_CROPPING = 3
CROP_PADDING = 50
DBSCAN_EPS = 20 # Fixed EPS fallback or base value
DBSCAN_MIN_SAMPLES = 5 # DBSCAN min_samples parameter
# New config for adaptive EPS based on resolution
DBSCAN_EPS_RESOLUTION_RATIO = 0.02 # Ratio of max image dimension for EPS (e.g., 0.01 -> 1% of longer side)


# Coordinate precision for deduplication (rounding float coordinates)
COORD_PRECISION = 1 # Number of decimal places to round coordinates for uniqueness check

# Matching parameters
MIN_MATCHES_PER_VARIATION = 5 # Lowered this threshold slightly, as combining might filter some
MIN_TOTAL_MATCHES_PER_PAIR = 20 # Minimum unique matches for a pair to be saved in global list

# Output file names (within FEATURE_DIR and MATCH_DIR)
KEYPOINTS_SUBDIR = 'keypoints'
DESCRIPTORS_H5 = 'descriptors.h5'
MATCHES_PT = 'matches.pt'
CROP_DATA = 'crop_data.h5'
CROP_INFO = 'crop_info.h5'

# Parameters for graph building and clustering thresholds
# These are the internal names used in the function; map external arguments to these if needed
MIN_MATCHES_FOR_GRAPH_EDGE = 20  # Min matches for adding edge to graph
MIN_MATCHES_FOR_FILTERED_GRAPH = 100 # Min matches for filtering graph (your aliked_dis_min)
MIN_IMAGES_PER_CLUSTER = 5 # Min images in a final cluster



In [7]:
def load_torch_image(fname, device=torch.device('cpu')):
    img = K.io.load_image(fname, K.io.ImageLoadType.RGB32, device=device)[None, ...]
    return img


# # Must Use efficientnet global descriptor to get matching shortlists.
# def get_global_desc(fnames, device = torch.device('cpu')):
#     processor = AutoImageProcessor.from_pretrained('/kaggle/input/dinov2/pytorch/base/1')
#     model = AutoModel.from_pretrained('/kaggle/input/dinov2/pytorch/base/1')
#     model = model.eval()
#     model = model.to(device)
#     global_descs_dinov2 = []
#     for i, img_fname_full in tqdm(enumerate(fnames),total= len(fnames)):
#         key = os.path.splitext(os.path.basename(img_fname_full))[0]
#         timg = load_torch_image(img_fname_full)
#         with torch.inference_mode():
#             inputs = processor(images=timg, return_tensors="pt", do_rescale=False).to(device)
#             outputs = model(**inputs)
#             dino_mac = F.normalize(outputs.last_hidden_state[:,1:].max(dim=1)[0], dim=1, p=2)
#         global_descs_dinov2.append(dino_mac.detach().cpu())
#     global_descs_dinov2 = torch.cat(global_descs_dinov2, dim=0)
#     return global_descs_dinov2


def get_global_desc(fnames, device=torch.device('cpu')):
    """Computes global descriptors for images."""
    print(f"Computing global descriptors with DINOv2 on {device}...")
    processor = AutoImageProcessor.from_pretrained(GLOBAL_DESC_MODEL)
    model = AutoModel.from_pretrained(GLOBAL_DESC_MODEL)
    model = model.eval().to(device)

    global_descs_dinov2 = []
    for img_fname_full in tqdm(fnames, desc="DINOv2 Descriptors"):
        # Need error handling here for potentially problematic images
        try:
            timg = load_torch_image(img_fname_full, device=device)
            with torch.inference_mode():
                inputs = processor(images=timg, return_tensors="pt", do_rescale=False).to(device)
                outputs = model(**inputs)
                # Using CLS token or pooling as descriptor
                # descriptor = outputs.last_hidden_state[:, 0].mean(dim=1) # CLS token
                descriptor = F.normalize(outputs.last_hidden_state[:, 1:].max(dim=1)[0], dim=1, p=2) # Pool spatial tokens
                global_descs_dinov2.append(descriptor.detach().cpu())
        except Exception as e:
            print(f"Error processing {img_fname_full} for global descriptor: {e}")
            # Append a zero vector or handle missing descriptor later
            global_descs_dinov2.append(torch.zeros(1, model.config.hidden_size))


    # Pad or handle missing descriptors if errors occurred
    max_dim = max(d.shape[1] for d in global_descs_dinov2)
    global_descs_dinov2 = [
        F.pad(d, (0, max_dim - d.shape[1])) for d in global_descs_dinov2
    ]
    global_descs_dinov2 = torch.cat(global_descs_dinov2, dim=0)

    return global_descs_dinov2


def get_img_pairs_exhaustive(img_fnames):
    index_pairs = []
    for i in range(len(img_fnames)):
        for j in range(i+1, len(img_fnames)):
            index_pairs.append((i,j))
    return index_pairs


def get_image_pairs_shortlist(fnames,
                              sim_th = 0.6, # should be strict
                              min_pairs = 30,
                              exhaustive_if_less = 20,
                              device=torch.device('cpu')):
    num_imgs = len(fnames)
    if num_imgs <= exhaustive_if_less:
        return get_img_pairs_exhaustive(fnames)
    descs = get_global_desc(fnames, device=device)
    dm = torch.cdist(descs, descs, p=2).detach().cpu().numpy()

    # 只分析上三角（去掉对角线），避免重复
    triu_indices = np.triu_indices_from(dm, k=1)
    dm_flat = dm[triu_indices]
    
    # 打印统计信息
    print("Distance Matrix Statistics:")
    print(f"Min:  {dm_flat.min():.4f}")
    print(f"Max:  {dm_flat.max():.4f}")
    print(f"Mean: {dm_flat.mean():.4f}")
    print(f"Std:  {dm_flat.std():.4f}")
    print(f"20%:  {np.percentile(dm_flat, 20):.4f}")
    print(f"25%:  {np.percentile(dm_flat, 25):.4f}")
    print(f"USED 50%:  {np.percentile(dm_flat, 50):.4f}")
    print(f"75%:  {np.percentile(dm_flat, 75):.4f}")
    threshold = dm_flat.mean() + np.sqrt(3) * dm_flat.std()
    # removing half
    # thr = min(np.percentile(dm_flat, 50), sim_th)
    mask = dm <= np.percentile(dm_flat, 50)
    # print("thr :", thr)
    # mask = dm<=threshold
    total = 0
    matching_list = []
    ar = np.arange(num_imgs)
    already_there_set = []
    for st_idx in range(num_imgs-1):
        mask_idx = mask[st_idx]
        to_match = ar[mask_idx]
        if len(to_match) < min_pairs:
            to_match = np.argsort(dm[st_idx])[:min_pairs]  
        for idx in to_match:
            if st_idx == idx:
                continue
            if dm[st_idx, idx] < threshold:
                matching_list.append(tuple(sorted((st_idx, idx.item()))))
                total+=1
    matching_list = sorted(list(set(matching_list)))
    return matching_list

# def detect_aliked(img_fnames,
#                   feature_dir = '.featureout',
#                   num_features = 4096,
#                   resize_to = 1024,
#                   device=torch.device('cpu')):
#     dtype = torch.float32 # ALIKED has issues with float16
#     extractor = ALIKED(max_num_keypoints=num_features, detection_threshold=0.1).eval().to(device, dtype)
#     extractor.preprocess_conf["resize"] = resize_to
#     if not os.path.isdir(feature_dir):
#         os.makedirs(feature_dir)
#     with h5py.File(f'{feature_dir}/keypoints.h5', mode='w') as f_kp, \
#          h5py.File(f'{feature_dir}/descriptors.h5', mode='w') as f_desc:
#         for img_path in tqdm(img_fnames):
#             img_fname = img_path.split('/')[-1]
#             key = img_fname
#             with torch.inference_mode():
#                 image0 = load_torch_image(img_path, device=device).to(dtype)
#                 feats0 = extractor.extract(image0)  # auto-resize the image, disable with resize=None
#                 kpts = feats0['keypoints'].reshape(-1, 2).detach().cpu().numpy()
#                 descs = feats0['descriptors'].reshape(len(kpts), -1).detach().cpu().numpy()
#                 f_kp[key] = kpts
#                 f_desc[key] = descs
#     return

# def match_with_lightglue(img_fnames,
#                    index_pairs,
#                    feature_dir = '.featureout',
#                    device=torch.device('cpu'),
#                    min_matches=20,verbose=True):
#     lg_matcher = KF.LightGlueMatcher("aliked", {"width_confidence": -1,
#                                                 "depth_confidence": -1,
#                                                  "mp": True if 'cuda' in str(device) else False}).eval().to(device)
#     with h5py.File(f'{feature_dir}/keypoints.h5', mode='r') as f_kp, \
#         h5py.File(f'{feature_dir}/descriptors.h5', mode='r') as f_desc, \
#         h5py.File(f'{feature_dir}/matches.h5', mode='w') as f_match:
#         for pair_idx in tqdm(index_pairs):
#             idx1, idx2 = pair_idx
#             fname1, fname2 = img_fnames[idx1], img_fnames[idx2]
#             key1, key2 = fname1.split('/')[-1], fname2.split('/')[-1]
#             kp1 = torch.from_numpy(f_kp[key1][...]).to(device)
#             kp2 = torch.from_numpy(f_kp[key2][...]).to(device)
#             desc1 = torch.from_numpy(f_desc[key1][...]).to(device)
#             desc2 = torch.from_numpy(f_desc[key2][...]).to(device)
#             with torch.inference_mode():
#                 dists, idxs = lg_matcher(desc1,
#                                          desc2,
#                                          KF.laf_from_center_scale_ori(kp1[None]),
#                                          KF.laf_from_center_scale_ori(kp2[None]))
#             if len(idxs)  == 0:
#                 continue
#             n_matches = len(idxs)
#             if verbose:
#                 print (f'{key1}-{key2}: {n_matches} matches')
#             group  = f_match.require_group(key1)
#             if n_matches >= min_matches:
#                  group.create_dataset(key2, data=idxs.detach().cpu().numpy().reshape(-1, 2))
#     return



def import_into_colmap(img_dir, feature_dir ='.featureout', database_path = 'colmap.db'):
    db = COLMAPDatabase.connect(database_path)
    db.create_tables()
    single_camera = False
    fname_to_id = add_keypoints(db, feature_dir, img_dir, '', 'simple-pinhole', single_camera)
    add_matches(
        db,
        feature_dir,
        fname_to_id,
    )
    db.commit()
    return

In [8]:
def calculate_kornia_resize_scale(original_size_hw, target_resize):
    """
    Calculates the scale factor Kornia's default resize applies.
    Assumes aspect ratio is maintained and longer side is scaled to target_resize,
    only if longer side > target_resize.

    Args:
        original_size_hw (tuple): Original image size (H, W).
        target_resize (int): The target size for the longer side.

    Returns:
        float: The scale factor applied (processed_size / original_size).
    """
    h_orig, w_orig = original_size_hw
    max_orig_dim = max(h_orig, w_orig)

    if target_resize is None or target_resize <= 0 or max_orig_dim <= target_resize:
        # No resizing or scaling up is needed based on default logic
        return 1.0
    else:
        # Scale down the longer side to target_resize
        return target_resize / max_orig_dim

def transform_points_from_processed(kp_processed, processed_scale_factor, crop_box=None):
    """
    Transforms keypoint coordinates from the 'processed' scale space
    back to the original image space, accounting for scaling and cropping.

    Args:
        kp_processed (np.ndarray): Keypoint coordinates [N, 2] in the processed space (after scaling by ALIKED).
        processed_scale_factor (float): The calculated scale factor applied by ALIKED (processed_size / original_or_cropped_size).
        crop_box (list): [x, y, w, h] of the crop in original image coords, or None.

    Returns:
        np.ndarray: Keypoint coordinates [N, 2] in the original image space.
    """
    if len(kp_processed) == 0:
        return np.empty((0, 2), dtype=np.float32)

    coords = kp_processed.copy().astype(np.float32) # Ensure float32

    # 1. Reverse scaling (from processed scale back to original or cropped scale)
    if processed_scale_factor > 0:
        coords /= processed_scale_factor
    # else: scale_factor is 1.0, no change needed here

    # 2. Add cropping offset (from cropped coordinates back to original coordinates)
    if crop_box is not None and len(crop_box) == 4 and crop_box[2] > 0 and crop_box[3] > 0:
        x_crop, y_crop, _, _ = crop_box
        coords[:, 0] += x_crop
        coords[:, 1] += y_crop

    # Note: We don't clip to original bounds here, as that might discard valid points near edges.
    # Downstream steps should handle points outside bounds if necessary.

    return coords

# Remove the old get_keypoint_original_coords function entirely

In [9]:
# ... (imports, configs, utility functions like load_torch_image, load_pil_image, get_image_size)
# Add the new calculate_kornia_resize_scale and transform_points_from_processed functions here

def perform_initial_detection_and_matching(img_fnames, index_pairs, data_dir, device=DEVICE):
    """
    Performs detection (ALIKED) and matching (LightGlue) on original images
    at a base resolution (e.g., 1024) to collect data for the cropping step.
    Stores keypoints (at detection scale) and matches in temporary HDF5.
    Analyzes matches to create crop data file.
    """
    temp_feature_dir = os.path.join(data_dir, '.temp_crop_features')
    os.makedirs(temp_feature_dir, exist_ok=True)

    initial_feature_file = os.path.join(temp_feature_dir, f'initial_features_{INITIAL_DETECTION_RESIZE}.h5')
    initial_match_file = os.path.join(temp_feature_dir, f'initial_matches_{INITIAL_DETECTION_RESIZE}.h5')
    crop_data_file = os.path.join(data_dir, CROP_DATA)

    if os.path.exists(crop_data_file):
         print(f"Initial detection and matching data for cropping exists: {crop_data_file}. deleting.")
         os.remove
         # return crop_data_file

    print(f"Performing initial ALIKED detection ({INITIAL_DETECTION_RESIZE}) and LightGlue matching for cropping data...")

    # 1. Initial Detection
    print("Running initial ALIKED detection...")
    try:
        extractor = ALIKED(max_num_keypoints=INITIAL_DETECTION_NUM_FEATURES, detection_threshold=ALIKED_TH).eval().to(device, dtype=torch.float32)
        # Set resize parameter here
        extractor.preprocess_conf["resize"] = INITIAL_DETECTION_RESIZE
        # Ensure ALIKED is on the correct device/dtype
        # extractor.to(device, dtype=torch.float32)

    except Exception as e:
        print(f"Error loading ALIKED extractor: {e}")
        return None

    with h5py.File(initial_feature_file, mode='w') as f_kp_desc:
        for img_path in tqdm(img_fnames, desc="Initial ALIKED Detection"):
            img_key = os.path.basename(img_path)
            kp = None # Initialize features to None
            desc = None
            calculated_scale = None # Will store the calculated scale factor
            original_pil_size = None

            try:
                # Load original PIL image to get its size
                img_orig_pil = load_pil_image(img_path)
                if img_orig_pil is None:
                     print(f"Skipping ALIKED for {img_path}: PIL image loading failed.")
                     continue
                original_pil_size = img_orig_pil.size # (W, H)

                # Calculate the expected scale factor ALIKED will apply
                # ALIKED uses preprocess_conf["resize"] on the *input image tensor*
                # Input image tensor size will be (H, W) after Kornia loading/conversion
                input_tensor_size_hw = (original_pil_size[1], original_pil_size[0]) # Convert (W, H) to (H, W)
                calculated_scale = calculate_kornia_resize_scale(input_tensor_size_hw, INITIAL_DETECTION_RESIZE)


                # Load image as Kornia tensor for ALIKED
                timg = K.image_to_tensor(np.array(img_orig_pil), keepdim=True).to(device, torch.float32) / 255.0 # Normalize
                if timg.ndim == 3: timg = timg[None, ...] # Ensure BxCxHxW


                with torch.inference_mode():
                    # Pass resize to extractor, but we don't expect processed_size back
                    # The scale is calculated based on preprocess_conf["resize"]
                    feats = extractor.extract(timg)


                    # --- Add Error Handling for accessing feats dictionary ---
                    try:
                        # Check for expected keys ('keypoints', 'descriptors')
                        if 'keypoints' in feats and 'descriptors' in feats and \
                           len(feats.get('keypoints', [])) > 0 and len(feats.get('descriptors', [])) > 0: # Use .get with default for safety

                            kp = feats['keypoints'].reshape(-1, 2).detach().cpu().numpy()
                            desc = feats['descriptors'].reshape(len(kp), -1).detach().cpu().numpy()

                            # Check length after reshaping just in case
                            if len(kp) == 0 or len(desc) == 0:
                                 print(f"Warning: Extracted features are empty for {img_path} after reshape.")
                                 kp = None; desc = None # Invalidate data


                        else:
                             missing_keys = [k for k in ['keypoints', 'descriptors'] if k not in feats]
                             empty_data_keys = [k for k in ['keypoints', 'descriptors'] if k in feats and len(feats[k]) == 0]

                             if missing_keys or empty_data_keys:
                                 print(f"Warning: Missing or empty required data in ALIKED output for {img_path}. (Missing Keys: {missing_keys}, Empty Data Keys: {empty_data_keys})")

                             kp = None; desc = None # Ensure invalid data is cleared


                    except KeyError as ke:
                         # This catches if the keys 'keypoints' or 'descriptors' are unexpectedly missing entirely
                         print(f"Error: Expected key '{ke}' not found in ALIKED features for {img_path}.")
                         kp = None; desc = None
                    except Exception as e:
                         print(f"Unexpected error processing ALIKED features result for {img_path}: {e}")
                         kp = None; desc = None


            except Exception as e:
                 print(f"Error during initial ALIKED feature extraction for {img_path}: {e}")


            # --- Check if features were successfully obtained and save ---
            if kp is not None and desc is not None and calculated_scale is not None and original_pil_size is not None:
                try:
                    img_group = f_kp_desc.create_group(img_key)
                    # Save keypoints AS IS (in the processed scale space)
                    img_group.create_dataset('keypoints', data=kp.astype(np.float32))
                    img_group.create_dataset('descriptors', data=desc.astype(np.float32))
                    # Store the calculated scale factor and original size for later transformation
                    img_group.attrs['calculated_scale_factor'] = float(calculated_scale)
                    img_group.attrs['original_pil_size'] = original_pil_size # (W, H) tuple
                    img_group.attrs['original_path'] = img_path # Store original path
                except Exception as e:
                     print(f"Error saving initial features for {img_path} to HDF5: {e}")
            else:
                print(f"Skipping saving initial features for {img_path} due to extraction failure or empty results.")


    # ... (rest of the perform_initial_detection_and_matching function: Initial Matching, Analyze Matches, Save crop_data.h5)
    # Ensure the rest of the function correctly handles cases where some images might not have initial features saved in f_kp_desc

    # 2. Initial Matching (Keep this part as it's needed for the current cropping method)
    print("Running initial LightGlue matching...")
    try:
        lg_matcher = KF.LightGlueMatcher("aliked", {"width_confidence": -1,
                                                    "depth_confidence": -1,
                                                    "mp": True if 'cuda' in str(device) else False}).eval().to(device)
        if device == torch.device('cpu'):
             lg_matcher.to('cpu')

    except Exception as e:
         print(f"Error loading LightGlue matcher: {e}")
         pass # Allow to continue to analysis if possible


    # Ensure initial_match_file is created even if empty, if matching failed entirely
    # This prevents subsequent H5 read errors
    if not os.path.exists(initial_match_file): # Corrected typo here
        try:
            with h5py.File(initial_match_file, mode='w') as f:
                pass # Create an empty file
        except Exception as e:
            print(f"Error creating empty initial match file {initial_match_file}: {e}")


    # Match only images that had features successfully extracted
    # Read keys from the initial_feature_file HDF5
    extracted_image_keys = []
    if os.path.exists(initial_feature_file): # Corrected typo here
        try:
            with h5py.File(initial_feature_file, mode='r') as f_kp_desc_read:
                 extracted_image_keys = list(f_kp_desc_read.keys())
        except Exception as e:
            print(f"Error reading keys from initial feature file {initial_feature_file}: {e}")


    # Create a mapping from image key to its original index
    # key_to_idx = {os.path.basename(fname): i for i, fname in enumerate(img_fnames)} # Not used in this block directly

    # Filter index_pairs to include only pairs where both images had features extracted
    filtered_index_pairs = []
    for idx1, idx2 in index_pairs:
         key1, key2 = os.path.basename(img_fnames[idx1]), os.path.basename(img_fnames[idx2])
         if key1 in extracted_image_keys and key2 in extracted_image_keys:
              filtered_index_pairs.append((idx1, idx2))
         # else: print(f"Skipping initial match for {key1}-{key2}: features not extracted for one or both.")


    if not filtered_index_pairs:
         print("No image pairs with extracted features to perform initial matching.")
         # Proceed to analysis, crop_data.h5 might be empty

    else:
        with h5py.File(initial_feature_file, mode='r') as f_kp_desc_read, \
             h5py.File(initial_match_file, mode='a') as f_match: # Use append mode if file might exist but be incomplete

            for idx1, idx2 in tqdm(filtered_index_pairs, desc="Initial LightGlue Matching"):
                fname1, fname2 = img_fnames[idx1], img_fnames[idx2]
                key1, key2 = os.path.basename(fname1), os.path.basename(fname2)

                # Check if pair already matched (useful if appending)
                if key1 in f_match and key2 in f_match[key1]:
                     continue # Skip if already matched

                try:
                    # Load keypoints and descriptors from the initial detection file
                    # These KPs are at the processed scale
                    # Check for dataset existence within the group
                    if 'keypoints' not in f_kp_desc_read[key1] or 'descriptors' not in f_kp_desc_read[key1] or \
                       'keypoints' not in f_kp_desc_read[key2] or 'descriptors' not in f_kp_desc_read[key2]:
                         print(f"Warning: Missing keypoint/descriptor datasets for {key1}-{key2} in initial feature file. Skipping match.")
                         continue

                    kp1 = torch.from_numpy(f_kp_desc_read[key1]['keypoints'][...]).to(device)
                    kp2 = torch.from_numpy(f_kp_desc_read[key2]['keypoints'][...]).to(device)
                    desc1 = torch.from_numpy(f_kp_desc_read[key1]['descriptors'][...]).to(device)
                    desc2 = torch.from_numpy(f_kp_desc_read[key2]['descriptors'][...]).to(device)

                    if len(kp1) == 0 or len(kp2) == 0:
                         continue

                    # Create LAFs based on processed scale keypoints
                    # Assuming scale 1.0 relative to processed size is appropriate here
                    laf1 = KF.laf_from_center_scale_ori(kp1[None])
                    laf2 = KF.laf_from_center_scale_ori(kp2[None])

                    with torch.inference_mode():
                        dists, idxs = lg_matcher(desc1, desc2, laf1, laf2)
                    if len(idxs) > 0:
                        group = f_match.require_group(key1)
                        group.create_dataset(key2, data=idxs.detach().cpu().numpy().astype(np.int32))

                except Exception as e:
                    print(f"Error during initial LightGlue matching for {key1}-{key2}: {e}")


    # --- (rest of the Analyze Initial Matches part) ---
    print("Analyzing initial matches for cropping data...")

    kp_match_pairs = {}

    if os.path.exists(initial_match_file):
        try:
            with h5py.File(initial_match_file, mode='r') as f_match:
                for img_key1 in f_match.keys():
                    for img_key2 in f_match[img_key1].keys():
                        try:
                            matches = f_match[img_key1][img_key2][...] # Indices (kp1_idx, kp2_idx)

                            for kp1_idx, kp2_idx in matches:
                                if img_key1 not in kp_match_pairs: kp_match_pairs[img_key1] = {}
                                if kp1_idx not in kp_match_pairs[img_key1]: kp_match_pairs[img_key1][kp1_idx] = set()
                                kp_match_pairs[img_key1][kp1_idx].add(img_key2)

                                if img_key2 not in kp_match_pairs: kp_match_pairs[img_key2] = {}
                                if kp2_idx not in kp_match_pairs[img_key2]: kp_match_pairs[img_key2][kp2_idx] = set()
                                kp_match_pairs[img_key2][kp2_idx].add(img_key1)

                        except Exception as e:
                            print(f"Error processing initial match data for {img_key1}-{img_key2} in analysis: {e}")
        except Exception as e:
             print(f"Error reading initial match file {initial_match_file} for analysis: {e}")


    frequent_kp_data = {}

    if os.path.exists(initial_feature_file):
        try:
            with h5py.File(initial_feature_file, mode='r') as f_kp_desc:
                 for img_key in kp_match_pairs.keys():
                     if img_key not in f_kp_desc:
                          print(f"Warning: Initial features not found for {img_key} (present in matches but not in detection file). Skipping crop data processing for this image.")
                          continue

                     try:
                         # Check for needed data/attrs
                         if 'keypoints' not in f_kp_desc[img_key] or 'calculated_scale_factor' not in f_kp_desc[img_key].attrs or 'original_pil_size' not in f_kp_desc[img_key].attrs:
                              print(f"Warning: Incomplete initial feature data for {img_key}. Skipping crop data processing.")
                              continue

                         all_kp_coords_processed = f_kp_desc[img_key]['keypoints'][...] # KPs at processed scale
                         calculated_scale_factor = f_kp_desc[img_key].attrs['calculated_scale_factor']
                         original_pil_size = f_kp_desc[img_key].attrs['original_pil_size'] # (W, H)

                         kp_data_dict = kp_match_pairs[img_key]

                         frequent_indices = [kp_idx for kp_idx, matched_pairs in kp_data_dict.items()
                                             if len(matched_pairs) >= MIN_PAIRS_FOR_CROPPING and kp_idx < len(all_kp_coords_processed)] # Bounds check


                         if frequent_indices:
                             # Get coordinates at the processed scale
                             frequent_coords_processed = all_kp_coords_processed[frequent_indices]

                             # Transform coordinates back to original image space using the calculated scale
                             # We pass None for crop_box as these points are from the original (non-cropped) image
                             frequent_coords_orig_scale = transform_points_from_processed(
                                 frequent_coords_processed, calculated_scale_factor, crop_box=None
                             )

                             # Check for valid transformed coordinates (non-negative)
                             valid_frequent_coords_orig = frequent_coords_orig_scale[~np.any(frequent_coords_orig_scale < 0, axis=1)]


                             if len(valid_frequent_coords_orig) > 0:
                                 frequent_kp_data[img_key] = {
                                     'kp_coords_original_scale': valid_frequent_coords_orig
                                 }
                             else:
                                  print(f"No valid frequent keypoints in original scale for {img_key}.")

                         else:
                              print(f"No frequent keypoints found for {img_key} (threshold={MIN_PAIRS_FOR_CROPPING} pairs).")

                     except Exception as e:
                         print(f"Error processing frequent keypoints for {img_key}: {e}")

        except Exception as e:
             print(f"Error reading initial feature file {initial_feature_file} for analysis: {e}")


    # ... (rest of the saving frequent_kp_data to crop_data_file) ...
    with h5py.File(crop_data_file, mode='w') as f_crop_data:
        if frequent_kp_data:
            for img_key, data in frequent_kp_data.items():
                 group = f_crop_data.create_group(img_key)
                 group.create_dataset('kp_coords_original_scale', data=data['kp_coords_original_scale'])


    print(f"Initial detection and matching complete. Cropping data saved to {crop_data_file}")

    return crop_data_file

In [10]:
def calculate_crop_boxes(img_fnames, crop_data_file, feature_dir, data_dir):
    """
    Calculates crop bounding boxes for each image based on clustered frequent keypoints.
    Stores crop boxes in a dedicated HDF5 file.
    Uses a resolution-based ratio to determine DBSCAN EPS.
    """
    print("Calculating crop boxes using DBSCAN with resolution-based adaptive EPS...")
    # Store crop info in a separate file
    crop_info_file = os.path.join(data_dir, 'crop_info.h5')
    os.makedirs(data_dir, exist_ok=True)

    with h5py.File(crop_data_file, mode='r') as f_crop_data, \
         h5py.File(crop_info_file, mode='w') as f_crop_info:

        for img_path in tqdm(img_fnames, desc="Calculating Crops"):
            img_key = os.path.basename(img_path)

            if img_key not in f_crop_data:
                 # print(f"No cropping data for {img_key}. No crop will be used.") # Optional verbose
                 img_group = f_crop_info.create_group(img_key)
                 img_group.attrs['has_crop'] = False
                 img_group.attrs['crop_box'] = [0, 0, 0, 0]
                 continue

            try:
                frequent_coords_orig_scale = f_crop_data[img_key]['kp_coords_original_scale'][...]

                img_orig_wh = get_image_size(img_path)
                if img_orig_wh is None:
                    print(f"Could not get original size for {img_key}. Skipping crop calculation.")
                    img_group = f_crop_info.create_group(img_key)
                    img_group.attrs['has_crop'] = False
                    img_group.attrs['crop_box'] = [0, 0, 0, 0]
                    continue
                img_orig_w, img_orig_h = img_orig_wh


                if len(frequent_coords_orig_scale) > 0:
                    n_samples = len(frequent_coords_orig_scale)

                    # --- Adaptive EPS Calculation: Resolution * Ratio ---
                    max_orig_dim = max(img_orig_w, img_orig_h)
                    adaptive_eps = max_orig_dim * DBSCAN_EPS_RESOLUTION_RATIO

                    # Add a safeguard: adaptive_eps should be > 0.
                    # Fallback to fixed DBSCAN_EPS if calculation yields non-positive or ratio is zero/negative
                    if adaptive_eps <= 0:
                         print(f"Warning: Calculated resolution-based EPS is {adaptive_eps} for {img_key} (Max Dim={max_orig_dim}, Ratio={DBSCAN_EPS_RESOLUTION_RATIO}). Using fixed DBSCAN_EPS as fallback.")
                         # Fallback to the original fixed EPS config
                         adaptive_eps = DBSCAN_EPS

                    # DBSCAN requires at least min_samples + 1 points to form any cluster
                    if n_samples >= DBSCAN_MIN_SAMPLES + 1:
                        try:
                             # --- Apply DBSCAN with Resolution-based Adaptive EPS ---
                             # print(f"Image {img_key}: Max Dim = {max_orig_dim}, Ratio = {DBSCAN_EPS_RESOLUTION_RATIO:.4f}, Adaptive EPS = {adaptive_eps:.2f}") # Improved print format
                             db = DBSCAN(eps=adaptive_eps, min_samples=DBSCAN_MIN_SAMPLES).fit(frequent_coords_orig_scale)
                             labels = db.labels_

                        except Exception as e:
                             print(f"Error during DBSCAN for {img_key}: {e}. Skipping crop calculation.")
                             labels = np.array([-1] * n_samples) # Treat all points as noise on error


                    else:
                         print(f"Warning: Not enough frequent points ({n_samples}) for DBSCAN with min_samples={DBSCAN_MIN_SAMPLES} for {img_key}. Skipping DBSCAN.")
                         labels = np.array([-1] * n_samples) # Treat all points as noise if not enough samples

                    # Find the bounding box of all non-noise clustered points
                    clustered_points = frequent_coords_orig_scale[labels != -1] # Exclude noise points (-1)

                    if len(clustered_points) > 0:
                        min_x, min_y = np.min(clustered_points, axis=0)
                        max_x, max_y = np.max(clustered_points, axis=0)

                        # Add padding and ensure bounds are within the original image
                        min_x = max(0.0, min_x - CROP_PADDING)
                        min_y = max(0.0, min_y - CROP_PADDING)
                        max_x = min(float(img_orig_w - 1), max_x + CROP_PADDING)
                        max_y = min(float(img_orig_h - 1), max_y + CROP_PADDING)

                        # Ensure crop has positive dimensions
                        crop_w = max_x - min_x + 1
                        crop_h = max_y - min_y + 1

                        if crop_w > 0 and crop_h > 0:
                            # Crop box format: [x, y, w, h] (integers for simplicity)
                            crop_box = [int(min_x), int(min_y), int(crop_w), int(crop_h)]
                            # print(f"Calculated crop box for {img_key}: {crop_box}")

                            # Store crop box
                            img_group = f_crop_info.create_group(img_key)
                            img_group.attrs['has_crop'] = True
                            img_group.attrs['crop_box'] = crop_box
                        else:
                            # print(f"Calculated crop box for {img_key} has zero dimensions. No crop will be used.")
                            img_group = f_crop_info.create_group(img_key)
                            img_group.attrs['has_crop'] = False
                            img_group.attrs['crop_box'] = [0, 0, 0, 0]


                    else:
                        # print(f"DBSCAN found no significant clusters for {img_key}. No crop will be used.")
                        img_group = f_crop_info.create_group(img_key)
                        img_group.attrs['has_crop'] = False
                        img_group.attrs['crop_box'] = [0, 0, 0, 0]

                else:
                     print(f"No frequent keypoints found for {img_key}. No crop will be used.")
                     img_group = f_crop_info.create_group(img_key)
                     img_group.attrs['has_crop'] = False
                     img_group.attrs['crop_box'] = [0, 0, 0, 0]


            except Exception as e:
                print(f"Error calculating crop box for {img_key}: {e}")
                # Ensure entry is made in crop_info.h5 even on error
                if img_key not in f_crop_info:
                     img_group = f_crop_info.create_group(img_key)
                img_group.attrs['has_crop'] = False
                img_group.attrs['crop_box'] = [0, 0, 0, 0]


    print("Crop box calculation complete.")
    return crop_info_file

In [11]:
# ... (imports, configs, utility functions like calculate_kornia_resize_scale, transform_points_from_processed)
# Note: load_torch_image and load_pil_image are also needed

def detect_and_combine_features(img_fnames, crop_info_file, feature_dir, device=DEVICE, verbose=True):
    """
    Detects ALIKED features for multiple scales and original/cropped images,
    combines unique features per image (deduplicating based on original coords),
    and saves combined features to .pt and .h5 files per image.
    """
    print("Running multi-variation ALIKED detection and combining features...")
    os.makedirs(feature_dir, exist_ok=True)
    keypoints_subdir_path = os.path.join(feature_dir, KEYPOINTS_SUBDIR)
    os.makedirs(keypoints_subdir_path, exist_ok=True)
    descriptors_h5_path = os.path.join(feature_dir, DESCRIPTORS_H5)

    with h5py.File(descriptors_h5_path, mode='w') as f_descriptors, \
         h5py.File(crop_info_file, mode='r') as f_crop_info:

        extractor = None # Initialize extractor outside the loop

        for img_path in tqdm(img_fnames, desc="Detecting & Combining Features"):
            img_key = os.path.basename(img_path)
            kp_pt_path = os.path.join(keypoints_subdir_path, f'{img_key}.pt')

            # Skip if combined features already exist for this image
            if os.path.exists(kp_pt_path) and img_key in f_descriptors:
                 print(f"Combined features for {img_key} already exist. Skipping detection.")
                 continue

            img_orig_pil = load_pil_image(img_path)
            if img_orig_pil is None:
                print(f"Could not load original image {img_path}. Skipping.")
                # Create empty files/datasets to indicate processing happened (and failed)
                try:
                    torch.save(torch.empty(0, 2, dtype=torch.float32), kp_pt_path)
                    f_descriptors.create_group(img_key)
                    if verbose:
                        print(f"Created empty combined feature files for {img_key}.")
                except Exception as e:
                     print(f"Error creating empty combined feature files for {img_key}: {e}")
                continue

            img_orig_w, img_orig_h = img_orig_pil.size
            img_orig_size_hw = (img_orig_h, img_orig_w) # (H, W)

            # Get crop info
            has_crop = False
            # Default dummy crop box, will be [0,0,0,0] if no crop or invalid
            crop_box = [0, 0, 0, 0]
            if img_key in f_crop_info:
                 img_crop_group = f_crop_info[img_key]
                 has_crop = img_crop_group.attrs.get('has_crop', False)
                 crop_box = img_crop_group.attrs.get('crop_box', [0, 0, 0, 0])

            # Ensure crop_box is valid if has_crop is true
            if has_crop and (len(crop_box) != 4 or crop_box[2] <= 0 or crop_box[3] <= 0):
                 print(f"Warning: Invalid crop box {crop_box} for {img_key} despite has_crop=True. Ignoring crop.")
                 has_crop = False
                 crop_box = [0, 0, 0, 0]


            variations_to_process = []
            for scale in TTA_SCALES:
                # Original image variations
                variations_to_process.append({'type': 'orig', 'scale_target': scale, 'crop_box': None, 'pil_img': img_orig_pil})
                # Cropped image variations (if enabled and crop exists and is valid)
                if USE_CROPPED_IMAGES and has_crop:
                    x, y, w, h = crop_box
                    try:
                         img_cropped_pil = img_orig_pil.crop((x, y, x + w, y + h))
                         variations_to_process.append({'type': 'crop', 'scale_target': scale, 'crop_box': crop_box, 'pil_img': img_cropped_pil})
                    except Exception as e:
                         print(f"Error cropping image {img_key} with box {crop_box}: {e}. Skipping cropped variation.")


            all_kps_orig_coords = [] # List to collect all transformed keypoints (in original image space)
            all_descriptors = []     # List to collect corresponding descriptors

            if extractor is None:
                 try:
                    extractor = ALIKED(max_num_keypoints=TTA_NUM_FEATURES, detection_threshold=ALIKED_TTA_TH).eval().to(DEVICE, dtype=torch.float32)
                    if DEVICE == torch.device('cpu'):
                         extractor.to('cpu', torch.float32)
                 except Exception as e:
                    print(f"Error loading ALIKED extractor: {e}")
                    # Create empty files/datasets on extractor failure as well
                    try:
                        torch.save(torch.empty(0, 2, dtype=torch.float32), kp_pt_path)
                        f_descriptors.create_group(img_key)
                        if verbose:
                            print(f"Created empty combined feature files for {img_key} due to extractor failure.")
                    except Exception as e_save:
                         print(f"Error creating empty combined feature files for {img_key} on extractor failure: {e_save}")
                    continue # Move to next image if extractor fails

            for var_info in variations_to_process:
                 var_type = var_info['type']
                 var_scale_target = var_info['scale_target']
                 var_pil_img = var_info['pil_img']
                 var_crop_box = var_info['crop_box']

                 try:
                     # Get the size of the PIL image *for this variation*
                     var_pil_size_wh = var_pil_img.size # (W, H)
                     var_pil_size_hw = (var_pil_size_wh[1], var_pil_size_wh[0]) # (H, W)

                     # Calculate the expected scale factor ALIKED applies to *this variation's* PIL image size
                     calculated_scale_variation = calculate_kornia_resize_scale(var_pil_size_hw, var_scale_target)


                     # Convert PIL image to Kornia tensor
                     timg = K.image_to_tensor(np.array(var_pil_img), keepdim=True).to(device, torch.float32) / 255.0
                     if timg.ndim == 3: timg = timg[None, ...] # Ensure BxCxHxW

                     with torch.inference_mode():
                         # Pass resize to extractor. Keypoints will be in space scaled by calculated_scale_variation
                         feats = extractor.extract(timg, resize=var_scale_target, return_processed_size=False) # Do not request processed_size


                     # --- Add Error Handling for accessing feats dictionary ---
                     try:
                         if 'keypoints' in feats and 'descriptors' in feats and \
                            len(feats.get('keypoints', [])) > 0 and len(feats.get('descriptors', [])) > 0:

                             kp_variation = feats['keypoints'].reshape(-1, 2).detach().cpu().numpy()
                             desc_variation = feats['descriptors'].reshape(len(kp_variation), -1).detach().cpu().numpy()

                             if len(kp_variation) > 0 and calculated_scale_variation is not None: # Check scale is valid too
                                 # Transform keypoints from the variation's processed scale back to original image coordinates
                                 kp_orig_coords = transform_points_from_processed(
                                     kp_variation, calculated_scale_variation, var_crop_box
                                 )

                                 # Filter out any points that ended up outside original bounds or invalid (optional but good)
                                 # This check should ideally be within transform_points_from_processed or after
                                 # Let's trust transform_points_from_processed returns valid numpy array
                                 # kp_orig_coords are already in original coordinates space.

                                 all_kps_orig_coords.append(kp_orig_coords)
                                 all_descriptors.append(desc_variation)
                             else:
                                 print(f"Warning: Extracted features empty or scale invalid for {img_key} ({var_type}, {var_scale_target}).")


                         else:
                             missing_keys = [k for k in ['keypoints', 'descriptors'] if k not in feats]
                             empty_data_keys = [k for k in ['keypoints', 'descriptors'] if k in feats and len(feats[k]) == 0]

                             if missing_keys or empty_data_keys:
                                  print(f"Warning: Missing or empty required data in ALIKED output for {img_key} ({var_type}, {var_scale_target}). (Missing Keys: {missing_keys}, Empty Data Keys: {empty_data_keys})")


                     except KeyError as ke:
                          print(f"Error: Expected key '{ke}' not found in ALIKED features for {img_key} ({var_type}, {var_scale_target}).")
                     except Exception as e:
                          print(f"Unexpected error processing ALIKED features result for {img_key} ({var_type}, {var_scale_target}): {e}")

                 except Exception as e:
                      print(f"Error during ALIKED feature extraction for {img_key} ({var_type}, {var_scale_target}): {e}")


            # Combine all detected points (now all in original coords) and deduplicate
            if not all_kps_orig_coords:
                 print(f"No valid keypoints detected for any variation of {img_key}.")
                 # Create empty files/datasets to indicate processing happened (and failed)
                 try:
                    torch.save(torch.empty(0, 2, dtype=torch.float32), kp_pt_path)
                    f_descriptors.create_group(img_key) # Create group even if no descriptors
                    if verbose:
                        print(f"Created empty combined feature files for {img_key}.")
                 except Exception as e:
                     print(f"Error creating empty combined feature files for {img_key}: {e}")

                 continue # Move to the next image

            combined_kps_orig = np.concatenate(all_kps_orig_coords, axis=0)
            combined_descriptors = np.concatenate(all_descriptors, axis=0)

            # Deduplicate based on rounded coordinates in original image space
            seen_coords = {}
            unique_kps_orig = []
            unique_descriptors = []

            # Using enumerate to get original index for descriptor selection
            # Use a small tolerance for rounding coordinates
            try:
                for i, (kp_coord, descriptor) in enumerate(zip(combined_kps_orig, combined_descriptors)):
                    # Ensure coordinate is a tuple of floats for dictionary key
                    rounded_coord = tuple(np.round(kp_coord, COORD_PRECISION).astype(float)) # Use float for tuple key

                    if rounded_coord not in seen_coords:
                        seen_coords[rounded_coord] = len(unique_kps_orig) # Store index in unique list
                        unique_kps_orig.append(kp_coord)
                        unique_descriptors.append(descriptor) # Keep the descriptor from the first occurrence
                if verbose:
                    print(f"Combined features for {img_key}: Detected {len(combined_kps_orig)}, Unique {len(unique_kps_orig)}")
                # Convert lists to numpy arrays
                unique_kps_orig_np = np.array(unique_kps_orig, dtype=np.float32)
                unique_descriptors_np = np.array(unique_descriptors, dtype=np.float32)

                # Save unique keypoints to .pt
                try:
                    torch.save(torch.from_numpy(unique_kps_orig_np), kp_pt_path)
                    # print(f"Saved unique keypoints for {img_key} to {kp_pt_path}") # Optional verbose
                except Exception as e:
                     print(f"Error saving keypoints .pt for {img_key}: {e}")

                # Save unique descriptors to descriptors.h5
                try:
                    img_desc_group = f_descriptors.create_group(img_key)
                    img_desc_group.create_dataset('data', data=unique_descriptors_np, compression="gzip") # Use compression
                    # print(f"Saved unique descriptors for {img_key} to {descriptors_h5_path}/{img_key}") # Optional verbose
                except Exception as e:
                     print(f"Error saving descriptors .h5 for {img_key}: {e}")

            except Exception as e:
                 print(f"Error during feature combination and deduplication for {img_key}: {e}")
                 # Create empty files/datasets if combination/saving fails
                 try:
                    torch.save(torch.empty(0, 2, dtype=torch.float32), kp_pt_path)
                    if img_key not in f_descriptors: # Only create group if it failed before
                         f_descriptors.create_group(img_key)
                    if verbose:
                        print(f"Created empty combined feature files for {img_key} after combination error.")
                 except Exception as e_save:
                     print(f"Error creating empty combined feature files for {img_key} after combination error: {e_save}")


    print("Multi-variation detection and feature combination complete.")

# ... (rest of the code: match_and_cluster_images, process_images_combined_output, main block)

In [12]:
def load_image_names_from_json(cluster_path):
    with open(os.path.join(cluster_path, 'images.json'), 'r') as f:
        full_paths = json.load(f)  # 可能是 ['/path/to/images/img001.jpg', ...]
        image_names = [os.path.basename(p) for p in full_paths]  # 提取 'img001.jpg'
    return image_names

In [13]:
import kornia as K
import kornia.feature as KF
from tqdm import tqdm
import networkx as nx # Needed for graph clustering
import json # Needed for saving images.json (per cluster, or globally)
import warnings # To manage potential warnings

def match_and_cluster_images(
    img_fnames,
    index_pairs,
    data_dir='.', # Use data_dir to find feature files and save cluster outputs
    device=torch.device('cpu'),
    min_matches=MIN_MATCHES_FOR_GRAPH_EDGE, # Use external arg names that match your snippet
    aliked_dis_min=MIN_MATCHES_FOR_FILTERED_GRAPH, # Use external arg names that match your snippet
    verbose=False
):
    """
    Performs LightGlue matching on combined features for image pairs,
    builds a match graph, clusters images, and saves per-cluster files
    (images.json, keypoints.h5, matches.h5).

    Args:
        img_fnames (list): List of full paths to image files.
        index_pairs (list): List of (idx1, idx2) tuples for image pairs to match.
        data_dir (str): Base directory where 'features_combined' is located and
                        where per-cluster outputs will be created.
        device (torch.device): Device to use for matching.
        min_matches (int): Min matches for adding edge to graph.
        aliked_dis_min (int): Min matches for filtering graph.
        verbose (bool): Whether to print detailed match info.

    Returns:
        list: A list of lists, where each inner list contains the global indices
              of images belonging to a cluster.
    """
    # Define paths based on data_dir and configuration
    feature_dir_combined = os.path.join(data_dir, 'features_combined')
    keypoints_subdir_path = os.path.join(feature_dir_combined, KEYPOINTS_SUBDIR) # Directory holding combined per-image .pt files
    descriptors_h5_path = os.path.join(feature_dir_combined, DESCRIPTORS_H5) # HDF5 holding combined per-image descriptors
    # Global match dir is still used for global summaries, but per-cluster goes into feature_dir_combined/clusters
    match_dir_global = os.path.join(data_dir, 'matches_global')

    # Directory where per-cluster subfolders will be created
    cluster_output_base_dir = os.path.join(feature_dir_combined, 'clusters')
    os.makedirs(cluster_output_base_dir, exist_ok=True) # Ensure base cluster output dir exists

    try:
        lg_matcher = KF.LightGlueMatcher(
            "aliked", {
                "width_confidence": -1,
                "depth_confidence": -1,
                "mp": 'cuda' in str(device)
            }
        ).eval().to(device)
    except Exception as e:
        print(f"Error loading LightGlue matcher: {e}")
        return []

    match_graph = nx.Graph()
    match_graph.add_nodes_from(range(len(img_fnames)))
    # Store match indices (relative to combined per-image features)
    # This is needed to save matches.h5 per cluster later
    all_matches = {}

    # Open combined descriptors file once
    try:
        f_descriptors = h5py.File(descriptors_h5_path, mode='r')
    except Exception as e:
        print(f"Error opening combined descriptors file {descriptors_h5_path}: {e}")
        return []

    print("Performing LightGlue matching on combined features and building graph...")

    # Iterate through shortlisted pairs
    for idx1, idx2 in tqdm(index_pairs, desc="LightGlue Matching & Graph Building"):
        fname1, fname2 = img_fnames[idx1], img_fnames[idx2]
        key1 = os.path.basename(fname1)
        key2 = os.path.basename(fname2)

        kp1_pt_path = os.path.join(keypoints_subdir_path, f'{key1}.pt')
        kp2_pt_path = os.path.join(keypoints_subdir_path, f'{key2}.pt')

        # Check if combined features exist for both images
        if not os.path.exists(kp1_pt_path) or key1 not in f_descriptors or \
           not os.path.exists(kp2_pt_path) or key2 not in f_descriptors:
            continue

        try:
            # Load combined keypoints (original coordinates)
            kp1_combined_orig = torch.load(kp1_pt_path, weights_only=False).to(device)
            kp2_combined_orig = torch.load(kp2_pt_path, weights_only=False).to(device)

            # Load combined descriptors
            desc1_combined = torch.from_numpy(f_descriptors[key1]['data'][...]).to(device)
            desc2_combined = torch.from_numpy(f_descriptors[key2]['data'][...]).to(device)

            if len(kp1_combined_orig) == 0 or len(kp2_combined_orig) == 0 or \
               len(desc1_combined) == 0 or len(desc2_combined) == 0:
                continue

            # Create dummy LAFs centered at keypoints (using original coordinates)
            # Ensure KPs are float tensors for LAF creation
            kp1_tensor = kp1_combined_orig.float()[None] # Add batch dim
            kp2_tensor = kp2_combined_orig.float()[None] # Add batch dim
            laf1 = KF.laf_from_center_scale_ori(kp1_tensor) # Use batch size 1, scale 1.0
            laf2 = KF.laf_from_center_scale_ori(kp2_tensor)


            with torch.inference_mode():
                dists, idxs = lg_matcher(desc1_combined, desc2_combined, laf1, laf2)

            n_matches = len(idxs)

            if verbose:
                 tqdm.write(f'{key1}-{key2}: {n_matches} matches')

            # Add edge to graph if enough matches are found
            if n_matches >= min_matches: # Use the passed min_matches
                 # Store number of matches as edge weight
                 match_graph.add_edge(idx1, idx2, weight=n_matches)
                 # Store match indices (relative to combined KPs)
                 all_matches.setdefault(key1, {})[key2] = idxs.cpu().detach().numpy().astype('int16')


        except Exception as e:
            print(f"Error during matching combined features for {key1}-{key2}: {e}")

    # Close the descriptors file
    f_descriptors.close()

    print("Graph building complete. Performing clustering...")

    # --- Graph Clustering Logic (Same as before) ---

    raw_clusters = list(nx.connected_components(match_graph))
    final_clusters = []
    outliers = set()

    for cluster_indices in raw_clusters:
        subgraph = match_graph.subgraph(cluster_indices)
        filtered_subgraph = nx.Graph()
        for u, v, d in subgraph.edges(data=True):
            # Use the passed aliked_dis_min for filtering
            if d['weight'] >= aliked_dis_min:
                filtered_subgraph.add_edge(u, v)

        for sub_cluster_indices in nx.connected_components(filtered_subgraph):
            # Use the configured MIN_IMAGES_PER_CLUSTER (assuming 2 is desired based on original)
            if len(sub_cluster_indices) >= MIN_IMAGES_PER_CLUSTER:
                final_clusters.append(list(sub_cluster_indices))
            else:
                outliers.update(sub_cluster_indices)

    print(f"Clustering complete. Found {len(final_clusters)} clusters.")


    # --- Save Per-Cluster Files (Updated to match the style from your snippet, using combined data) ---

    print("Saving per-cluster files...")

    for i, cluster_indices in enumerate(final_clusters):
        # Create the cluster subfolder
        cluster_dir = os.path.join(cluster_output_base_dir, f'cluster_{i}')
        os.makedirs(cluster_dir, exist_ok=True)

        cluster_fnames = [img_fnames[j] for j in cluster_indices]
        cluster_keys = [os.path.basename(f) for f in cluster_fnames]

        # 1. Save images.json (list of full filenames)
        images_json_path = os.path.join(cluster_dir, 'images.json')
        try:
            with open(images_json_path, 'w') as f_json:
                json.dump(cluster_fnames, f_json, indent=2)
            # print(f"Saved {images_json_path}") # Optional verbose
        except Exception as e:
             print(f"Error saving {images_json_path}: {e}")


        # 2. Save keypoints.h5 for the cluster (using combined keypoints per image)
        # This replicates the saving style from your snippet but uses the combined KPs
        keypoints_h5_path = os.path.join(cluster_dir, 'keypoints.h5')
        try:
            with h5py.File(keypoints_h5_path, 'w') as f_out_kp:
                 for img_idx in cluster_indices:
                     img_key = os.path.basename(img_fnames[img_idx])
                     kp_pt_path = os.path.join(keypoints_subdir_path, f'{img_key}.pt') # Path to combined KPs for this image
                     try:
                         if os.path.exists(kp_pt_path):
                             # Load combined KPs for this image (original coords)
                             kp_combined_np = torch.load(kp_pt_path, weights_only=False).cpu().numpy() # Load and convert to numpy
                             f_out_kp.create_dataset(img_key, data=kp_combined_np.astype(np.float32))
                             # print(f"Saved combined KPs for {img_key} to {keypoints_h5_path}") # Optional verbose
                         else:
                              print(f"Warning: Combined keypoints not found for {img_key} at {kp_pt_path}. Skipping saving to cluster H5.")

                     except Exception as e:
                          print(f"Error loading/saving combined keypoints for {img_key} to cluster H5: {e}")

        except Exception as e:
             print(f"Error creating or saving to {keypoints_h5_path}: {e}")


        # 3. Save matches.h5 (matches between images *within this cluster*)
        # This replicates the saving style from your snippet
        # The indices in all_matches are already relative to the combined per-image keypoints
        # We just need to filter all_matches to include only pairs within this cluster
        cluster_matches_filtered = {}
        # Create a set of keys in this cluster for quick lookup
        cluster_keys_set = set(cluster_keys)

        for key1 in cluster_keys:
            # Only process if key1 had any matches stored AND key1 is in this cluster
            if key1 in all_matches and key1 in cluster_keys_set:
                cluster_matches_filtered[key1] = {}
                for key2, match_data in all_matches[key1].items():
                    # Only include matches where key2 is also in this cluster
                    if key2 in cluster_keys_set:
                        cluster_matches_filtered[key1][key2] = match_data # Store the original match indices

        matches_h5_path = os.path.join(cluster_dir, 'matches.h5')
        if cluster_matches_filtered:
            try:
                with h5py.File(matches_h5_path, 'w') as f_match:
                    for key1, matches_dict in cluster_matches_filtered.items():
                         if matches_dict: # Ensure key1 actually has matches within the cluster_matches_filtered dict
                            group = f_match.create_group(key1) # Use create_group as this is a new file
                            for key2, match in matches_dict.items():
                                group.create_dataset(key2, data=match, dtype='int16') # Save as int16
                # print(f"Saved {matches_h5_path}") # Optional verbose
            except Exception as e:
                 print(f"Error saving {matches_h5_path}: {e}")
        # else:
             # print(f"No matches to save for cluster {i} at {matches_h5_path}") # Optional verbose


        # 4. Skip saving descriptors.h5 per cluster (as per the commented code in your snippet)


    # --- Save Global Cluster Summaries ---
    # This part saves summaries in the 'matches_global' directory
    os.makedirs(match_dir_global, exist_ok=True) # Ensure global match dir exists

    # Save clusters as a list of image filenames in a JSON file
    clusters_json_path = os.path.join(match_dir_global, 'clusters.json')
    clusters_filename_list = []
    for cluster_indices in final_clusters:
        cluster_fnames = [img_fnames[idx] for idx in cluster_indices]
        clusters_filename_list.append(cluster_fnames)

    try:
        with open(clusters_json_path, 'w') as f_json:
            json.dump(clusters_filename_list, f_json, indent=2)
        print(f"Saved global cluster filenames summary to {clusters_json_path}")
    except Exception as e:
        print(f"Error saving global clusters.json: {e}")


    # Save cluster summary in a text file
    clusters_txt_path = os.path.join(match_dir_global, 'clusters.txt')
    try:
        with open(clusters_txt_path, 'w') as f:
            f.write(f"Found {len(final_clusters)} clusters:\n\n")

            # Loop through the final clusters and write their images
            for i, cluster_indices in enumerate(final_clusters):
                cluster_fnames = [img_fnames[j] for j in cluster_indices]
                cluster_keys = sorted([os.path.basename(f) for f in cluster_fnames])
                f.write(f'Cluster {i} (size={len(cluster_keys)}, indices={sorted(cluster_indices)}):\n')
                for name in cluster_keys: # Write the image names belonging to THIS cluster
                     f.write(f'  {name}\n')
                f.write('\n') # Add a newline after listing images for each cluster


            # --- Correct placement for writing outliers ---
            # Generate outlier filenames and keys AFTER clustering is complete
            if outliers: # Check if the outliers set is not empty
                 outlier_fnames = [img_fnames[idx] for idx in sorted(list(outliers))]
                 outlier_keys = sorted([os.path.basename(f) for f in outlier_fnames])
                 f.write(f'Outliers ({len(outlier_keys)} images, indices={sorted(list(outliers))}):\n')
                 for name in outlier_keys: # <-- This loop is now correctly placed
                     f.write(f'  {name}\n')
                 f.write('\n') # Add a final newline after the outlier list

        print(f"Saved global cluster summary to {clusters_txt_path}")
    except Exception as e:
         print(f"Error saving global clusters.txt: {e}")



    # Return the list of lists of image indices
    return final_clusters

# Note: This function assumes that detect_and_combine_features
# has already been run and created the combined features in
# data_dir/features_combined/keypoints/ and data_dir/features_combined/descriptors.h5.
# It replicates the per-cluster saving structure from your older snippet
# but populates the files using the *new* combined features.

In [14]:
def import_into_colmap_cluster(
    img_dir,
    cluster_path='.featureout/cluster_0',
    database_path = '.featureout/cluster_0/colmap.db',
    image_names = None
):
    """
    Import keypoints and matches into COLMAP database using helper functions.

    Args:
        img_dir (str): Directory containing image files
        cluster_path (str): Path with matches.h5
        database_path (str): Output database location
        image_names (list[str]): Optional subset of image names to include
    """
    db = COLMAPDatabase.connect(database_path)
    db.create_tables()
    single_camera = False
    # Add keypoints and images
    fname_to_id = add_keypoints(
        db=db,
        h5_path=cluster_path,
        image_path=img_dir,
        img_ext='',
        camera_model='simple-pinhole',
        single_camera=single_camera
    )
    # Filter fname_to_id to only use the selected subset (if provided)
    if image_names is not None:
        fname_to_id = {k: v for k, v in fname_to_id.items() if k in image_names}

    # Add matches between selected image pairs
    add_matches(
        db=db,
        h5_path=cluster_path,
        fname_to_id=fname_to_id
    )
    db.commit()
    db.close()

In [15]:
import json


def run_colmap_clusters(
    feature_dir,
    images_dir,
    timings
):
    """
    Run COLMAP reconstruction for each cluster folder in feature_dir.

    Parameters:
    - feature_dir: Directory containing cluster_* folders with images.json, keypoints.h5, matches.h5
    - images_dir: Path to raw image files
    - timings: dict to record durations
    - min_model_size: minimum model size for COLMAP mapping
    - max_num_models: maximum number of maps to attempt
    """
    cluster_folders = sorted([f for f in os.listdir(feature_dir) if f.startswith('cluster_')])
    all_maps = []
    for i, folder in enumerate(cluster_folders):
        cluster_path = os.path.join(feature_dir, folder)
        database_path = os.path.join(cluster_path, 'colmap.db')
        image_list_path = os.path.join(cluster_path, 'images.json')

        if not os.path.isfile(image_list_path):
            print(f"[Cluster {i}] Missing images.json, skipping.")
            continue

        with open(image_list_path, 'r') as f:
            image_names = [os.path.basename(x) for x in json.load(f)]

        if os.path.isfile(database_path):
            os.remove(database_path)

        gc.collect()
        sleep(1)

        # Step 1: import keypoints, matches into COLMAP db
        import_into_colmap_cluster(
            img_dir=images_dir,
            cluster_path=cluster_path,
            database_path=database_path,
            image_names=image_names
        )

        # Step 2: RANSAC
        t = time()
        pycolmap.match_exhaustive(database_path)
        t_ransac = time() - t
        timings['RANSAC'].append(t_ransac)
        print(f'[Cluster {i}] Ran RANSAC in {t_ransac:.4f} sec')

        # Step 3: Incremental mapping
        output_path = os.path.join(cluster_path, 'colmap_rec_aliked')
        os.makedirs(output_path, exist_ok=True)

        mapper_options = pycolmap.IncrementalPipelineOptions()
        mapper_options.min_model_size = 8
        mapper_options.max_num_models = 25
        mapper_options.mapper.filter_max_reproj_error	 = 10.0

        t = time()
        maps = pycolmap.incremental_mapping(
            database_path=database_path,
            image_path=images_dir,
            output_path=output_path,
            options=mapper_options
        )
        t_rec = time() - t
        timings['Reconstruction'].append(t_rec)
        print(f'[Cluster {i}] Reconstruction done in {t_rec:.4f} sec')
        all_maps.append(maps)
    
    return timings, all_maps


In [16]:
# Collect vital info from the dataset

@dataclasses.dataclass
class Prediction:
    image_id: str | None  # A unique identifier for the row -- unused otherwise. Used only on the hidden test set.
    dataset: str
    filename: str
    cluster_index: int | None = None
    rotation: np.ndarray | None = None
    translation: np.ndarray | None = None

# Set is_train=True to run the notebook on the training data.
# Set is_train=False if submitting an entry to the competition (test data is hidden, and different from what you see on the "test" folder).
is_train = True
data_dir = '/kaggle/input/image-matching-challenge-2025'
workdir = '/kaggle/working/result/'
os.makedirs(workdir, exist_ok=True)

if is_train:
    sample_submission_csv = os.path.join(data_dir, 'train_labels.csv')
else:
    sample_submission_csv = os.path.join(data_dir, 'sample_submission.csv')

samples = {}
competition_data = pd.read_csv(sample_submission_csv)
for _, row in competition_data.iterrows():
    # Note: For the test data, the "scene" column has no meaning, and the rotation_matrix and translation_vector columns are random.
    if row.dataset not in samples:
        samples[row.dataset] = []
    samples[row.dataset].append(
        Prediction(
            image_id=None if is_train else row.image_id,
            dataset=row.dataset,
            filename=row.image
        )
    )

for dataset in samples:
    print(f'Dataset "{dataset}" -> num_images={len(samples[dataset])}')

Dataset "imc2023_haiper" -> num_images=54
Dataset "imc2023_heritage" -> num_images=209
Dataset "imc2023_theather_imc2024_church" -> num_images=76
Dataset "imc2024_dioscuri_baalshamin" -> num_images=138
Dataset "imc2024_lizard_pond" -> num_images=214
Dataset "pt_brandenburg_british_buckingham" -> num_images=225
Dataset "pt_piazzasanmarco_grandplace" -> num_images=168
Dataset "pt_sacrecoeur_trevi_tajmahal" -> num_images=225
Dataset "pt_stpeters_stpauls" -> num_images=200
Dataset "amy_gardens" -> num_images=200
Dataset "fbk_vineyard" -> num_images=163
Dataset "ETs" -> num_images=22
Dataset "stairs" -> num_images=51


In [17]:
import os
import shutil

def delete_cluster_folders(base_dir):
    for name in os.listdir(base_dir):
        path = os.path.join(base_dir, name)
        if os.path.isdir(path) and name.startswith("cluster"):
            print(f"Deleting: {path}")
            shutil.rmtree(path)


In [18]:
gc.collect()

max_images = None  # Used For debugging only. Set to None to disable.
datasets_to_process = None  # Not the best convention, but None means all datasets.

if is_train:
    # max_images = 5

    # Note: When running on the training dataset, the notebook will hit the time limit and die. Use this filter to run on a few specific datasets.
    datasets_to_process = [
    	# New data.
    	# 'amy_gardens',
    	'ETs',
    	# 'fbk_vineyard',
    	# 'stairs',
    	# Data from IMC 2023 and 2024.
    	# 'imc2024_dioscuri_baalshamin',
    	# 'imc2023_theather_imc2024_church',
    	# 'imc2023_heritage',
    	# 'imc2023_haiper',
    	# 'imc2024_lizard_pond',
    	# Crowdsourced PhotoTourism data.
    	# 'pt_stpeters_stpauls',
    	# 'pt_brandenburg_british_buckingham',
    	# 'pt_piazzasanmarco_grandplace',
    	# 'pt_sacrecoeur_trevi_tajmahal',
    ]

timings = {
    "shortlisting":[],
    "feature_detection": [],
    "feature_augmentation":[],
    "feature_merge":[],
    "feature_matching":[],
    "RANSAC": [],
    "Reconstruction": [],
}
mapping_result_strs = []


print (f"Extracting on device {device}")
for dataset, predictions in samples.items():
    if datasets_to_process and dataset not in datasets_to_process:
        print(f'Skipping "{dataset}"')
        continue
    
    images_dir = os.path.join(data_dir, 'train' if is_train else 'test', dataset)
    images = [os.path.join(images_dir, p.filename) for p in predictions]
    if max_images is not None:
        images = images[:max_images]

    print(f'\nProcessing dataset "{dataset}": {len(images)} images')

    filename_to_index = {p.filename: idx for idx, p in enumerate(predictions)}

    feature_dir = os.path.join(workdir, 'featureout', dataset)
    os.makedirs(feature_dir, exist_ok=True)

    # Wrap algos in try-except blocks so we can populate a submission even if one scene crashes.
    try:
        t = time()
        index_pairs = get_image_pairs_shortlist(
            images,
            sim_th = 0.5, # should be strict
            min_pairs = 10, # we should select at least min_pairs PER IMAGE with biggest similarity
            exhaustive_if_less = 20,
            device=device
        )
        timings['shortlisting'].append(time() - t)
        print (f'Shortlisting. Number of pairs to match: {len(index_pairs)}. Done in {time() - t:.4f} sec')
        gc.collect()
    
        t = time()

        # detect_aliked(images, feature_dir, 4096, device=device)
        # gc.collect()
        # timings['feature_detection'].append(time() - t)
        # print(f'Features detected in {time() - t:.4f} sec')
        
        # delete_cluster_folders(feature_dir)
        # t = time()
        # # match_with_lightglue(images, index_pairs, feature_dir=feature_dir, device=device, verbose=False)
        # clusternum = match_with_lightglue_and_cluster(images, index_pairs, feature_dir=feature_dir, aliked_dis_min=80, device=device, verbose=False)
        
        # 2. Perform initial detection and matching for cropping data
        # This step will skip if the crop data file already exists
        DATA_DIR = images_dir
        FEATURE_DIR = os.path.join(feature_dir, 'features_combined')
        MATCH_DIR = os.path.join(feature_dir, 'matches_global')
        CLUSTER_DIR = os.path.join(FEATURE_DIR, 'clusters')

        crop_data_file = perform_initial_detection_and_matching(images, index_pairs, data_dir = feature_dir, device=DEVICE)
        gc.collect()
        timings['feature_detection'].append(time() - t)
        print(f'Features detected in {time() - t:.4f} sec')
    
        # 3. Calculate crop boxes based on initial match analysis
        # This step will skip if the crop info file already exists from a previous run
        # However, the logic for skipping is currently inside calculate_crop_boxes itself (mode='w')
        # Let's ensure it writes fresh crop_info based on potentially existing crop_data
        t = time()
        crop_info_file = calculate_crop_boxes(images, crop_data_file, FEATURE_DIR, data_dir = feature_dir)
        gc.collect()
        timings['feature_augmentation'].append(time() - t)
        print(f'Features augmentation in {time() - t:.4f} sec')    
    
        # 4. Perform multi-variation ALIKED detection, combine features per image, save to .pt/.h5
        # This step skips images whose combined features already exist
        t = time()
        detect_and_combine_features(images, crop_info_file, FEATURE_DIR, device=DEVICE)
        gc.collect()
        timings['feature_merge'].append(time() - t)
        print(f'Features merge in {time() - t:.4f} sec')  

        # 5. Load combined features and perform LightGlue matching, save global matches to .pt
        # This step runs matching using the combined features generated in step 4
        t = time()
        clusters = match_and_cluster_images(images, index_pairs, data_dir = feature_dir, device=DEVICE)
        gc.collect()
        print("Processing complete. Combined features saved to", FEATURE_DIR, "Global matches saved to", MATCH_DIR)
        print("generate cluster : ", clusters)
        print("cluster num:", len(clusters))
        timings['feature_matching'].append(time() - t)
        print(f'Features matched in {time() - t:.4f} sec')
        
        
        timings, all_maps = run_colmap_clusters(CLUSTER_DIR, images_dir, timings)
        gc.collect()
        timings['Reconstruction'].append(time() - t)
        print(f'Reconstruction in {time() - t:.4f} sec') 
        clear_output(wait=False)

        registered = 0
        cluster_global_index = 0  # 全局 cluster 索引
        for maps in all_maps:  # 每个 maps 是 Dict[int, Reconstruction]
            for map_index, cur_map in maps.items():
                print(f"cluster : {map_index}")
                print(maps)
                for _, image in cur_map.images.items():
                    prediction_index = filename_to_index[image.name]
                    predictions[prediction_index].cluster_index = cluster_global_index
                    predictions[prediction_index].rotation = deepcopy(image.cam_from_world.rotation.matrix())
                    predictions[prediction_index].translation = deepcopy(image.cam_from_world.translation)
                    registered += 1
                cluster_global_index += 1
        
        mapping_result_str = f'Dataset "{dataset}" -> Registered {registered} / {len(images)} images across {cluster_global_index} clusters'
        mapping_result_strs.append(mapping_result_str)
        print(mapping_result_str)

        gc.collect()
    except Exception as e:
        print(e)
        # raise e
        mapping_result_str = f'Dataset "{dataset}" -> Failed!'
        mapping_result_strs.append(mapping_result_str)
        print(mapping_result_str)

print('\nResults')
for s in mapping_result_strs:
    print(s)

print('\nTimings')
for k, v in timings.items():
    print(f'{k} -> total={sum(v):.02f} sec.')

cluster : 0
{0: Reconstruction(num_reg_images=9, num_cameras=9, num_points3D=3033, num_observations=14684)}
cluster : 0
{0: Reconstruction(num_reg_images=10, num_cameras=10, num_points3D=992, num_observations=5893)}
Dataset "ETs" -> Registered 19 / 22 images across 2 clusters
Skipping "stairs"

Results
Dataset "ETs" -> Registered 19 / 22 images across 2 clusters

Timings
shortlisting -> total=6.02 sec.
feature_detection -> total=8.35 sec.
feature_augmentation -> total=0.40 sec.
feature_merge -> total=6.54 sec.
feature_matching -> total=18.36 sec.
RANSAC -> total=0.79 sec.
Reconstruction -> total=35.97 sec.


In [19]:
# Must Create a submission file.

array_to_str = lambda array: ';'.join([f"{x:.09f}" for x in array])
none_to_str = lambda n: ';'.join(['nan'] * n)

submission_file = '/kaggle/working/submission.csv'
with open(submission_file, 'w') as f:
    if is_train:
        f.write('dataset,scene,image,rotation_matrix,translation_vector\n')
        for dataset in samples:
            for prediction in samples[dataset]:
                cluster_name = 'outliers' if prediction.cluster_index is None else f'cluster{prediction.cluster_index}'
                rotation = none_to_str(9) if prediction.rotation is None else array_to_str(prediction.rotation.flatten())
                translation = none_to_str(3) if prediction.translation is None else array_to_str(prediction.translation)
                f.write(f'{prediction.dataset},{cluster_name},{prediction.filename},{rotation},{translation}\n')
    else:
        f.write('image_id,dataset,scene,image,rotation_matrix,translation_vector\n')
        for dataset in samples:
            for prediction in samples[dataset]:
                cluster_name = 'outliers' if prediction.cluster_index is None else f'cluster{prediction.cluster_index}'
                rotation = none_to_str(9) if prediction.rotation is None else array_to_str(prediction.rotation.flatten())
                translation = none_to_str(3) if prediction.translation is None else array_to_str(prediction.translation)
                f.write(f'{prediction.image_id},{prediction.dataset},{cluster_name},{prediction.filename},{rotation},{translation}\n')

!head {submission_file}

dataset,scene,image,rotation_matrix,translation_vector
imc2023_haiper,outliers,fountain_image_116.png,nan;nan;nan;nan;nan;nan;nan;nan;nan,nan;nan;nan
imc2023_haiper,outliers,fountain_image_108.png,nan;nan;nan;nan;nan;nan;nan;nan;nan,nan;nan;nan
imc2023_haiper,outliers,fountain_image_101.png,nan;nan;nan;nan;nan;nan;nan;nan;nan,nan;nan;nan
imc2023_haiper,outliers,fountain_image_082.png,nan;nan;nan;nan;nan;nan;nan;nan;nan,nan;nan;nan
imc2023_haiper,outliers,fountain_image_071.png,nan;nan;nan;nan;nan;nan;nan;nan;nan,nan;nan;nan
imc2023_haiper,outliers,fountain_image_025.png,nan;nan;nan;nan;nan;nan;nan;nan;nan,nan;nan;nan
imc2023_haiper,outliers,fountain_image_000.png,nan;nan;nan;nan;nan;nan;nan;nan;nan,nan;nan;nan
imc2023_haiper,outliers,fountain_image_007.png,nan;nan;nan;nan;nan;nan;nan;nan;nan,nan;nan;nan
imc2023_haiper,outliers,fountain_image_012.png,nan;nan;nan;nan;nan;nan;nan;nan;nan,nan;nan;nan


In [20]:
# Definitely Compute results if running on the training set.
# Do not do this when submitting a notebook for scoring. All you have to do is save your submission to /kaggle/working/submission.csv.

if is_train:
    t = time()
    final_score, dataset_scores = metric.score(
        gt_csv='/kaggle/input/image-matching-challenge-2025/train_labels.csv',
        user_csv=submission_file,
        thresholds_csv='/kaggle/input/image-matching-challenge-2025/train_thresholds.csv',
        mask_csv=None if is_train else os.path.join(data_dir, 'mask.csv'),
        inl_cf=0,
        strict_cf=-1,
        verbose=True,
    )
    print(f'Computed metric in: {time() - t:.02f} sec.')

imc2023_haiper: score=0.00% (mAA=0.00%, clusterness=0.00%)
imc2023_heritage: score=0.00% (mAA=0.00%, clusterness=0.00%)
imc2023_theather_imc2024_church: score=0.00% (mAA=0.00%, clusterness=0.00%)
imc2024_dioscuri_baalshamin: score=0.00% (mAA=0.00%, clusterness=0.00%)
imc2024_lizard_pond: score=0.00% (mAA=0.00%, clusterness=0.00%)
pt_brandenburg_british_buckingham: score=0.00% (mAA=0.00%, clusterness=0.00%)
pt_piazzasanmarco_grandplace: score=0.00% (mAA=0.00%, clusterness=0.00%)
pt_sacrecoeur_trevi_tajmahal: score=0.00% (mAA=0.00%, clusterness=0.00%)
pt_stpeters_stpauls: score=0.00% (mAA=0.00%, clusterness=0.00%)
amy_gardens: score=0.00% (mAA=0.00%, clusterness=0.00%)
fbk_vineyard: score=0.00% (mAA=0.00%, clusterness=0.00%)
ETs: score=51.43% (mAA=34.62%, clusterness=100.00%)
stairs: score=0.00% (mAA=0.00%, clusterness=0.00%)
Average over all datasets: score=3.96% (mAA=2.66%, clusterness=7.69%)
Computed metric in: 0.29 sec.
