In [1]:
import os
import cv2
import sys
import h5py
import timm
import torch
import shutil
import sqlite3
import warnings
import pycolmap
import itertools
import numpy as np
import configparser
import pandas as pd
from glob import glob
from tqdm import tqdm
from copy import deepcopy
from PIL import Image, ExifTags
import torch.nn.functional as F
from collections import defaultdict
from timm.data import resolve_data_config
from timm.data.transforms_factory import create_transform

sys.path.append('/kaggle/input')
from ma20230519.matching import Matching

INPUT_ROOT = '/kaggle/input/image-matching-challenge-2023'
DATA_ROOT = '/kaggle/data'
OUTPUT_ROOT = '/kaggle/working'
matching_name = 'SuperGlue'
image_size = 1460
similarity_filter = True
manual_ransac = False

In [2]:
datasets_scenes = []
sample_submission_df = pd.read_csv(f"{INPUT_ROOT}/sample_submission.csv")
for _, r in sample_submission_df[['dataset', 'scene']].iterrows():
    ds = f"{r.dataset}/{r.scene}"
    if ds not in datasets_scenes:
        datasets_scenes.append(ds)

In [3]:
if similarity_filter:
    similarity_model = timm.create_model('tf_efficientnet_b7', checkpoint_path='/kaggle/input/ma20230519/weights/tf_efficientnet_b7_ra-6c08e654.pth').cuda().half().eval()

In [4]:
matching_config = {
    'superpoint': {
        'nms_radius': 3,
        'keypoint_threshold': 0.001,
        'max_keypoints': -1
    },
    'superglue': {
        'weights': 'outdoor',
        'sinkhorn_iterations': 20,
        'match_threshold': 0.2,
    }
}

matching_model = Matching(matching_config).cuda().eval()

Loaded SuperPoint model
Loaded SuperGlue model ("outdoor" weights)


In [5]:
def get_img_pairs_all(fnames):
    """
    Generate pairs of indices for all possible combinations of image filenames.

    Args:
        filenames (list): List of image filenames.

    Returns:
        list: List of index pairs representing all possible combinations of image indices.
    """    
    index_pairs = []
    for i in range(len(fnames)):
        for j in range(i+1, len(fnames)):
            index_pairs.append((i,j))
    return index_pairs


def get_global_desc(model, fnames):
    """
    Get global descriptors for a list of image filenames using a similarity model.

    Args:
        model (torch.nn.Module): Similarity model.
        filenames (list): List of image filenames.

    Returns:
        torch.Tensor: Global descriptors for all images.
    """    
    config = resolve_data_config({}, model=model)
    transform = create_transform(**config)
    global_descs_convnext=[]
    for fname in tqdm(fnames, desc='Get global features using similarity model'):
        img = Image.open(fname).convert('RGB')
        timg = transform(img).unsqueeze(0).cuda().half()
        with torch.no_grad():
            desc = model.forward_features(timg.cuda().half()).mean(dim=(-1,2))
            desc = desc.view(1, -1)
            desc_norm = F.normalize(desc, dim=1, p=2)
        global_descs_convnext.append(desc_norm.detach().cpu())
    global_descs_all = torch.cat(global_descs_convnext, dim=0)
    return global_descs_all


def get_image_pairs_filtered(model, fnames, sim_th=0.5, min_pairs=20, all_if_less=20):
    """
    Generate pairs of image indices based on similarity filtering using global descriptors.

    Args:
        model (torch.nn.Module): Similarity model.
        filenames (list): List of image filenames.
        similarity_threshold (float): Similarity threshold for filtering. Default is 0.5.
        min_pairs (int): Minimum number of pairs to generate if the number of images is below all_if_less. Default is 20.
        all_if_less (int): If the number of images is less than or equal to all_if_less, return all possible pairs. Default is 20.

    Returns:
        tuple: A tuple containing a list of matching pairs of image indices and a distance matrix.
    """   

    num_imgs = len(fnames)

    if num_imgs <= all_if_less:
        return get_img_pairs_all(fnames), None

    descs = get_global_desc(model, fnames).type(torch.FloatTensor)
    dm = torch.cdist(descs, descs, p=2).detach().cpu().numpy()

    mask = dm <= sim_th
    total = 0
    matching_list = []
    ar = np.arange(num_imgs)
    for st_idx in range(num_imgs-1):
        mask_idx = mask[st_idx]
        to_match = ar[mask_idx]
        if len(to_match) < min_pairs:
            to_match = np.argsort(dm[st_idx])[:min_pairs]  
        for idx in to_match:
            if st_idx == idx:
                continue
            if dm[st_idx, idx] < 1200:
                matching_list.append(tuple(sorted((st_idx, idx.item()))))
                total+=1
    matching_list = sorted(list(set(matching_list)))

    return matching_list, dm


def get_unique_idxs(A, dim=0):
    """
    Get the indices of the first occurrence of unique elements along the specified dimension of the input tensor.

    Args:
        input_tensor (torch.Tensor): Input tensor.
        dim (int): Dimension along which to find unique elements. Default is 0.

    Returns:
        torch.Tensor: Indices of the first occurrence of unique elements.
    """    
    _, idx, counts = torch.unique(A, dim=dim, sorted=True, return_inverse=True, return_counts=True)
    _, ind_sorted = torch.sort(idx, stable=True)
    cum_sum = counts.cumsum(0)
    cum_sum = torch.cat((torch.tensor([0],device=cum_sum.device), cum_sum[:-1]))
    first_indices = ind_sorted[cum_sum]
    return first_indices

In [6]:
# Default settings
MAX_IMAGE_ID = 2**31 - 1
# from https://github.com/colmap/colmap/blob/dev/scripts/python/database.py

CREATE_CAMERAS_TABLE = """CREATE TABLE IF NOT EXISTS cameras (
    camera_id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL,
    model INTEGER NOT NULL,
    width INTEGER NOT NULL,
    height INTEGER NOT NULL,
    params BLOB,
    prior_focal_length INTEGER NOT NULL)"""


CREATE_DESCRIPTORS_TABLE = """CREATE TABLE IF NOT EXISTS descriptors (
    image_id INTEGER PRIMARY KEY NOT NULL,
    rows INTEGER NOT NULL,
    cols INTEGER NOT NULL,
    data BLOB,
    FOREIGN KEY(image_id) REFERENCES images(image_id) ON DELETE CASCADE)"""


CREATE_IMAGES_TABLE = """CREATE TABLE IF NOT EXISTS images (
    image_id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL,
    name TEXT NOT NULL UNIQUE,
    camera_id INTEGER NOT NULL,
    prior_qw REAL,
    prior_qx REAL,
    prior_qy REAL,
    prior_qz REAL,
    prior_tx REAL,
    prior_ty REAL,
    prior_tz REAL,
    CONSTRAINT image_id_check CHECK(image_id >= 0 and image_id < {}),
    FOREIGN KEY(camera_id) REFERENCES cameras(camera_id))
""".format(MAX_IMAGE_ID)


CREATE_TWO_VIEW_GEOMETRIES_TABLE = """
CREATE TABLE IF NOT EXISTS two_view_geometries (
    pair_id INTEGER PRIMARY KEY NOT NULL,
    rows INTEGER NOT NULL,
    cols INTEGER NOT NULL,
    data BLOB,
    config INTEGER NOT NULL,
    F BLOB,
    E BLOB,
    H BLOB)
"""


CREATE_KEYPOINTS_TABLE = """CREATE TABLE IF NOT EXISTS keypoints (
    image_id INTEGER PRIMARY KEY NOT NULL,
    rows INTEGER NOT NULL,
    cols INTEGER NOT NULL,
    data BLOB,
    FOREIGN KEY(image_id) REFERENCES images(image_id) ON DELETE CASCADE)
"""


CREATE_MATCHES_TABLE = """CREATE TABLE IF NOT EXISTS matches (
    pair_id INTEGER PRIMARY KEY NOT NULL,
    rows INTEGER NOT NULL,
    cols INTEGER NOT NULL,
    data BLOB)"""


CREATE_NAME_INDEX = \
    "CREATE UNIQUE INDEX IF NOT EXISTS index_name ON images(name)"


CREATE_ALL = "; ".join([
    CREATE_CAMERAS_TABLE,
    CREATE_IMAGES_TABLE,
    CREATE_KEYPOINTS_TABLE,
    CREATE_DESCRIPTORS_TABLE,
    CREATE_MATCHES_TABLE,
    CREATE_TWO_VIEW_GEOMETRIES_TABLE,
    CREATE_NAME_INDEX
])


def image_ids_to_pair_id(image_id1, image_id2):
    if image_id1 > image_id2:
        image_id1, image_id2 = image_id2, image_id1
    return image_id1 * MAX_IMAGE_ID + image_id2


def array_to_blob(array):
    return array.tostring()


class COLMAPDatabase(sqlite3.Connection):

    @staticmethod
    def connect(database_path):
        return sqlite3.connect(database_path, factory=COLMAPDatabase)

    def __init__(self, *args, **kwargs):
        super(COLMAPDatabase, self).__init__(*args, **kwargs)

        self.create_tables = lambda: self.executescript(CREATE_ALL)
        self.create_cameras_table = \
            lambda: self.executescript(CREATE_CAMERAS_TABLE)
        self.create_descriptors_table = \
            lambda: self.executescript(CREATE_DESCRIPTORS_TABLE)
        self.create_images_table = \
            lambda: self.executescript(CREATE_IMAGES_TABLE)
        self.create_two_view_geometries_table = \
            lambda: self.executescript(CREATE_TWO_VIEW_GEOMETRIES_TABLE)
        self.create_keypoints_table = \
            lambda: self.executescript(CREATE_KEYPOINTS_TABLE)
        self.create_matches_table = \
            lambda: self.executescript(CREATE_MATCHES_TABLE)
        self.create_name_index = lambda: self.executescript(CREATE_NAME_INDEX)

    def add_camera(self, model, width, height, params,
                   prior_focal_length=False, camera_id=None):
        params = np.asarray(params, np.float64)
        cursor = self.execute(
            "INSERT INTO cameras VALUES (?, ?, ?, ?, ?, ?)",
            (camera_id, model, width, height, array_to_blob(params),
             prior_focal_length))
        return cursor.lastrowid

    def add_image(self, name, camera_id,
                  prior_q=np.zeros(4), prior_t=np.zeros(3), image_id=None):
        cursor = self.execute(
            "INSERT INTO images VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)",
            (image_id, name, camera_id, prior_q[0], prior_q[1], prior_q[2],
             prior_q[3], prior_t[0], prior_t[1], prior_t[2]))
        return cursor.lastrowid

    def add_keypoints(self, image_id, keypoints):
        assert(len(keypoints.shape) == 2)
        assert(keypoints.shape[1] in [2, 4, 6])

        keypoints = np.asarray(keypoints, np.float32)
        self.execute(
            "INSERT INTO keypoints VALUES (?, ?, ?, ?)",
            (image_id,) + keypoints.shape + (array_to_blob(keypoints),))

    def add_matches(self, image_id1, image_id2, matches):
        assert(len(matches.shape) == 2)
        assert(matches.shape[1] == 2)

        if image_id1 > image_id2:
            matches = matches[:,::-1]

        pair_id = image_ids_to_pair_id(image_id1, image_id2)
        matches = np.asarray(matches, np.uint32)
        self.execute(
            "INSERT INTO matches VALUES (?, ?, ?, ?)",
            (pair_id,) + matches.shape + (array_to_blob(matches),))

    def add_two_view_geometry(self, image_id1, image_id2, matches,
                              F=np.eye(3), E=np.eye(3), H=np.eye(3), config=2):
        assert(len(matches.shape) == 2)
        assert(matches.shape[1] == 2)

        if image_id1 > image_id2:
            matches = matches[:,::-1]

        pair_id = image_ids_to_pair_id(image_id1, image_id2)
        matches = np.asarray(matches, np.uint32)
        F = np.asarray(F, dtype=np.float64)
        E = np.asarray(E, dtype=np.float64)
        H = np.asarray(H, dtype=np.float64)
        self.execute(
            "INSERT INTO two_view_geometries VALUES (?, ?, ?, ?, ?, ?, ?, ?)",
            (pair_id,) + matches.shape + (array_to_blob(matches), config,
             array_to_blob(F), array_to_blob(E), array_to_blob(H)))


def get_focal(image_path, err_on_default=False):
    image         = Image.open(image_path)
    max_size      = max(image.size)

    exif = image.getexif()
    focal = None
    if exif is not None:
        focal_35mm = None
        # https://github.com/colmap/colmap/blob/d3a29e203ab69e91eda938d6e56e1c7339d62a99/src/util/bitmap.cc#L299
        for tag, value in exif.items():
            focal_35mm = None
            if ExifTags.TAGS.get(tag, None) == 'FocalLengthIn35mmFilm':
                focal_35mm = float(value)
                break

        if focal_35mm is not None:
            focal = focal_35mm / 35. * max_size
    
    if focal is None:
        if err_on_default:
            raise RuntimeError("Failed to find focal length")

        # failed to find it in exif, use prior
        FOCAL_PRIOR = 1.2
        focal = FOCAL_PRIOR * max_size

    return focal


def create_camera(db, image_path, camera_model):

    image = Image.open(image_path)
    width, height = image.size

    focal = get_focal(image_path)

    if camera_model == 'simple-pinhole':
        model = 0 # simple pinhole
        param_arr = np.array([focal, width / 2, height / 2])
    if camera_model == 'pinhole':
        model = 1 # pinhole
        param_arr = np.array([focal, focal, width / 2, height / 2])
    elif camera_model == 'simple-radial':
        model = 2 # simple radial
        param_arr = np.array([focal, width / 2, height / 2, 0.1])
    elif camera_model == 'opencv':
        model = 4 # opencv
        param_arr = np.array([focal, focal, width / 2, height / 2, 0., 0., 0., 0.])
         
    return db.add_camera(model, width, height, param_arr)


def add_keypoints(db, feature_dir, img_dir, camera_model, single_camera=True):
    keypoint_f = h5py.File(os.path.join(feature_dir, 'keypoints.h5'), 'r')

    camera_id = None
    fname_to_id = {}
    for filename in tqdm(list(keypoint_f.keys())):
        keypoints = keypoint_f[filename][()]

        path = os.path.join(img_dir, filename)
        if not os.path.isfile(path):
            raise IOError(f'Invalid image path {path}')

        if camera_id is None or not single_camera:
            camera_id = create_camera(db, path, camera_model)
        image_id = db.add_image(filename, camera_id)
        fname_to_id[filename] = image_id

        db.add_keypoints(image_id, keypoints)

    return fname_to_id


def add_matches(db, feature_dir, fname_to_id, FH=None):

    match_file = h5py.File(os.path.join(feature_dir, 'matches.h5'), 'r')
    added = set()
    if FH:
        all_pair_ids = list(itertools.combinations(range(1,len(fname_to_id)+1), 2))

    for key_1 in match_file.keys():
        group = match_file[key_1]
        for key_2 in group.keys():
            id_1 = fname_to_id[key_1]
            id_2 = fname_to_id[key_2]

            pair_id = (id_1, id_2)
            if pair_id in added:
                warnings.warn(f'Pair {pair_id} ({id_1}, {id_2}) already added!')
                continue
            added.add(pair_id)

            matches = group[key_2][()]
            db.add_matches(id_1, id_2, matches)
            if FH:
                db.add_two_view_geometry(id_1, id_2, matches, F=FH[0][(key_1, key_2)], E=np.eye(3), H=FH[1][(key_1, key_2)], config=3)

    if FH:
        for pair_id in all_pair_ids:
            if pair_id not in added:
                id_1, id_2 = pair_id
                db.add_matches(id_1, id_2, np.empty((0,2)))
                db.add_two_view_geometry(id_1, id_2, np.empty((0,2)), config=0)


def import_into_colmap(img_dir, feature_dir='.featureout', FH=None):
    db = COLMAPDatabase.connect(f"{feature_dir}/colmap.db")
    db.create_tables()
    fname_to_id = add_keypoints(db, feature_dir, img_dir, 'simple-radial', single_camera=False)
    add_matches(db, feature_dir, fname_to_id, FH=FH)
    db.commit()

In [7]:
def get_homography_matrix(source, destination):
    """ Calculates the entries of the Homography matrix between two sets of matching points.
    Args
    ----
        - `source`: Source points where each point is int (x, y) format.
        - `destination`: Destination points where each point is int (x, y) format.
    Returns
    ----
        - A numpy array of shape (3, 3) representing the Homography matrix.
    Raises
    ----
        - `source` and `destination` is lew than four points.
        - `source` and `destination` is of different size.
    """
    assert len(source) >= 4, "must provide more than 4 source points"
    assert len(destination) >= 4, "must provide more than 4 destination points"
    assert len(source) == len(destination), "source and destination must be of equal length"
    A = []
    b = []
    for i in range(len(source)):
        s_x, s_y = source[i]
        d_x, d_y = destination[i]
        A.append([s_x, s_y, 1, 0, 0, 0, (-d_x)*(s_x), (-d_x)*(s_y)])
        A.append([0, 0, 0, s_x, s_y, 1, (-d_y)*(s_x), (-d_y)*(s_y)])
        b += [d_x, d_y]
    A = np.array(A)
    h = np.linalg.lstsq(A, b, rcond=None)[0]
    h = np.concatenate((h, [1]), axis=-1)
    return np.reshape(h, (3, 3))


def resize(image, image_size):
    """
    Resize the image while maintaining the aspect ratio.

    Args:
        image (np.ndarray): Input image.
        image_size (int): Target size of the image.

    Returns:
        np.ndarray: Resized image.
        tuple: New size of the image.
    """    
    h, w = image.shape[:2]
    aspect_ratio = h/w
    smaller_side_size = int(image_size/max(aspect_ratio, 1/aspect_ratio))
    if aspect_ratio > 1: # H > W
        new_size = (image_size, smaller_side_size)
    else: # H <= W
        new_size = (smaller_side_size, image_size)
    image = cv2.resize(image, new_size[::-1])
    return image, new_size


def superglue_inference(model, img1, img2):
    """
    Perform inference using the SuperGlue model.

    Args:
        model (torch.nn.Module): SuperGlue model.
        img1 (torch.Tensor): Image 1.
        img2 (torch.Tensor): Image 2.

    Returns:
        np.ndarray: Matched keypoints from image 1.
        np.ndarray: Matched keypoints from image 2.
    """
    with torch.no_grad():
        pred = model({'image0': img1, 'image1': img2})

    kpts1, kpts2 = pred['keypoints0'][0].cpu().numpy(), pred['keypoints1'][0].cpu().numpy()
    matches = pred['matches0'][0].cpu().numpy()
    valid_matches = matches > -1
    mkpts1 = kpts1[valid_matches]
    mkpts2 = kpts2[matches[valid_matches]]

    return mkpts1, mkpts2


def matching_inference(model, fname1, fname2, cache=None):
    """
    Perform matching inference using the matching model.

    Args:
        model (torch.nn.Module): Matching model.
        filename1 (str): Path to the first image file.
        filename2 (str): Path to the second image file.
        cache (dict): Cache dictionary for storing preprocessed images. Default is None.

    Returns:
        np.ndarray: Matched keypoints from image 1.
        np.ndarray: Matched keypoints from image 2.
    """
    for fname in [fname1, fname2]:
        if fname not in cache:
            img = cv2.imread(fname, 0)
            h, w = h_r, w_r = img.shape[:2]
            if max(h, w) != image_size:
                img, (h_r, w_r) = resize(img, image_size)

            img = torch.from_numpy(img.astype(np.float32)/255.0).cuda()
            img = img[None, None]
            cache[fname] = {'img': img, 'h': h, 'w': w, 'h_r': h_r, 'w_r': w_r}
        
    mkpts1, mkpts2 = superglue_inference(model, cache[fname1]['img'], cache[fname2]['img'])

    if max(cache[fname1]['h'], cache[fname1]['w']) != image_size:
        mkpts1[:,0] *= cache[fname1]['w']/cache[fname1]['w_r']
        mkpts1[:,1] *= cache[fname1]['h']/cache[fname1]['h_r']
    if max(cache[fname2]['h'], cache[fname2]['w']) != image_size:
        mkpts2[:,0] *= cache[fname2]['w']/cache[fname2]['w_r']
        mkpts2[:,1] *= cache[fname2]['h']/cache[fname2]['h_r']

    return mkpts1, mkpts2


def matching_pipeline(matching_model, fnames, index_pairs, feature_dir, manual_ransac=False):

    cache = {}
    with h5py.File(f"{feature_dir}/matches_{matching_name}.h5", mode='w') as f_match:

        for pair_idx in tqdm(index_pairs, desc='Get matched keypoints using matching model'):
            idx1, idx2 = pair_idx
            fname1, fname2 = fnames[idx1], fnames[idx2]
            key1, key2 = fname1.split('/')[-1], fname2.split('/')[-1]

            mkpts1, mkpts2 = matching_inference(matching_model, fname1, fname2, cache)

            n_matches = len(mkpts2)
            group  = f_match.require_group(key1)
            if n_matches >= 150:
                group.create_dataset(key2, data=np.concatenate([mkpts1, mkpts2], axis=1))

    kpts = defaultdict(list)
    total_kpts = defaultdict(int)
    match_indexes = defaultdict(dict)

    with h5py.File(f"{feature_dir}/matches_{matching_name}.h5", mode='r') as f_match:
        for k1 in f_match.keys():
            group  = f_match[k1]
            for k2 in group.keys():
                matches = group[k2][...]
                total_kpts[k1]
                kpts[k1].append(matches[:, :2])
                kpts[k2].append(matches[:, 2:])
                current_match = torch.arange(len(matches)).reshape(-1, 1).repeat(1, 2)
                current_match[:, 0] += total_kpts[k1]
                current_match[:, 1] += total_kpts[k2]
                total_kpts[k1] += len(matches)
                total_kpts[k2] += len(matches)
                match_indexes[k1][k2] = current_match

    for k in kpts.keys():
        kpts[k] = np.round(np.concatenate(kpts[k], axis=0))

    unique_kpts = {}
    unique_match_idxs = {}
    for k in kpts.keys():
        uniq_kps, uniq_reverse_idxs = torch.unique(torch.from_numpy(kpts[k].astype(np.float32)), dim=0, return_inverse=True)
        unique_match_idxs[k] = uniq_reverse_idxs
        unique_kpts[k] = uniq_kps.numpy()

    with h5py.File(f"{feature_dir}/keypoints.h5", mode='w') as f_kp:
        for k, kpts1 in unique_kpts.items():
            f_kp[k] = kpts1

    out_match = defaultdict(dict)

    for k1, group in match_indexes.items():
        for k2, m in group.items():
            m2 = deepcopy(m)
            m2[:,0] = unique_match_idxs[k1][m2[:,0]]
            m2[:,1] = unique_match_idxs[k2][m2[:,1]]
            mkpts = np.concatenate([unique_kpts[k1][m2[:,0]], unique_kpts[k2][m2[:,1]]], axis=1)
            unique_idxs_current = get_unique_idxs(torch.from_numpy(mkpts), dim=0)
            m2_semiclean = m2[unique_idxs_current]
            unique_idxs_current1 = get_unique_idxs(m2_semiclean[:, 0], dim=0)
            m2_semiclean = m2_semiclean[unique_idxs_current1]
            unique_idxs_current2 = get_unique_idxs(m2_semiclean[:, 1], dim=0)
            m2_semiclean2 = m2_semiclean[unique_idxs_current2]
            out_match[k1][k2] = m2_semiclean2.numpy()

    with h5py.File(f"{feature_dir}/matches.h5", mode='w') as f_match:
        for k1, gr in out_match.items():
            group  = f_match.require_group(k1)
            for k2, match in gr.items():
                group[k2] = match

    return None


def colmap_pipeline(img_dir, feature_dir, FH=None):

    import_into_colmap(img_dir, feature_dir=feature_dir, FH=FH)

    database_path=f"{feature_dir}/colmap.db"
    if FH is None:
        pycolmap.match_exhaustive(database_path)

    output_path = f"{feature_dir}/colmap_rec_{matching_name}"
    os.makedirs(output_path)

    mapper_options = pycolmap.IncrementalMapperOptions()
    mapper_options.min_model_size = 3
    maps = pycolmap.incremental_mapping(database_path=database_path, image_path=img_dir, output_path=output_path, options=mapper_options)

    return maps


def postprocessing(maps, dataset, scene):

    results = {}
    imgs_registered  = 0
    best_idx = None
    print ("Looking for the best reconstruction")
    if isinstance(maps, dict):
        for idx1, rec in maps.items():
            print(idx1, rec.summary())
            if len(rec.images) > imgs_registered:
                imgs_registered = len(rec.images)
                best_idx = idx1
    if best_idx is not None:
        print(maps[best_idx].summary())
        for im in maps[best_idx].images.values():
            key1 = f'{dataset}/{scene}/images/{im.name}'
            results[key1] = {}
            results[key1]["R"] = im.rotmat()
            results[key1]["t"] = im.tvec

    print(f'Registered: {dataset} / {scene} -> {len(results)} images')

    return results


def arr_to_str(a):
    return ';'.join([str(x) for x in a.reshape(-1)])

In [8]:
results_df = pd.DataFrame(columns=['image_path', 'dataset', 'scene', 'rotation_matrix', 'translation_vector'])
for dataset_scene in tqdm(datasets_scenes, desc='Running pipeline'):
    
    dataset, scene = dataset_scene.split('/')
    print(f"{dataset=}, {scene=}")

    img_dir = f"{INPUT_ROOT}/test/{dataset}/{scene}/images"
    if not os.path.exists(img_dir):
        continue
    
    feature_dir = f"{DATA_ROOT}/featureout/{dataset}/{scene}"
    os.makedirs(feature_dir)

    fnames = sorted(glob(f"{img_dir}/*"))

    # Similarity pipeline
    if similarity_filter:
        index_pairs, distance_matrix = get_image_pairs_filtered(similarity_model, fnames=fnames, sim_th=2.2, min_pairs=20, all_if_less=20)
        if distance_matrix is not None:
            distances = {fname: np.argsort(distance_matrix[idx])[1:] for idx, fname in enumerate(fnames)}
    else:
        index_pairs = get_img_pairs_all(fnames=fnames)

    # Matching pipeline
    FH = matching_pipeline(matching_model=matching_model,
                           fnames=fnames,
                           index_pairs=index_pairs,
                           feature_dir=feature_dir,
                           manual_ransac=manual_ransac)

    # Colmap pipeline
    maps = colmap_pipeline(img_dir, feature_dir, FH=FH)

    # Postprocessing
    results = postprocessing(maps, dataset, scene)

    # Create submission
    for fname in fnames:
        image_id = '/'.join(fname.split('/')[-4:])
        if image_id in results:
            R = results[image_id]['R'].reshape(-1)
            T = results[image_id]['t'].reshape(-1)
        else:
            R = np.eye(3).reshape(-1)
            T = np.zeros((3))

        new_row = pd.DataFrame({'image_path': image_id,
                                'dataset': dataset,
                                'scene': scene,
                                'rotation_matrix': arr_to_str(R),
                                'translation_vector': arr_to_str(T)}, index=[0])

        results_df = pd.concat([results_df, new_row]).reset_index(drop=True)

Running pipeline:   0%|          | 0/1 [00:00<?, ?it/s]

dataset='2cfa01ab573141e4', scene='2fa124afd1f74f38'


Running pipeline: 100%|██████████| 1/1 [00:00<00:00, 305.17it/s]


In [9]:
results_df.to_csv(f"{OUTPUT_ROOT}/submission.csv", index=False)