### Reference
- https://www.kaggle.com/code/namgalielei/loftr-validation-score
- https://www.kaggle.com/code/remekkinas/detector-free-local-feature-matching-w-transformer
- https://www.kaggle.com/code/eduardtrulls/imc2022-training-set-eval-one-function

### This notebook computes validation score on part of training set

In [None]:
!pip install kornia --no-index --find-links=file:///kaggle/input/imc2022-dependencies/pip/kornia/ --upgrade 
!pip install kornia_moons --no-index --find-links=file:///kaggle/input/imc2022-dependencies/pip/kornia_moons/ --no-deps  --upgrade 

In [None]:
import sys
sys.path.append('../input/loftrutils/LoFTR-master/LoFTR-master/')

In [None]:
!cp -r ../input/imutils/imutils-0.5.3/ /
!pip install /imutils-0.5.3/

In [None]:
import os
import numpy as np
import pandas as pd
import cv2
import glob
import gc
import random

from collections import namedtuple
from tqdm.notebook import tqdm

from PIL import Image
import matplotlib.pyplot as plt
import matplotlib.cm as cm

import torch
import kornia as K
import kornia.feature as KF
from kornia.feature.loftr import LoFTR
from kornia_moons.feature import *

# from src.loftr import LoFTR, default_cfg
from src.utils.plotting import make_matching_figure

In [None]:
dataset_path = '/kaggle/input/image-matching-challenge-2022/'

In [None]:
DEVICE = 'cuda:0'
WEIGHT_PATH = '../input/loftrutils/outdoor_ds.ckpt'
LONGEST_EDGE = 1280

In [None]:
matcher = LoFTR(pretrained=None)
matcher.load_state_dict(torch.load(WEIGHT_PATH)['state_dict'])
matcher = matcher.to(DEVICE)
matcher.eval()
print()

In [None]:
import imutils
def resize_keep_ratio(img, longest_size=LONGEST_EDGE):
    height, width = img.shape[:2]
    if np.maximum(height, width) <= longest_size: # no need to resize
        return img
    
    if height >= width:
        resized_img = imutils.resize(img, height=longest_size)
    else:
        resized_img = imutils.resize(img, width=longest_size)
    return resized_img

def load_torch_image(fname):
    img = cv2.imread(fname)
    img = resize_keep_ratio(img)
    img = cv2.resize(img, (img.shape[1]//8*8, img.shape[0]//8*8))  # input size should be divisible by 8
    img = K.image_to_tensor(img, False).float() /255.
    img = K.color.bgr_to_rgb(img)
    return img

In [None]:
def match(img_path0, img_path1, matcher, device=DEVICE):
    img0 = load_torch_image(img_path0)
    img1 = load_torch_image(img_path1)
        
    input_dict = {"image0": K.color.rgb_to_grayscale(img0).to(device), 
                  "image1": K.color.rgb_to_grayscale(img1).to(device)}
    
    with torch.no_grad():
        correspondences = matcher(input_dict)      
        
    mkpts0 = correspondences['keypoints0'].cpu().numpy()
    mkpts1 = correspondences['keypoints1'].cpu().numpy()
        
    # Make sure we do not trigger an exception here.
    if len(mkpts0) > 8:
        F, inliers = cv2.findFundamentalMat(mkpts0, mkpts1, cv2.USAC_MAGSAC, 0.2, 0.999, 100000)

        try:
            assert F.shape == (3, 3), 'Malformed F?'
        except:
            F = np.zeros((3, 3))
            
    else:
        F = np.zeros((3, 3))
        
    del correspondences, input_dict
    del mkpts0, mkpts1
    torch.cuda.empty_cache()
    gc.collect()

    return F

In [None]:
def match_and_draw(img_path0, img_path1, matcher, device=DEVICE):
    
    img0 = load_torch_image(img_path0)
    img1 = load_torch_image(img_path1)
        
    input_dict = {"image0": K.color.rgb_to_grayscale(img0).to(device), 
                  "image1": K.color.rgb_to_grayscale(img1).to(device)}
    
    with torch.no_grad():
        correspondences = matcher(input_dict)    
        
    mkpts0 = correspondences['keypoints0'].cpu().numpy()
    mkpts1 = correspondences['keypoints1'].cpu().numpy()
    
    if len(mkpts0) > 8:
        F, inliers = cv2.findFundamentalMat(mkpts0, mkpts1, cv2.USAC_MAGSAC, 0.2, 0.999, 100000)

        assert F.shape == (3, 3), 'Malformed F?'
    else:
        F = np.zeros((3, 3))
    
    draw_LAF_matches(
        KF.laf_from_center_scale_ori(torch.from_numpy(mkpts0).view(1,-1, 2),
                                    torch.ones(mkpts0.shape[0]).view(1,-1, 1, 1),
                                    torch.ones(mkpts0.shape[0]).view(1,-1, 1)),

        KF.laf_from_center_scale_ori(torch.from_numpy(mkpts1).view(1,-1, 2),
                                    torch.ones(mkpts1.shape[0]).view(1,-1, 1, 1),
                                    torch.ones(mkpts1.shape[0]).view(1,-1, 1)),
        torch.arange(mkpts0.shape[0]).view(-1,1).repeat(1,2),
        K.tensor_to_image(img0),
        K.tensor_to_image(img1),
        inliers,
        draw_dict={'inlier_color': (0.2, 1, 0.2),
                   'tentative_color': None, 
                   'feature_color': (0.2, 0.5, 1), 'vertical': False})
    
    del correspondences, input_dict
    del mkpts0, mkpts1
    torch.cuda.empty_cache()
    gc.collect()

In [None]:
eps = 1e-15

def QuaternionFromMatrix(matrix):
    '''Transform a rotation matrix into a quaternion.'''

    M = np.array(matrix, dtype=np.float64, copy=False)[:4, :4]
    m00 = M[0, 0]
    m01 = M[0, 1]
    m02 = M[0, 2]
    m10 = M[1, 0]
    m11 = M[1, 1]
    m12 = M[1, 2]
    m20 = M[2, 0]
    m21 = M[2, 1]
    m22 = M[2, 2]

    K = np.array([[m00 - m11 - m22, 0.0, 0.0, 0.0],
              [m01 + m10, m11 - m00 - m22, 0.0, 0.0],
              [m02 + m20, m12 + m21, m22 - m00 - m11, 0.0],
              [m21 - m12, m02 - m20, m10 - m01, m00 + m11 + m22]])
    K /= 3.0

    # The quaternion is the eigenvector of K that corresponds to the largest eigenvalue.
    w, V = np.linalg.eigh(K)
    q = V[[3, 0, 1, 2], np.argmax(w)]

    if q[0] < 0:
        np.negative(q, q)

    return q


def ComputeErrorForOneExample(q_gt, T_gt, q, T, scale):
    '''Compute the error metric for a single example. The function returns two errors, over rotation and translation. These are combined at different thresholds by ComputeMaa in order to compute the mean Average Accuracy.'''
    
    q_gt_norm = q_gt / (np.linalg.norm(q_gt) + eps)
    q_norm = q / (np.linalg.norm(q) + eps)

    loss_q = np.maximum(eps, (1.0 - np.sum(q_norm * q_gt_norm)**2))
    err_q = np.arccos(1 - 2 * loss_q)

    # Apply the scaling factor for this scene.
    T_gt_scaled = T_gt * scale
    T_scaled = T * np.linalg.norm(T_gt) * scale / (np.linalg.norm(T) + eps)

    err_t = min(np.linalg.norm(T_gt_scaled - T_scaled), np.linalg.norm(T_gt_scaled + T_scaled))

    return err_q * 180 / np.pi, err_t


def ComputeMaa(err_q, err_t, thresholds_q, thresholds_t):
    '''Compute the mean Average Accuracy at different tresholds, for one scene.'''
    
    assert len(err_q) == len(err_t)
    
    acc, acc_q, acc_t = [], [], []
    for th_q, th_t in zip(thresholds_q, thresholds_t):
        acc += [(np.bitwise_and(np.array(err_q) < th_q, np.array(err_t) < th_t)).sum() / len(err_q)]
        acc_q += [(np.array(err_q) < th_q).sum() / len(err_q)]
        acc_t += [(np.array(err_t) < th_t).sum() / len(err_t)]
    return np.mean(acc), np.array(acc), np.array(acc_q), np.array(acc_t)


def ComputeFundamentalMatrix(K1, K2, R1, R2, T1, T2):
    '''Compute the fundamental matrix, given intrinsics and extrinsics for two cameras.'''
    dR = np.dot(R2, R1.T)
    dT = (T2 - np.dot(dR, T1)).flatten()
    A = np.dot(K1, np.dot(dR.T, dT))
    C = np.array([[0, -A[2], A[1]], [A[2], 0, -A[0]], [-A[1], A[0], 0]])
    return np.matmul(np.linalg.inv(K2).T, np.matmul(dR, np.matmul(K1.T, C)))


def DecomposeFundamentalMatrixWithIntrinsics(F, K1, K2):
    '''Decompose the fundamental matrix into R and T, given the intrinsics.'''
    
    # This fundamentally reimplements this function: https://github.com/opencv/opencv/blob/be38d4ea932bc3a0d06845ed1a2de84acc2a09de/modules/calib3d/src/five-point.cpp#L742
    # This is a pre-requisite of OpenCV's recoverPose: https://github.com/opencv/opencv/blob/be38d4ea932bc3a0d06845ed1a2de84acc2a09de/modules/calib3d/src/five-point.cpp#L559
    # Instead of the cheirality check with correspondences, we keep and evaluate the different hypotheses downstream, and pick the best one.
    # Note that our metric does not care about the sign of the translation vector, so we only need to evaluate the two rotation matrices.
    E = np.matmul(K2.T, np.matmul(F, K1))

    U, S, Vh = np.linalg.svd(E)
    if np.linalg.det(U) < 0:
        U *= -1
    if np.linalg.det(Vh) < 0:
        Vh *= -1

    W = np.array([[0, 1, 0], [-1, 0, 0], [0, 0, 1]])
    R_a = np.matmul(U, np.matmul(W, Vh))
    R_b = np.matmul(U, np.matmul(W.T, Vh))
    T = U[:, -1]

    return R_a, R_b, T


def ComputeErrorForOneExample(q_gt, T_gt, q, T, scale):
    '''Compute the error metric for a single example.
    
    The function returns two errors, over rotation and translation. These are combined at different thresholds by ComputeMaa, downstream, in order to compute the mean Average Accuracy.'''
    
    q_gt_norm = q_gt / (np.linalg.norm(q_gt) + eps)
    q_norm = q / (np.linalg.norm(q) + eps)

    loss_q = np.maximum(eps, (1.0 - np.sum(q_norm * q_gt_norm)**2))
    err_q = np.arccos(1 - 2 * loss_q)

    # Apply the scaling factor for this scene.
    T_gt_scaled = T_gt * scale
    T_scaled = T * np.linalg.norm(T_gt) * scale / (np.linalg.norm(T) + eps)

    err_t = min(np.linalg.norm(T_gt_scaled - T_scaled), np.linalg.norm(T_gt_scaled + T_scaled))

    return err_q * 180 / np.pi, err_t


def FlattenMatrix(M, num_digits=8):
    '''Convenience function to write CSV files.'''
    
    return ' '.join([f'{v:.{num_digits}e}' for v in M.flatten()])


def EvaluateSubmission(df):
    '''Evaluate a prediction file against the ground truth.
    
    Note that only the subset of entries in the prediction file will be evaluated.'''
    
    thresholds_q = np.linspace(1, 10, 10)
    thresholds_t = np.geomspace(0.2, 5, 10)
    
    scenes = df['scene'].unique()
    errors_dict_q = {scene: {} for scene in scenes}
    errors_dict_t = {scene: {} for scene in scenes}
    
    for i, row in df.iterrows():
        F_predicted    = row.fundamental_matrix
        scene          = row.scene
        scaling_factor = row.scaling_factor
        pair = row.image_id_1 + '-' + row.image_id_2

        K1, R1_gt, T1_gt = row.K1, row.R1, row.T1
        K2, R2_gt, T2_gt = row.K2, row.R2, row.T2

        R_pred_a, R_pred_b, T_pred = DecomposeFundamentalMatrixWithIntrinsics(F_predicted, K1, K2)
        q_pred_a = QuaternionFromMatrix(R_pred_a)
        q_pred_b = QuaternionFromMatrix(R_pred_b)

        dR_gt = np.dot(R2_gt, R1_gt.T)
        dT_gt = (T2_gt - np.dot(dR_gt, T1_gt)).flatten()
        q_gt = QuaternionFromMatrix(dR_gt)
        q_gt = q_gt / (np.linalg.norm(q_gt) + eps)

        # blah blah cheirality...
        err_q_a, err_t_a = ComputeErrorForOneExample(q_gt, dT_gt, q_pred_a, T_pred, scaling_factor)
        err_q_b, err_t_b = ComputeErrorForOneExample(q_gt, dT_gt, q_pred_b, T_pred, scaling_factor)
        assert err_t_a == err_t_b
        errors_dict_q[scene][pair] = min(err_q_a, err_q_b)
        errors_dict_t[scene][pair] = err_t_a

    # Aggregate the results by computing the final metric for each scene, and then averaging across all scenes.
    maa_per_scene = {}
    
    for scene in scenes:
        maa_per_scene[scene], _, _, _ = ComputeMaa(list(errors_dict_q[scene].values()), list(errors_dict_t[scene].values()), thresholds_q, thresholds_t)
    
    return np.mean(list(maa_per_scene.values())), maa_per_scene, errors_dict_q, errors_dict_t

In [None]:
scaling_df = pd.read_csv(f'{dataset_path}/train/scaling_factors.csv')
scaling_df['scaling_factor'] = scaling_df['scaling_factor'].astype(float)
scaling_df

In [None]:
data_df = pd.DataFrame()
max_num_pairs = 50

for scene in scaling_df['scene'].values:
    pair_df = pd.read_csv(f'{dataset_path}/train/{scene}/pair_covisibility.csv')
    pair_df['scene'] = scene
    pair_df = pair_df[pair_df['covisibility']>0.1].copy()
    pair_df.reset_index(drop=True, inplace=True)
    
    if len(pair_df) > max_num_pairs:
        pair_df = pair_df.sample(n=max_num_pairs, random_state=42)
        pair_df.reset_index(drop=True, inplace=True)
    
    pair_df['fundamental_matrix'] = pair_df['fundamental_matrix'].apply( lambda x: np.array([float(v) for v in x.split(' ')]).reshape([3, 3]) )
    pair_df['image_id_1'] = pair_df['pair'].apply( lambda x: x.split('-')[0] )
    pair_df['image_id_2'] = pair_df['pair'].apply( lambda x: x.split('-')[1] )
    pair_df.drop( 'pair', axis=1, inplace=True )
    
    #calibration
    calib_df = pd.read_csv(f'{dataset_path}/train/{scene}/calibration.csv')
    calib_df['camera_intrinsics'] = calib_df['camera_intrinsics'].apply( lambda x: np.array([float(v) for v in x.split(' ')]).reshape([3, 3]) )
    calib_df['rotation_matrix'] = calib_df['rotation_matrix'].apply( lambda x: np.array([float(v) for v in x.split(' ')]).reshape([3, 3]) )
    calib_df['translation_vector'] = calib_df['translation_vector'].apply( lambda x: np.array([float(v) for v in x.split(' ')]).reshape([3, 1]) )

    pair_df = pd.merge( pair_df, calib_df.rename( columns={'image_id':'image_id_1', 'camera_intrinsics':'K1', 'rotation_matrix':'R1', 'translation_vector':'T1'} ), on=['image_id_1'], how='left')
    pair_df = pd.merge( pair_df, calib_df.rename( columns={'image_id':'image_id_2', 'camera_intrinsics':'K2', 'rotation_matrix':'R2', 'translation_vector':'T2'} ), on=['image_id_2'], how='left')

    data_df = pd.concat( [data_df, pair_df], axis=0 )
    data_df.reset_index(drop=True, inplace=True)
    
    del pair_df, calib_df
    
data_df = pd.merge( data_df, scaling_df, on=['scene'], how='left')
data_df.head()

In [None]:
'''
print('--- Processing a ground truth submission ---')
maa, maa_per_scene, errors_dict_q, errors_dict_t = EvaluateSubmission(data_df)

for scene, cur_maa in maa_per_scene.items():
    print(f'Scene:{scene:25s} ({len(errors_dict_q[scene])} pairs), mAA={cur_maa:.05f}')
    
print()
print(f'Full dataset: mAA={maa:.05f}')
'''

## Evaluate on training set (sample)

In [None]:
F_list = []

for i, row in tqdm( data_df.iterrows(), total=len(data_df) ):
    scene      = row.scene
    image_id_1 = row.image_id_1
    image_id_2 = row.image_id_2
    
    img_path1 = f'{dataset_path}/train/{scene}/images/{image_id_1}.jpg'
    img_path2 = f'{dataset_path}/train/{scene}/images/{image_id_2}.jpg'
    
    F = match(img_path1, img_path2, matcher)
    F_list.append(F)
    
    if i < 3:
        match_and_draw(img_path1, img_path2, matcher)
        
data_df['fundamental_matrix'] = F_list

In [None]:
maa, maa_per_scene, errors_dict_q, errors_dict_t = EvaluateSubmission(data_df)

for scene, cur_maa in maa_per_scene.items():
    print(f'Scene:{scene:25s} ({len(errors_dict_q[scene])} pairs), mAA={cur_maa:.05f}')
    
print()
print(f'Full dataset: mAA={maa:.05f}')