### This notebook is one of my final submissions which had the best private score.
### I also tried emsembling DKM and SE2-LoFTR under multiple conditions and multi-stage approaches,but unfortunately those did not work well for me

In [None]:
%%capture
dry_run = False
!pip install ../input/kornialoftr/kornia-0.6.4-py2.py3-none-any.whl
!pip install ../input/kornialoftr/kornia_moons-0.1.9-py3-none-any.whl

In [None]:
%matplotlib inline

import os
import csv
import random
from glob import glob
from tqdm import tqdm
from collections import namedtuple

import cv2
import numpy as np
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.cm as cm
import torch
import torchvision.transforms as transforms

import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import kornia
from kornia_moons.feature import *
import kornia as K
import kornia.feature as KF
import gc
import pydegensac

import sys
import time

sys.path.append("../input/")
sys.path.append("../input/super-glue-pretrained-network")

from models.matching import Matching as Matching_SuperGlue
from models.utils import (compute_pose_error, compute_epipolar_error,
                          estimate_pose, make_matching_plot,
                          error_colormap, AverageTimer, pose_auc, read_image,
                          rotate_intrinsics, rotate_pose_inplane,
                          scale_intrinsics)

In [None]:
# Check which GPUs I am assigned to
gpu_info = !nvidia-smi
gpu_info = '\n'.join(gpu_info)
if gpu_info.find('failed') >= 0:
  print('Select the Runtime > "Change runtime type" menu to enable a GPU accelerator, ')
  print('and then re-execute this cell.')
else:
  print(gpu_info)

## General Helper Functions

In [None]:
src = '/kaggle/input/image-matching-challenge-2022/'

test_samples = []
with open(f'{src}/test.csv') as f:
    reader = csv.reader(f, delimiter=',')
    for i, row in enumerate(reader):
        # Skip header.
        if i == 0:
            continue
        test_samples += [row]


def FlattenMatrix(M, num_digits=8):
    '''Convenience function to write CSV files.'''
    
    return ' '.join([f'{v:.{num_digits}e}' for v in M.flatten()])


def load_torch_image(device, fname=None, local_image=None, size=840.0):
    # If the image is already in memory
    if local_image is None:
        img = cv2.imread(fname)
    else:
        img = np.copy(local_image)
        
    if size == -1:
        scale = 1
    else:
        scale = float(size) / float(max(img.shape[0], img.shape[1]))
    
    w = int(img.shape[1] * scale)
    h = int(img.shape[0] * scale)
    img = cv2.resize(img, (w, h))
    img = K.image_to_tensor(img, False).float() /255.0
    img = K.color.bgr_to_rgb(img)
    
    # the scale value here is the new_size / old_size, different from the original SuperGlue 
    return img.to(device), scale

test_samples_df = pd.DataFrame(test_samples, columns=["sample_id", "batch_id", "image_0_id", "image_1_id"])
test_samples_df

## Load SuperGlue

In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

resize = [-1, ] # resize = [-1, ] means no resize
# resize = 840
resize_float = True

config = {
    "superpoint": {
        # "nms_radius": 4,
        "nms_radius": 4,
        "keypoint_threshold": 0.005,
        "max_keypoints": 2048
    },
    "superglue": {
        "weights": "outdoor",
        "sinkhorn_iterations": 160,
        "match_threshold": 0.2,
    }
}
matcher_SG = Matching_SuperGlue(config).eval().to(device)

## Load LoFTR

In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

matcher_LoFTR = KF.LoFTR(pretrained=None)
matcher_LoFTR.load_state_dict(torch.load("../input/kornialoftr/loftr_outdoor.ckpt")['state_dict'])
matcher_LoFTR = matcher_LoFTR.to(device).eval()

## Helper Functions to extract keypoints and mateches from the two models

In [None]:
def get_keypoints_with_conf_LoFTR(image_1, image_2, matcher, img_resize=840.0, conf_th=[0.75, 0.5, 0.25, 0], num_keypoints=1000, take_all=False):
                          
    # the scale value here is the new_size / old_size, different from SuperGlue, to make it the same, take the inverse
    image_1_tensor, scale_1 = load_torch_image(device, fname=None, local_image=image_1, size=img_resize)
    image_2_tensor, scale_2 = load_torch_image(device, fname=None, local_image=image_2, size=img_resize)
    scale_1 = float(1.0 / float(scale_1))
    scale_2 = float(1.0 / float(scale_2))
    
    input_dict = {"image0": K.color.rgb_to_grayscale(image_1_tensor), 
                  "image1": K.color.rgb_to_grayscale(image_2_tensor)}
    
    with torch.no_grad():
        correspondences = matcher(input_dict)
        
    mkpts0_LoFTR = correspondences['keypoints0'].cpu().numpy()
    mkpts1_LoFTR = correspondences['keypoints1'].cpu().numpy()
    conf = correspondences['confidence'].cpu().numpy()
    # print("initial number of LoFTR points: " + str(len(mkpts0_LoFTR)))
    
    if take_all == True:
        return mkpts0_LoFTR, mkpts1_LoFTR, np.mean(conf), 0, scale_1, scale_2
    
    # Create bins of points according to confidence thresholds
    mkpts0_LoFTR_conf_0 = mkpts0_LoFTR[conf > conf_th[0]]
    mkpts1_LoFTR_conf_0 = mkpts1_LoFTR[conf > conf_th[0]]
    mkpts0_LoFTR_conf_1 = mkpts0_LoFTR[conf > conf_th[1]]
    mkpts1_LoFTR_conf_1= mkpts1_LoFTR[conf > conf_th[1]]
    mkpts0_LoFTR_conf_2 = mkpts0_LoFTR[conf > conf_th[2]]
    mkpts1_LoFTR_conf_2 = mkpts1_LoFTR[conf > conf_th[2]]
    mkpts0_LoFTR_all = mkpts0_LoFTR[conf >= conf_th[3]]
    mkpts1_LoFTR_all = mkpts1_LoFTR[conf >= conf_th[3]]
    
    # Use a progressive method to select the confidence threshold
    #  If there are too many keypoints, take high confidence, otherwise take low
    num_bin_1 = len(mkpts0_LoFTR_conf_0)
    num_bin_2 = len(mkpts0_LoFTR_conf_1) - num_bin_1
    num_bin_3 = len(mkpts0_LoFTR_conf_2) - num_bin_2 - num_bin_1
    num_bin_4 = len(mkpts0_LoFTR_all) - num_bin_3 - num_bin_2 - num_bin_1
    
    largest_bin_index = np.argmax(np.array([num_bin_1, num_bin_2, num_bin_3, num_bin_4]))
    conf_th_final = conf_th[largest_bin_index]
    
    mkpts0_LoFTR_final = mkpts0_LoFTR[conf > conf_th_final]
    mkpts1_LoFTR_final = mkpts1_LoFTR[conf > conf_th_final]
    conf_mean = np.mean(conf[conf > conf_th_final])
    
    if len(mkpts0_LoFTR_final) <= 7:
        mkpts0_LoFTR_final = mkpts0_LoFTR_all
        mkpts1_LoFTR_final = mkpts1_LoFTR_all
        conf_mean = np.mean(conf)
    
    # Since experiments show that sometimes LoFTR can create an excessive amount of matching points
    if len(mkpts0_LoFTR_final) > num_keypoints:
        conf_final = conf[conf > conf_th_final]
        conf_argsorted = np.argsort(conf_final)
        selected_indices = conf_argsorted[-num_keypoints:]
        print(np.min(selected_indices))
        mkpts0_LoFTR_final = mkpts0_LoFTR_final[selected_indices]
        mkpts1_LoFTR_final = mkpts1_LoFTR_final[selected_indices]
        
    print("final number of LoFTR points: " + str(len(mkpts1_LoFTR_final)))
    return mkpts0_LoFTR_final, mkpts1_LoFTR_final, conf_mean, conf_th_final, scale_1, scale_2


def get_keypoints_with_conf_SG(image_fpath_0, image_fpath_1, matcher, resize, resize_float, conf_th=[0.75, 0.5, 0.25, 0], take_all=False):
    
    # scale = original_size / new_size, different from the original SuperGlue. 
    image_0, inp_0, scales_0 = read_image(image_fpath_0, device, resize, 0, resize_float)
    image_1, inp_1, scales_1 = read_image(image_fpath_1, device, resize, 0, resize_float)

    input_dict = {"image0": inp_0, "image1": inp_1}

    with torch.no_grad():
        pred_SG = matcher(input_dict)
        
    pred_SG = {k: v[0].detach().cpu().numpy() for k, v in pred_SG.items()}
    kpts0_SG, kpts1_SG = pred_SG["keypoints0"], pred_SG["keypoints1"]
    # matches mask are different "matches0" and "matches1" since the number of keypoints are different
    # but the valid keypoints after applying the mask will be the same (actually still different, probably a bug)
    matches_mask_0_SG, conf_0 = pred_SG["matches0"], pred_SG["matching_scores0"]
    
    valid_0 = matches_mask_0_SG > -1
    mkpts0_SG = kpts0_SG[valid_0]
    mkpts1_SG = kpts1_SG[matches_mask_0_SG[valid_0]]
    conf_0 = conf_0[valid_0]
    conf = conf_0
    
    # print("initial number of SG points: " + str(len(mkpts0_SG)))
    if take_all == True:
        return mkpts0_SG, mkpts1_SG, np.mean(conf), 0, scale_1, scale_2
    
    # Create bins of points according to confidence thresholds
    mkpts0_SG_conf_0 = mkpts0_SG[conf > conf_th[0]]
    mkpts1_SG_conf_0 = mkpts1_SG[conf > conf_th[0]]
    mkpts0_SG_conf_1 = mkpts0_SG[conf > conf_th[1]]
    mkpts1_SG_conf_1= mkpts1_SG[conf > conf_th[1]]
    mkpts0_SG_conf_2 = mkpts0_SG[conf > conf_th[2]]
    mkpts1_SG_conf_2 = mkpts1_SG[conf > conf_th[2]]
    mkpts0_SG_all = mkpts0_SG[conf >= conf_th[3]]
    mkpts1_SG_all = mkpts1_SG[conf >= conf_th[3]]
    
    # Use a progressive method to select the confidence threshold
    #  If there are too many keypoints, take high confidence, otherwise take low
    num_bin_1 = len(mkpts0_SG_conf_0)
    num_bin_2 = len(mkpts0_SG_conf_1) - num_bin_1
    num_bin_3 = len(mkpts0_SG_conf_2) - num_bin_2 - num_bin_1
    num_bin_4 = len(mkpts0_SG_all) - num_bin_3 - num_bin_2 - num_bin_1
    
    largest_bin_index = np.argmax(np.array([num_bin_1, num_bin_2, num_bin_3, num_bin_4]))
    conf_th_final = conf_th[largest_bin_index]
    
    mkpts0_SG_final = mkpts0_SG[conf > conf_th_final]
    mkpts1_SG_final = mkpts1_SG[conf > conf_th_final]
    conf_mean = np.mean(conf[conf > conf_th_final])
    
    if len(mkpts0_SG_final) <= 7:
        mkpts0_SG_final = mkpts0_SG_all
        mkpts1_SG_final = mkpts1_SG_all
        conf_mean = np.mean(conf)

    print("final number of SG points: " + str(len(mkpts0_SG_final)))
    return mkpts0_SG_final, mkpts1_SG_final, conf_mean, conf_th_final, scales_0, scales_1

## Extract keypoints with LoFTR and computer the F matrix

In [None]:
F_dict = {}
import time
from matplotlib import pyplot as plt

num_kpts_LoFTR = 1000
conf_th = [0.75, 0.5, 0.25, 0]

for i, row in enumerate(test_samples):
    sample_id, batch_id, image_1_id, image_2_id = row
    
    # Load the images.
    st = time.time()
    image_fpath_1 = f'{src}/test_images/{batch_id}/{image_1_id}.png'
    image_fpath_2 = f'{src}/test_images/{batch_id}/{image_2_id}.png'
    image_1 = cv2.imread(image_fpath_1)
    image_2 = cv2.imread(image_fpath_2)
    image_1_tensor, scale = load_torch_image(device, fname=None, local_image=image_1, size=-1)
    image_2_tensor, scale = load_torch_image(device, fname=None, local_image=image_2, size=-1)

    img1_max_dim = max(image_1.shape[0], image_1.shape[1])
    img2_max_dim = max(image_2.shape[0], image_2.shape[1])
    max_dim = max(img1_max_dim, img2_max_dim)
    
    # limit the image size, the input images shouldn't be too big or too small
    if max_dim > 1250:
        max_dim = 1250
    if max_dim < 750:
        max_dim = 750

    input_dict = {"image0": K.color.rgb_to_grayscale(image_1_tensor), 
                  "image1": K.color.rgb_to_grayscale(image_2_tensor)}

    # First use LoFTR to get a coarse match 
    mkpts0_LoFTR_resize2, mkpts1_LoFTR_resize2, conf_mean_LoFTR_resize2, conf_th_LoFTR_resize2, \
            scale_1_LoFTR_resize2, scale_2_LoFTR_resize2 = get_keypoints_with_conf_LoFTR(image_1, \
            image_2, matcher_LoFTR, img_resize=840, conf_th=conf_th, num_keypoints=num_kpts_LoFTR, take_all=False)
    conf_th_mean_LoFTR = np.mean([conf_th_LoFTR_resize2])

    # Second use SuperGlue to get a coarse match 
    mkpts0_SG, mkpts1_SG, conf_mean_SG, conf_th_SG, scale_1_SG, scale_2_SG = \
                    get_keypoints_with_conf_SG(image_fpath_1,image_fpath_2, matcher_SG, \
                    resize=[-1, ], resize_float=resize_float, conf_th=conf_th, take_all=False)

    mkpts0_SG_resize1, mkpts1_SG_resize1, conf_mean_SG_resize1, conf_th_SG_resize1, scale_1_SG_resize1, \
                    scale_2_SG_resize1 = get_keypoints_with_conf_SG(image_fpath_1,image_fpath_2, matcher_SG, \
                    resize=[max_dim*1.6, ], resize_float=resize_float, conf_th=conf_th, take_all=False)

    conf_th_mean_SG = np.mean([conf_th_SG, conf_th_SG_resize1])

    # Map the keypoints back according to the image sizes
    mkpts0_LoFTR_s2 = mkpts0_LoFTR_resize2 * scale_1_LoFTR_resize2
    mkpts1_LoFTR_s2 = mkpts1_LoFTR_resize2 * scale_2_LoFTR_resize2

    mkpts0_SG_ns = mkpts0_SG * scale_1_SG
    mkpts0_SG_s1 = mkpts0_SG_resize1 * scale_1_SG_resize1
    mkpts1_SG_ns = mkpts1_SG * scale_2_SG
    mkpts1_SG_s1 = mkpts1_SG_resize1 * scale_2_SG_resize1


    mkpts0_combined = np.concatenate((mkpts0_LoFTR_s2, mkpts0_SG_ns, mkpts0_SG_s1), axis=0)                                    
    mkpts1_combined = np.concatenate((mkpts1_LoFTR_s2, mkpts1_SG_ns, mkpts1_SG_s1), axis=0)                            

    # Get the F-matrix 
    if len(mkpts0_combined) > 7:
        F, inliers = cv2.findFundamentalMat(mkpts0_combined, mkpts1_combined, cv2.USAC_MAGSAC, 0.2, 0.99999, 250000)
        inliers = inliers.squeeze() > 0  
        assert F.shape == (3, 3), 'Malformed F?'
        F_dict[sample_id] = F  

    else:
        print("zero F matrix")
        F_dict[sample_id] = np.zeros((3, 3))
        
    gc.collect()
    # torch.cuda.empty_cache()
    
    nd = time.time()   
    
    if (i < 3):
        print("Running time: ", nd - st, " s")
        draw_LAF_matches(
        KF.laf_from_center_scale_ori(torch.from_numpy(mkpts0_combined).view(1,-1, 2),
                                    torch.ones(mkpts0_combined.shape[0]).view(1,-1, 1, 1),
                                    torch.ones(mkpts0_combined.shape[0]).view(1,-1, 1)),

        KF.laf_from_center_scale_ori(torch.from_numpy(mkpts1_combined).view(1,-1, 2),
                                    torch.ones(mkpts1_combined.shape[0]).view(1,-1, 1, 1),
                                    torch.ones(mkpts1_combined.shape[0]).view(1,-1, 1)),
        torch.arange(mkpts0_combined.shape[0]).view(-1,1).repeat(1,2),
        K.tensor_to_image(image_1_tensor),
        K.tensor_to_image(image_2_tensor),
        inliers,
        draw_dict={'inlier_color': (0.2, 1, 0.2),
                   'tentative_color': None, 
                   'feature_color': (0.2, 0.5, 1), 'vertical': False})

        
with open('submission.csv', 'w') as f:
    f.write('sample_id,fundamental_matrix\n')
    for sample_id, F in F_dict.items():
        f.write(f'{sample_id},{FlattenMatrix(F)}\n')