In [None]:
import argparse
import glob
import pickle
import sys
import warnings

import cv2

from delf import feature_io

import matplotlib.image as mpimg
import matplotlib.pyplot as plt

import numpy as np
import pandas as pd

from scipy.spatial import cKDTree
from skimage.feature import plot_matches
from skimage.measure import ransac
from skimage.transform import AffineTransform
import tensorflow as tf
import tensorflow_hub as hub

from tensorflow.python.platform import app

warnings.simplefilter('default')

In [None]:
train_path = './delf-train/'
dev_path = './delf-dev/'
test_path = './delf-test/'
non_landmark_dev_path = './delf-nlm-dev/'

_DISTANCE_THRESHOLD = 0.8

input_shape = (384, 384)

n_cat = 14942

In [None]:
train_info = pd.read_csv('train_info_red_sample_1.csv', index_col='id')
train_info.head()

In [None]:
train_info_full = pd.read_csv('train.csv', index_col='id')

In [None]:
n_cat_train = train_info['landmark_id'].nunique()
print(n_cat_train)
if n_cat_train != n_cat:
    warnings.warn('Warning: The training data is not compatible.')
    

In [None]:
dev_image_files = glob.glob(dev_path + '*_loc.npy')
dev_image_ids = [image_file.replace(
    '_loc.npy', '').replace(dev_path, '') for image_file in dev_image_files]
dev_info=train_info_full.loc[dev_image_ids]
dev_info['filename'] = pd.Series(dev_image_files, index=dev_image_ids)
#dev_info = dev_info[dev_info['landmark_id'].isin(train_info['landmark_id'])]
dev_info.head()

In [None]:
non_landmark_dev_image_files = glob.glob(non_landmark_dev_path + '*_loc.npy')
nlm_dev_df = pd.DataFrame({'filename':non_landmark_dev_image_files})
nlm_dev_df['landmark_id'] = -1
nlm_dev_df.index = [str(i) for i in nlm_dev_df.index]
print(len(nlm_dev_df))
nlm_dev_df.head()

In [None]:
test_info_full = pd.read_csv('test.csv', index_col='id')
test_info_full.head()

test_image_files = glob.glob(test_path + '*_loc.npy')
test_image_ids = [image_file.replace(
    '_loc.npy','').replace(test_path, '') for image_file in test_image_files]

test_info=test_info_full.loc[test_image_ids]
test_info['filename'] = pd.Series(test_image_files, index=test_image_ids)

test_info.head()

In [None]:
candidates = pd.read_csv('delf-scored-candidates.csv', index_col=0)
candidates.head()

In [None]:
def load_delf_features(img_id, ddir):
    locations = np.load(ddir + img_id + '_loc.npy')
    descriptions = np.load(ddir + img_id + '_desc.npy')
    return locations, descriptions

In [None]:
a1, b1 = load_delf_features(dev_info.index[1], 'delf-dev/')
a2, b2 = load_delf_features(dev_info.index[2], 'delf-dev/')

In [None]:
def compare_imgs_1_preloaded(locations_1, 
                             descriptors_1, 
                             d1_tree, 
                             img_id_2, 
                             dir_2='delf-train/'):
    # Read features.
    num_features_1 = locations_1.shape[0]
    locations_2, descriptors_2 = load_delf_features(img_id_2, dir_2)
    num_features_2 = locations_2.shape[0]

    if len(locations_1)*len(locations_2)==0:
        return 0
    
    _, indices = d1_tree.query(
            descriptors_2, 
            distance_upper_bound=_DISTANCE_THRESHOLD)

    if len(indices)==0:
        return 0
    
    # Select feature locations for putative matches.       
    locations_2_to_use = np.array([
      locations_2[i,]
      for i in range(num_features_2)
      if indices[i] != num_features_1
    ])
    locations_1_to_use = np.array([
      locations_1[indices[i],]
      for i in range(num_features_2)
      if indices[i] != num_features_1
    ])

    # Perform geometric verification using RANSAC.                                                   
    
    if len(locations_1_to_use)*len(locations_2_to_use)==0:
        return 0

    _, inliers = ransac(
      (locations_1_to_use, locations_2_to_use),
      AffineTransform,
      min_samples=3,
      residual_threshold=20,
      max_trials=1000)

    if inliers is None:
        score = 0.
    else:
        score = sum(inliers)

    return score

In [None]:
def compare_imgs(img_id_1, 
                 img_id_2, 
                 dir_1, dir_2='delf-train/', 
                 plot=False, 
                 img_dir_1=None, 
                 img_dir_2=None):
    # Read features.
    locations_1, descriptors_1 = load_delf_features(img_id_1, dir_1)
    num_features_1 = locations_1.shape[0]
    
    locations_2, descriptors_2 = load_delf_features(img_id_2, dir_2)
    num_features_2 = locations_2.shape[0]
    
    if len(locations_1)*len(locations_2)==0:
        return 0
    
    d1_tree = cKDTree(descriptors_1)
    _, indices = d1_tree.query(
      descriptors_2, distance_upper_bound=_DISTANCE_THRESHOLD)

    # Select feature locations for putative matches.       
    locations_2_to_use = np.array([
      locations_2[i,]
      for i in range(num_features_2)
      if indices[i] != num_features_1
    ])
    locations_1_to_use = np.array([
      locations_1[indices[i],]
      for i in range(num_features_2)
      if indices[i] != num_features_1
    ])

    # Perform geometric verification using RANSAC.                                                   
    if len(locations_1_to_use)*len(locations_2_to_use)==0:
        score=0
    else:
        _, inliers = ransac(
          (locations_1_to_use, locations_2_to_use),
          AffineTransform,
          min_samples=3,
          residual_threshold=20,
          max_trials=1000)

        if inliers is None:
            score = 0.
        else:
            score = sum(inliers)
    
    if plot:
        _, ax = plt.subplots()
        img_1=cv2.cvtColor(
                    cv2.resize(cv2.imread(img_dir_1+img_id_1+'.jpg'),input_shape),
                    cv2.COLOR_BGR2RGB)/255.
        img_2=cv2.cvtColor(
                    cv2.resize(cv2.imread(img_dir_2+img_id_2+'.jpg'),input_shape),
                    cv2.COLOR_BGR2RGB)/255. 
        
        inlier_idxs = np.nonzero(inliers)[0]
        plot_matches(
          ax,
          img_1,
          img_2,
          locations_1_to_use,
          locations_2_to_use,
          np.column_stack((inlier_idxs, inlier_idxs)),
          matches_color='b')
        ax.axis('off')
        ax.set_title('DELF correspondences')

    return score


In [None]:
def compare_features(locations_1, descriptors_1, locations_2, descriptors_2, d1_tree):

    num_features_1 = locations_1.shape[0]
    num_features_2 = locations_2.shape[0]
    
    if num_features_1 * num_features_2 == 0:
        return 0
    
    _, indices = d1_tree.query(
      descriptors_2, distance_upper_bound=_DISTANCE_THRESHOLD)

    if len(indices)==0:
        return 0
    
    # Select feature locations for putative matches.           
    locations_2_to_use = np.array([
      locations_2[i,]
      for i in range(num_features_2)
      if indices[i] != num_features_1
    ])
    locations_1_to_use = np.array([
      locations_1[indices[i],]
      for i in range(num_features_2)
      if indices[i] != num_features_1
    ])


    # Perform geometric verification using RANSAC.                                                   
    if len(locations_1_to_use) * len(locations_2_to_use) == 0:
        return 0
    
    _, inliers = ransac(
      (locations_1_to_use, locations_2_to_use),
      AffineTransform,
      min_samples=3,
      residual_threshold=20,
      max_trials=1000)

    if inliers is None:
        score = 0.
    else:
        score = sum(inliers)
    return score

In [None]:
score=compare_imgs(train_info[train_info['landmark_id'] == 6933].sample(1).index[0],
                   train_info[train_info['landmark_id'] == 6933].sample(1).index[0], 
                   dir_1='delf-train/', 
                   dir_2='delf-train/', 
                   plot=True,
                   img_dir_1='train-highres/', 
                   img_dir_2='train-highres/')
print(score)

In [None]:
def verify_hypothesis(pred_info, 
                      ref_info, 
                      pred_path, 
                      ref_path, 
                      n_imgs=8, 
                      i_start=0, 
                      io_n=100, 
                      checkpoint_n=100):
    for i, img_id in enumerate(pred_info.index):
        if i < i_start:
            continue
        hyp_id = pred_info.loc[img_id]['pred_id']
        n_ref_imgs = sum(ref_info['landmark_id'] == hyp_id)
        ref_img_ids = ref_info[ref_info['landmark_id'] == hyp_id].sample(
            min(n_imgs,n_ref_imgs)).index
        
        try:
            locations_1, descriptors_1 = load_delf_features(img_id, pred_path)
        except KeyboardInterrupt:
            raise
        except:
            print('Error: could not read id:', img_id, '-> Set scores to zero.')
            pred_info.loc[img_id, 'delf_max_score']=0
            pred_info.loc[img_id, 'delf_mean_score']=0
            pred_info.loc[img_id, 'delf_m2_score']=0
            continue
        
        d1_tree = cKDTree(descriptors_1)
        scores = np.zeros(len(ref_img_ids))
    
        for j,ref_img_id in enumerate(ref_img_ids):
            try:
                scores[j] = compare_imgs_1_preloaded(locations_1, descriptors_1, 
                                                   d1_tree, ref_img_id, ref_path)
            except KeyboardInterrupt:
                raise
            except:
                print('An error has occured. Set score to zero.')
        max_score = np.max(scores)
        mean_score = np.mean(scores)
        m2_score = np.mean(np.sort(scores)[-n_imgs//2:])

        pred_info.loc[img_id, 'delf_max_score']=max_score
        pred_info.loc[img_id, 'delf_mean_score']=mean_score
        pred_info.loc[img_id, 'delf_m2_score']=m2_score

        if i % io_n == 0:
            print(i,'/',len(pred_info), ' -->', hyp_id, max_score, mean_score, m2_score)
        if i % checkpoint_n == 0 and i > 0:
            print('Checkpoint ...')
            pred_info.to_csv('verify-chp-1.csv')
            pred_info.to_csv('verify-chp-2.csv')
            pred_info.to_csv('verify-chp-3.csv')
            np.savetxt('delf-predict-last-i.txt', np.array([i]))
            
    return pred_info

In [None]:
verify_hypothesis(candidates, 
                  train_info, 
                  test_path, 
                  train_path, 
                  n_imgs=32, 
                  io_n=20, 
                  checkpoint_n=200, 
                  i_start=22000)

In [None]:
candidates.head()

In [None]:
candidates.to_csv('delf-scored-candidates.csv')