In [None]:
"""
Based on Baseline kernel for "Google Landmarks Recognition Challenge 2020".

First, ranks all training images by embedding similarity to each test image.
Then, performs geometric-verification and re-ranking on the `NUM_TO_RERANK`
most similar training images. For a given test image, each class' score is
the sum of the scores of re-ranked training images, and the predicted
class is the one with the highest aggregate score.

TODO:
* Resize image if needed
* kNN by cuML
* Faster RANSAC (better with GPU)
"""

import copy
import csv
import gc
import operator
import os
import pathlib
import shutil
import time

import numpy as np
import pandas as pd
import PIL
import pydegensac
from scipy import spatial
import random

# import multiprocessing as mp
# from multiprocessing import set_start_method
# set_start_method("spawn")
# from multiprocessing import get_context

import gc
gc.enable()

# os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"  # see issue #152
# # os.environ["CUDA_VISIBLE_DEVICES"] = "0"
# os.environ["CUDA_VISIBLE_DEVICES"] = "1"

import tensorflow as tf
tf.compat.v1.enable_resource_variables()
tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)

print(f"Tensorflow Version: {tf.__version__}")

In [None]:
def set_seed(seed: int = 42):
    random.seed(seed)
    np.random.seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)
    tf.random.set_seed(seed)

rand_seed = 1120
set_seed(rand_seed)

# debug_mode = True
debug_mode = False

# DEBUGGING PARAMS:
NUM_PUBLIC_TRAIN_IMAGES = 1580470  # Used to detect if in session or re-run.
if debug_mode:
    MAX_NUM_EMBEDDINGS = 10
    NUM_PUBLIC_TRAIN_IMAGES = 10
    # MAX_NUM_EMBEDDINGS = 100
    # NUM_PUBLIC_TRAIN_IMAGES = 100
else:
    MAX_NUM_EMBEDDINGS = -1  # Set to > 1 to subsample dataset while debugging.

In [None]:
# Dataset parameters:
INPUT_DIR = os.path.join('..', 'input')
DATASET_DIR = os.path.join(INPUT_DIR, 'landmark-recognition-2020')
TEST_IMAGE_DIR = os.path.join(DATASET_DIR, 'test')
TRAIN_IMAGE_DIR = os.path.join(DATASET_DIR, 'train')
TRAIN_LABELMAP_PATH = os.path.join(DATASET_DIR, 'train.csv')

# INPUT_DIR = "/workspace/Kaggle/Landmark"
# DATASET_DIR = os.path.join(INPUT_DIR, 'landmark-recognition-2020')
# TEST_IMAGE_DIR = os.path.join(DATASET_DIR, 'test')
# TRAIN_IMAGE_DIR = os.path.join(DATASET_DIR, 'train')
# TRAIN_LABELMAP_PATH = os.path.join(DATASET_DIR, 'train.csv')

# Retrieval & re-ranking parameters:
# NUM_TO_RERANK = 4
# TOP_K = 4  # Number of retrieved images used to make prediction for a test image (<=NUM_TO_RERANK)
# NUM_TO_RERANK = 2
# TOP_K = 2  # Number of retrieved images used to make prediction for a test image (<=NUM_TO_RERANK)
NUM_TO_RERANK = 10
TOP_K = 10  # Number of retrieved images used to make prediction for a test image (<=NUM_TO_RERANK)


# RANSAC parameters:
MAX_INLIER_SCORE = 70
MAX_REPROJECTION_ERROR = 4.0
MAX_RANSAC_ITERATIONS = 10000
HOMOGRAPHY_CONFIDENCE = 0.995

# MAX_INLIER_SCORE = 20
# MAX_REPROJECTION_ERROR = 4.0
# MAX_RANSAC_ITERATIONS = 60000
# # MAX_RANSAC_ITERATIONS = 120000
# HOMOGRAPHY_CONFIDENCE = 0.99

## Pretrained Model Config

In [None]:
# R101-DELG model pretrained on GLDv2-clean
# SAVED_MODEL_DIR = "/workspace/Kaggle/Landmark/pretrained_delg/r101delg_gldv2clean_20200914"

models = ["r50", "r101", "rn101af"]
# models = ["r50", "r101", "r50v1", "r101v1", "rn101af"]
model_config = {
    "r50": {
        "SAVED_MODEL_DIR":
        "../input/pretrained-delg/r50delg_gldv2clean_20200914",
        "DELG_SCORE_THRESHOLD_TENSOR": tf.constant(454.6),  # score_threshold,
        "global_weight": 0.4,
        "global_only": False,
        "is_tf2": False,
    },
    "r101": {
        "SAVED_MODEL_DIR":
        "../input/pretrained-delg/r101delg_gldv2clean_20200914",
        "DELG_SCORE_THRESHOLD_TENSOR": tf.constant(357.48),  # score_threshold
        "global_weight": 0.4,
        "global_only": False,
        "is_tf2": False,
    },
#     "r50v1": {
#         "SAVED_MODEL_DIR": "../input/pretrained-delg/r50delg_gld_20200814",
#         "DELG_SCORE_THRESHOLD_TENSOR": tf.constant(175.0),  # score_threshold,
#         "global_weight": 0.2,
#         "global_only": True,
#         "is_tf2": False,
#     },
#     "r101v1": {
#         "SAVED_MODEL_DIR": "../input/pretrained-delg/r101delg_gld_20200814",
#         "DELG_SCORE_THRESHOLD_TENSOR": tf.constant(166.1),  # score_threshold
#         "global_weight": 0.2,
#         "global_only": True,
#         "is_tf2": False,
#     },
    "rn101af": {
        "SAVED_MODEL_DIR":
        "../input/pretrained-delg/rn101_af_gldv2clean_20200814",
        "global_weight": 0.2,
        "global_only": True,
        "is_tf2": True,
    }
}

# models = ["r50", "r101", "r50v1", "r101v1", "rn101af"]
# model_config = {
#     "r50": {
#         "SAVED_MODEL_DIR":
#         "/workspace/Kaggle/Landmark/pretrained_delg/r50delg_gldv2clean_20200914",
#         "DELG_SCORE_THRESHOLD_TENSOR": tf.constant(454.6),  # score_threshold,
#         "global_weight": 0.2,
#         "global_only": False,
#         "is_tf2": False,
#     },
#     "r101": {
#         "SAVED_MODEL_DIR":
#         "/workspace/Kaggle/Landmark/pretrained_delg/r101delg_gldv2clean_20200914",
#         "DELG_SCORE_THRESHOLD_TENSOR": tf.constant(357.48),  # score_threshold
#         "global_weight": 0.2,
#         "global_only": False,
#         "is_tf2": False,
#     },
#     "r50v1": {
#         "SAVED_MODEL_DIR":
#         "/workspace/Kaggle/Landmark/pretrained_delg/r50delg_gld_20200814",
#         "DELG_SCORE_THRESHOLD_TENSOR": tf.constant(175.0),  # score_threshold,
#         "global_weight": 0.2,
#         "global_only": True,
#         "is_tf2": False,
#     },
#     "r101v1": {
#         "SAVED_MODEL_DIR":
#         "/workspace/Kaggle/Landmark/pretrained_delg/r101delg_gld_20200814",
#         "DELG_SCORE_THRESHOLD_TENSOR": tf.constant(166.1),  # score_threshold
#         "global_weight": 0.2,
#         "global_only": True,
#         "is_tf2": False,
#     },
#     "rn101af": {
#         "SAVED_MODEL_DIR":
#         "/workspace/Kaggle/Landmark/pretrained_delg/rn101_af_gldv2clean_20200814",
#         "global_weight": 0.2,
#         "global_only": True,
#         "is_tf2": True,
#     }
# }

# To stay below the time limit, the host only extracted local features for 3 scales
DELG_IMAGE_SCALES_TENSOR = tf.convert_to_tensor([0.70710677, 1.0,
                                                 1.4142135])  # image_scales

# Local feature extraction:
LOCAL_FEATURE_NUM_TENSOR = tf.constant(1000)  # max_feature_num

# Global feature extraction:
NUM_EMBEDDING_DIMENSIONS = 2048
GLOBAL_SCALES_IND_TENSOR = tf.range(
    len(DELG_IMAGE_SCALES_TENSOR))  # image_scales_ind

## Utility Functions

In [None]:
def to_hex(image_id) -> str:
    return '{0:0{1}x}'.format(image_id, 16)


def get_image_path(subset, image_id):
    name = to_hex(image_id)
    return os.path.join(DATASET_DIR, subset, name[0], name[1], name[2],
                        '{}.jpg'.format(name))


def load_image_tensor(image_path):
    return tf.convert_to_tensor(
        np.array(PIL.Image.open(image_path).convert('RGB')))


def extract_global_features(image_root_dir, extractor, is_tf2=False):
    """Extracts embeddings for all the images in given `image_root_dir`."""

    image_paths = [x for x in pathlib.Path(image_root_dir).rglob('*.jpg')]

    num_embeddings = len(image_paths)
    if MAX_NUM_EMBEDDINGS > 0:
        num_embeddings = min(MAX_NUM_EMBEDDINGS, num_embeddings)

    print(
        f"Extracting global features from {num_embeddings} images (dim={NUM_EMBEDDING_DIMENSIONS}) ......"
    )

    ids = num_embeddings * [None]
    embeddings = np.empty((num_embeddings, NUM_EMBEDDING_DIMENSIONS))

    for i, image_path in enumerate(image_paths):
        if i >= num_embeddings:
            break

        ids[i] = int(image_path.name.split('.')[0], 16)

        image_tensor = load_image_tensor(image_path)

        if not is_tf2:
            output = extractor(
                input_image=image_tensor,
                input_scales=DELG_IMAGE_SCALES_TENSOR,
                input_global_scales_ind=GLOBAL_SCALES_IND_TENSOR)
            raw_global_descriptors = output[-1]
        else:
            output_dict = extractor(
                input_image=image_tensor,
                input_scales=DELG_IMAGE_SCALES_TENSOR,
                input_global_scales_ind=GLOBAL_SCALES_IND_TENSOR)
            raw_global_descriptors = output_dict["global_descriptors"]

        embedding_tensor = tf.nn.l2_normalize(raw_global_descriptors,
                                              axis=1,
                                              name='l2_normalization')
        embedding_tensor = tf.reduce_sum(embedding_tensor,
                                         axis=0,
                                         name='sum_pooling')
        embeddings[i, :] = tf.nn.l2_normalize(
            embedding_tensor, axis=0, name='final_l2_normalization').numpy()

    return ids, embeddings


def extract_local_features(model_name, image_path, extractor):
    """Extracts local features for the given `image_path`."""

    image_tensor = load_image_tensor(image_path)

    output = extractor(input_image=image_tensor,
                       input_scales=DELG_IMAGE_SCALES_TENSOR,
                       input_global_scales_ind=GLOBAL_SCALES_IND_TENSOR,
                       input_max_feature_num=LOCAL_FEATURE_NUM_TENSOR,
                       input_abs_thres=model_config[model_name]
                       ["DELG_SCORE_THRESHOLD_TENSOR"])

    boxes, features = output[0], output[1]

    # Shape: (N, 2)
    keypoints = tf.divide(
        tf.add(tf.gather(boxes, [0, 1], axis=1),
               tf.gather(boxes, [2, 3], axis=1)), 2.0).numpy()

    # Shape: (N, 128)
    local_descriptors = tf.nn.l2_normalize(features,
                                           axis=1,
                                           name='l2_normalization').numpy()

    return keypoints, local_descriptors

In [None]:
def compute_putative_matching_keypoints(test_keypoints,
                                        test_descriptors,
                                        train_keypoints,
                                        train_descriptors,
                                        max_distance=0.9):
    """Finds matches from `test_descriptors` to KD-tree of `train_descriptors`."""

    train_descriptor_tree = spatial.cKDTree(train_descriptors)
    _, matches = train_descriptor_tree.query(test_descriptors,
                                             distance_upper_bound=max_distance)

    test_kp_count = test_keypoints.shape[0]
    train_kp_count = train_keypoints.shape[0]

    test_matching_keypoints = np.array([
        test_keypoints[i, ] for i in range(test_kp_count)
        if matches[i] != train_kp_count
    ])
    train_matching_keypoints = np.array([
        train_keypoints[matches[i], ] for i in range(test_kp_count)
        if matches[i] != train_kp_count
    ])

    return test_matching_keypoints, train_matching_keypoints


def compute_num_inliers(test_keypoints, test_descriptors, train_keypoints,
                        train_descriptors):
    """Returns the number of RANSAC inliers."""

    test_match_kp, train_match_kp = compute_putative_matching_keypoints(
        test_keypoints, test_descriptors, train_keypoints, train_descriptors)

    if test_match_kp.shape[
            0] <= 4:  # Min keypoints supported by `pydegensac.findHomography()`
        return 0

    try:
        _, mask = pydegensac.findHomography(test_match_kp, train_match_kp,
                                            MAX_REPROJECTION_ERROR,
                                            HOMOGRAPHY_CONFIDENCE,
                                            MAX_RANSAC_ITERATIONS)
    except np.linalg.LinAlgError:  # When det(H)=0, can't invert matrix.
        return 0

    return int(copy.deepcopy(mask).astype(np.float32).sum())


def get_total_score(num_inliers, global_score):
    local_score = min(num_inliers, MAX_INLIER_SCORE) / MAX_INLIER_SCORE
    return local_score + global_score

def rescore_and_rerank_by_num_inliers(test_image_id,
                                      train_ids_labels_and_scores,
                                      local_models):
    """Returns rescored and sorted training images by local feature extraction."""

    test_image_path = get_image_path('test', test_image_id)
    
    model_name = "r50"
    extractor = local_models[model_name]
    test_keypoints, test_descriptors = extract_local_features(
        model_name, test_image_path, extractor)

    for i in range(len(train_ids_labels_and_scores)):
        train_image_id, label, global_score = train_ids_labels_and_scores[i]

        train_image_path = get_image_path('train', train_image_id)
        train_keypoints, train_descriptors = extract_local_features(
            model_name, train_image_path, extractor)

        num_inliers = compute_num_inliers(test_keypoints, test_descriptors,
                                          train_keypoints, train_descriptors)
        total_score = get_total_score(num_inliers, global_score)
        train_ids_labels_and_scores[i] = (train_image_id, label, total_score)

    train_ids_labels_and_scores.sort(key=lambda x: x[2], reverse=True)

    return train_ids_labels_and_scores

# def rescore_and_rerank_by_num_inliers(test_image_id,
#                                       train_ids_labels_and_scores,
#                                       local_models):
#     """Returns rescored and sorted training images by local feature extraction."""

#     test_image_path = get_image_path('test', test_image_id)
    
#     model_test_embeds = []
#     for i, model_name in enumerate(models):
#         local_extractor = local_models[model_name]
#         keypoints, descriptors = extract_local_features(
#             model_name, test_image_path, local_extractor)
        
#         model_test_embeds.append((keypoints, descriptors))
    
#     test_keypoints = []
#     test_descriptors = []
#     for i in range(len(models)):
#         test_keypoints.append(model_test_embeds[i][0])
#         test_descriptors.append(model_test_embeds[i][1])
#     test_keypoints = np.concatenate(test_keypoints, axis=0)
#     test_descriptors = np.concatenate(test_descriptors, axis=0)
#     # print(test_keypoints.shape, test_descriptors.shape)

#     for i in range(len(train_ids_labels_and_scores)):
#         train_image_id, label, global_score = train_ids_labels_and_scores[i]

#         train_image_path = get_image_path('train', train_image_id)
        
#         model_train_embeds = []
#         for i, model_name in enumerate(models):
#             local_extractor = local_models[model_name]
#             keypoints, descriptors = extract_local_features(
#                 model_name, train_image_path, local_extractor)

#             model_train_embeds.append((keypoints, descriptors))

#         train_keypoints = []
#         train_descriptors = []
#         for i in range(len(models)):
#             train_keypoints.append(model_train_embeds[i][0])
#             train_descriptors.append(model_train_embeds[i][1])
#         train_keypoints = np.concatenate(train_keypoints, axis=0)
#         train_descriptors = np.concatenate(train_descriptors, axis=0)
        
#         # print(train_keypoints.shape, train_descriptors.shape)

#         num_inliers = compute_num_inliers(test_keypoints, test_descriptors,
#                                           train_keypoints, train_descriptors)
#         total_score = get_total_score(num_inliers, global_score)
#         train_ids_labels_and_scores[i] = (train_image_id, label, total_score)

#     train_ids_labels_and_scores.sort(key=lambda x: x[2], reverse=True)

#     return train_ids_labels_and_scores

## Generate Predictions

In [None]:
def get_global_extractor(model, is_tf2=False):
    if not is_tf2:
        DELG_INPUT_TENSOR_NAMES = [
            'input_image:0', 'input_scales:0', 'input_global_scales_ind:0'
        ]

        # Global feature extractor graph
        GLOBAL_FEATURE_EXTRACTION_FN = model.prune(DELG_INPUT_TENSOR_NAMES,
                                                   ['global_descriptors:0'])

        return GLOBAL_FEATURE_EXTRACTION_FN
    else:
        return model.signatures['serving_default']


def get_local_extractor(model):
    DELG_INPUT_TENSOR_NAMES = [
        'input_image:0', 'input_scales:0', 'input_global_scales_ind:0'
    ]

    # Local feature extractor graph
    LOCAL_FEATURE_EXTRACTION_FN = model.prune(
        DELG_INPUT_TENSOR_NAMES +
        ['input_max_feature_num:0', 'input_abs_thres:0'],
        ['boxes:0', 'features:0'])

    return LOCAL_FEATURE_EXTRACTION_FN

In [None]:
def load_labelmap():
    with open(TRAIN_LABELMAP_PATH, mode='r') as csv_file:
        csv_reader = csv.DictReader(csv_file)
        labelmap = {row['id']: row['landmark_id'] for row in csv_reader}

    return labelmap


def get_prediction_map(test_ids, train_ids_labels_and_scores):
    """Makes dict from test ids and ranked training ids, labels, scores."""

    prediction_map = dict()

    for test_index, test_id in enumerate(test_ids):
        hex_test_id = to_hex(test_id)

        aggregate_scores = {}
        # Extract Top-K ranked training images with their labels
        for _, label, score in train_ids_labels_and_scores[test_index][:TOP_K]:
            if label not in aggregate_scores:
                aggregate_scores[label] = 0
            aggregate_scores[label] += score

        label, score = max(aggregate_scores.items(),
                           key=operator.itemgetter(1))

        prediction_map[hex_test_id] = {'score': score, 'class': label}

    return prediction_map


def get_predictions(labelmap, global_models, local_models):
    """Gets predictions using embedding similarity and local feature reranking."""

    test_ids, train_ids = None, None
    model_test_embeds, model_train_embeds = [], []
    for model_name in models:
        global_extractor = global_models[model_name]
        test_ids, model_test_embeddings = extract_global_features(
            TEST_IMAGE_DIR, global_extractor,
            model_config[model_name]["is_tf2"])
        train_ids, model_train_embeddings = extract_global_features(
            TRAIN_IMAGE_DIR, global_extractor,
            model_config[model_name]["is_tf2"])

        model_test_embeds.append(model_test_embeddings)
        model_train_embeds.append(model_train_embeddings)

        del global_extractor
        gc.collect()
        tf.keras.backend.clear_session()

    test_embeddings = np.zeros_like(model_test_embeds[0])
    train_embeddings = np.zeros_like(model_train_embeds[0])
    for i in range(len(models)):
        test_embeddings += model_config[model_name][
            "global_weight"] * model_test_embeds[i]
        train_embeddings += model_config[model_name][
            "global_weight"] * model_train_embeds[i]
    # test_embeddings /= len(models)
    # train_embeddings /= len(models)

    train_ids_labels_and_scores = [None] * test_embeddings.shape[0]

    # Using (slow) for-loop, as distance matrix doesn't fit in memory.
    for test_index in range(test_embeddings.shape[0]):
        # Extract nearest training images
        distances = spatial.distance.cdist(
            test_embeddings[np.newaxis, test_index, :], train_embeddings,
            'cosine')[0]
        partition = np.argpartition(distances, NUM_TO_RERANK)[:NUM_TO_RERANK]

        nearest = sorted([(train_ids[p], distances[p]) for p in partition],
                         key=lambda x: x[1])

        train_ids_labels_and_scores[test_index] = [
            (train_id, labelmap[to_hex(train_id)], 1. - cosine_distance)
            for train_id, cosine_distance in nearest
        ]

    del test_embeddings
    del train_embeddings
    del labelmap
    gc.collect()

    pre_verification_predictions = get_prediction_map(
        test_ids, train_ids_labels_and_scores)

    for test_index, test_id in enumerate(test_ids):
        train_ids_labels_and_scores[
            test_index] = rescore_and_rerank_by_num_inliers(
                test_id, train_ids_labels_and_scores[test_index], local_models)

    post_verification_predictions = get_prediction_map(
        test_ids, train_ids_labels_and_scores)

    return pre_verification_predictions, post_verification_predictions

In [None]:
def save_submission_csv(predictions=None):
    """Saves optional `predictions` as submission.csv.

  The csv has columns {id, landmarks}. The landmarks column is a string
  containing the label and score for the id, separated by a ws delimeter.

  If `predictions` is "None" (default), submission.csv is copied from
  sample_submission.csv in `IMAGE_DIR`.

  Args:
    predictions: Optional dict of image ids to dicts with keys {class, score}.
  """

    if predictions is None:
        # Dummy submission!
        shutil.copyfile(os.path.join(DATASET_DIR, 'sample_submission.csv'),
                        'submission.csv')
        return

    with open('submission.csv', 'w') as submission_csv:
        csv_writer = csv.DictWriter(submission_csv,
                                    fieldnames=['id', 'landmarks'])
        csv_writer.writeheader()
        for image_id, prediction in predictions.items():
            label = prediction['class']
            score = prediction['score']
            csv_writer.writerow({
                'id': image_id,
                'landmarks': f'{label} {score}'
            })

In [None]:
def run():
    # with get_context("spawn").Pool() as pool:

    # Load model topology graph and pretrained weights
    global_models = {}
    local_models = {}
    for model_name in models:
        DELG_MODEL = tf.saved_model.load(
            model_config[model_name]["SAVED_MODEL_DIR"])
        global_models[model_name] = get_global_extractor(
            DELG_MODEL, model_config[model_name]["is_tf2"])
        if not model_config[model_name]["global_only"]:
            local_models[model_name] = get_local_extractor(DELG_MODEL)

        del DELG_MODEL
        gc.collect()
        tf.keras.backend.clear_session()

    labelmap = load_labelmap()
    num_training_images = len(labelmap.keys())
    print(f'Found {num_training_images} training images.')

    if num_training_images == NUM_PUBLIC_TRAIN_IMAGES:
        print(
            f'Found {NUM_PUBLIC_TRAIN_IMAGES} training images. Copying sample submission.'
        )
        save_submission_csv()
    else:
        _, post_verification_predictions = get_predictions(
            labelmap, global_models, local_models)
        save_submission_csv(post_verification_predictions)

    del labelmap, global_models, local_models
    gc.collect()
    tf.keras.backend.clear_session()

    # pool.close()

In [None]:
%%time

run()

### Verify Submission File

In [None]:
submit_df = pd.read_csv("submission.csv", engine="c")
print(submit_df.shape)
submit_df.head()

## EOF
