In [1]:
print('Installing packages')
!pip uninstall -y allennlp torch
!pip install /kaggle/input/torch16/torch-1.6.0cu101-cp37-cp37m-linux_x86_64.whl
!pip install /kaggle/input/pytorch-image-models/
!pip install /kaggle/input/yacs-yet-another-configuration-system/yacs
!pip install /kaggle/input/landmark-lib/
!pip install /kaggle/input/semantic-segmentation-pytorch/
!pip install /kaggle/input/superpoint-pytorch/
print('Installed packages')

Installing packages
Found existing installation: allennlp 1.0.0
Uninstalling allennlp-1.0.0:
  Successfully uninstalled allennlp-1.0.0
Found existing installation: torch 1.5.1
Uninstalling torch-1.5.1:
  Successfully uninstalled torch-1.5.1
Processing /kaggle/input/torch16/torch-1.6.0cu101-cp37-cp37m-linux_x86_64.whl
Installing collected packages: torch
  Attempting uninstall: torch
    Found existing installation: torch 1.5.1
    Uninstalling torch-1.5.1:
      Successfully uninstalled torch-1.5.1
Successfully installed torch-1.6.0+cu101
Processing /kaggle/input/pytorch-image-models
Building wheels for collected packages: timm
  Building wheel for timm (setup.py) ... [?25l- \ | done
[?25h  Created wheel for timm: filename=timm-0.2.1-py3-none-any.whl size=228223 sha256=65a4cb99fd2c7e474a0ed6865c7653c914c81cd02d439fdccbdf99f7b8d32498
  Stored in directory: /root/.cache/pip/wheels/6e/a1/9f/ba52506e62a11fa95ed7b1efbb42f9e84c2d5e7401469da686
Successfully built tim

In [2]:
!mkdir sample
!mkdir sample/0
!mkdir sample/0/0
!mkdir sample/0/0/0
!cp /kaggle/input/landmark-recognition-2020/train/0/0/0/* sample/0/0/0/

In [3]:
import copy
import csv
import os
os.environ["LRU_CACHE_CAPACITY"] = "3"
import gc
import operator
import pathlib
import shutil
import math
import random

import numpy as np
import cv2
cv2.setNumThreads(0)
cv2.ocl.setUseOpenCL(False)
import PIL
from PIL import Image
import pydegensac
from scipy import spatial
import torch; print(torch.__version__)
torch.set_grad_enabled(False)
from torch.cuda.amp import autocast
import torch.nn.functional as F
import torchvision

from albumentations import Compose, Normalize
from albumentations.pytorch import ToTensorV2  
from albumentations.augmentations import functional

1.6.0+cu101


In [4]:
seed = 42
torch.manual_seed(seed)
np.random.seed(seed)
random.seed(seed)

In [5]:
import sys
sys.path.append("/kaggle/input/landmark-lib/")
sys.path.append('/kaggle/input/superpoint-pytorch/')

import cvcore
from cvcore.config import get_cfg
from cvcore.data.landmark_dataset import thumbnail
from cvcore.modeling.meta_arch.landmark_cnn import build_landmark_cls_model 
from cvcore.modeling.meta_arch.landmark_cnn import GEMPoolCNN, DELG

In [6]:
from superpoint_pytorch.models.superpoint import SuperPoint as SP
from superpoint_pytorch.models.superglue import SuperGlue as SG

In [7]:
def load_weight(model, weight):
    print(f"=> loading checkpoint {weight}")
    ckpt = torch.load(weight, "cpu")

    state_dict = ckpt.pop('state_dict')
    #print(state_dict.keys())
    new_state_dict = {}
    for k, v in state_dict.items():
        if k.endswith('seesaw_loss.N'):
            continue
            
        new_k = k
        if new_k.startswith('module.'):
            new_k = new_k[7:]
        new_state_dict[new_k.replace("se_module", "se")] = v

    model.load_state_dict(new_state_dict)
    print('=> loaded state dict')

In [8]:
# Dataset parameters:
INPUT_DIR = os.path.join('..', 'input')

DATASET_DIR = os.path.join(INPUT_DIR, 'landmark-recognition-2020')
TEST_IMAGE_DIR = os.path.join(DATASET_DIR, 'test')
TRAIN_IMAGE_DIR = os.path.join(DATASET_DIR, 'train')
TRAIN_LABELMAP_PATH = os.path.join(DATASET_DIR, 'train.csv')

In [9]:
# DEBUGGING PARAMS:
NUM_PUBLIC_TRAIN_IMAGES = 1580470 # Used to detect if in session or re-run.
MAX_NUM_EMBEDDINGS = -1  # Set to > 1 to subsample dataset while debugging. # TODO:

# Retrieval & re-ranking parameters:
NUM_TO_RERANK = 5
TOP_K = 3 # Number of retrieved images used to make prediction for a test image.

# RANSAC parameters:
# MAX_INLIER_SCORE = 50 # public LB: 0.5548
# MAX_INLIER_SCORE = 60 # public LB: 0.5591
MAX_INLIER_SCORE = 200
MAX_REPROJECTION_ERROR = 4.0
MAX_RANSAC_ITERATIONS = 1000
HOMOGRAPHY_CONFIDENCE = 0.99

In [10]:
# DELF Config
stride_factor = 1 # 2
rf, stride, padding = [291.0, 16.0 * stride_factor, 145.0]
feature_depth = 128 # nhannt: 128, delf: 1024
scales = (1,) # 1 / math.sqrt(2), 1, math.sqrt(2)
ABS_THRESHOLD = 175 # How to choose this number
LOCAL_FEATURE_NUM_TENSOR = 1000
NMS_IOU = 1.

# Global feature extraction:
NUM_EMBEDDING_DIMENSIONS = 512

In [11]:
# augmentation
aug = Compose([
    Normalize(),
    ToTensorV2(),
])

## Global features

In [12]:
def get_model(config_path, weight_path):
    cfg = get_cfg()
    cfg.merge_from_file(config_path)
    cfg.MODEL.BACKBONE.PRETRAINED = False # avoid download imagenet weight
    INTERP = cfg.DATA.INTERP # match training interpolation


    class GEMPoolExtractor(GEMPoolCNN):

        def __init__(self, cfg):
            super(GEMPoolExtractor, self).__init__(cfg)

        def forward(self, images):
            with autocast():
                features = self.backbone(images)
                features = [features[f] for f in self.in_features]
                _, global_features = features
                global_features = self.pool(global_features)
                return global_features

    model = GEMPoolExtractor(cfg)
    load_weight(model, weight_path)
    del model.cls_head; gc.collect()
    model.eval()
    model = model.cuda()
    
    return model, INTERP


model1, INTERP = get_model('/kaggle/input/landmark-lib/configs/google-landmark/resnext101_32x4d.yaml', 
                           '/kaggle/input/best-resnext101-32x4d-cluster/best_resnext101_32x4d_clusterv2.pth')

model2, INTERP = get_model('/kaggle/input/best-seresnet101-cluster/seresnet101.yaml', 
                           '/kaggle/input/best-seresnet101-cluster/best_seresnet101_clusterv2.pth')

model3, INTERP = get_model('/kaggle/input/effnetb6/b6.yaml',
                           '/kaggle/input/effnetb6/best_b6_clusterv2.pth')

model4, INTERP = get_model('/kaggle/input/res101/resnet101.yaml',
                           '/kaggle/input/res101/best_resnet101_clusterv2.pth')


MODELS = [model1, model2, model3, model4]
MODEL_NAMES = ['resnext101_clusterv2', 'seresnet101_clusterv2', 'b6_clusterv2', 'resnet101_clusterv2']
# MODEL_WEIGHTS = [2.0, 1.5, 0.75, 0.75]
MODEL_WEIGHTS = [2.0, 1.0, 0.5, 0.5]

=> loading checkpoint /kaggle/input/best-resnext101-32x4d-cluster/best_resnext101_32x4d_clusterv2.pth
=> loaded state dict
=> loading checkpoint /kaggle/input/best-seresnet101-cluster/best_seresnet101_clusterv2.pth
=> loaded state dict
=> loading checkpoint /kaggle/input/effnetb6/best_b6_clusterv2.pth
=> loaded state dict
=> loading checkpoint /kaggle/input/res101/best_resnet101_clusterv2.pth
=> loaded state dict


In [13]:
from tqdm import tqdm

def extract_global_features(image_root_dir, pre_compute=False):
    image_paths = [x for x in pathlib.Path(image_root_dir).rglob('*.jpg')]
        
    num_embeddings = len(image_paths)
    print("Num images: ", num_embeddings)
    
    if MAX_NUM_EMBEDDINGS > 0:
        num_embeddings = min(MAX_NUM_EMBEDDINGS, num_embeddings)

    ids = num_embeddings * [None]
    embeddings = np.empty((len(MODELS), num_embeddings, NUM_EMBEDDING_DIMENSIONS))

    if pre_compute:
        # Loop through all model names
        for m, name in enumerate(MODEL_NAMES):
            emb_dict = torch.load(f"/kaggle/input/train-embeddings/{name}.pth", "cpu")
            for i, image_path in tqdm(enumerate(image_paths), total=num_embeddings):
                image_id = image_path.stem
                if image_id in emb_dict.keys():
                    ids[i] = int(image_path.name.split('.')[0], 16)
                    embeddings[m, i, :] = emb_dict[image_id].squeeze(0).numpy()
            del emb_dict; gc.collect()
        
    for i, image_path in tqdm(enumerate(image_paths), total=num_embeddings):
        if i >= num_embeddings:
            break
        if ids[i] != None:
            continue

        ids[i] = int(image_path.name.split('.')[0], 16)

        image = np.array(Image.open(image_path).convert('RGB'))
        # Resize longer edge to max 1024
        image = thumbnail(image)
        image = aug(image=image)['image']
        # Add bs dimension
        image.unsqueeze_(0)
        image = image.cuda(non_blocking=True)
    
        for m in range(len(MODELS)):
            global_descriptors = MODELS[m](image)
            embeddings[m, i, :] = F.normalize(global_descriptors.sum(dim=0), dim=0).cpu().numpy()
            
    return ids, embeddings

In [14]:
def to_hex(image_id):
    return '{0:0{1}x}'.format(image_id, 16)


def get_image_path(data_dir, image_id):
    name = to_hex(image_id)
    return os.path.join(data_dir, name[0], name[1], name[2],
                      '{}.jpg'.format(name))

In [15]:
def get_total_score(num_inliers, global_score):
    local_score = min(num_inliers + 1, MAX_INLIER_SCORE) / MAX_INLIER_SCORE
    return local_score * global_score

## SuperPoint + SuperGlue for local re-ranking

In [16]:
local_extractor = SP({
  'keypoint_threshold': 0.001, 
  'max_keypoints': 2048,
  "seg_backbone": "hrnetv2",
  'seg_mask': True,
  'homographic': False,
})
local_matcher = SG({'match_threshold': 0.2, 'weights': 'outdoor', 'sinkhorn_iterations': 150})

local_extractor.eval()
local_matcher.eval()
local_extractor = local_extractor.cuda()
local_matcher = local_matcher.cuda()
print('Finished loading SuperPoint + SuperGlue')

Loaded SuperPoint model
Loading weights for net_encoder
Loading weights for net_decoder
Loaded ADE20k segmentation model
Loaded SuperGlue model ("outdoor" weights)
Finished loading SuperPoint + SuperGlue


In [17]:
# import random
# random.seed(42)
# PERCENT_FILTER = 0.3

# def extract_superpoint_features(image_path,
#                                 local_extractor=local_extractor,
#                                 filter_keypoints=False):
#     image = np.array(Image.open(image_path).convert("RGB"))
#     image = image.astype(np.float32)
#     image = thumbnail(image, max_size=640)
#     image = image / 255.
#     image = torch.from_numpy(image).permute(2, 0, 1)[None].float()
#     image = image.cuda(non_blocking=True)
    
#     # Whether to remove keypoints from various semantic masks
#     if filter_keypoints and random.uniform(0, 1) < PERCENT_FILTER:
#         local_extractor.seg_mask = filter_keypoints
#     pred = local_extractor({'image': image})
#     pred = {k: torch.stack(v) for k, v in pred.items()}
#     pred['image_shape'] = image.shape[-2:]
#     return pred

In [18]:
def get_num_inliers_superglue(data, local_matcher=local_matcher):
    scores = local_matcher(data)['matching_scores0']    
    num_inliers = torch.clamp(scores, 0.0, 1.0).sum().item()
    return num_inliers

In [19]:
HEIGHT, WIDTH = 896, 672

def rescore_and_rerank_by_num_inliers(test_image_id,
                                      train_ids_labels_and_scores):
    """Returns rescored and sorted training images by local feature extraction."""
    
    def load_image(image_path):
        image = np.array(Image.open(image_path).convert("RGB"))
        image = functional.resize(image, height=HEIGHT, width=WIDTH, interpolation=cv2.INTER_LINEAR)
        image = image.astype(np.float32) / 255.
        image = torch.from_numpy(image).permute(2, 0, 1)
        return image
    
    image_paths = []
    image_paths.append(get_image_path(TEST_IMAGE_DIR, test_image_id))
    for i in range(len(train_ids_labels_and_scores)):
        train_image_id, _, _ = train_ids_labels_and_scores[i]
        image_paths.append(get_image_path(TRAIN_IMAGE_DIR, train_image_id))
    images = torch.stack([load_image(p) for p in image_paths], 0)
    images = images.cuda(non_blocking=True)
    
    outputs = local_extractor({"image": images})
    data = {}
    for k, v in outputs.items():
        data[k+'0'] = v[0][None]
    data['image_shape0'] = data['image_shape1'] = [HEIGHT, WIDTH]
    
    for i in range(len(train_ids_labels_and_scores)):
        train_image_id, label, global_score = train_ids_labels_and_scores[i]
        for k, v in outputs.items():
            data[k+'1'] = v[i+1][None]
            
        num_inliers = get_num_inliers_superglue(data)  
#         print("="*100)
#         print(f"Test {test_image_id} - train {train_image_id} - inliers {num_inliers}")
        total_score = get_total_score(num_inliers, global_score)
        train_ids_labels_and_scores[i] = (train_image_id, label, total_score)
    train_ids_labels_and_scores.sort(key=lambda x: x[2], reverse=True)

    return train_ids_labels_and_scores

In [20]:
def load_labelmap():
    with open(TRAIN_LABELMAP_PATH, mode='r') as csv_file:
        csv_reader = csv.DictReader(csv_file)
        labelmap = {row['id']: row['landmark_id'] for row in csv_reader}

    return labelmap

In [21]:
def get_prediction_map(test_ids, train_ids_labels_and_scores):
    """Makes dict from test ids and ranked training ids, labels, scores."""

    prediction_map = dict()

    for test_index, test_id in enumerate(test_ids):
        hex_test_id = to_hex(test_id)

        aggregate_scores = {}
        for _, label, score in train_ids_labels_and_scores[test_index][:TOP_K]:
            aggregate_scores[label] = aggregate_scores.get(label, 0) + score

        label, score = max(aggregate_scores.items(), key=operator.itemgetter(1))

        prediction_map[hex_test_id] = {'score': score, 'class': label}

    return prediction_map

In [22]:
def get_predictions(labelmap):
    """Gets predictions using embedding similarity and local feature reranking."""

    test_ids, test_embeddings = extract_global_features(TEST_IMAGE_DIR)
    train_ids, train_embeddings = extract_global_features(TRAIN_IMAGE_DIR, pre_compute=True)

    train_ids_labels_and_scores = [None] * test_embeddings.shape[1]

    # Using (slow) for-loop, as distance matrix doesn't fit in memory.
    for test_index in range(test_embeddings.shape[1]):
        similarities = np.ones(train_embeddings.shape[1], dtype=np.float32)
        for m in range(test_embeddings.shape[0]):
            similarities *= (np.clip(0.8 - spatial.distance.cdist(test_embeddings[np.newaxis, m, test_index, :], 
                                                                  train_embeddings[m], 'cosine')[0], 0.1, None)/0.8)**(MODEL_WEIGHTS[m]/sum(MODEL_WEIGHTS))
        distances = 1 - similarities
        partition = np.argpartition(distances, NUM_TO_RERANK)[:NUM_TO_RERANK]

        nearest = sorted([(train_ids[p], distances[p]) for p in partition],
                         key=lambda x: x[1])

        train_ids_labels_and_scores[test_index] = [
            (train_id, labelmap[to_hex(train_id)], 1. - cosine_distance)
            for train_id, cosine_distance in nearest
        ]

    del test_embeddings
    del train_embeddings
    del labelmap
    gc.collect()

    pre_verification_predictions = get_prediction_map(
      test_ids, train_ids_labels_and_scores)


    for test_index, test_id in enumerate(test_ids):
        train_ids_labels_and_scores[test_index] = rescore_and_rerank_by_num_inliers(
            test_id, train_ids_labels_and_scores[test_index])

    post_verification_predictions = get_prediction_map(
      test_ids, train_ids_labels_and_scores)

    return pre_verification_predictions, post_verification_predictions


In [23]:
def save_submission_csv(predictions=None):
    """Saves optional `predictions` as submission.csv.

    The csv has columns {id, landmarks}. The landmarks column is a string
    containing the label and score for the id, separated by a ws delimeter.

    If `predictions` is `None` (default), submission.csv is copied from
    sample_submission.csv in `IMAGE_DIR`.

    Args:
    predictions: Optional dict of image ids to dicts with keys {class, score}.
    """

    if predictions is None:
        # Dummy submission!
        shutil.copyfile(
            os.path.join(DATASET_DIR, 'sample_submission.csv'), 'submission.csv')
        return

    with open('submission.csv', 'w') as submission_csv:
        csv_writer = csv.DictWriter(submission_csv, fieldnames=['id', 'landmarks'])
        csv_writer.writeheader()
        for image_id, prediction in predictions.items():
            label = prediction['class']
            score = prediction['score']
            csv_writer.writerow({'id': image_id, 'landmarks': f'{label} {score}'})


In [24]:
def subsample(x, n=32):
    out = dict()
    for k in list(x.keys())[:n]:
        out[k] = x[k]
    return out


labelmap = load_labelmap()
num_training_images = len(labelmap.keys())
print(f'Found {num_training_images} training images.')
if num_training_images == NUM_PUBLIC_TRAIN_IMAGES:
    TEST_IMAGE_DIR = "sample"
    TRAIN_IMAGE_DIR = "sample"

_, post_verification_predictions = get_predictions(labelmap)

if num_training_images == NUM_PUBLIC_TRAIN_IMAGES:
    save_submission_csv()
else:
    save_submission_csv(post_verification_predictions)

  0%|          | 0/407 [00:00<?, ?it/s]

Found 1580470 training images.
Num images:  407


100%|██████████| 407/407 [01:27<00:00,  4.66it/s]


Num images:  407


100%|██████████| 407/407 [00:00<00:00, 16647.47it/s]
100%|██████████| 407/407 [00:00<00:00, 42656.78it/s]
100%|██████████| 407/407 [00:00<00:00, 31192.68it/s]
100%|██████████| 407/407 [00:00<00:00, 37020.34it/s]
100%|██████████| 407/407 [00:00<00:00, 375182.80it/s]
	nonzero(Tensor input, *, Tensor out)
Consider using one of the following signatures instead:
	nonzero(Tensor input, *, bool as_tuple) (Triggered internally at  /pytorch/torch/csrc/utils/python_arg_parser.cpp:766.)
  keypoints = [torch.nonzero(s > self.config["keypoint_threshold"]) for s in scores]


In [25]:
subsample(post_verification_predictions, n=4)

{'000a350223f0d81e': {'score': 0.9682963490486145, 'class': '55450'},
 '0006e8d7218033ff': {'score': 0.9779824614524841, 'class': '76284'},
 '000234836dba6876': {'score': 0.9617262482643127, 'class': '194039'},
 '000b355c94ff99e8': {'score': 0.9303506016731262, 'class': '42935'}}