# Introduction

+ This code estimates landmark IDs and confidence levels based on feature similarity.

+ Features are extracted from training, validation, and testing images using an inference model with ResNet-34 as the backbone CNN.

+ Training images include only landmark images, but validation images contain many non-landmark images.

+ The confidence of test images is calculated based on similarity to landmark images, while penalties originate from similarity to non-landmark images.

In [1]:
# import libraries
import pathlib

import torch
import torch.utils.data
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
import pandas as pd

import PIL.Image
import albumentations.pytorch
import cv2
import matplotlib.pyplot as plt

from tqdm.notebook import tqdm
from typing import List, Tuple

# define global parameters and file paths
IMAGE_SIZE = 384  
BATCH_SIZE = 512

# use pre-trained resnet-34 model
MODEL_FILE = pathlib.Path('../input/google-landmark-2021-validation/model.pth')
TRAIN_LABEL_FILE = pathlib.Path('train.csv')
TRAIN_IMAGE_DIR = pathlib.Path('../input/landmark-recognition-2021/train')
VALID_LABEL_FILE = pathlib.Path('valid.csv')
VALID_IMAGE_DIR = pathlib.Path('../input/google-landmark-2021-validation/valid')
TEST_LABEL_FILE = pathlib.Path('../input/landmark-recognition-2021/sample_submission.csv')
TEST_IMAGE_DIR = pathlib.Path('../input/landmark-recognition-2021/test')



## Extract subset

+ In order to reduce the processing time, only a subset of the public training images are used for feature extraction when saving the code.

In [2]:

train_df = pd.read_csv('../input/landmark-recognition-2021/train.csv')

if len(train_df) == 1580470:
    records = {}

    for image_id, landmark_id in train_df.values:
        if landmark_id in records:
            records[landmark_id].append(image_id)
        else:
            records[landmark_id] = [image_id]
        
    image_ids = []
    landmark_ids = []

    # Add the first 5 data of each landmark to the corresponding list.
    for landmark_id, img_ids in records.items():
        num = min(len(img_ids), 5)
        image_ids.extend(records[landmark_id][:num])
        landmark_ids.extend([landmark_id] * num)

    train_df = pd.DataFrame({'id': image_ids, 'landmark_id': landmark_ids})

train_df.to_csv(TRAIN_LABEL_FILE, index=False)
train_df

Unnamed: 0,id,landmark_id
0,17660ef415d37059,1
1,92b6290d571448f6,1
2,cd41bf948edc0340,1
3,fb09f1e98c6d2f70,1
4,25c9dfc7ea69838d,7
...,...,...
375522,074967ced3198635,203092
375523,19f4a1bc4bcae084,203092
375524,1a5af12bccf60325,203092
375525,72c3b1c367e3d559,203092


## Extract non-landmarks samples

+ The ID of these non-landmark pictures is -1.

In [3]:
valid_df = pd.read_csv('../input/google-landmark-2021-validation/valid.csv')
valid_df = valid_df[valid_df['landmark_id'] == -1]
valid_df.to_csv(VALID_LABEL_FILE, index=False)
valid_df

Unnamed: 0,id,landmark_id
0,e324e0f3e6d9e504,-1
1,d9e17c5f3e0c47b3,-1
2,1a748a755ed67512,-1
3,537bf9bdfccdafea,-1
4,13f4c974274ee08b,-1
...,...,...
117221,a339ef58d82dcb86,-1
117223,5426472625271a4d,-1
117224,7b6a585405978398,-1
117225,d885235ba249cf5d,-1


## Read and preprocess

In [4]:
class Dataset(torch.utils.data.Dataset):
    def __init__(self, label_file: pathlib.Path, image_dir: pathlib.Path) -> None:
        super().__init__()
        # Read the picture according to the picture storage rule.
        self.files = [
            image_dir / n[0] / n[1] / n[2] / f'{n}.jpg'
            for n in pd.read_csv(label_file)['id'].values]
        
        # images process
        self.transformer = albumentations.Compose([
            albumentations.SmallestMaxSize(IMAGE_SIZE, interpolation=cv2.INTER_CUBIC),
            albumentations.CenterCrop(IMAGE_SIZE, IMAGE_SIZE),
            albumentations.Normalize(),
            albumentations.pytorch.ToTensorV2(),
        ])

    def __len__(self) -> int:
        return len(self.files)

    # Pass in an index parameter and return the corresponding image ID and the preprocessed data.
    def __getitem__(self, index: int) -> Tuple[str, torch.Tensor]:
        path = self.files[index]
        image = PIL.Image.open(self.files[index])
        image = self.transformer(image=np.array(image))['image']

        return path.name[:-4], image

## Get features function

In [5]:
# This function returns a tuple containing two elements:
# a list of all image IDs, a tensor containing all image features.
@torch.no_grad()
def get_features(
    model: nn.Module,
    label_file: pathlib.Path,
    image_dir: pathlib.Path,
) -> Tuple[List[str], torch.Tensor]:
    loader = torch.utils.data.DataLoader(
        Dataset(label_file, image_dir),
        batch_size=BATCH_SIZE, shuffle=False, num_workers=2)

    model = model.cuda()
    model.eval()
    
    all_names = []
    all_features = []

    for names, images in tqdm(loader, desc=image_dir.name):
        images = images.cuda()
        features = model(images)
        all_features.append(features)
        all_names.extend(names)

    return all_names, F.normalize(torch.cat(all_features, dim=0))

## Get similarity

+ Calculate the similarity between each image in the test set and the image in the training set,

+ and each image in the test set will be assigned a landmark ID by means of KNN.

+ Because the data is normalized at the end of getting features function, so the cosine similarity is actually compared.

In [6]:
def get_similarity(model: nn.Module)-> Tuple[List[str], List[str]]:
    # Get image features of training set, validation set and test set.
    train_names, train_features = get_features(
        model, TRAIN_LABEL_FILE, TRAIN_IMAGE_DIR)    
    _, valid_features = get_features(
        model, VALID_LABEL_FILE, VALID_IMAGE_DIR)
    test_names, test_features = get_features(
        model, TEST_LABEL_FILE, TEST_IMAGE_DIR)

    # Define penalties.
    train_penalties_list = []
    for i in range(0, train_features.shape[0], 128):
        x = torch.mm(train_features[i:i + 128], valid_features.T)
        x = torch.topk(x, k=5)[0].mean(dim=1)
        train_penalties_list.append(x)
    train_penalties = torch.cat(train_penalties_list, dim=0)

    test_penalties_list = []
    for i in range(0, test_features.shape[0], 128):
        x = torch.mm(test_features[i:i + 128], valid_features.T)
        x = torch.topk(x, k=10)[0].mean(dim=1)
        test_penalties_list.append(x)
    test_penalties = torch.cat(test_penalties_list, dim=0)

    # Assign neighbors for each test image.
    submit_ids = []
    submit_landmark_ids = []
    submit_confidences = []
    
    train_df = pd.read_csv(TRAIN_LABEL_FILE)
    idmap = {n: v for n, v in train_df.values}

    for i in range(0, test_features.shape[0], 128):
        x = torch.mm(test_features[i:i + 128], train_features.T)
        x -= train_penalties[None, :]
        # The top 5 training images with the highest similarity are selected as its "neighbors".
        values, indexes = torch.topk(x, k=5)
        
        submit_ids.extend(test_names[i:i + 128])

        for idxs, vals, penalty in zip(indexes, values, test_penalties[i:i + 128]):
            scores = {}
            
            # The neighbors of each test image are traversed and the similarity of "neighbors" is accumulated.
            for idx, val in zip(idxs, vals):
                landmark_id = idmap[train_names[idx]]
                if landmark_id in scores:
                    scores[landmark_id] += float(val)
                else:
                    scores[landmark_id] = float(val)
                    
            # Select the landmark ID with the highest score.
            landmark_id, confidence = max(
                [(k, v) for k, v in scores.items()], key=lambda x: x[1])
            submit_landmark_ids.append(landmark_id)
            submit_confidences.append(confidence - penalty)

    # Use Min-Max normalization to get confidence values.
    max_conf = max(submit_confidences)
    min_conf = min(submit_confidences)
    submit_confidences = [
        (v - min_conf) / (max_conf - min_conf) for v in submit_confidences]
    
    submit_landmarks = [
        f'{i} {c:.8f}' for i, c in zip(submit_landmark_ids, submit_confidences)]
    
    return submit_ids, submit_landmarks

## Load model and calculate

In [7]:
# Load and check
model = torch.jit.load(str(MODEL_FILE))
print(model)

# Get landmark IDs and Confidence values to submit.
submit_ids, submit_landmarks = get_similarity(model)
submit_df = pd.DataFrame({'id': submit_ids, 'landmarks': submit_landmarks})
submit_df.to_csv('submission.csv', index=False)

RecursiveScriptModule(
  original_name=Model
  (backbone): RecursiveScriptModule(
    original_name=TimmBackbone
    (net): RecursiveScriptModule(
      original_name=ResNet
      (conv1): RecursiveScriptModule(original_name=Conv2d)
      (bn1): RecursiveScriptModule(original_name=BatchNorm2d)
      (act1): RecursiveScriptModule(original_name=ReLU)
      (maxpool): RecursiveScriptModule(original_name=MaxPool2d)
      (layer1): RecursiveScriptModule(
        original_name=Sequential
        (0): RecursiveScriptModule(
          original_name=BasicBlock
          (conv1): RecursiveScriptModule(original_name=Conv2d)
          (bn1): RecursiveScriptModule(original_name=BatchNorm2d)
          (act1): RecursiveScriptModule(original_name=ReLU)
          (conv2): RecursiveScriptModule(original_name=Conv2d)
          (bn2): RecursiveScriptModule(original_name=BatchNorm2d)
          (act2): RecursiveScriptModule(original_name=ReLU)
        )
        (1): RecursiveScriptModule(
          original

train:   0%|          | 0/734 [00:00<?, ?it/s]

valid:   0%|          | 0/226 [00:00<?, ?it/s]

test:   0%|          | 0/21 [00:00<?, ?it/s]