# Similarity-based Inference and submission code

[Ver.6] FIX BUG: Label file `valid.csv` which is used as a list of non-landmark images has contained landmark image files. The landmark entries are removed from `valid.csv` (2021/09/20 1:00AM GMT).  
[Ver.9] UPDATE: All private training images are used for the feature extraction (2021/09/24 5:00AM GMT).  
[Ver.9] UPDATE: Input image size is changed to 320x320 (2021/09/24 5:00AM GMT).  
[Ver.10] UPDATE: Label file `valid.csv` is updated (2021/09/24 6:40AM GMT).

### What is this code?

This code estimates landmark IDs and confidences based on feature similarity. The features are extracted from train, validation and test images by using a inference model that has a ResNet-34 as the backbone CNN. The train images includes only landmark images, but validation images includes many non-landmark images. A confidence of a test image is calculated from the similarity with landmark images, and a penalty is derived from the similarity with non-landmark images.

The estimation algorithm is the same as in the following paper:

Supporting large-scale image recognition with out-of-domain samples  
Christof Henkel, Philipp Singer  
https://arxiv.org/abs/2010.01650

### ~~Notice~~

~~Because of making this as a published code, this implementation compromises on the performance as follows:~~

~~1. Only 311,511 training images are used in order to reduce processing time. If all training images are used, the performance will be better. Image features can be saved as feature files by feature extraction of training and validation images before inference. If the feature files are created before inference, features of all training images can be used in a short processing time.~~  
~~2. Because of processing time reduction, input image size is small (224x224). It is well known that bigger input images improves the performance.~~

In [None]:
import pathlib

import torch
import torch.utils.data
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
import pandas as pd

import PIL.Image
# import albumentations.pytorch
import albumentations as A
import cv2
import matplotlib.pyplot as plt

from tqdm.notebook import tqdm
from typing import List, Tuple

IMAGE_SIZE = 320  # (2021/09/24 5:00AM) Updated.
BATCH_SIZE = 120

# MODEL_FILE = pathlib.Path('../input/google-landmark-2021-validation/model.pth')
MODEL_FILE = pathlib.Path('../input/config1/config1.pth')
TRAIN_LABEL_FILE = pathlib.Path('train.csv')
TRAIN_IMAGE_DIR = pathlib.Path('../input/landmark-recognition-2021/train')
VALID_LABEL_FILE = pathlib.Path('valid.csv')
VALID_IMAGE_DIR = pathlib.Path('../input/google-landmark-2021-validation/valid')
TEST_LABEL_FILE = pathlib.Path('../input/landmark-recognition-2021/sample_submission.csv')
TEST_IMAGE_DIR = pathlib.Path('../input/landmark-recognition-2021/test')
torch.cuda.empty_cache()

### Elimination of public training images
(This code is updated at 2021/09/24 5:00AM GMT)

In order to reduce the processing time, only a subset of public training images are used for the feature extraction at saving the code.
At the submission, all private trainig images are used.

In [None]:
train_df = pd.read_csv('../input/landmark-recognition-2021/train.csv')

if len(train_df) == 1580470:# submission use all the training images
    records = {}

    for image_id, landmark_id in train_df.values:
        if landmark_id in records:
            records[landmark_id].append(image_id)
        else:
            records[landmark_id] = [image_id]
        
    image_ids = []
    landmark_ids = []

    for landmark_id, img_ids in records.items():
        num = min(len(img_ids), 2)# maxium two images
        image_ids.extend(records[landmark_id][:num])
        landmark_ids.extend([landmark_id] * num)

    train_df = pd.DataFrame({'id': image_ids, 'landmark_id': landmark_ids})
# train_df = train_df.iloc[:512,]
train_df.to_csv(TRAIN_LABEL_FILE, index=False)
train_df

### List of non-landmark images
(This code is added at 2021/09/20 1:00AM GMT)

In [None]:
valid_df = pd.read_csv('../input/google-landmark-2021-validation/valid.csv')
valid_df = valid_df[valid_df['landmark_id'] == -1].reset_index(drop=True)
# valid_df = valid_df.iloc[:512,]
valid_df = valid_df.iloc[:72000,]
valid_df.to_csv(VALID_LABEL_FILE, index=False)
valid_df

In [None]:
# train_df = train_df.iloc[:1024,]
# valid_df = valid_df.iloc[:1024,]

In [None]:
aug1 = A.Compose([ A.SmallestMaxSize(512, interpolation=cv2.INTER_CUBIC),A.CenterCrop(height=448,width=448,p=1.)])
aug2 = A.Compose([ A.SmallestMaxSize(512, interpolation=cv2.INTER_CUBIC),A.CenterCrop(height=448,width=448,p=1.)])
aug4 = A.Compose([ A.LongestMaxSize(512,p=1),A.PadIfNeeded(512, 512, border_mode=cv2.BORDER_CONSTANT,p=1),A.CenterCrop(p=1.0, height=448, width=448)])
# aug4 = A.Compose([ A.SmallestMaxSize(512, interpolation=cv2.INTER_CUBIC),A.CenterCrop(height=448,width=448,p=1.)])
aug7 = A.Compose([A.Resize(height=544,width=672,p=1.),A.CenterCrop(height=448,width=512,p=1.)])
auglist = [aug1,aug2,aug4,aug7]

### Class and Functions for feature extraction

In [None]:
class Dataset(torch.utils.data.Dataset):
    def __init__(self, label_file: pathlib.Path, image_dir: pathlib.Path, aug) -> None:
        super().__init__()
        self.files = [
            image_dir / n[0] / n[1] / n[2] / f'{n}.jpg'
            for n in pd.read_csv(label_file)['id'].values]
        
#         self.transformer = albumentations.Compose([
#             albumentations.SmallestMaxSize(IMAGE_SIZE, interpolation=cv2.INTER_CUBIC),
#             albumentations.CenterCrop(IMAGE_SIZE, IMAGE_SIZE),
#             albumentations.Normalize(),
#             albumentations.pytorch.ToTensorV2(),
#         ])
        self.transformer = aug

    def __len__(self) -> int:
        return len(self.files)

    def __getitem__(self, index: int) -> Tuple[str, torch.Tensor]:
        path = self.files[index]
        image = PIL.Image.open(self.files[index])
#         image = self.transformer(image=np.array(image))['image']
        
        image = self.transformer(image=np.array(image,dtype=np.uint8))['image']
        image = image.astype(np.float32) 
    #normalization
        mean = np.array([123.675, 116.28 , 103.53 ], dtype=np.float32)
        std = np.array([58.395   , 57.120, 57.375   ], dtype=np.float32)
        image = image.astype(np.float32)
        image -= mean
        image *= np.reciprocal(std, dtype=np.float32)
        
        image = torch.from_numpy(image.transpose((2, 0, 1)))

        return path.name[:-4], image

In [None]:
@torch.no_grad()
def get_features(
    model: nn.Module,
    label_file: pathlib.Path,
    image_dir: pathlib.Path, aug
) -> Tuple[List[str], torch.Tensor]:
    loader = torch.utils.data.DataLoader(
        Dataset(label_file, image_dir,aug),
        batch_size=BATCH_SIZE, shuffle=False, num_workers=2)

    model = model.cuda()
    model.eval()
    
    all_names = []
    all_features = []

    for names, images in tqdm(loader, desc=image_dir.name):
        images = images.cuda()
        features = model(images)
        all_features.append(features)
        all_names.extend(names)

    return all_names, F.normalize(torch.cat(all_features, dim=0))

In [None]:
def get_similarity(model: nn.Module,aug)-> Tuple[List[str], List[str]]:
    # features
    train_names, train_features = get_features(
        model, TRAIN_LABEL_FILE, TRAIN_IMAGE_DIR,aug)    
    _, valid_features = get_features(
        model, VALID_LABEL_FILE, VALID_IMAGE_DIR,aug)
    test_names, test_features = get_features(
        model, TEST_LABEL_FILE, TEST_IMAGE_DIR,aug)
    EMBEDDING_SIZE = 512
    # penalties
    train_penalties_list = []
    for i in range(0, train_features.shape[0], EMBEDDING_SIZE):
        x = torch.mm(train_features[i:i + EMBEDDING_SIZE], valid_features.T)
        x = torch.topk(x, k=5)[0].mean(dim=1)
        train_penalties_list.append(x)
    train_penalties = torch.cat(train_penalties_list, dim=0)

    test_penalties_list = []
    for i in range(0, test_features.shape[0], EMBEDDING_SIZE):
        x = torch.mm(test_features[i:i + EMBEDDING_SIZE], valid_features.T)
        x = torch.topk(x, k=10)[0].mean(dim=1)
        test_penalties_list.append(x)
    test_penalties = torch.cat(test_penalties_list, dim=0)

    # neighbors
    submit_ids = []
    submit_landmark_ids = []
    submit_confidences = []
    
    train_df = pd.read_csv(TRAIN_LABEL_FILE)
    idmap = {n: v for n, v in train_df.values}

    for i in range(0, test_features.shape[0], EMBEDDING_SIZE):
        x = torch.mm(test_features[i:i + EMBEDDING_SIZE], train_features.T)
        x -= train_penalties[None, :]
        values, indexes = torch.topk(x, k=3)
        
        submit_ids.extend(test_names[i:i + EMBEDDING_SIZE])

        for idxs, vals, penalty in zip(indexes, values, test_penalties[i:i + EMBEDDING_SIZE]):
            scores = {}
            for idx, val in zip(idxs, vals):
                landmark_id = idmap[train_names[idx]]
                if landmark_id in scores:
                    scores[landmark_id] += float(val)
                else:
                    scores[landmark_id] = float(val)
                    
            landmark_id, confidence = max(
                [(k, v) for k, v in scores.items()], key=lambda x: x[1])
            submit_landmark_ids.append(landmark_id)
            submit_confidences.append(confidence - penalty)

    # standardize confidence values
    max_conf = max(submit_confidences)
    min_conf = min(submit_confidences)
    submit_confidences = [
        (v - min_conf) / (max_conf - min_conf) for v in submit_confidences]
    
    # make values for 'landmark' column
    submit_landmarks = [
        f'{i} {c:.8f}' for i, c in zip(submit_landmark_ids, submit_confidences)]
    
    return submit_ids, submit_landmarks

In [None]:
@torch.no_grad()
def get_features_ensemble(
    modellist,
    label_file: pathlib.Path,
    image_dir: pathlib.Path, auglist
) -> Tuple[List[str], torch.Tensor]:

    n = []
    f = []
    for idx,model in enumerate(modellist):
        
        loader = torch.utils.data.DataLoader(
        Dataset(label_file, image_dir,auglist[idx]),
        batch_size=BATCH_SIZE, shuffle=False, num_workers=2)
        model = model.cuda()
        model.eval()
        all_names = []
        all_features = []

        for names, images in tqdm(loader, desc=image_dir.name):
            images = images.cuda()
            features = model(images).detach().cpu().numpy().astype(np.float32)
            all_features.append(features)
            all_names.extend(names)
        n.append(all_names)
#         f.append(torch.cat(all_features, dim=0))
        f.append(np.concatenate(all_features, axis=0))
        torch.cuda.empty_cache()
#     return all_names, F.normalize(torch.cat(all_features, dim=0))
    return n[0],F.normalize(torch.from_numpy(np.concatenate(f, axis=1)).to('cuda:0'))

In [None]:
def get_similarity_ensemble(modellist,auglist)-> Tuple[List[str], List[str]]:
    # features
    train_names, train_features = get_features_ensemble(
        modellist, TRAIN_LABEL_FILE, TRAIN_IMAGE_DIR,auglist)    
    _, valid_features = get_features_ensemble(
         modellist, VALID_LABEL_FILE, VALID_IMAGE_DIR,auglist)
    test_names, test_features = get_features_ensemble(
        modellist, TEST_LABEL_FILE, TEST_IMAGE_DIR,auglist)
    EMBEDDING_SIZE = 512*4
    # penalties
    train_penalties_list = []
    for i in range(0, train_features.shape[0], EMBEDDING_SIZE):
        x = torch.mm(train_features[i:i + EMBEDDING_SIZE], valid_features.T)
        x = torch.topk(x, k=5)[0].mean(dim=1)
        train_penalties_list.append(x)
    train_penalties = torch.cat(train_penalties_list, dim=0)

    test_penalties_list = []
    for i in range(0, test_features.shape[0], EMBEDDING_SIZE):
        x = torch.mm(test_features[i:i + EMBEDDING_SIZE], valid_features.T)
        x = torch.topk(x, k=10)[0].mean(dim=1)
        test_penalties_list.append(x)
    test_penalties = torch.cat(test_penalties_list, dim=0)

    # neighbors
    submit_ids = []
    submit_landmark_ids = []
    submit_confidences = []
    
    train_df = pd.read_csv(TRAIN_LABEL_FILE)
    idmap = {n: v for n, v in train_df.values}

    for i in range(0, test_features.shape[0], EMBEDDING_SIZE):
        x = torch.mm(test_features[i:i + EMBEDDING_SIZE], train_features.T)
        x -= train_penalties[None, :]
        values, indexes = torch.topk(x, k=3)
        
        submit_ids.extend(test_names[i:i + EMBEDDING_SIZE])

        for idxs, vals, penalty in zip(indexes, values, test_penalties[i:i + EMBEDDING_SIZE]):
            scores = {}
            for idx, val in zip(idxs, vals):
                landmark_id = idmap[train_names[idx]]
                if landmark_id in scores:
                    scores[landmark_id] += float(val)
                else:
                    scores[landmark_id] = float(val)
                    
            landmark_id, confidence = max(
                [(k, v) for k, v in scores.items()], key=lambda x: x[1])
            submit_landmark_ids.append(landmark_id)
            submit_confidences.append(confidence - penalty)

    # standardize confidence values
    max_conf = max(submit_confidences)
    min_conf = min(submit_confidences)
    submit_confidences = [
        (v - min_conf) / (max_conf - min_conf) for v in submit_confidences]
    
    # make values for 'landmark' column
    submit_landmarks = [
        f'{i} {c:.8f}' for i, c in zip(submit_landmark_ids, submit_confidences)]
    
    return submit_ids, submit_landmarks

In [None]:
import importlib
from types import SimpleNamespace
import argparse

In [None]:
pip install ../input/config1/timm-0.4.12-py3-none-any.whl

In [None]:
#from pytorchcv.model_provider import get_model as ptcv_get_model
import timm
from torch import nn

import math
import torch
from torch.nn import functional as F
from torch.nn.parameter import Parameter

class ArcMarginProduct(nn.Module):
    def __init__(self, in_features, out_features):
        super().__init__()
        self.weight = nn.Parameter(torch.Tensor(out_features, in_features))
        self.reset_parameters()

    def reset_parameters(self):
        nn.init.xavier_uniform_(self.weight)
        # stdv = 1. / math.sqrt(self.weight.size(1))
        # self.weight.data.uniform_(-stdv, stdv)

    def forward(self, features):
        cosine = F.linear(F.normalize(features), F.normalize(self.weight))
        return cosine

def gem(x, p=3, eps=1e-6):
    return F.avg_pool2d(x.clamp(min=eps).pow(p), (x.size(-2), x.size(-1))).pow(1./p)

class GeM(nn.Module):
    def __init__(self, p=3, eps=1e-6, p_trainable=True):
        super(GeM,self).__init__()
        if p_trainable:
            self.p = Parameter(torch.ones(1)*p)
        else:
            self.p = p
        self.eps = eps

    def forward(self, x):
        return gem(x, p=self.p, eps=self.eps)       
    def __repr__(self):
        if type(self.p)==int:
            return self.__class__.__name__ + '(' + 'p=' + '{:.4f}'.format(self.p) + ', ' + 'eps=' + str(self.eps) + ')'
        return self.__class__.__name__ + '(' + 'p=' + '{:.4f}'.format(self.p.data.tolist()[0]) + ', ' + 'eps=' + str(self.eps) + ')'

    
class Backbone(nn.Module):

    
    def __init__(self, name='resnet18', pretrained=True,checkpoint_path=''):
        super(Backbone, self).__init__()

        self.net = timm.create_model(name, pretrained=pretrained,checkpoint_path=checkpoint_path)
        
        if 'regnet' in name:
            self.out_features = self.net.head.fc.in_features
        elif 'csp' in name:
            self.out_features = self.net.head.fc.in_features
        elif 'res' in name: #works also for resnest
            self.out_features = self.net.fc.in_features
        elif 'efficientnet' in name:
            self.out_features = self.net.classifier.in_features
        elif 'densenet' in name:
            self.out_features = self.net.classifier.in_features
        elif 'senet' in name:
            self.out_features = self.net.fc.in_features
        elif 'inception' in name:
            self.out_features = self.net.last_linear.in_features

        else:
            self.out_features = self.net.classifier.in_features

    def forward(self, x):
        x = self.net.forward_features(x)

        return x

    
class Net(nn.Module):
    def __init__(self, args, pretrained=False):
        super(Net, self).__init__()
        
        self.args = args
        self.backbone = Backbone(args.backbone, pretrained=pretrained,checkpoint_path=args.checkpoint_path)
        
        if args.pool == "gem":
            self.global_pool = GeM(p_trainable=args.p_trainable)
        elif args.pool == "identity":
            self.global_pool = torch.nn.Identity()
        else:
            self.global_pool = nn.AdaptiveAvgPool2d(1)

        self.embedding_size = args.embedding_size        
        
        # https://www.groundai.com/project/arcface-additive-angular-margin-loss-for-deep-face-recognition
        if args.neck == "option-D":
            self.neck = nn.Sequential(
                nn.Linear(self.backbone.out_features, self.embedding_size, bias=True),
                nn.BatchNorm1d(self.embedding_size),
                torch.nn.PReLU()
            )
        elif args.neck == "option-F":
            self.neck = nn.Sequential(
                nn.Dropout(0.3),
                nn.Linear(self.backbone.out_features, self.embedding_size, bias=True),
                nn.BatchNorm1d(self.embedding_size),
                torch.nn.PReLU()
            )
        else:
            self.neck = nn.Sequential(
                nn.Linear(self.backbone.out_features, self.embedding_size, bias=False),
                nn.BatchNorm1d(self.embedding_size),
            )
            
        self.head = ArcMarginProduct(self.embedding_size, args.n_classes)
        
        if args.pretrained_weights is not None:
            self.load_state_dict(torch.load(args.pretrained_weights, map_location='cpu'), strict=False)
            print('weights loaded from',args.pretrained_weights)

    # def forward(self, input_dict, get_embeddings=False, get_attentions=False):

    #     x = input_dict['input']
    #     # print("input",x)
    #     x = self.backbone(x)
    #     # print("after backbone",x)
    #     x = self.global_pool(x)
    #     x = x[:,:,0,0]
    #     # print("after pool",x)
    #     x = self.neck(x)
    #     # print("embedding",x)
    #     logits = self.head(x)
        
    #     if get_embeddings:
    #         return {'logits': logits, 'embeddings': x}
    #     else:
    #         return {'logits': logits}
    
    def forward(self, image):

        x = image
        x = self.backbone(x)
        x = self.global_pool(x)
        x = x[:,:,0,0]
        x = self.neck(x)
    
        return x

In [None]:
modelnames=['config1','config2','config4','config7']
modellist = []
for name in modelnames:
    MODEL_PATH = pathlib.Path("../input/config1/"+f"{name}.pth")
    m = torch.load(str(MODEL_PATH))
    modellist.append(m)

In [None]:
submit_ids, submit_landmarks = get_similarity_ensemble(modellist,auglist)
submit_df = pd.DataFrame({'id': submit_ids, 'landmarks': submit_landmarks})
submit_df.to_csv('submission.csv', index=False)

### Inference and Submission

In [None]:
# model = torch.load(str(MODEL_FILE))

# submit_ids, submit_landmarks = get_similarity(model,aug1)
# submit_df = pd.DataFrame({'id': submit_ids, 'landmarks': submit_landmarks})
# submit_df.to_csv('submission.csv', index=False)

### Check the submission

Following code shows the inference results. Each figure shows a test image (LEFT), the estimated landmark image (RIGHT), landmark ID and confidence (TITLE).

In [None]:
submit_df = pd.read_csv('submission.csv')
submit_df['landmark_id'] = submit_df['landmarks'].apply(lambda x: int(x.split()[0]))
submit_df['confidence'] = submit_df['landmarks'].apply(lambda x: float(x.split()[1]))
train_df = pd.read_csv(TRAIN_LABEL_FILE)

def get_image(path, name):
    img = PIL.Image.open(path / name[0] / name[1] / name[2] / f'{name}.jpg')
    if img.width > img.height:
        img = img.resize((256, round(img.height / img.width * 256)))
        new_img = PIL.Image.new(img.mode, (256, 256), (0, 0, 0))
        new_img.paste(img, (0, (256 - img.height) // 2))
    else:
        img = img.resize((round(img.width / img.height * 256), 256))
        new_img = PIL.Image.new(img.mode, (256, 256), (0, 0, 0))
        new_img.paste(img, ((256 - img.width) // 2, 2))
    return np.array(new_img)

rows = 10
fig = plt.figure(figsize=(15, 4 * rows))
for r in range(rows):
    for c in range(3):
        i = r * 3 + c
        test_name, _, label, conf = submit_df.iloc[i].values
        test_image = get_image(TEST_IMAGE_DIR, test_name)
        train_name = train_df.query(f'landmark_id == {label}').iloc[0]['id']
        train_image = get_image(TRAIN_IMAGE_DIR, train_name)
        image = np.concatenate([test_image, train_image], axis=1)
    
        ax = fig.add_subplot(rows, 3, i + 1)        
        ax.set_title(f'Label={label}, Confidence={conf:.2f}')
        ax.axis('off')
        ax.imshow(image)
fig.tight_layout()