This code is based on [Google Landmark 2nd code](https://github.com/WesleyZhang1991/Google_Landmark_Retrieval_2021_2nd_Place_Solution).<br>
The cropped dataset comes from [Awsaf](http://https://www.kaggle.com/awsaf49/happywhale-boundingbox-yolov5)<br>
You can change backbone and head arbitrarily through config files. if helpful, please upvote :D

In [None]:
!cp -r ../input/traincode/Google_Landmark_Retrieval_2021_2nd_Place_Solution/* ./ 

In [None]:
!mv ./logs/GLDv2clean/ResNeSt101_512_all_input512_bs12_lossarcfacescale30margin0.3_optSGD_lr0.0008_wd1e-05_warm0.0_ep100_schecosine_drop0.0_re1.0_smoothoff_samplergld_pad20_necknobias_cache_all_list_color0.0_affine0.0/resnest101_35_best.pth ./logs/GLDv2clean/ResNeSt101_512_all_input512_bs12_lossarcfacescale30margin0.3_optSGD_lr0.0008_wd1e-05_warm0.0_ep100_schecosine_drop0.0_re1.0_smoothoff_samplergld_pad20_necknobias_cache_all_list_color0.0_affine0.0/resnest101_30_best.pth

In [None]:
#if train 
# !python train.py

In [None]:
from utils.logger import setup_logger
from model import make_model
from solver import make_optimizer, WarmupMultiStepLR
from solver.scheduler_factory import create_scheduler
from loss import make_loss
from processor import do_train
import random
import datetime
import torch
import numpy as np
import os
import argparse
from config import cfg
import pandas as pd
from sklearn.preprocessing import LabelEncoder
import albumentations as A
from albumentations.pytorch import ToTensorV2
from torch.utils.data import Dataset, DataLoader
import cv2
from tqdm import tqdm
import joblib
from sklearn.model_selection import StratifiedKFold

In [None]:
def set_seed(seed):
    # torch.manual_seed(seed)
    # torch.cuda.manual_seed(seed)
    # torch.cuda.manual_seed_all(seed)
    # np.random.seed(seed)
    # random.seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True

In [None]:
local_rank = 0 # int(os.environ["LOCAL_RANK"])

if True:
    cfg.merge_from_file("./configs/GLDv2/ResNeSt101_512_all.yml")
    if len(cfg.BASE) > 0:
        cfg.merge_from_file(cfg.BASE)
    cfg.merge_from_file("./configs/GLDv2/ResNeSt101_512_all.yml")
loss_postfix = f'{cfg.MODEL.ID_LOSS_TYPE}'
if cfg.MODEL.ID_LOSS_TYPE == 'arcface':
    loss_postfix += f'scale{cfg.SOLVER.COSINE_SCALE}margin{cfg.SOLVER.COSINE_MARGIN}'
if cfg.MODEL.NECK_BIAS:
    neck_bias_postfix = 'neckbias'
else:
    neck_bias_postfix = 'necknobias'
if len(cfg.DATALOADER.CACHE_LIST) > 0:
    cache_info = cfg.DATALOADER.CACHE_LIST.split('.pkl')[0]
else:
    cache_info = 'nocache'
aug_info = f'color{cfg.INPUT.COLOR_PROB}_affine{cfg.INPUT.RANDOM_AFFINE_PROB}'
output_dir = cfg.OUTPUT_DIR + f'_input{cfg.INPUT.SIZE_TRAIN[0]}_bs{cfg.SOLVER.IMS_PER_BATCH}_loss{loss_postfix}_opt{cfg.SOLVER.OPTIMIZER_NAME}_lr{cfg.SOLVER.BASE_LR}_wd{cfg.SOLVER.WEIGHT_DECAY}_warm{cfg.SOLVER.WARMUP_EPOCHS}_ep{cfg.SOLVER.MAX_EPOCHS}_sche{cfg.SOLVER.WARMUP_METHOD}_drop{cfg.MODEL.CNN_DROPOUT}_re{cfg.INPUT.RE_PROB}_smooth{cfg.MODEL.IF_LABELSMOOTH}_sampler{cfg.DATALOADER.SAMPLER}_pad{cfg.INPUT.PADDING}_{neck_bias_postfix}_{cache_info}_{aug_info}'
if output_dir and not os.path.exists(output_dir) and args.local_rank == 0:
    os.makedirs(output_dir)
cfg.OUTPUT_DIR = output_dir
cfg.freeze()

set_seed(cfg.SOLVER.SEED)

In [None]:
if cfg.MODEL.DIST_TRAIN:

    torch.cuda.set_device(0)

logger = setup_logger("reid_baseline", output_dir, if_train=True)
logger.info("Saving model in the path :{}".format(cfg.OUTPUT_DIR))

if True:
    logger.info("Loaded configuration file {}".format("./configs/GLDv2/ResNeSt101_512_all.yml"))
    with open("./configs/GLDv2/ResNeSt101_512_all.yml", 'r') as cf:
        config_str = "\n" + cf.read()
        # logger.info(config_str)
# logger.info("Running with config:\n{}".format(cfg))

if cfg.MODEL.DIST_TRAIN:
    torch.distributed.init_process_group(backend='nccl',
                                         init_method='env://',
                                         timeout=datetime.timedelta(1800))

os.environ['CUDA_VISIBLE_DEVICES'] = cfg.MODEL.DEVICE_ID

In [None]:
data_transforms = {
    "train": A.Compose([
        A.Resize(cfg['INPUT']['SIZE_TRAIN'][0], cfg['INPUT']['SIZE_TRAIN'][0]),
        A.HorizontalFlip(p=0.5),
        A.VerticalFlip(p=0.5),
        A.Rotate(limit=30, p=0.5),
        A.Normalize(
            mean=[0.485, 0.456, 0.406],
            std=[0.229, 0.224, 0.225],
            max_pixel_value=255.0,
            p=1.0
        ),
        ToTensorV2()], p=1.),

    "valid": A.Compose([
        A.Resize(cfg['INPUT']['SIZE_TRAIN'][0], cfg['INPUT']['SIZE_TRAIN'][0]),
        A.Normalize(
            mean=[0.485, 0.456, 0.406],
            std=[0.229, 0.224, 0.225],
            max_pixel_value=255.0,
            p=1.0
        ),
        ToTensorV2()], p=1.)
}

In [None]:
class HappyWhaleDataset(Dataset):
    def __init__(self, df, transforms=None):
        self.df = df
        self.file_names = df['file_path'].values
        self.labels = df['individual_id'].values
        self.transforms = transforms
    def __len__(self):
        return len(self.df)

    def __getitem__(self, index):
        img_path = self.file_names[index]
        img = cv2.imread(img_path)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        label = self.labels[index]

        if self.transforms:
            img = self.transforms(image=img)["image"]

        return  img, torch.tensor(label, dtype=torch.long)

In [None]:
num_classes = 15587
model = make_model(cfg, num_class=num_classes)
model.load_state_dict(torch.load("./logs/GLDv2clean/ResNeSt101_512_all_input512_bs12_lossarcfacescale30margin0.3_optSGD_lr0.0008_wd1e-05_warm0.0_ep100_schecosine_drop0.0_re1.0_smoothoff_samplergld_pad20_necknobias_cache_all_list_color0.0_affine0.0/resnest101_30_best.pth"))
loss_func = make_loss(cfg, num_classes=num_classes)

In [None]:
optimizer = make_optimizer(cfg, model)
if cfg.SOLVER.WARMUP_METHOD:
    print('===========using cosine learning rate=======')
    scheduler = create_scheduler(cfg, optimizer)
else:
    print('===========using normal learning rate=======')
    scheduler = WarmupMultiStepLR(optimizer, cfg.SOLVER.STEPS, cfg.SOLVER.GAMMA,
                                  cfg.SOLVER.WARMUP_FACTOR,
                                  cfg.SOLVER.WARMUP_EPOCHS, cfg.SOLVER.WARMUP_METHOD)

def get_test_file_path(id):
    return f"{TEST_DIR}/{id}"

ROOT_DIR = '../input/happywhalecropped'
TEST_DIR = '../input/happywhalecropped/test_images/test_images'
weights_path = "../input/dummymodel4/Loss13.7021_epoch49.bin"


df = pd.read_csv("../input/happy-whale-and-dolphin/sample_submission.csv")
df['file_path'] = df['image'].apply(get_test_file_path)
#hardcode dummy label for input in ArcMargin forward function
df['individual_id'] = 0
#save LabelEncoder object during training so we can made the invers transform after the predictions
encoder = LabelEncoder()
encoder.classes_ = np.load(r"./le.pkl",allow_pickle=True)

data_transforms = {
    "test": A.Compose([
        A.Resize(cfg['INPUT']['SIZE_TRAIN'][0], cfg['INPUT']['SIZE_TRAIN'][0]),
        A.Normalize(
            mean=[0.485, 0.456, 0.406],
            std=[0.229, 0.224, 0.225],
            max_pixel_value=255.0,
            p=1.0
        ),
        ToTensorV2()], p=1.)
}

In [None]:
test_dataset = HappyWhaleDataset(df, transforms=data_transforms["test"])
test_loader = DataLoader(test_dataset, batch_size=100,
                          num_workers=4, shuffle=False, pin_memory=True)

In [None]:
device = "cuda"
model.to(device)
model.eval()
all_pred_list = []
for img, target in tqdm(test_loader):
    with torch.no_grad():
        img = img.to(device)
        target = target.to(device)
        score, feat = model(img, target)
        _, pred = score.topk(5, 1, True, True)
        pred = pred.detach().cpu().numpy()
        all_pred_list.extend(pred)

# conver pred2name
# for pred in all_pred_list:
#     encoder.inverse_transform(pred)

name_pred = []
# conver pred2name
for pred in tqdm(all_pred_list):
    final_pred = ""
    names = encoder.inverse_transform(pred)
    for idx, name in enumerate(names):
        if idx < 4:
            final_pred += name + " "
        else:
            final_pred += name
    name_pred.append(final_pred)

submission = pd.read_csv("../input/happy-whale-and-dolphin/sample_submission.csv")
submission['predictions'] = name_pred
submission.to_csv("submission.csv", index=None)