## [Embedding Ensemble]  
**1) Deit Cropped Dataset**  
**2) YOLO V5 Cropped Dataset**  

=> Weight Tuning by Optuna or Pytorch.lightening

In [1]:
!pip install faiss

Collecting faiss
  Downloading faiss-1.5.3-cp37-cp37m-manylinux1_x86_64.whl (4.7 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m4.7/4.7 MB[0m [31m74.4 MB/s[0m eta [36m0:00:00[0m:00:01[0m
Installing collected packages: faiss
Successfully installed faiss-1.5.3
[0m

In [2]:
import os, sys, gc, time, random, warnings, math, cv2
import wandb, optuna, faiss, timm, torch
import numpy as np
import pandas as pd
import pytorch_lightning as pl
import torch.nn as nn
import torch.nn.functional as F
import albumentations as albu

from albumentations.pytorch import ToTensorV2
from PIL import Image
from pytorch_lightning.loggers import WandbLogger
from pytorch_lightning.callbacks.model_checkpoint import ModelCheckpoint
from timm.data.transforms_factory import create_transform
from timm.optim import create_optimizer_v2
from torch.utils.data import DataLoader
from torch.utils.data import Dataset
from tqdm.notebook import tqdm
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import normalize
from sklearn.preprocessing import LabelEncoder
from kaggle_secrets import UserSecretsClient
from glob import glob
from pathlib import Path
from typing import Callable, Dict, Optional, Tuple
from torch.autograd import Variable
warnings.filterwarnings("ignore")

In [3]:
# WandB Login => Copy API Key
user_secrets = UserSecretsClient()
secret_value_0 = user_secrets.get_secret("wandb")

!wandb login $secret_value_0

[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


In [4]:
class CFG:
    checkpoint_dir = './saved/model'
    name = 'HappyWhale'
    model = 'convnext_base_384_in22ft1k'
    resume_dir = '/kaggle/input/pytorch-arcface-train-with-focal-loss/conv-nextday322pl/convnext_base_384_in22ft1k_384.ckpt'

    """ Common Options """
    wandb = True
    optuna = True  # if you want to tune hyperparameter, set True
    competition = 'HappyWhale'
    seed = 42
    cfg_name = 'CFG'
    n_gpu = 1
    
    device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
    gpu_id = 0
    num_workers = 0

    """ Data Options """
    n_folds = 5
    epochs = 180
    img_size = 384
    batch_size = 64

    """ Gradient Options """
    amp_scaler = True
    gradient_checkpoint = True  # save parameter
    clipping_grad = True  # clip_grad_norm
    n_gradient_accumulation_steps = 1
    max_grad_norm = 1000

    """ Loss & Metrics Options """
    loss_fn = ''
    reduction = 'mean'
    metrics = ['MCRMSE', 'f_beta', 'recall']

    """ Optimizer with LLRD Options """
    optimizer = 'AdamW'  # options: SWA, AdamW
    llrd = True
    layerwise_lr = 5e-5
    layerwise_lr_decay = 0.9
    layerwise_weight_decay = 1e-2
    layerwise_adam_epsilon = 1e-6
    layerwise_use_bertadam = False
    betas = (0.9, 0.999)

    """ Scheduler Options """
    scheduler = 'cosine_annealing'  # options: cosine, linear, cosine_annealing, linear_annealing
    batch_scheduler = True
    num_cycles = 0.5  # num_warmup_steps = 0
    warmup_ratio = 0.1  # options: 0.05, 0.1

    """ SWA Options """
    swa = True
    swa_start = int(epochs*0.75)
    swa_lr = 1e-4
    anneal_epochs = 4
    anneal_strategy = 'cos'  # default = cos, available option: linear

    """ Model_Utils Options """
    freeze = False
    reinit = True
    num_reinit = 5
    awp = False
    nth_awp_start_epoch = 10
    awp_eps = 1e-2
    awp_lr = 1e-4

In [5]:
""" Helper Function """

def check_device() -> bool:
    return torch.mps.is_available()

def check_library(checker: bool) -> tuple:
    """
    1) checker == True
        - current device is mps
    2) checker == False
        - current device is cuda with cudnn
    """
    if not checker:
        _is_built = torch.backends.cudnn.is_available()
        _is_enable = torch.backends.cudnn.enabledtorch.backends.cudnn.enabled
        version = torch.backends.cudnn.version()
        device = (_is_built, _is_enable, version)
        return device

def class2dict(cfg) -> dict:
    return dict((name, getattr(cfg, name)) for name in dir(cfg) if not name.startswith('__'))


def all_type_seed(cfg, checker: bool) -> None:
    # python & torch seed
    os.environ['PYTHONHASHSEED'] = str(cfg.seed)  # python Seed
    random.seed(cfg.seed)  # random module Seed
    np.random.seed(cfg.seed)  # numpy module Seed
    torch.manual_seed(cfg.seed)  # Pytorch CPU Random Seed Maker

    # device == cuda
    if not checker:
        torch.cuda.manual_seed(cfg.seed)  # Pytorch GPU Random Seed Maker
        torch.cuda.manual_seed_all(cfg.seed)  # Pytorch Multi Core GPU Random Seed Maker
        # torch.cudnn seed
        torch.backends.cudnn.deterministic = True
        torch.backends.cudnn.benchmark = False
        torch.backends.cudnn.enabled = False

def seed_worker(worker_id) -> None:
    worker_seed = torch.initial_seed() % 2 ** 32
    np.random.seed(worker_seed)
    random.seed(worker_seed)
    

check_library(True)
all_type_seed(CFG, True)

g = torch.Generator()
g.manual_seed(CFG.seed)

<torch._C.Generator at 0x7213d2175c30>

In [6]:
""" Deit Cropped Dataset """
""" Path & Settings """

INPUT_DIR = Path("..") / "input"
OUTPUT_DIR = Path("/") / "kaggle" / "working"

DATA_ROOT_DIR = INPUT_DIR / "convert-backfintfrecords" / "happy-whale-and-dolphin-backfin"
TRAIN_DIR = DATA_ROOT_DIR / "train_images"
TEST_DIR = DATA_ROOT_DIR / "test_images"
TRAIN_CSV_PATH = DATA_ROOT_DIR / "train.csv"
SAMPLE_SUBMISSION_CSV_PATH = DATA_ROOT_DIR /"sample_submission.csv"
PUBLIC_SUBMISSION_CSV_PATH = INPUT_DIR / "whale-blender-v2-0" / "submission.csv"
IDS_WITHOUT_BACKFIN_PATH = INPUT_DIR / "ids-without-backfin" / "ids_without_backfin.npy"

N_SPLITS = 5

#ENCODER_CLASSES_PATH = OUTPUT_DIR /" encoder_classes.npy"
TEST_CSV_PATH = OUTPUT_DIR / " test.csv"
# TRAIN_CSV_ENCODED_FOLDED_PATH = OUTPUT_DIR / "train_encoded_folded.csv"
CHECKPOINTS_DIR = OUTPUT_DIR / "conv-nextday322pl"
SUBMISSION_CSV_PATH = OUTPUT_DIR / "submission.csv"

DEBUG = False

In [7]:
""" YOLO V5 Cropped Dataset """
""" Path & Settings """

# INPUT_DIR = Path("..") / "input"
# OUTPUT_DIR = Path("/") / "kaggle" / "working"

# O_DATA_ROOT_DIR = INPUT_DIR / "convert-backfintfrecords" / "happy-whale-and-dolphin-backfin"
second_DATA_ROOT_DIR = INPUT_DIR / "/kaggle/input/happywhale-yolo5-cropped-dataset"
second_TRAIN_DIR = second_DATA_ROOT_DIR / "train" / "train_images"
second_TEST_DIR = second_DATA_ROOT_DIR / "test" / "test_images"
second_TRAIN_CSV_PATH = second_DATA_ROOT_DIR / "train_df.csv"
# SAMPLE_SUBMISSION_CSV_PATH = O_DATA_ROOT_DIR /"sample_submission.csv"
# PUBLIC_SUBMISSION_CSV_PATH = INPUT_DIR / "whale-blender-v2-0" / "submission.csv"
# IDS_WITHOUT_BACKFIN_PATH = INPUT_DIR / "ids-without-backfin" / "ids_without_backfin.npy"

# N_SPLITS = 5

ENCODER_CLASSES_PATH = '/kaggle/input/pytorch-arcface-train-with-focal-loss/ encoder_classes.npy'
second_TEST_CSV_PATH = '/kaggle/input/pytorch-arcface-train-with-focal-loss/ test.csv'
TRAIN_CSV_ENCODED_FOLDED_PATH = '/kaggle/input/pytorch-arcface-train-with-focal-loss/train_encoded_folded.csv'
# CHECKPOINTS_DIR = OUTPUT_DIR / "conv-nextday322pl"
# SUBMISSION_CSV_PATH = OUTPUT_DIR / "submission.csv"

# DEBUG = False

In [8]:
def get_image_path(id: str, dir: Path) -> str:
    return f"{dir / id}"

In [9]:
""" Make DataFrame & Cross Validation Function """

def get_image_path(id: str, dir: Path) -> str:
    return f"{dir / id}"

def stratifiedkfold(df: pd.DataFrame, cfg) -> pd.DataFrame:
    """ Stratified KFold """
    fold = StratifiedKFold(
        n_splits=cfg.n_folds,
        shuffle=True,
        random_state=cfg.seed
    )
    df['kfold'] = -1
    for num, (tx, vx) in enumerate(fold.split(df, df.individual_id)):
        df.loc[vx, "kfold"] = int(num)
    return df

def load_data(data_path: str) -> pd.DataFrame:
    """ Load data_folder from csv file like as train.csv, test.csv, val.csv """
    df = pd.read_csv(data_path)
    df["image_path"] = df["image"].apply(get_image_path, dir=TRAIN_DIR)
    return df

def img_preprocess(df: pd.DataFrame, cfg) -> pd.DataFrame:
    """
    For Remove Background Image, Normalize Each Train & Test Data
    
    [Reference]
    https://www.kaggle.com/code/remekkinas/remove-background-salient-object-detection/notebook
    """
    encoder = LabelEncoder()
    df["individual_id"] = encoder.fit_transform(df["individual_id"])
    np.save(ENCODER_CLASSES_PATH, encoder.classes_)
    df = stratifiedkfold(df, cfg)
    df.to_csv(TRAIN_CSV_ENCODED_FOLDED_PATH, index=False)
    return df

In [10]:
""" load & Preprocess Train Data """
# train_df = img_preprocess(load_data(TRAIN_CSV_PATH), CFG)
# train_df

' load & Preprocess Train Data '

In [11]:
""" For Deit Cropped Dataset """
# Use sample submission csv as template
test_df = pd.read_csv(SAMPLE_SUBMISSION_CSV_PATH)
test_df["image_path"] = test_df["image"].apply(get_image_path, dir=TEST_DIR)

test_df.drop(columns=["predictions"], inplace=True)

# Dummy id
test_df["individual_id"] = 0

test_df.to_csv(TEST_CSV_PATH, index=False)

test_df.head()

Unnamed: 0,image,image_path,individual_id
0,000110707af0ba.jpg,../input/convert-backfintfrecords/happy-whale-...,0
1,0006287ec424cb.jpg,../input/convert-backfintfrecords/happy-whale-...,0
2,000809ecb2ccad.jpg,../input/convert-backfintfrecords/happy-whale-...,0
3,00098d1376dab2.jpg,../input/convert-backfintfrecords/happy-whale-...,0
4,000b8d89c738bd.jpg,../input/convert-backfintfrecords/happy-whale-...,0


In [12]:
""" For YOLO V5 Cropped Dataset """
yolo_test_df = pd.read_csv('/kaggle/input/happywhale-yolo5-cropped-dataset/test_df.csv')
yolo_test_df

Unnamed: 0,image,image_path,individual_id
0,000110707af0ba.jpg,../input/happywhale-yolo5-cropped-dataset/test...,0
1,0006287ec424cb.jpg,../input/happywhale-yolo5-cropped-dataset/test...,0
2,000809ecb2ccad.jpg,../input/happywhale-yolo5-cropped-dataset/test...,0
3,00098d1376dab2.jpg,../input/happywhale-yolo5-cropped-dataset/test...,0
4,000b8d89c738bd.jpg,../input/happywhale-yolo5-cropped-dataset/test...,0
...,...,...,...
27951,fff6ff1989b5cd.jpg,../input/happywhale-yolo5-cropped-dataset/test...,0
27952,fff8fd932b42cb.jpg,../input/happywhale-yolo5-cropped-dataset/test...,0
27953,fff96371332c16.jpg,../input/happywhale-yolo5-cropped-dataset/test...,0
27954,fffc1c4d3eabc7.jpg,../input/happywhale-yolo5-cropped-dataset/test...,0


In [13]:
class HappyWhaleDataset(Dataset):
    def __init__(self, df: pd.DataFrame, transform: Optional[Callable] = None):
        self.df = df
        self.transform = transform

        self.image_names = self.df["image"].values
        self.image_paths = self.df["image_path"].values
        self.targets = self.df["individual_id"].values

    def __getitem__(self, index: int) -> Dict[str, torch.Tensor]:
        image_name = self.image_names[index]

        image_path = self.image_paths[index]

        image = Image.open(image_path).convert("RGB")
        
        if self.transform:
            image = self.transform(image)

        target = self.targets[index]
        target = torch.tensor(target, dtype=torch.long)

        return {"image_name": image_name, "image": image, "target": target}

    def __len__(self) -> int:
        return len(self.df)

In [14]:
class LitDataModule(pl.LightningDataModule):
    def __init__(
        self,
        train_csv_encoded_folded: str,
        test_csv: str,
        val_fold: float,
        image_size: int,
        batch_size: int,
        num_workers: int,
    ):
        super().__init__()

        self.save_hyperparameters()

        self.train_df = pd.read_csv(train_csv_encoded_folded)
        self.test_df = pd.read_csv(test_csv)
        """ Transform: 
            1) Resize
            2) Normalize
        """
        self.transform = create_transform(
            input_size=(self.hparams.image_size, self.hparams.image_size),
            crop_pct=1.0,
        )
        
    def setup(self, stage: Optional[str] = None):
        if stage == "fit" or stage is None:
            # Split train df using fold
            train_df = self.train_df[self.train_df.kfold != self.hparams.val_fold].reset_index(drop=True)
            val_df = self.train_df[self.train_df.kfold == self.hparams.val_fold].reset_index(drop=True)

            self.train_dataset = HappyWhaleDataset(train_df, transform=self.transform)
            self.val_dataset = HappyWhaleDataset(val_df, transform=self.transform)

        if stage == "test" or stage is None:
            self.test_dataset = HappyWhaleDataset(self.test_df, transform=self.transform)

    def train_dataloader(self) -> DataLoader:
        return self._dataloader(self.train_dataset, train=True)

    def val_dataloader(self) -> DataLoader:
        return self._dataloader(self.val_dataset)

    def test_dataloader(self) -> DataLoader:
        return self._dataloader(self.test_dataset)

    def _dataloader(self, dataset: HappyWhaleDataset, train: bool = False) -> DataLoader:
        return DataLoader(
            dataset,
            batch_size=self.hparams.batch_size,
            shuffle=train,
            num_workers=self.hparams.num_workers,
            pin_memory=True,
            drop_last=train,
        )

In [15]:
class WeightedLitDataModule(pl.LightningDataModule):
    def __init__(
        self,
        train_csv_encoded_folded: str,
        test_csv: str,
        second_test_csv: str,
        val_fold: float,
        image_size: int,
        batch_size: int,
        num_workers: int,
    ):
        super().__init__()

        self.save_hyperparameters()

        self.train_df = pd.read_csv(train_csv_encoded_folded)
        self.test_df = pd.read_csv(test_csv)
        self.second_test_df = pd.read_csv(second_test_csv)
        self.transform = create_transform(
            input_size=(self.hparams.image_size, self.hparams.image_size),
            crop_pct=1.0,
        )
        
    def setup(self, stage: Optional[str] = None):
        if stage == "fit" or stage is None:
            # Split train df using fold
            train_df = self.train_df[self.train_df.kfold != self.hparams.val_fold].reset_index(drop=True)
            val_df = self.train_df[self.train_df.kfold == self.hparams.val_fold].reset_index(drop=True)

            self.train_dataset = HappyWhaleDataset(train_df, transform=self.transform)
            self.val_dataset = HappyWhaleDataset(val_df, transform=self.transform)

        if stage == "test" or stage is None:
            self.test_dataset = HappyWhaleDataset(self.test_df, transform=self.transform)
            self.second_test_dataset = HappyWhaleDataset(self.second_test_df, transform=self.transform)

    def train_dataloader(self) -> DataLoader:
        return self._dataloader(self.train_dataset, train=True)

    def val_dataloader(self) -> DataLoader:
        return self._dataloader(self.val_dataset)

    def test_dataloader(self) -> DataLoader:
        return self._dataloader(self.test_dataset)
    
    def second_test_dataloader(self) -> DataLoader:
        return self._dataloader(self.second_test_dataset)

    def _dataloader(self, dataset: HappyWhaleDataset, train: bool = False) -> DataLoader:
        return DataLoader(
            dataset,
            batch_size=self.hparams.batch_size,
            shuffle=train,
            num_workers=self.hparams.num_workers,
            pin_memory=True,
            drop_last=train,
        )

In [16]:
# From https://github.com/lyakaap/Landmark2019-1st-and-3rd-Place-Solution/blob/master/src/modeling/metric_learning.py
# Added type annotations, device, and 16bit support
class ArcMarginProduct(nn.Module):
    r"""Implement of large margin arc distance: :
    Args:
        in_features: size of each input sample
        out_features: size of each output sample
        s: norm of input feature
        m: margin
        cos(theta + m)
    """

    def __init__(
        self,
        in_features: int,
        out_features: int,
        s: float,
        m: float,
        easy_margin: bool,
        ls_eps: float,
    ):
        super(ArcMarginProduct, self).__init__()
        self.in_features = in_features  # size of concat embedding size
        self.out_features = out_features # num classes: 
        self.s = s
        self.m = m
        self.ls_eps = ls_eps  # label smoothing
        self.weight = nn.Parameter(torch.FloatTensor(out_features, in_features))
        nn.init.xavier_uniform_(self.weight)

        self.easy_margin = easy_margin
        self.cos_m = math.cos(m)
        self.sin_m = math.sin(m)
        self.th = math.cos(math.pi - m)
        self.mm = math.sin(math.pi - m) * m

    def forward(self, input: torch.Tensor, label: torch.Tensor, device: str = "cuda") -> torch.Tensor:
        # --------------------------- cos(theta) & phi(theta) ---------------------
        cosine = F.linear(F.normalize(input), F.normalize(self.weight))
        # Enable 16 bit precision
        cosine = cosine.to(torch.float32)

        sine = torch.sqrt(1.0 - torch.pow(cosine, 2))
        phi = cosine * self.cos_m - sine * self.sin_m
        if self.easy_margin:
            phi = torch.where(cosine > 0, phi, cosine)
        else:
            phi = torch.where(cosine > self.th, phi, cosine - self.mm)
        # --------------------------- convert label to one-hot ---------------------
        # one_hot = torch.zeros(cosine.size(), requires_grad=True, device='cuda')
        one_hot = torch.zeros(cosine.size(), device=device)
        one_hot.scatter_(1, label.view(-1, 1).long(), 1)
        if self.ls_eps > 0:
            one_hot = (1 - self.ls_eps) * one_hot + self.ls_eps / self.out_features
        # -------------torch.where(out_i = {x_i if condition_i else y_i) ------------
        output = (one_hot * phi) + ((1.0 - one_hot) * cosine)
        output *= self.s

        return output

In [17]:

from torch.autograd import Variable


class FocalLoss(nn.Module):
    def __init__(self, gamma=0, alpha=None, size_average=True):
        super(FocalLoss, self).__init__()
        self.gamma = gamma
        self.alpha = alpha
        if isinstance(alpha, (float, int)): self.alpha = torch.Tensor([alpha, 1 - alpha])
        if isinstance(alpha, list): self.alpha = torch.Tensor(alpha)
        self.size_average = size_average

    def forward(self, input, target):
        if input.dim() > 2:
            input = input.view(input.size(0), input.size(1), -1)  # N,C,H,W => N,C,H*W
            input = input.transpose(1, 2)  # N,C,H*W => N,H*W,C
            input = input.contiguous().view(-1, input.size(2))  # N,H*W,C => N*H*W,C
        target = target.view(-1, 1)

        logpt = F.log_softmax(input)
        logpt = logpt.gather(1, target)
        logpt = logpt.view(-1)
        pt = Variable(logpt.data.exp())

        if self.alpha is not None:
            if self.alpha.type() != input.data.type():
                self.alpha = self.alpha.type_as(input.data)
            select = (target != 0).type(torch.LongTensor).cuda()
            at = self.alpha.gather(0, select.data.view(-1))
            logpt = logpt * Variable(at)

        loss = -1 * (1 - pt) ** self.gamma * logpt
        if self.size_average:
            return loss.mean()
        else:
            return loss.sum()


class SwitchNorm1d(nn.Module):
    def __init__(self, num_features, eps=1e-5, momentum=0.997, using_moving_average=True):
        super(SwitchNorm1d, self).__init__()
        self.eps = eps
        self.momentum = momentum
        self.using_moving_average = using_moving_average
        self.weight = nn.Parameter(torch.ones(1, num_features))
        self.bias = nn.Parameter(torch.zeros(1, num_features))
        self.mean_weight = nn.Parameter(torch.ones(2))
        self.var_weight = nn.Parameter(torch.ones(2))
        self.register_buffer('running_mean', torch.zeros(1, num_features))
        self.register_buffer('running_var', torch.zeros(1, num_features))
        self.reset_parameters()

    def reset_parameters(self):
        self.running_mean.zero_()
        self.running_var.zero_()
        self.weight.data.fill_(1)
        self.bias.data.zero_()

    def _check_input_dim(self, input):
        if input.dim() != 2:
            raise ValueError('expected 2D input (got {}D input)'
                             .format(input.dim()))

    def forward(self, x):
        self._check_input_dim(x)
        mean_ln = x.mean(1, keepdim=True)
        var_ln = x.var(1, keepdim=True)

        if self.training:
            mean_bn = x.mean(0, keepdim=True)
            var_bn = x.var(0, keepdim=True)
            if self.using_moving_average:
                self.running_mean.mul_(self.momentum)
                self.running_mean.add_((1 - self.momentum) * mean_bn.data)
                self.running_var.mul_(self.momentum)
                self.running_var.add_((1 - self.momentum) * var_bn.data)
            else:
                self.running_mean.add_(mean_bn.data)
                self.running_var.add_(mean_bn.data ** 2 + var_bn.data)
        else:
            mean_bn = torch.autograd.Variable(self.running_mean)
            var_bn = torch.autograd.Variable(self.running_var)

        softmax = nn.Softmax(0)
        mean_weight = softmax(self.mean_weight)
        var_weight = softmax(self.var_weight)

        mean = mean_weight[0] * mean_ln + mean_weight[1] * mean_bn
        var = var_weight[0] * var_ln + var_weight[1] * var_bn

        x = (x - mean) / (var + self.eps).sqrt()
        return x * self.weight + self.bias


class LitModule(pl.LightningModule):
    def __init__(
        self,
        model_name: str,
        pretrained: bool,
        drop_rate: float,
        embedding_size: int,
        num_classes: int,
        arc_s: float,
        arc_m: float,
        arc_easy_margin: bool,
        arc_ls_eps: float,
        optimizer: str,
        learning_rate: float,
        weight_decay: float,
        len_train_dl: int,
        epochs:int
    ):
        super().__init__()

        self.save_hyperparameters()
          # self.fea_extra_layer = [2, 3]
        self.fea_extra_layer = [-2,-1]
        self.model = timm.create_model(model_name, pretrained=False, drop_rate=drop_rate,
                                       features_only=True,
                                       out_indices=self.fea_extra_layer
                                       )
        in_features = 1536
        self.embedding = nn.Sequential(

            nn.Linear(in_features, embedding_size),
            nn.BatchNorm1d(embedding_size))

        # self.model.reset_classifier(num_classes=0, global_pool="avg")
        self.bn = nn.Sequential(
            nn.BatchNorm2d(1024),
            nn.Dropout(0.2),
            nn.AdaptiveAvgPool2d(1),
        )
        self.bn2 = nn.Sequential(
            nn.BatchNorm2d(512),
            nn.Dropout(0.2),
            nn.AdaptiveAvgPool2d(1),
        )

        self.arc = ArcMarginProduct(
            in_features=embedding_size,
            out_features=num_classes,
            s=arc_s,
            m=arc_m,
            easy_margin=arc_easy_margin,
            ls_eps=arc_ls_eps,
        )
        self.loss_fn = nn.CrossEntropyLoss()

    def forward(self, images: torch.Tensor) -> torch.Tensor:
        features = self.model(images)
        features[0] = self.bn2(features[0])
        features[1] = self.bn(features[1])
        features = torch.cat(features, dim=1)
        embeddings = self.embedding(features.flatten(1))

        return embeddings

    def configure_optimizers(self):
        optimizer = create_optimizer_v2(
            self.parameters(),
            opt=self.hparams.optimizer,
            lr=self.hparams.learning_rate,
            weight_decay=self.hparams.weight_decay,
        )
        
        scheduler = torch.optim.lr_scheduler.OneCycleLR(
            optimizer,
            self.hparams.learning_rate,
            steps_per_epoch=self.hparams.len_train_dl,
            epochs=self.hparams.epochs,
        )
        scheduler = {"scheduler": scheduler, "interval": "step"}

        return [optimizer], [scheduler]

    def training_step(self, batch: Dict[str, torch.Tensor], batch_idx: int) -> torch.Tensor:
        return self._step(batch, "train")

    def validation_step(self, batch: Dict[str, torch.Tensor], batch_idx: int) -> torch.Tensor:
        return self._step(batch, "val")

    def _step(self, batch: Dict[str, torch.Tensor], step: str) -> torch.Tensor:
        images, targets = batch["image"], batch["target"]

        embeddings = self(images)
        outputs = self.arc(embeddings, targets, self.device)

        loss = self.loss_fn(outputs, targets)
        
        self.log(f"{step}_loss", loss)

        return loss

In [18]:
def load_eval_module(checkpoint_path: str, device: torch.device) -> LitModule:
    module = LitModule.load_from_checkpoint(checkpoint_path)
    module.to(device)
    module.eval()

    return module

def load_dataloaders(
    train_csv_encoded_folded: str,
    test_csv: str,
    val_fold: float,
    image_size: int,
    batch_size: int,
    num_workers: int,
) -> Tuple[DataLoader, DataLoader, DataLoader]:

    datamodule = LitDataModule(
        train_csv_encoded_folded=train_csv_encoded_folded,
        test_csv=test_csv,
        val_fold=val_fold,
        image_size=image_size,
        batch_size=batch_size,
        num_workers=num_workers,
        
    )

    datamodule.setup()

    train_dl = datamodule.train_dataloader()
    val_dl = datamodule.val_dataloader()
    test_dl = datamodule.test_dataloader()

    return train_dl, val_dl, test_dl

def weighted_load_dataloaders(
    train_csv_encoded_folded: str,
    test_csv: str,
    second_test_csv: str,
    val_fold: float,
    image_size: int,
    batch_size: int,
    num_workers: int,
) -> Tuple[DataLoader, DataLoader, DataLoader, DataLoader]:

    datamodule = WeightedLitDataModule(
        train_csv_encoded_folded=train_csv_encoded_folded,
        test_csv=test_csv,
        second_test_csv=second_test_csv,
        val_fold=val_fold,
        image_size=image_size,
        batch_size=batch_size,
        num_workers=num_workers,
    )

    datamodule.setup()

    train_dl = datamodule.train_dataloader()
    val_dl = datamodule.val_dataloader()
    test_dl = datamodule.test_dataloader()
    second_test_dl = datamodule.second_test_dataloader()

    return train_dl, val_dl, test_dl, second_test_dl


def load_encoder() -> LabelEncoder:
    encoder = LabelEncoder()
    encoder.classes_ = np.load(ENCODER_CLASSES_PATH, allow_pickle=True)

    return encoder


@torch.inference_mode()
def get_embeddings(
    module: pl.LightningModule, dataloader: DataLoader, encoder: LabelEncoder, stage: str
) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:

    all_image_names = []
    all_embeddings = []
    all_targets = []

    for batch in tqdm(dataloader, desc=f"Creating {stage} embeddings"):
        image_names = batch["image_name"]
        images = batch["image"].to(module.device)
        targets = batch["target"].to(module.device)

        embeddings = module(images)
        #emb2 = module2(images)
        #embeddings = embeddings + emb2   
        all_image_names.append(image_names)
        all_embeddings.append(embeddings.cpu().numpy())
        all_targets.append(targets.cpu().numpy())
        
        if DEBUG:
            break

    all_image_names = np.concatenate(all_image_names)
    all_embeddings = np.vstack(all_embeddings)
    all_targets = np.concatenate(all_targets)

    all_embeddings = normalize(all_embeddings, axis=1, norm="l2")
    all_targets = encoder.inverse_transform(all_targets)

    return all_image_names, all_embeddings, all_targets


def create_and_search_index(embedding_size: int, train_embeddings: np.ndarray, val_embeddings: np.ndarray, k: int):
    index = faiss.IndexFlatIP(embedding_size)
    index.add(train_embeddings)
    D, I = index.search(val_embeddings, k=k)  # noqa: E741

    return D, I


def create_val_targets_df(
    train_targets: np.ndarray, val_image_names: np.ndarray, val_targets: np.ndarray
) -> pd.DataFrame:

    allowed_targets = np.unique(train_targets)
    val_targets_df = pd.DataFrame(np.stack([val_image_names, val_targets], axis=1), columns=["image", "target"])
    val_targets_df.loc[~val_targets_df.target.isin(allowed_targets), "target"] = "new_individual"

    return val_targets_df


def create_distances_df(
    image_names: np.ndarray, targets: np.ndarray, D: np.ndarray, I: np.ndarray, stage: str  # noqa: E741
) -> pd.DataFrame:

    distances_df = []
    for i, image_name in tqdm(enumerate(image_names), desc=f"Creating {stage}_df"):
        target = targets[I[i]]
        distances = D[i]
        subset_preds = pd.DataFrame(np.stack([target, distances], axis=1), columns=["target", "distances"])
        subset_preds["image"] = image_name
        distances_df.append(subset_preds)

    distances_df = pd.concat(distances_df).reset_index(drop=True)
    distances_df = distances_df.groupby(["image", "target"]).distances.max().reset_index()
    distances_df = distances_df.sort_values("distances", ascending=False).reset_index(drop=True)

    return distances_df


def get_best_threshold(val_targets_df: pd.DataFrame, valid_df: pd.DataFrame) -> Tuple[float, float]:
    best_th = 0
    best_cv = 0
    for th in [0.1 * x for x in range(11)]:
        all_preds = get_predictions(valid_df, threshold=th)

        cv = 0
        for i, row in val_targets_df.iterrows():
            target = row.target
            preds = all_preds[row.image]
            val_targets_df.loc[i, th] = map_per_image(target, preds)

        cv = val_targets_df[th].mean()

        print(f"th={th} cv={cv}")

        if cv > best_cv:
            best_th = th
            best_cv = cv

    print(f"best_th={best_th}")
    print(f"best_cv={best_cv}")

    # Adjustment: Since Public lb has nearly 10% 'new_individual' (Be Careful for private LB)
    val_targets_df["is_new_individual"] = val_targets_df.target == "new_individual"
    val_scores = val_targets_df.groupby("is_new_individual").mean().T
    val_scores["adjusted_cv"] = val_scores[True] * 0.1 + val_scores[False] * 0.9
    best_th = val_scores["adjusted_cv"].idxmax()
    print(f"best_th_adjusted={best_th}")

    return best_th, best_cv


def get_predictions(df: pd.DataFrame, threshold: float = 0.2):
    sample_list = ["938b7e931166", "5bf17305f073", "7593d2aee842", "7362d7a01d00", "956562ff2888"]

    predictions = {}
    for i, row in tqdm(df.iterrows(), total=len(df), desc=f"Creating predictions for threshold={threshold}"):
        if row.image in predictions:
            if len(predictions[row.image]) == 5:
                continue
            predictions[row.image].append(row.target)
        elif row.distances > threshold:
            predictions[row.image] = [row.target, "new_individual"]
        else:
            predictions[row.image] = ["new_individual", row.target]

    for x in tqdm(predictions):
        if len(predictions[x]) < 5:
            remaining = [y for y in sample_list if y not in predictions]
            predictions[x] = predictions[x] + remaining
            predictions[x] = predictions[x][:5]

    return predictions


# TODO: add types
# mAP
def map_per_image(label, predictions):
    """Computes the precision score of one image.

    Parameters
    ----------
    label : string
            The true label of the image
    predictions : list
            A list of predicted elements (order does matter, 5 predictions allowed per image)

    Returns
    -------
    score : double
    """
    try:
        return 1 / (predictions[:5].index(label) + 1)
    except ValueError:
        return 0.0


def create_predictions_df(test_df: pd.DataFrame, best_th: float) -> pd.DataFrame:
    predictions = get_predictions(test_df, best_th)

    predictions = pd.Series(predictions).reset_index()
    predictions.columns = ["image", "predictions"]
    predictions["predictions"] = predictions["predictions"].apply(lambda x: " ".join(x))

    return predictions

In [24]:
""" 여기 함수를 루프로 감싸버리자, 그리고 임베딩 추출 뒷 부분부터는 그냥 함수 밖으로 빼서 한번에 퉁치기 """
""" 
Args:
    checkpoint_path: Fine-Tuned Model by Deit Cropped & YOLO V5 Cropped
    train_csv_encoded_folded: Whole Dataset (About 90000+) which is Deit Cropped & YOLO V5 Cropped
    test_csv: Each Dataset's Test DataFrame
"""
def weighted_infer(
    checkpoint_path: str,
    train_csv_encoded_folded: str = str(TRAIN_CSV_ENCODED_FOLDED_PATH),
    test_csv: str = str(TEST_CSV_PATH),
    second_test_csv: str = str(second_TEST_CSV_PATH),
    val_fold: float = 0.0,
    image_size: int = 256,
    batch_size: int = 32,
    num_workers: int = 2,
    k: int = 50,
):
    new_embeddings = []
    ids_without_backfin = np.load(IDS_WITHOUT_BACKFIN_PATH, allow_pickle=True) # 얘가 지금 임베딩 비는 애들
    module = load_eval_module(checkpoint_path, torch.device("cuda"))
    #module2 = load_eval_module(checkpoint_path2, torch.device("cuda"))
    train_dl, val_dl, test_dl, second_test_dl = weighted_load_dataloaders(
        train_csv_encoded_folded=train_csv_encoded_folded,
        test_csv=test_csv,
        second_test_csv=second_test_csv,
        val_fold=val_fold,
        image_size=image_size,
        batch_size=batch_size,
        num_workers=num_workers,
    )

    encoder = load_encoder()
    test_image_names, test_embeddings, test_targets = get_embeddings(module, test_dl, encoder, stage="test")
    print(test_embeddings, test_embeddings.shape)

    second_test_image_names, second_test_embeddings, second_test_targets = get_embeddings(module, second_test_dl, encoder, stage="test")
    print(second_test_embeddings, test_embeddings.shape)

    for idx in range(len(test_image_names)):
        if test_image_names[idx] in ids_without_backfin:
            new_embeddings.append(second_test_embeddings[idx])
        else:
            new_embeddings.append(0.5 * np.add(test_embeddings[idx], second_test_embeddings[idx]))
    
    test_embeddings = np.array(new_embeddings)
    
    del second_test_image_names, second_test_targets, second_test_embeddings
    gc.collect()
    torch.cuda.empty_cache()
        
    D, I = create_and_search_index(module.hparams.embedding_size, train_embeddings, val_embeddings, k)  # noqa: E741
    print("Created index with train_embeddings")

    val_targets_df = create_val_targets_df(train_targets, val_image_names, val_targets)
    print(f"val_targets_df=\n{val_targets_df.head()}")

    val_df = create_distances_df(val_image_names, train_targets, D, I, "val")
    print(f"val_df=\n{val_df.head()}")

    best_th, best_cv = get_best_threshold(val_targets_df, val_df)
    print(f"val_targets_df=\n{val_targets_df.describe()}")

    train_embeddings = np.concatenate([train_embeddings, val_embeddings]) # train dataset's embedding
    train_targets = np.concatenate([train_targets, val_targets]) # train dataset's embedding
    print("Updated train_embeddings and train_targets with val data")

    D, I = create_and_search_index(module.hparams.embedding_size, train_embeddings, test_embeddings, k)  # 여기 test_embeddings를 1+2로 하면 되겠네
    print("Created index with train_embeddings")

    test_df = create_distances_df(test_image_names, train_targets, D, I, "test")
    print(f"test_df=\n{test_df.head()}")
    predictions = create_predictions_df(test_df, best_th)
    print(f"predictions.head()={predictions.head()}")
    
    # Fix missing predictions
    public_predictions = pd.read_csv(PUBLIC_SUBMISSION_CSV_PATH)
    # ids_without_backfin = np.load(IDS_WITHOUT_BACKFIN_PATH, allow_pickle=True) # 얘가 지금 임베딩 비는 애들

    ids2 = public_predictions["image"][~public_predictions["image"].isin(predictions["image"])]
    predictions = pd.concat(
        [
            predictions[~(predictions["image"].isin(ids_without_backfin))],
            public_predictions[public_predictions["image"].isin(ids_without_backfin)],
            public_predictions[public_predictions["image"].isin(ids2)],
        ]
    )
    predictions = predictions.drop_duplicates()

    predictions.to_csv(SUBMISSION_CSV_PATH, index=False)

In [25]:
image_size = 384
batch_size = 32
weighted_infer(
    checkpoint_path=f"{CFG.resume_dir}",
    train_csv_encoded_folded = str(TRAIN_CSV_ENCODED_FOLDED_PATH),
    test_csv = str(TEST_CSV_PATH),
    second_test_csv = str(second_TEST_CSV_PATH),
    image_size=image_size, batch_size=batch_size,
    k=100
)

Creating test embeddings:   0%|          | 0/874 [00:00<?, ?it/s]

[[ 0.07979687  0.06576561 -0.02406937 ...  0.03794983 -0.02177413
  -0.01135578]
 [-0.02665154  0.053927    0.03176337 ... -0.0439869  -0.01988871
  -0.04037159]
 [-0.05702118  0.03273489  0.0507337  ...  0.03958606 -0.04100629
   0.10138851]
 ...
 [-0.06211325  0.08108444  0.02756541 ...  0.02824374  0.04517635
  -0.11374559]
 [-0.00908647 -0.01007448 -0.02222575 ...  0.032947    0.02258123
   0.0343171 ]
 [-0.01046135 -0.00119797 -0.00544814 ... -0.04452109 -0.04238594
   0.01275638]] (27942, 512)


Creating test embeddings:   0%|          | 0/874 [00:00<?, ?it/s]

[[ 0.07979687  0.06576561 -0.02406937 ...  0.03794983 -0.02177413
  -0.01135578]
 [-0.02665154  0.053927    0.03176337 ... -0.0439869  -0.01988871
  -0.04037159]
 [-0.05702118  0.03273489  0.0507337  ...  0.03958606 -0.04100629
   0.10138851]
 ...
 [-0.06211325  0.08108444  0.02756541 ...  0.02824374  0.04517635
  -0.11374559]
 [-0.00908647 -0.01007448 -0.02222575 ...  0.032947    0.02258123
   0.0343171 ]
 [-0.01046135 -0.00119797 -0.00544814 ... -0.04452109 -0.04238594
   0.01275638]] (27942, 512)
(27942, 512)
['000110707af0ba.jpg' '0006287ec424cb.jpg' '000809ecb2ccad.jpg' ...
 'fff96371332c16.jpg' 'fffc1c4d3eabc7.jpg' 'fffc50be10c175.jpg'] (27942,)
[[ 0.07979687  0.06576561 -0.02406937 ...  0.03794983 -0.02177413
  -0.01135578]
 [-0.02665154  0.053927    0.03176337 ... -0.0439869  -0.01988871
  -0.04037159]
 [-0.05702118  0.03273489  0.0507337  ...  0.03958606 -0.04100629
   0.10138851]
 ...
 [-0.06211325  0.08108444  0.02756541 ...  0.02824374  0.04517635
  -0.11374559]
 [-0.009086

UnboundLocalError: local variable 'train_embeddings' referenced before assignment