Public LB: 
- 0.114 (12621.2s - GPU)(epoch=1(/folds),ntree=10)(February 2, 2022)  
- 0.121 (35601.9s - GPU)(epoch=5(/folds),ntree=100)(February 3, 2022)

I'm using **EfficientNet_b4** & **ContrastiveLoss** & **CrossBatchMemory(2000)** & **αQE(α=1,K=1)**.  

In this model, no distinction is made between different types of animals. 

**Increasing the value of "epoch" or increasing the value of "ntree" in Annoy will improve the performance.**  

I'm a beginner, so I'm sharing my notebook for study.  
The code is messed up, but if you run it from above, you will get submisson.csv.  
If there are any mistakes, I would appreciate it if you could point them out.  

I referred to [this notebook](https://www.kaggle.com/moeinshariatnia/contrastive-loss-pretraining-in-depth-explanation) .    
I recommend this notebook for its detailed description of metric learning.

# Settings

In [None]:
mkdir /kaggle/working/weights

In [None]:
class CFG:
    SEED = 0
    size = 224
    batch_size = 16
    num_workers = 2
    efficientnet_feature = 1792
    model_name = "efficientnet_b4"
    pretrained = True
    
    dropout = None
    linear = None
    margin = 1
    scheduler = "ReduceLROnPlateau"
    step = "epoch"

    learning_rate = 1e-3
    factor =0.5
    patience = 2
    epochs = 5


In [None]:
import numpy as np
import pandas as pd
import os
import random

import torch
import torch.nn as nn
import torch.optim as optim


import albumentations
!pip install timm
import timm

from tqdm.autonotebook import tqdm

!pip install pytorch-metric-learning
from pytorch_metric_learning import losses

import gc
from albumentations.pytorch.transforms import ToTensorV2
from torchvision import transforms
from torch.utils.data import Dataset
from sklearn.model_selection import KFold
import cv2

from annoy import AnnoyIndex

In [None]:
def fix_seed(seed):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    #gpu randomseed fixed
    torch.backends.cudnn.deterministic = True
   
fix_seed(CFG.SEED)

In [None]:
device = "cuda" if torch.cuda.is_available() else "cpu"
print(device)

# data

In [None]:
df = pd.read_csv("/kaggle/input/happy-whale-and-dolphin/train.csv")

In [None]:
df.head()

In [None]:
df["path"] = "/kaggle/input/happy-whale-and-dolphin/train_images/" + df["image"].astype(str)
df["index"] = df.index

In [None]:
df.head()

In [None]:
df_id_idx = pd.DataFrame({"individual_id":df["individual_id"].unique(),
              "idx":range(len(df["individual_id"].unique()))})

# Augmentation

In [None]:
train_transforms = albumentations.Compose([
            albumentations.Resize(CFG.size,CFG.size),
            albumentations.HorizontalFlip(p=0.5),
            albumentations.Rotate(limit=10, p=0.8),
            albumentations.RandomBrightness(limit=(0.09,0.6),p=0.5),
            albumentations.Normalize(),
            ToTensorV2(p=1.0),
            ])
    
def train_albumentations_transform(image, transform=train_transforms):
    if transform:
        image_np = np.array(image)
        augmented = transform(image=image_np)
    return augmented

train_data_transform = transforms.Compose([
    transforms.Lambda(train_albumentations_transform),
])

In [None]:
valid_transforms = albumentations.Compose([
            albumentations.Resize(CFG.size,CFG.size, always_apply=True),
            albumentations.Normalize(),
            ToTensorV2(p=1.0),
            ])
    
def valid_albumentations_transform(image, transform=valid_transforms):
    if transform:
        image_np = np.array(image)
        augmented = transform(image=image_np)
    return augmented

valid_data_transform = transforms.Compose([
    transforms.Lambda(valid_albumentations_transform),
])


# Functions

In [None]:
class ContrasiveDataset(Dataset):
    def __init__(self, df, df_id_idx, transforms):
        self.transforms=transforms
        self.individual_id = list(df["individual_id"].unique())
        self.individual_id_to_imgs = {individual_id: df[df["individual_id"] == individual_id].path.values for individual_id in self.individual_id}
        self.df = df
        self.df_id_idx = df_id_idx

    def __getitem__(self, idx):
        id = self.individual_id[idx]

        if random.random()  > 0.5:
            same = True
            same_id_images = self.individual_id_to_imgs[id]
            if len(same_id_images)==1:
                img1 = same_id_images[0]
                img2 = same_id_images[0]
            else:
                img1, img2 = np.random.choice(same_id_images, size=2, replace=False if len(same_id_images) > 1 else True)
        else:
            same = False
            img1 = np.random.choice(self.individual_id_to_imgs[id], size=1)[0]

            while True:
                different_label = np.random.choice(self.individual_id, size=1)[0]

                if different_label != id:
                    break
            
            img2 = np.random.choice(self.individual_id_to_imgs[different_label], size=1)[0]

        img1_tensor, img2_tensor= self.process_imgs(img1, img2)

        return {"images1":img1_tensor,
                "images2":img2_tensor,
                "same":torch.tensor(same).float(),
                "label1":df_id_idx[df_id_idx["individual_id"]==id]["idx"].values[0],
                "label2":df_id_idx[df_id_idx["individual_id"]==id]["idx"].values[0] if same else df_id_idx[df_id_idx["individual_id"]==different_label]["idx"].values[0],
                "image1_name":img1,
                "image2_name":img2}


    def read_transform_one(self, img):
        img = cv2.imread(img)[..., :: -1]

        if self.transforms is not None:
            img = self.transforms(img)["image"]
        return img

    def process_imgs(self, img1, img2):
        img1 = self.read_transform_one(img1)
        img2 = self.read_transform_one(img2)
        return img1, img2
    
    def __len__(self):
        return len(self.individual_id)


In [None]:
class Model(nn.Module):
    def __init__(self,
                 model_name=CFG.model_name,
                 pretrained=True,
                 dropout=0.2,
                 linear=128):

                 super().__init__()
                 model = timm.create_model(model_name,
                                           pretrained=pretrained,
                                           num_classes=0)
                 self.num_features=model.num_features
                 self.linear = None
                 if linear is not None and linear >0:
                     self.linear = nn.Linear(self.num_features, linear)
                 self.backbone = nn.Sequential(model,
                                              self.linear if self.linear is not None else nn.Identity(),
                                              nn.ReLU() if self.linear is not None else nn.Identity(),
                                              nn.Dropout(0.2) if dropout is not None else nn.Identity())

    def forward(self, batch):
        images_1 = self.backbone(batch["images1"].to(device))
        images_2 = self.backbone(batch["images2"].to(device))
        
        return images_1, images_2

In [None]:
class AvgMeter:
    def __init__(self, name="Metric"):
        self.name = name
        self.reset()

    def reset(self):
        self.avg, self.sum, self.count = [0]*3
    
    def update(self, val, count=1):
        self.count += count
        self.sum += val * count
        self.avg = self.sum / self.count
    
    def __repr__(self):
        text = "{}:{:.4f}".format(self.name,self.avg)
        return text
    
def get_lr(optimizer):
    for param_group in optimizer.param_groups:
        return param_group["lr"]

In [None]:
def one_epoch(model,
              criterion,
              loader,
              optimizer=None,
              lr_scheduler=None,
              mode="train",
              step="batch"):
    loss_meter = AvgMeter()
    distances = None
    labels = None
    tqdm_object = tqdm(enumerate(loader), total=len(loader))
    for i, batch in tqdm_object:
        images1_f, images2_f = model(batch)
        
        embeddings = torch.cat((images1_f, images2_f))
        labels = torch.cat((batch["label1"], batch["label2"]))
        loss = criterion(embeddings, labels).to(device)

        if mode == "train":
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            if step ==  "batch":
                lr_scheduler.step()

        count = len(labels)
        loss_meter.update(loss.item(), count)

        if mode == "train":
            tqdm_object.set_postfix(train_loss=loss_meter.avg, lr=get_lr(optimizer))
        else:
            tqdm_object.set_postfix(validloss=loss_meter.avg)
    
    return loss_meter

In [None]:
def train_eval(epochs, model, train_loader, valid_loader,
               criterion, optimizer, lr_scheduler=None, fold=None, li=None):
    best_loss = float("inf")

    for epoch in range(epochs):
        print("*"*30)
        print("Epoch{}".format(epoch+1))
        current_lr = get_lr(optimizer)

        model.train()
        train_loss = one_epoch(model,
                                        criterion,
                                        train_loader,
                                        optimizer=optimizer,
                                        lr_scheduler=lr_scheduler,
                                        mode="train",
                                        step=CFG.step)
        
        model.eval()
        with torch.no_grad():
            valid_loss = one_epoch(model,
                                            criterion,
                                            valid_loader,
                                            optimizer=None,
                                            mode="valid")
        li[fold].append(float(valid_loss.avg))
            
        if valid_loss.avg < best_loss:
            best_loss = valid_loss.avg

            if fold is not None:
                weight_name = "/kaggle/working/weights/best" + str(fold) + ".pt"
            else:
                weight_name = "/kaggle/working/weights/best.pt"

            torch.save(model.state_dict(), weight_name)
            print("Saved best model!")
        if isinstance(lr_scheduler, torch.optim.lr_scheduler.ReduceLROnPlateau):
            lr_scheduler.step(train_loss.avg)

# Train

In [None]:
n_splits = 5

losses_list = [[] for i in range(n_splits)]

kf = KFold(n_splits=n_splits, shuffle=True, random_state=CFG.SEED)
ids = df["individual_id"].unique()
for fold, (train_ids, valid_ids) in enumerate(kf.split(ids)):
    train_df= df[df["index"].isin(train_ids)].reset_index(drop=True)
    valid_df = df[df["index"].isin(valid_ids)].reset_index(drop=True)
    train_dataset = ContrasiveDataset(train_df,df_id_idx, train_data_transform)
    valid_dataset = ContrasiveDataset(valid_df,df_id_idx, valid_data_transform)
    train_loader = torch.utils.data.DataLoader(train_dataset,
                                           batch_size=CFG.batch_size,
                                           num_workers=CFG.num_workers,
                                           pin_memory=True,
                                           shuffle=True)
    valid_loader = torch.utils.data.DataLoader(valid_dataset,
                                           batch_size=CFG.batch_size,
                                           num_workers=CFG.num_workers,
                                           pin_memory=True,
                                           shuffle=False)
    
    model = Model(CFG.model_name, CFG.pretrained, CFG.dropout, CFG.linear)
    model.to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=CFG.learning_rate)

    if CFG.scheduler =="ReduceLROnPlateau":
        lr_scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                                mode="min",
                                                                factor=CFG.factor,
                                                                patience=CFG.patience)
    CFG.step = "epoch"

    criterion = losses.ContrastiveLoss(pos_margin=0, neg_margin=CFG.margin)
    criterion = losses.CrossBatchMemory(criterion, CFG.efficientnet_feature, memory_size=2000)

    train_eval(CFG.epochs,
            model,
            train_loader,
            valid_loader,
            criterion,
            optimizer,
            lr_scheduler,
            fold,
            losses_list)
    
    print(losses_list)

    gc.collect()
    torch.cuda.empty_cache()

In [None]:
try:
    del model, Model, train_loader, valid_loader, train_dataset, valid_dataset, optimizer, criterion, train_eval, lr_scheduler, labels, one_epoch, train_df, valid_df, train_labels, valid_labels, AvgMeter, get_lr, ContrasiveDataset, train_transforms, train_albumentations_transform, train_data_transform
except:
    print("some variable do not exist")

In [None]:
gc.collect()
torch.cuda.empty_cache()

# Making Embeddings

In [None]:
df_2 = pd.read_csv("/kaggle/input/happy-whale-and-dolphin/sample_submission.csv")
df_2 = df_2.drop('predictions', axis=1)
df_2["path"] = "/kaggle/input/happy-whale-and-dolphin/test_images/" + df_2["image"].astype(str)

In [None]:
df_list = df.values.tolist()
df_2_list = df_2.values.tolist()

In [None]:
class TrainDataset(Dataset):
    def __init__(self, List, transforms=None):
        self.transforms=transforms
        self.List = List

    def __len__(self):
        return len(self.List)

    def __getitem__(self, idx):
        img = cv2.imread(self.List[idx][3])[...,::-1]
        if self.transforms is not None:
            img = self.transforms(img)["image"]
        return {"images1":img}

In [None]:
train_dataset = TrainDataset(df_list, valid_data_transform)
train_loader = torch.utils.data.DataLoader(train_dataset,
                                          batch_size=CFG.batch_size,
                                          num_workers=os.cpu_count(),
                                          pin_memory=True,
                                          shuffle=False)

In [None]:
class Model(nn.Module):
    def __init__(self,
                 model_name=CFG.model_name,
                 pretrained=True,
                 dropout=0.2,
                 linear=128):

                 super().__init__()
                 model = timm.create_model(model_name,
                                           pretrained=pretrained,
                                           num_classes=0)
                 self.num_features=model.num_features
                 self.linear = None
                 if linear is not None and linear >0:
                     self.linear = nn.Linear(self.num_features, linear)
                 self.backbone = nn.Sequential(model,
                                              self.linear if self.linear is not None else nn.Identity(),
                                              nn.ReLU() if self.linear is not None else nn.Identity(),
                                              nn.Dropout(0.2) if dropout is not None else nn.Identity())
    def forward(self, batch):
        images = self.backbone(batch["images1"].to(device))
        return images

In [None]:
models = []
for i in range(5):
    model = Model(CFG.model_name, CFG.pretrained, CFG.dropout, CFG.linear)
    models.append(model)
    model_path = "/kaggle/working/weights/best"+str(i)+".pt"
    models[i].load_state_dict(torch.load(model_path))
    models[i].to(device)

In [None]:
def calc_norm(array):
    array = array/np.linalg.norm(array)
    return array.tolist()

In [None]:
n_trees = 100
index = AnnoyIndex(CFG.efficientnet_feature, metric="euclidean")
for i in range(len(models)):
    models[i].eval()

label_index = 0

for batch in train_loader:
    with torch.inference_mode():
        for i in range(len(models)):
            if i == 0:
                outputs = models[i](batch).cpu()
            else:
                outputs += models[i](batch).cpu()
        outputs = outputs/float(len(models))

    for i, feature in enumerate(outputs):
        feature=feature.numpy().copy()
        feature = calc_norm(feature)
        index.add_item(label_index, feature)
        label_index += 1

index.build(n_trees, n_jobs=-1)
index.save("/kaggle/working/feature.ann")

In [None]:
class TestDataset(Dataset):
    def __init__(self, List, transforms=None):
        self.transforms=transforms
        self.List = List

    def __len__(self):
        return len(self.List)

    def __getitem__(self, idx):
        img = cv2.imread(self.List[idx][1])[...,::-1]
        if self.transforms is not None:
            img = self.transforms(img)["image"]
        return {"images1":img}

In [None]:
test_dataset = TestDataset(df_2_list, valid_data_transform)
test_loader = torch.utils.data.DataLoader(test_dataset,
                                          batch_size=CFG.batch_size,
                                          num_workers=os.cpu_count(),
                                          pin_memory=True,
                                          shuffle=False)

In [None]:
n_trees = 3
index_test = AnnoyIndex(CFG.efficientnet_feature, metric="euclidean")
for i in range(len(models)):
    models[i].eval()
label_index = 0

for batch in test_loader:
    with torch.inference_mode():
        for i in range(len(models)):
            if i == 0:
                outputs = models[i](batch).cpu()
            else:
                outputs += models[i](batch).cpu()
        outputs = outputs/float(len(models))
    for i, feature in enumerate(outputs):
        feature=feature.numpy().copy()
        feature = calc_norm(feature)
        index_test.add_item(label_index, feature)
        label_index += 1

index_test.build(n_trees, n_jobs=-1)

In [None]:
def query_expansion(query_vec, similar_vec):
    query_vec = np.array(query_vec)
    similar_vec = np.array(similar_vec)
    similarity = np.dot(query_vec, similar_vec)
    similarity = float(similarity)

    vec = (1*query_vec + similarity*similar_vec)/(1+similarity)
    vec = calc_norm(vec)
    return vec

In [None]:
!touch submission.csv
!echo "image,predictions">> submission.csv

In [None]:
with open("submission.csv", "a") as f:
    for i in range(len(df_2)):
        test_query_vec = index_test.get_item_vector(i)
        result = index.get_nns_by_vector(test_query_vec,1)#****
        tmp_index_vec = index.get_item_vector(result[0])#****

        test_query_vec = query_expansion(test_query_vec, tmp_index_vec)

        result = index.get_nns_by_vector(test_query_vec,4)
        results =str(df_2.iloc[i]["image"])+","
        for k in range(5):
            if k == 4:
                results = results+ "new_individual" +"\n"
            else:
                results = results+ str(df_list[result[k]][2]) + " "
        f.writelines("{}".format(results))