In [None]:
!pip install timm faiss-gpu

In [None]:
import numpy as np
import os
import pandas as pd
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import LabelEncoder
import gc
from tqdm import tqdm
from fastai.vision.all import *
from fastai.vision.learner import _update_first_layer
import faiss
from timm import create_model
from timm.data.transforms_factory import create_transform
from sklearn.preprocessing import normalize

In [None]:
INPUT_DIR = Path("..") / "input"
OUTPUT_DIR = Path("/") / "kaggle" / "working"
DATA_ROOT_DIR = INPUT_DIR / "convert-backfintfrecords" / "happy-whale-and-dolphin-backfin"
TRAIN_DIR = DATA_ROOT_DIR / "train_images"
TEST_DIR = DATA_ROOT_DIR / "test_images"
TRAIN_CSV_PATH = DATA_ROOT_DIR / "train.csv"
SAMPLE_SUBMISSION_CSV_PATH = DATA_ROOT_DIR / "sample_submission.csv"
PUBLIC_SUBMISSION_CSV_PATH = INPUT_DIR / "0-720-eff-b5-640-rotate" / "submission.csv"
IDS_WITHOUT_BACKFIN_PATH = INPUT_DIR / "ids-without-backfin" / "ids_without_backfin.npy"
SUBMISSION_CSV_PATH = OUTPUT_DIR / "submission.csv"

# Loading dataset

In [None]:
train_csv = pd.read_csv('../input/happy-whale-and-dolphin/train.csv')
classes = list(train_csv.individual_id.unique())
train_df = pd.read_csv(TRAIN_CSV_PATH)
test_df = pd.read_csv(SAMPLE_SUBMISSION_CSV_PATH)
test_df.drop(columns=["predictions"], inplace=True)
test_df["individual_id"] = train_df.individual_id.unique()[0]
train_df["image_path"] = train_df["image"].apply(lambda x:TRAIN_DIR/str(x))
test_df["image_path"] = test_df["image"].apply(lambda x:TEST_DIR/str(x))

# Splitting data

In [None]:
N_SPLITS = 5
encoder = LabelEncoder()
train_df["label"] = encoder.fit_transform(train_df["individual_id"])
skf = StratifiedKFold(n_splits=N_SPLITS)

for fold, (_, val_) in enumerate(skf.split(X=train_df, y=train_df.individual_id)):
    train_df.loc[val_, "kfold"] = fold
    
train_df.drop('label',axis=1,inplace=True)
train_df.head()

In [None]:
len(train_df[train_df.kfold == 0])

In [None]:
seed=999
set_seed(seed, reproducible=True)
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
torch.backends.cudnn.deterministic = True
torch.use_deterministic_algorithms = True

# Creating dataloaders

In [None]:
Val_Fold = 0
def get_x(r): return r['image_path']
def get_y(r): return [r['individual_id']]
def splitter(df): 
    train = df.index[df.kfold != Val_Fold].tolist()
    valid = df.index[df.kfold == Val_Fold].tolist()
    return [train,valid]
def create_dls(df=train_df,bs=64,Val_Fold=0,Image_size=224):
    dblock = DataBlock(blocks = (ImageBlock,MultiCategoryBlock(vocab=classes)),
                       get_x = get_x,
                       get_y = get_y ,
                       splitter = splitter,
                       #item_tfms = [Resize(Image_size,method=ResizeMethod.Squish)],
                       item_tfms = [Resize(600)],
                       batch_tfms =[*aug_transforms(size=Image_size, max_warp=0), Normalize.from_stats(*imagenet_stats)]
                       #batch_tfms =[,Normalize.from_stats(*imagenet_stats)]
                      )

    dls = dblock.dataloaders(train_df,bs=bs)
    return dls

In [None]:
dls = create_dls()

In [None]:
dls.show_batch()

In [None]:
target_map = {N:CLASS for N,CLASS in enumerate(dls.vocab)}

# Arc Face loss

In [None]:
# From https://github.com/lyakaap/Landmark2019-1st-and-3rd-Place-Solution/blob/master/src/modeling/metric_learning.py
# Added type annotations, device, and 16bit support
class ArcMarginLoss(Module):
    def __init__(
        self,
        in_features: int,
        out_features: int,
        s: float,
        m: float,
        easy_margin: bool,
        ls_eps: float,
    ):
        self.in_features = in_features
        self.out_features = out_features
        self.s = s
        self.m = m
        self.ls_eps = ls_eps  # label smoothing
        self.weight = nn.Parameter(torch.FloatTensor(out_features, in_features))
        nn.init.xavier_uniform_(self.weight)
        self.easy_margin = easy_margin
        self.cos_m = math.cos(m)
        self.sin_m = math.sin(m)
        self.th = math.cos(math.pi - m)
        self.mm = math.sin(math.pi - m) * m

    def forward(self, logits, targ):
        cosine = F.linear(F.normalize(logits), F.normalize(self.weight))
        # Enable 16 bit precision
        cosine = cosine.to(torch.float32)
        sine = torch.sqrt(1.0 - torch.pow(cosine, 2))
        phi = cosine * self.cos_m - sine * self.sin_m
        if self.easy_margin:
            phi = torch.where(cosine > 0, phi, cosine)
        else:
            phi = torch.where(cosine > self.th, phi, cosine - self.mm)

        if self.ls_eps > 0:
            targ = (1 - self.ls_eps) * targ + self.ls_eps / self.out_features
        output = (targ * phi) + ((1.0 - targ) * cosine)
        output *= self.s
        loss =  F.cross_entropy(output, torch.argmax(targ, dim=1))
        return loss

# Creating the model

In [None]:
'''
class building_model(Module):
    def __init__(self,embedding_size:int,arch:str ='efficientnet_b0', pretrained:bool=True):
        self.model = create_model(arch, pretrained=pretrained)
        self.embedding = nn.Linear(self.model.get_classifier().in_features, embedding_size)
        self.model.reset_classifier(num_classes=0, global_pool="avg")
    def forward(self,x):
        features = self.model(x)
        embeddings = self.embedding(features)
        return embeddings
'''

In [None]:
#emb_size = 512
#model = building_model(emb_size,'efficientnet_b0',True)
#model = nn.Sequential(*list(model.children()))

In [None]:
def create_timm_body(arch:str, pretrained=True, drop_rate=0.0, cut=None, n_in=3):
    "Creates a body from any model in the `timm` library."
    model = create_model(arch, pretrained=pretrained, drop_rate=drop_rate, 
                         num_classes=0, global_pool='')
    _update_first_layer(model, n_in, pretrained)
    if cut is None:
        ll = list(enumerate(model.children()))
        cut = next(i for i,o in reversed(ll) if has_pool_type(o))
    if isinstance(cut, int): return nn.Sequential(*list(model.children())[:cut])
    elif callable(cut): return cut(model)
    else: raise NamedError("cut must be either integer or function")

In [None]:
emb_size = 512
body = create_timm_body("convnext_small", pretrained=True, drop_rate=0.0)
nf = num_features_model(nn.Sequential(*body.children()))
head = nn.Sequential(nn.AdaptiveAvgPool2d(output_size=1),nn.Flatten(),
                     nn.Linear(nf,emb_size,bias=False))

model = nn.Sequential(body, head)

In [None]:
loss_func = ArcMarginLoss(in_features=emb_size, out_features=dls.c,s=30.0,
                          m = 0.5, easy_margin=False, ls_eps=0.0)

In [None]:
dls = create_dls(bs=32,Image_size=320)
learn = Learner(dls,model, loss_func=loss_func, 
                splitter=default_split).to_fp16()
learn.freeze()
learn.summary()

In [None]:
'''
def accuracy_multi(inp, targ, thresh=0.5, sigmoid=True):
     "Compute accuracy when `inp` and `targ` are the same size."
     if sigmoid: inp = inp.sigmoid()
     return ((inp>thresh)==targ.bool()).float().mean()
'''

In [None]:
#learn.lr_find()

In [None]:
learn.fit_one_cycle(1,lr_max=3e-4,wd=1e-6)

In [None]:
learn.unfreeze()
learn.fit_one_cycle(9,lr_max=3e-4,wd=1e-6)

In [None]:
learn.export('model.pkl')

In [None]:
del learn
torch.cuda.empty_cache()
gc.collect()

In [None]:
def load_eval_learner(learner_path,output_group,dls,device):
    learn = load_learner(learner_path)
    learn.model.to(device)
    learn.dls = dls
    hook = Hook(learn.model[output_group], lambda m,i,o: o)
    return learn, hook

def load_dataloaders(train_df,test_df,val_fold,dls):
    train_dataloader = dls.test_dl(train_df[train_df.kfold != val_fold],with_labels=True)
    valid_dataloader = dls.test_dl(train_df[train_df.kfold == val_fold],with_labels=True)
    test_dataloader  = dls.test_dl(test_df,with_labels=True)
    return train_dataloader, valid_dataloader, test_dataloader

@torch.inference_mode()
def get_embeddings(module, dataloader, device):

    all_image_names = []
    all_embeddings = []
    all_targets = []

    for (x, y) in tqdm(dataloader):
        images =  x.to(device)
        targets = y.to(device)
        embeddings = module.model(images)
        all_embeddings.append(embeddings.cpu().numpy())
        all_targets.append(targets.cpu().numpy())

    all_image_names = dataloader.items['image'].values
    all_embeddings = np.vstack(all_embeddings)
    all_targets = np.concatenate(all_targets)
    all_targets = L(list(np.argmax(all_targets,axis=1)))
    all_embeddings = normalize(all_embeddings, axis=1, norm="l2")
    all_targets = np.array(all_targets.map(target_map.__getitem__))
    return all_image_names, all_embeddings, all_targets

def create_and_search_index(embedding_size: int, train_embeddings: np.ndarray, val_embeddings: np.ndarray, k: int):
    index = faiss.IndexFlatIP(embedding_size)
    index.add(train_embeddings)
    D, I = index.search(val_embeddings, k=k)  # noqa: E741

    return D, I

def create_val_targets_df(
    train_targets: np.ndarray, val_image_names: np.ndarray, val_targets: np.ndarray
) -> pd.DataFrame:

    allowed_targets = np.unique(train_targets)
    val_targets_df = pd.DataFrame(np.stack([val_image_names, val_targets], axis=1), columns=["image", "target"])
    val_targets_df.loc[~val_targets_df.target.isin(allowed_targets), "target"] = "new_individual"

    return val_targets_df

def create_distances_df(
    image_names: np.ndarray, targets: np.ndarray, D: np.ndarray, I: np.ndarray, stage: str  # noqa: E741
) -> pd.DataFrame:
    distances_df = []
    for i, image_name in tqdm(enumerate(image_names), desc=f"Creating {stage}_df"):
        target = targets[I[i]]
        distances = D[i]
        subset_preds = pd.DataFrame(np.stack([target, distances], axis=1), columns=["target", "distances"])
        subset_preds["image"] = image_name
        distances_df.append(subset_preds)
    distances_df = pd.concat(distances_df).reset_index(drop=True)
    distances_df = distances_df.groupby(["image", "target"]).distances.max().reset_index()
    distances_df = distances_df.sort_values("distances", ascending=False).reset_index(drop=True)
    return distances_df

def get_best_threshold(val_targets_df: pd.DataFrame, valid_df: pd.DataFrame):
    best_th = 0
    best_cv = 0
    for th in [0.1 * x for x in range(11)]:
        all_preds = get_predictions(valid_df, threshold=th)

        cv = 0
        for i, row in val_targets_df.iterrows():
            target = row.target
            preds = all_preds[row.image]
            val_targets_df.loc[i, th] = map_per_image(target, preds)

        cv = val_targets_df[th].mean()

        print(f"th={th} cv={cv}")

        if cv > best_cv:
            best_th = th
            best_cv = cv

    print(f"best_th={best_th}")
    print(f"best_cv={best_cv}")

    # Adjustment: Since Public lb has nearly 10% 'new_individual' (Be Careful for private LB)
    val_targets_df["is_new_individual"] = val_targets_df.target == "new_individual"
    val_scores = val_targets_df.groupby("is_new_individual").mean().T
    val_scores["adjusted_cv"] = val_scores[True] * 0.1 + val_scores[False] * 0.9
    best_th = val_scores["adjusted_cv"].idxmax()
    print(f"best_th_adjusted={best_th}")

    return best_th, best_cv

def get_predictions(df: pd.DataFrame, threshold: float = 0.2):
    sample_list = ["938b7e931166", "5bf17305f073", "7593d2aee842", "7362d7a01d00", "956562ff2888"]
    predictions = {}
    for i, row in tqdm(df.iterrows(), total=len(df), desc=f"Creating predictions for threshold={threshold}"):
        if row.image in predictions:
            if len(predictions[row.image]) == 5:
                continue
            predictions[row.image].append(row.target)
        elif float(row.distances) > threshold:
            predictions[row.image] = [row.target, "new_individual"]
        else:
            predictions[row.image] = ["new_individual", row.target]

    for x in tqdm(predictions):
        if len(predictions[x]) < 5:
            remaining = [y for y in sample_list if y not in predictions]
            predictions[x] = predictions[x] + remaining
            predictions[x] = predictions[x][:5]

    return predictions

def map_per_image(label, predictions):
    """Computes the precision score of one image.

    Parameters
    ----------
    label : string
            The true label of the image
    predictions : list
            A list of predicted elements (order does matter, 5 predictions allowed per image)

    Returns
    -------
    score : double
    """
    try:
        return 1 / (predictions[:5].index(label) + 1)
    except ValueError:
        return 0.0


def create_predictions_df(test_df: pd.DataFrame, best_th: float) -> pd.DataFrame:
    predictions = get_predictions(test_df, best_th)

    predictions = pd.Series(predictions).reset_index()
    predictions.columns = ["image", "predictions"]
    predictions["predictions"] = predictions["predictions"].apply(lambda x: " ".join(x))

    return predictions

In [None]:
def infer(
    model_path: str,
    dls: DataLoader,
    train_df: pd.DataFrame = train_df,
    test_df: pd.DataFrame = test_df,
    val_fold: float = 0.0,
    k: int = 50,
    emb_size:int = emb_size
):
    (learn, hook) = load_eval_learner(model_path,1,dls,torch.device("cuda"))
    train_dl, val_dl, test_dl = load_dataloaders(train_df,test_df,val_fold,dls)

    train_image_names, train_embeddings, train_targets = get_embeddings(learn, train_dl,torch.device("cuda"))
    val_image_names, val_embeddings, val_targets = get_embeddings(learn, val_dl,torch.device("cuda"))
    test_image_names, test_embeddings, test_targets = get_embeddings(learn, test_dl,torch.device("cuda"))

    D, I = create_and_search_index(emb_size, train_embeddings, val_embeddings, k)  # noqa: E741
    print("Created index with train_embeddings")
    
    val_targets_df = create_val_targets_df(train_targets, val_image_names, val_targets)
    print(f"val_targets_df=\n{val_targets_df.head()}")
    print(f"val_targets_df shape=\n{val_targets_df.shape}")
    val_df = create_distances_df(val_image_names, train_targets, D, I, "val")
    print(f"val_df=\n{val_df.head()}")
    print(f"val_df shape=\n{val_df.shape}")
    best_th, best_cv = get_best_threshold(val_targets_df, val_df)
    print(f"val_targets_df=\n{val_targets_df.describe()}")
    print(f"val_targets_df shape=\n{val_targets_df.shape}")
    train_embeddings = np.concatenate([train_embeddings, val_embeddings])
    train_targets = np.concatenate([train_targets, val_targets])
    print("Updated train_embeddings and train_targets with val data")

    D, I = create_and_search_index(emb_size, train_embeddings, test_embeddings, k)  # noqa: E741
    print("Created index with train_embeddings")

    test_df = create_distances_df(test_image_names, train_targets, D, I, "test")
    print(f"test_df=\n{test_df.head()}")
    print(f"test_df shape=\n{test_df.shape}")

    predictions = create_predictions_df(test_df, best_th)
    print(f"predictions.head()={predictions.head()}")
    print(f"predictions shape={predictions.shape}")
 
    # Fix missing predictions
    # From https://www.kaggle.com/code/jpbremer/backfins-arcface-tpu-effnet/notebook
    public_predictions = pd.read_csv(PUBLIC_SUBMISSION_CSV_PATH)
    ids_without_backfin = np.load(IDS_WITHOUT_BACKFIN_PATH, allow_pickle=True)

    ids2 = public_predictions["image"][~public_predictions["image"].isin(predictions["image"])]

    predictions = pd.concat(
        [
            predictions[~(predictions["image"].isin(ids_without_backfin))],
            public_predictions[public_predictions["image"].isin(ids_without_backfin)],
            public_predictions[public_predictions["image"].isin(ids2)],
        ]
    )
    predictions = predictions.drop_duplicates()

    predictions.to_csv(SUBMISSION_CSV_PATH, index=False)

In [None]:
infer(model_path="model.pkl",dls=dls)

In [None]:
'''
bs = 32
for Fold in range(N_SPLITS):
    print(f'Epoch-{Fold}')
    dls = create_dls(Val_Fold=Fold,bs=bs)
    learn = cnn_learner(dls, resnet18, metrics=partial(accuracy_multi,thresh=0.2))
    learn = learn.to_fp16()
    learn.fit_one_cycle(1,lr_max=slice(2.2e-6, 2e-4))
    learn.export(f'learn{Fold}.pkl')
    del learn
    torch.cuda.empty_cache()
    gc.collect()
'''