# Imports

In [None]:
!pip install timm
import timm
from timm.optim import Lookahead, RAdam

In [None]:
!pip install pytorch_metric_learning 
from pytorch_metric_learning import losses

In [None]:
# Shared Imports
import random
import math
import os
import pathlib
from tqdm import tqdm
from typing import Iterator, List, Optional, Tuple
import json

import numpy as np
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt

from PIL import Image as pil_image
import cv2 

# Pre-processing imports
from joblib import Parallel, delayed

# Train imports
## Pytorch/modell stuff
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from torchmetrics import Accuracy
from torchmetrics import MAP#.detection.mean_ap import MeanAveragePrecision
import torchvision.models as models
import pytorch_lightning as pl
from pytorch_lightning.callbacks import ModelCheckpoint, LearningRateMonitor
from pytorch_lightning.loggers import TensorBoardLogger
## Sim method
from sklearn.metrics.pairwise import cosine_similarity

## Loading and processing data
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction import image as sk_img

import albumentations as A
import albumentations.pytorch as APT

# Globals

In [None]:
SEED = 42
# Wheter to PAD the images
PAD = True
# The size of the images
PATCH = (512, 512)
# The number of matches to consider
N_MATCHES = 5

# Set random Seed
pl.seed_everything(SEED)

In [None]:
# Training Configuration
VAL_SIZE = 0.1
BATCH_SIZE = 16
EPOCHS = 10
LEARNING_RATE = 0.001
N_CHECKPOINTS = 2
EMBEDDING_SIZE = 4096

# Basemodel and number of workers for the dataloaders
NUM_WORKERS = 2
#BASE_MODEL = "efficientnet_b1"
BASE_MODEL = "eca_nfnet_l0"
#BASE_MODEL = "efficientnet_b2_pruned"
#BASE_MODEL="resnet50"
#BASE_MODEL="regnety_120"
#BASE_MODEL="swin_small_patch4_window7_224"
#BASE_MODEL="resnest101e"

# Wheter to freeze layers or not
FREEZE = False
FREEZE_UNTIL = -1

In [None]:
# directory of the data
DATA_DIR = pathlib.Path("../input/mlip-pad-resize-512x512")
# Work directory, where to store the data
WORKING_DIR = pathlib.Path("")
# Locations of the original train set, to derive the chain names
CHAIN_DIR = pathlib.Path("../input/hotel-id-to-combat-human-trafficking-2022-fgvc9/train_images")
# Locations of the train images in the data directory
TRAIN_DIR = DATA_DIR / pathlib.Path("images")

# Loading Data

In [None]:
chain_names = os.listdir(CHAIN_DIR)
train_file = DATA_DIR / pathlib.Path("train.csv")

# Encode the chain identifiers so that the model can work with it and save it so it can be retrieved
train_df = pd.read_csv(train_file)    
train_df["hotel_code"] = train_df["hotel_id"].astype('category').cat.codes.values.astype(np.int64)

hotel_id_code_df = train_df.drop(columns=["image_id"]).drop_duplicates().reset_index(drop=True)
hotel_id_code_df.to_csv(WORKING_DIR / 'hotel_id_code_mapping.csv', index=False)

In [None]:
print("Number of images:", len(train_df))
print("Number of different classes:", len(chain_names))
train_df.head()

## Train val split

In [None]:
train_set, val_set = train_test_split(train_df, test_size=VAL_SIZE, random_state=SEED)
print("Number of train images:", len(train_set))
print("Number of val images:", len(val_set))
train_set.head()

## Dataset Class

In [None]:
class ImageDataset(Dataset):
    def __init__(self,
                 data: pd.DataFrame,
                 data_path: pathlib.Path,
                 transform: Optional = None,
                ):
        self.data = data
        self.data_path = data_path
        self.transform = transform

    def __len__(self) -> int:
        return len(self.data)

    def __getitem__(self, idx: int):
        record = self.data.iloc[idx]

        image_path = self.data_path / record["image_id"]
        image = np.array(pil_image.open(image_path)).astype(np.uint8)
        
        if self.transform:
            transformed = self.transform(image=image)
            image = transformed["image"]
        
        label = record['hotel_code']
        return image, label

## Data augmentation

In [None]:
# Train augmentation
train_aug = A.Compose([A.HorizontalFlip(p=0.25),
                       #A.RandomCrop(224,224, p=1),
                       A.VerticalFlip(p=0.01),
                      A.RandomBrightnessContrast(brightness_limit=[-0.2,0.2], contrast_limit=[-0.2,0.2],p=0.5),
                      A.ShiftScaleRotate(p=0.5, border_mode=cv2.BORDER_CONSTANT),
A.OpticalDistortion(p=0.25),
A.IAAPerspective(p=0.25),
A.Blur(blur_limit=4, p=0.2),                       
A.RandomSizedCrop(min_max_height=[PATCH[1]//4, PATCH[1]], height=PATCH[1], width=PATCH[0], p=0.5),
A.CoarseDropout(p=0.5),A.CoarseDropout(p=1., 
                                      max_holes=1, 
                                      min_height=PATCH[0]//4, 
                                      max_height=PATCH[0]//2,
                                      min_width=PATCH[1]//4,  
                                      max_width=PATCH[1]//2, 
                                      fill_value=(255,0,0)
                                     ),
                      A.ToFloat(),
                      APT.transforms.ToTensorV2(),
                      ])

# Validation augmentation, thus only occulsion
val_aug = A.Compose([#A.RandomCrop(224,224, p=1),
    A.CoarseDropout(p=0.75, max_holes=1, 
                    min_height=PATCH[0]//4, max_height=PATCH[0]//2,
                    min_width=PATCH[1]//4,  max_width=PATCH[1]//2, 
                    fill_value=(255,0,0)),# simulating occlusions
    A.ToFloat(),
    APT.transforms.ToTensorV2(),
])

# Test augmentation, no occulsion
test_aug = A.Compose([#A.RandomCrop(224,224, p=1),
                    A.ToFloat(),
                     APT.transforms.ToTensorV2(),
                     ])

# # Training augmentation
# train_aug = A.Compose([A.HorizontalFlip(p=0.25),
#                        A.VerticalFlip(p=0.01),
#                       A.CoarseDropout(p=1., 
#                                       max_holes=1, 
#                                       min_height=PATCH[0]//4, 
#                                       max_height=PATCH[0]//2,
#                                       min_width=PATCH[1]//4,  
#                                       max_width=PATCH[1]//2, 
#                                       fill_value=(255,0,0)
#                                      ),
#                       A.RandomBrightnessContrast(brightness_limit=[-0.2,0.2], contrast_limit=[-0.2,0.2],p=0.5),
# #                       A.Blur(blur_limit=4, p=0.2),
#                       #A.RandomCrop(PATCH[0]//2, PATCH[1]//2, p=0.5),
#                       #A.Resize(PATCH[0], PATCH[1], p=1.),
#                       A.RandomSizedCrop(min_max_height=[PATCH[1]//4, PATCH[1]], height=PATCH[1], width=PATCH[0], p=0.5),
#                       A.ToFloat(),
#                       APT.transforms.ToTensorV2(),
#                       ])

# # Validation augmentation, thus only occulsion
# val_aug = A.Compose([A.CoarseDropout(p=1.,
#                                      max_holes=1, 
#                                      min_height=PATCH[0]//4, 
#                                      max_height=PATCH[0]//2,
#                                      min_width=PATCH[1]//4,  
#                                      max_width=PATCH[1]//2, 
#                                      fill_value=(255,0,0)
#                                     ),
#                     A.ToFloat(),
#                     APT.transforms.ToTensorV2(),
#                     ])

# # Test augmentation, no occulsion
# test_aug = A.Compose([A.ToFloat(),
#                      APT.transforms.ToTensorV2(),
#                      ])

# Model

In [None]:
# source: https://github.com/ronghuaiyang/arcface-pytorch/blob/master/models/metrics.py
class ArcMarginProduct(nn.Module):
    r"""Implement of large margin arc distance: :
        Args:
            in_features: size of each input sample
            out_features: size of each output sample
            s: norm of input feature
            m: margin
            cos(theta + m)
        """
    def __init__(self, in_features, out_features, s=30.0, m=0.50, easy_margin=False):
        super(ArcMarginProduct, self).__init__()
        self.in_features = in_features
        self.out_features = out_features
        self.s = s
        self.m = m
        self.weight = nn.Parameter(torch.FloatTensor(out_features, in_features))
        nn.init.xavier_uniform_(self.weight)

        self.easy_margin = easy_margin
        self.cos_m = math.cos(m)
        self.sin_m = math.sin(m)
        self.th = math.cos(math.pi - m)
        self.mm = math.sin(math.pi - m) * m

    def forward(self, input, label):
        # --------------------------- cos(theta) & phi(theta) ---------------------------
        cosine = F.linear(F.normalize(input), F.normalize(self.weight))
        sine = torch.sqrt((1.0 - torch.pow(cosine, 2)).clamp(0, 1))
        phi = cosine * self.cos_m - sine * self.sin_m
        if self.easy_margin:
            phi = torch.where(cosine > 0, phi, cosine)
        else:
            phi = torch.where(cosine > self.th, phi, cosine - self.mm)
        # --------------------------- convert label to one-hot ---------------------------
        # one_hot = torch.zeros(cosine.size(), requires_grad=True, device='cuda')
        one_hot = torch.zeros(cosine.size(), device='cuda')
        one_hot.scatter_(1, label.view(-1, 1).long(), 1)
        # -------------torch.where(out_i = {x_i if condition_i else y_i) -------------
        output = (one_hot * phi) + ((1.0 - one_hot) * cosine)  # you can use torch.where if your torch.__version__ is 0.4
        output *= self.s

        return output


In [None]:
class HotelModel(pl.LightningModule):
    def __init__(self,
                n_hotels: int,
                steps_per_epoch: int,
                n_embeddings: int = 256,
                base_model = None,
                pretrained: bool = False,
                learning_rate: float = 0.003,
                
                ):
        super().__init__()
        
        # Hyperparams
        self.n_embeddings = n_embeddings
        self.n_hotels = n_hotels
        self.learning_rate = learning_rate
        self.steps_per_epoch = steps_per_epoch
        
        # Metrics
        self.loss_fn = nn.CrossEntropyLoss()
        self.train_acc = Accuracy()
        self.val_acc = Accuracy()
        
        # Model Definition 
        ## Base model
        self.base_model = timm.create_model(base_model, pretrained=True)        
        in_features = self.base_model.get_classifier().in_features
        
        fc_name, _ = list(self.base_model.named_modules())[-1]
        if fc_name == 'classifier':
            self.base_model.classifier = nn.Identity()
        elif fc_name == 'head.fc':
            self.base_model.head.fc = nn.Identity()
        elif fc_name == 'fc':
            self.base_model.fc = nn.Identity()
        #else:
             #raise Exception("unknown classifier layer: " + fc_name)
        
        ## Arcface module
        self.arc_face = ArcMarginProduct(self.n_embeddings, n_hotels, s=30.0, m=0.20, easy_margin=False)
        
        ## Top model
        self.top_model = nn.Sequential(
            nn.utils.weight_norm(nn.Linear(1000, self.n_embeddings*2), dim=None),
            nn.BatchNorm1d(self.n_embeddings*2),
            nn.Dropout(0.2),
            nn.utils.weight_norm(nn.Linear(self.n_embeddings*2, self.n_embeddings)),
            nn.BatchNorm1d(self.n_embeddings),
        )
        
        # Save hyper params
        self.save_hyperparameters()

    def configure_optimizers(self):
        optimizer = Lookahead(torch.optim.AdamW(self.parameters(), lr=self.learning_rate), k=3)
        
        scheduler = torch.optim.lr_scheduler.OneCycleLR(
                    optimizer,
                    max_lr=self.learning_rate,
                    epochs=EPOCHS,
                    steps_per_epoch=self.steps_per_epoch,
                    div_factor=10,
                    final_div_factor=1,
                    pct_start=0.1,
                    anneal_strategy="cos",
                )
        
        schedule = {
            # Required: the scheduler instance.
            "scheduler": scheduler,
        }
        return [optimizer], [schedule]
    
    def forward(self, x, targets = None):
        y_hat = self.base_model(x)
        y_hat = y_hat.view(y_hat.size(0), -1)
        y_hat = self.top_model(y_hat)
        
        if targets is not None:
            y_hat = self.arc_face(y_hat, targets)

        return y_hat

    def training_step(self, batch, batch_idx):
        x, y = batch
        
        # Forward pass
        y_hat = self.forward(x, y)
        loss = self.loss_fn(y_hat, y)
        self.train_acc(y_hat, y)

        # Store results
        self.log("train_loss", loss, prog_bar=False)
        
        return loss
    
    def training_epoch_end(self, train_step_outputs) -> None:
        # Log metrics
        self.log("train_acc", self.train_acc, prog_bar=True)

    def validation_step(self, batch, batch_idx):
        x, y = batch
        
        # Forward pass
        y_hat = self.forward(x, y)
        loss = self.loss_fn(y_hat, y)
        self.val_acc(y_hat, y)

        # Store results
        self.log("val_loss", loss, prog_bar=False)
        return y_hat
        
    def validation_epoch_end(self, validation_step_outputs) -> None:
        self.log("val_acc", self.val_acc, prog_bar=True)
        
    def predict_step(self, batch, batch_idx):
        y_hat = self.forward(batch)
        return y_hat

# Training

## Data loader

In [None]:
train_data = ImageDataset(train_set, TRAIN_DIR, transform=train_aug)
val_data = ImageDataset(val_set, TRAIN_DIR, transform=val_aug)

In [None]:
train_dataloader = DataLoader(train_data, batch_size=BATCH_SIZE, shuffle=True, num_workers=NUM_WORKERS, drop_last=True)
val_dataloader = DataLoader(val_data, batch_size=BATCH_SIZE, shuffle=False, num_workers=NUM_WORKERS)

## Model

In [None]:
model = HotelModel(n_hotels=len(chain_names),
                   n_embeddings=EMBEDDING_SIZE,
                   base_model=BASE_MODEL,
                   learning_rate=LEARNING_RATE,
                   pretrained=True,
                   steps_per_epoch=len(train_dataloader)
                  )
                   

pattern = "epoch_{epoch:04d}.step_{step:09d}.val-map_{val_acc:.4f}"
ModelCheckpoint.CHECKPOINT_NAME_LAST = pattern + ".last"
checkpointer = ModelCheckpoint(
        monitor="val_acc",
        filename=pattern + ".best",
        save_last=True,
        auto_insert_metric_name=False,
        save_top_k=N_CHECKPOINTS,
    )

## Calculate Distance

In [None]:
def generate_embeddings(loader, model, bar_desc="Generating embeds"):
    targets_all = []
    outputs_all = []
    
    model = model.to(args.device)
    model.eval()
    with torch.no_grad():
        x = tqdm(loader, desc=bar_desc)
        for i, sample in enumerate(x):
            input = sample[0].to(args.device)
            target = sample[1].to(args.device)
            output = model(input)
            
            targets_all.extend(target.cpu().numpy())
            outputs_all.extend(output.detach().cpu().numpy())

    targets_all = np.array(targets_all).astype(np.float32)
    outputs_all = np.array(outputs_all).astype(np.float32)
            
    return outputs_all, targets_all

In [None]:
# find 5 most similar images from different hotels and return their hotel_id_code
def find_matches(query, base_embeds, base_targets, k=N_MATCHES):
    distance_df = pd.DataFrame(index=np.arange(len(base_targets)), data={"hotel_id_code": base_targets})
    # calculate cosine distance of query embeds to all base embeds
    distance_df["distance"] = cosine_similarity([query], base_embeds)[0]
    # sort by distance and hotel_id
    distance_df = distance_df.sort_values(by=["distance", "hotel_id_code"], ascending=False).reset_index(drop=True)
    # return first 5 different hotel_id_codes
    return distance_df["hotel_id_code"].unique()[:N_MATCHES]

In [None]:
def test_similarity(args, base_loader, test_loader, model):
    base_embeds, base_targets = generate_embeddings(base_loader, model, "Generate base embeddings")
    test_embeds, test_targets = generate_embeddings(test_loader, model, "Generate test embeddings")
    
    preds = []
    for query_embeds in tqdm(test_embeds, desc="Similarity - match finding"):
        tmp = find_matches(query_embeds, base_embeds, base_targets)
        preds.extend([tmp])
        
    preds = np.array(preds)
    test_targets_N = np.repeat([test_targets], repeats=N_MATCHES, axis=0).T
    # check if any of top 5 predictions are correct and calculate mean accuracy
    acc_top_5 = (preds == test_targets_N).any(axis=1).mean()
    # calculate prediction accuracy
    acc_top_1 = np.mean(test_targets == preds[:, 0])
    print(f"Similarity accuracy: {acc_top_1:0.4f}, Top 5 acc: {acc_top_5:0.4f}") # NEED TO CORRECT THE MAP CALCULATION

## Freeze Layers

In [None]:
if FREEZE:
    for parameter in model.parameters():
        parameter.requires_grad = False
    for parameter in model.base_model.blocks[FREEZE_UNTIL:-1].parameters():
        parameter.requires_grad = True

## Train loop

In [None]:
trainer = pl.Trainer(
    max_epochs=EPOCHS,
    gpus=torch.cuda.device_count(),
    callbacks=[checkpointer, LearningRateMonitor()],
    default_root_dir="logs/",
)

trainer.fit(model, train_dataloaders=train_dataloader, val_dataloaders=val_dataloader)

## Test Model

In [None]:
val_test_dataloader = DataLoader(val_data, batch_size=1, shuffle=False, num_workers=2)

In [None]:
base_dataset   = ImageDataset(train_df, TRAIN_DIR, transform=test_aug)
base_loader    = DataLoader(base_dataset, num_workers=NUM_WORKERS, batch_size=BATCH_SIZE, shuffle=False)

In [None]:
class args:
    epochs = 5
    lr = 1e-3
    batch_size = 64
    num_workers = 2
    val_samples = 1
    embedding_size = 128
    backbone_name = "efficientnet_b0"
    n_classes = train_df["hotel_code"].nunique()
    device = ('cuda' if torch.cuda.is_available() else 'cpu')
    
test_similarity(args, train_dataloader, val_test_dataloader, model)

### Save embeddings

In [None]:
# generate embeddings for all train images and save them for inference
base_embeds, _ = generate_embeddings(base_loader, model, "Generate embeddings for all images")
train_df["embeddings"] = list(base_embeds)
train_df.to_pickle(f"base_image-embeddings.pkl")