# Packages and Imports

In [None]:
!git clone https://github.com/rwightman/pytorch-image-models
!cd pytorch-image-models && pip install -e .

import sys
sys.path.append('../input/timm-pytorch-image-models/pytorch-image-models-master')

In [None]:
import numpy as np 
import pandas as pd
import random
import os
import math
import time

from sklearn.metrics.pairwise import cosine_similarity
from sklearn.utils.class_weight import compute_class_weight
from sklearn.model_selection import train_test_split
from PIL import Image as pil_image
from tqdm import tqdm
import scipy

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader

import timm

# Global

In [None]:
IMG_SIZE = 512
SEED = 42
N_MATCHES = 5
USE_TPU = False

PROJECT_FOLDER = "../input/hotel-id-to-combat-human-trafficking-2022-fgvc9/"
DATA_FOLDER = f"../input/hotelid-2022-train-images-{IMG_SIZE}x{IMG_SIZE}/"
IMAGE_FOLDER = DATA_FOLDER+"images/"
OUTPUT_FOLDER = ""

# for TPU
if USE_TPU:
    !pip install cloud-tpu-client==0.10 https://storage.googleapis.com/tpu-pytorch/wheels/torch_xla-1.7-cp37-cp37m-linux_x86_64.whl
    import torch_xla
    import torch_xla.core.xla_model as xm
torch.set_default_tensor_type('torch.FloatTensor')

In [None]:
print(os.listdir(PROJECT_FOLDER))
print(len(os.listdir(DATA_FOLDER)))

In [None]:
data_df = pd.read_csv(DATA_FOLDER+"train.csv")
# encode hotel ids
data_df["hotel_id_code"] = data_df["hotel_id"].astype('category').cat.codes.values.astype(np.int64)

In [None]:
class args:
    epochs = 10
    lr = 1e-3
    batch_size = 128 if USE_TPU else 4
    embgen_batch_size = batch_size*2 if USE_TPU else batch_size*4
    num_workers = 2
    val_samples = 1
    embedding_size = 256 #128
    backbone_name = 'efficientnet_b1' #'efficientnet_b0' #'fbnetc_100', 'eca_nfnet_l0'
    n_classes = data_df["hotel_id_code"].nunique()
    device = (xm.xla_device() if USE_TPU else ('cuda' if torch.cuda.is_available() else 'cpu'))
    sct_loss_weight = 0.1
    lam = 1

print(args.device)

In [None]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True

# Dataset and Transformations

In [None]:
import albumentations as A
import albumentations.pytorch as APT
import cv2 

# used for training dataset - augmentations and occlusions
train_transform = A.Compose([
    A.HorizontalFlip(p=0.75),
    A.VerticalFlip(p=0.05), #p=0.25 makes no sense, as there are few vertically-flipped images in the dataset
    A.ShiftScaleRotate(p=0.5, border_mode=cv2.BORDER_CONSTANT),
    A.OpticalDistortion(p=0.25),
    A.Perspective(p=0.25),
    A.CoarseDropout(p=0.5, min_holes=1, max_holes=6, 
                    min_height=IMG_SIZE//16, max_height=IMG_SIZE//4,
                    min_width=IMG_SIZE//16,  max_width=IMG_SIZE//4), # normal coarse dropout
    A.CoarseDropout(p=1., max_holes=1, 
                    min_height=IMG_SIZE//4, max_height=IMG_SIZE//2,
                    min_width=IMG_SIZE//4,  max_width=IMG_SIZE//2, 
                    fill_value=(255,0,0)),# simulating occlusions in test data

    A.RandomBrightnessContrast(p=0.75),
    A.ToFloat(),
    APT.transforms.ToTensorV2(),
])

# used for validation dataset - only occlusions
val_transform = A.Compose([
    A.CoarseDropout(p=1., max_holes=1, 
                    min_height=IMG_SIZE//4, max_height=IMG_SIZE//2,
                    min_width=IMG_SIZE//4,  max_width=IMG_SIZE//2, 
                    fill_value=(255,0,0)),# simulating occlusions
    A.ToFloat(),
    APT.transforms.ToTensorV2(),
])

# no augmentations
base_transform = A.Compose([
    A.ToFloat(),
    APT.transforms.ToTensorV2(),
])

In [None]:
class HotelTrainDataset:
    def __init__(self, data, transform=None, data_path="train_images/"):
        self.data = data
        self.data_path = data_path
        self.transform = transform

    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, idx):
        record = self.data.iloc[idx]
        image_path = self.data_path + record["image_id"]
        image = np.array(pil_image.open(image_path)).astype(np.uint8)

        if self.transform:
            transformed = self.transform(image=image)
            image = transformed["image"]
        
        return {
            "image" : image,
            "target" : record['hotel_id_code'],
        }

# Model

In [None]:
# source: https://github.com/ronghuaiyang/arcface-pytorch/blob/master/models/metrics.py
class ArcMarginProduct(nn.Module):
    def __init__(self, in_features, out_features, s=30.0, m=0.50, easy_margin=False):
        super(ArcMarginProduct, self).__init__()
        self.in_features = in_features
        self.out_features = out_features
        self.s = s
        self.m = m
        self.weight = nn.Parameter(torch.FloatTensor(out_features, in_features))
        nn.init.xavier_uniform_(self.weight)

        self.easy_margin = easy_margin
        self.cos_m = math.cos(m)
        self.sin_m = math.sin(m)
        self.th = math.cos(math.pi-m)
        self.mm = math.sin(math.pi-m)*m

    def forward(self, input, label):
        # --------------------------- cos(theta) & phi(theta) ---------------------------
        cosine = F.linear(F.normalize(input), F.normalize(self.weight))
        sine = torch.sqrt((1.0-torch.pow(cosine, 2)).clamp(0, 1))
        phi = cosine*self.cos_m-sine*self.sin_m
        if self.easy_margin:
            phi = torch.where(cosine>0, phi, cosine)
        else:
            phi = torch.where(cosine>self.th, phi, cosine-self.mm)
        # --------------------------- convert label to one-hot ---------------------------
        # one_hot = torch.zeros(cosine.size(), requires_grad=True, device='cuda')
        one_hot = torch.zeros(cosine.size(), device=args.device)
        one_hot.scatter_(1, label.view(-1, 1).long(), 1)
        # -------------torch.where(out_i = {x_i if condition_i else y_i) -------------
        output = (one_hot*phi)+((1.0-one_hot)*cosine)
        output *= self.s

        return output

class EmbeddingModel(nn.Module):
    def __init__(self, out_features=100, embed_size=256, backbone_name="efficientnet_b1"):
        super(EmbeddingModel, self).__init__()

        self.embed_size = embed_size
        self.backbone = timm.create_model(backbone_name, pretrained=True)
        in_features = self.backbone.get_classifier().in_features
        self.embedding = nn.Linear(in_features, embed_size)
        self.classifier = nn.Linear(embed_size, out_features)

        '''fc_name, _ = list(self.backbone.named_modules())[-1]
        if fc_name == 'classifier':
            self.backbone.classifier = nn.Identity()
        elif fc_name == 'head.fc':
            self.backbone.head.fc = nn.Identity()
        elif fc_name == 'fc':
            self.backbone.fc = nn.Identity()
        else:
            raise Exception("unknown classifier layer: "+fc_name)'''
        self.backbone.classifier = nn.Identity()

        self.arc_face = ArcMarginProduct(self.embed_size, out_features, s=30.0, m=0.20, easy_margin=False)

        self.post = nn.Sequential(
            nn.utils.weight_norm(nn.Linear(in_features, self.embed_size*2), dim=None),
            nn.BatchNorm1d(self.embed_size*2),
            nn.Dropout(0.2),
            nn.utils.weight_norm(nn.Linear(self.embed_size*2, self.embed_size)),
            nn.BatchNorm1d(self.embed_size),
        )

        print(f"Model {backbone_name} ArcMarginProduct - Features: {in_features}, Embeds: {self.embed_size}")
        
    def embed_and_classify(self, x):
        x = self.forward(x)
        return x, self.classifier(x)
    
    def forward(self, input, targets=None):
        x = self.backbone(input)
        x = x.view(x.size(0), -1)
        x = self.embedding(x) #self.post(x)
        
        if targets is not None:
            logits = self.arc_face(x, targets)
            return x, logits
        
        return x

# Model Helper Functions

In [None]:
def save_checkpoint(model, scheduler, optimizer, epoch, name, loss=None, score=None):
    if USE_TPU:
        checkpoint = {"epoch": epoch,
              "model": {k: v.cpu() for k, v in model.state_dict().items()},
              "loss": loss,
              "score": score,
              }
    else:
        checkpoint = {"epoch": epoch,
                      "model": model.state_dict(),
                      "scheduler": scheduler.state_dict(),
                      "optimizer": optimizer.state_dict(),
                      "loss": loss,
                      "score": score,
                      }
    os.makedirs(f"{OUTPUT_FOLDER}checkpoint-{name}", exist_ok=True)
    torch.save(checkpoint, f"{OUTPUT_FOLDER}checkpoint-{name}/epoch{epoch}_score{score}.pt")

def load_checkpoint(model, scheduler, optimizer, name):
    checkpoint = torch.load(name) #torch.load(f"{OUTPUT_FOLDER}checkpoint-{name}.pt")

    model.load_state_dict(checkpoint["model"])
    scheduler.load_state_dict(checkpoint["scheduler"])
    return model, scheduler, optimizer, checkpoint["epoch"]

In [None]:
# method to iterate loader and generate embeddings of images
# returns embeddings and image class
def generate_embeddings(loader, model, bar_desc="Generating embeds"):
    targets_all = []
    outputs_all = []
    
    model.eval()
    with torch.no_grad():
        t = tqdm(loader, desc=bar_desc)
        for i, sample in enumerate(t):
            input = sample['image'].to(args.device)
            target = sample['target'].to(args.device)
            output = model(input)
            
            targets_all.extend(target.cpu().numpy())
            outputs_all.extend(output.detach().cpu().numpy())

    targets_all = np.array(targets_all).astype(np.float32)
    outputs_all = np.array(outputs_all).astype(np.float32)
            
    return outputs_all, targets_all

# Training and Validation Functions

In [None]:
def train_epoch(args, model, loader, criterion, optimizer, scheduler, epoch, pos_neg_getter):
    sct_loss_weight = args.sct_loss_weight #(epoch-1)/args.epochs
    losses = []
    targets_all = []
    outputs_all = []
    
    model.train()
    t = tqdm(loader)
    
    for i, sample in enumerate(t):
        optimizer.zero_grad()
        
        images = sample['image'].to(args.device)
        targets = sample['target'].to(args.device)
        
        embeds, outputs = model(images, targets)
        loss = criterion(outputs, targets)
        pos_embeds, neg_embeds = pos_neg_getter(embeds, targets)
        loss2 = sct_loss(embeds, pos_embeds, neg_embeds)
        loss = loss+sct_loss_weight*loss2 #(1-sct_loss_weight)*loss+sct_loss_weight*loss2
        loss.backward()
        
        if USE_TPU:
            xm.optimizer_step(optimizer, barrier=True) #for TPU
        else:
            optimizer.step()
        
        if scheduler:
            scheduler.step()
                
        losses.append(loss.item())
        targets_all.extend(targets.cpu().numpy())
        outputs_all.extend(torch.sigmoid(outputs).detach().cpu().numpy())

        score = np.mean(targets_all == np.argmax(outputs_all, axis=1))
        desc = f"Training epoch {epoch}/{args.epochs} - loss: {loss:0.4f}, accuracy: {score:0.4f}"
        t.set_description(desc)
        
    return np.mean(losses), score

In [None]:
def test_classification(loader, model):
    targets_all = []
    outputs_all = []
    
    model.eval()
    t = tqdm(loader, desc="Classification")
    
    for i, sample in enumerate(t):
        images = sample['image'].to(args.device)
        targets = sample['target'].to(args.device)
        
        _, outputs = model.embed_and_classify(images)
        
        targets_all.extend(targets.cpu().numpy())
        outputs_all.extend(torch.sigmoid(outputs).detach().cpu().numpy())
        
    # repeat targets to N_MATCHES for easy calculation of MAP@5
    y = np.repeat([targets_all], repeats=N_MATCHES, axis=0).T
    # sort predictions and get top 5
    preds = np.argsort(-np.array(outputs_all), axis=1)[:, :N_MATCHES]
    # check if any of top 5 predictions are correct and calculate mean accuracy
    acc_top_5 = (preds == y).any(axis=1).mean()
    # calculate prediction accuracy
    acc_top_1 = np.mean(targets_all == np.argmax(outputs_all, axis=1))

    print(f"Classification accuracy: {acc_top_1:0.4f}, MAP@5: {acc_top_5:0.4f}")

In [None]:
# find 5 most similar images from different hotels and return their hotel_id_code
def find_matches(query, base_embeds, base_targets, k=N_MATCHES):
    distance_df = pd.DataFrame(index=np.arange(len(base_targets)), data={"hotel_id_code": base_targets})
    # calculate cosine distance of query embeds to all base embeds
    distance_df["distance"] = cosine_similarity([query], base_embeds)[0]
    # sort by distance and hotel_id
    distance_df = distance_df.sort_values(by=["distance", "hotel_id_code"], ascending=False).reset_index(drop=True)
    # return first 5 different hotel_id_codes
    return distance_df["hotel_id_code"].unique()[:N_MATCHES]
    
def test_similarity(args, data_df, model):#base_loader, test_loader, model):
    base_embeds = np.stack(data_df[data_df.Set=="train"].embeddings.values)
    base_targets = data_df[data_df.Set=="train"].hotel_id_code.values
    test_embeds = np.stack(data_df[data_df.Set=="val"].embeddings.values)
    test_targets = data_df[data_df.Set=="val"].hotel_id_code.values
    
    preds = []
    for query_embeds in tqdm(test_embeds, desc="Similarity - match finding"):
        tmp = find_matches(query_embeds, base_embeds, base_targets)
        preds.extend([tmp])
        
    preds = np.array(preds)
    test_targets_N = np.repeat([test_targets], repeats=N_MATCHES, axis=0).T
    
    # calculate map@5
    map_at_5 = (((preds==test_targets_N)*np.array([1.0,1/2,1/3,1/4,1/5])).max(axis=1)).mean()
    
    # calculate prediction accuracy
    acc_top_1 = np.mean(test_targets == preds[:, 0])
    print(f"Similarity accuracy: {acc_top_1:0.4f}, MAP@5: {map_at_5:0.4f}")

# Prepare Data

In [None]:
# save hotel_id encoding for later decoding
hotel_id_code_df = data_df.drop(columns=["image_id"]).drop_duplicates().reset_index(drop=True)
hotel_id_code_df.to_csv(OUTPUT_FOLDER+'hotel_id_code_mapping.csv', index=False)

# Train and Evaluate

In [None]:
model_name = f"embedding-model-{args.backbone_name}-{IMG_SIZE}x{IMG_SIZE}"
print(model_name)

seed_everything(seed=SEED)

In [None]:
# split data into train and validation set
hotel_image_count = data_df.groupby("hotel_id")["image_id"].count()
# hotels that have more images than samples for validation
valid_hotels = hotel_image_count[hotel_image_count > args.val_samples]
# data that can be split into train and val set
valid_data = data_df[data_df["hotel_id"].isin(valid_hotels.index)]
# if hotel had less than required val_samples it will be only in the train set
#valid_df = valid_data.groupby("hotel_id").sample(args.val_samples, random_state=SEED)
#train_df = data_df[~data_df["image_id"].isin(valid_df["image_id"])]
X_train, X_val, _, _ = train_test_split(valid_data['image_id'], valid_data['hotel_id'], test_size=0.1, stratify=valid_data['hotel_id'])
valid_df = data_df[data_df['image_id'].isin(X_val)]
train_df = data_df[data_df['image_id'].isin(X_train)]

train_dataset = HotelTrainDataset(train_df, train_transform, data_path=IMAGE_FOLDER)
train_loader  = DataLoader(train_dataset, num_workers=args.num_workers, batch_size=args.batch_size, shuffle=True, drop_last=True)
valid_dataset = HotelTrainDataset(valid_df, val_transform, data_path=IMAGE_FOLDER)
valid_loader  = DataLoader(valid_dataset, num_workers=args.num_workers, batch_size=args.batch_size, shuffle=False)
# base dataset for image similarity search
base_dataset   = HotelTrainDataset(data_df, base_transform, data_path=IMAGE_FOLDER)
base_loader    = DataLoader(base_dataset, num_workers=args.num_workers, batch_size=args.embgen_batch_size, shuffle=False)

In [None]:
model = EmbeddingModel(args.n_classes, args.embedding_size ,args.backbone_name)
model = model.to(args.device)

In [None]:
class HardPosHardNegFinder:
    def __init__(self, compare_targets, args):
        self.compare_targets = compare_targets.unsqueeze(1)
        self.inf = torch.tensor(float('inf'), device=args.device)
    
    def update_compare_embs(self, compare_embs):
        self.compare_embs = compare_embs
        
    def __call__(self, embeds, targets):
        sim_combis_mask = targets.unsqueeze(0) == self.compare_targets
        sim_combis = torch.cosine_similarity(embeds.unsqueeze(0), self.compare_embs.unsqueeze(1), dim=2)
        same_hotel_most_diff = torch.where(sim_combis_mask, sim_combis, self.inf).argmin(dim=0)        
        diff_hotel_most_sim = torch.where(~sim_combis_mask, sim_combis, -self.inf).argmax(dim=0)

        return self.compare_embs[same_hotel_most_diff], self.compare_embs[diff_hotel_most_sim]

class EasyPosHardNegFinder:
    def __init__(self, compare_targets, args):
        self.compare_targets = compare_targets.unsqueeze(1)
        self.ninf = torch.tensor(-float('inf'), device=args.device)
    
    def update_compare_embs(self, compare_embs):
        self.compare_embs = compare_embs
        
    def __call__(self, embeds, targets):
        sim_combis_mask = targets.unsqueeze(0) == self.compare_targets
        sim_combis = torch.cosine_similarity(embeds.unsqueeze(0), self.compare_embs.unsqueeze(1), dim=2)
        same_hotel_most_sim = torch.where(sim_combis_mask, sim_combis, self.ninf).argmax(dim=0)
        diff_hotel_most_sim = torch.where(~sim_combis_mask, sim_combis, self.ninf).argmax(dim=0)
        return self.compare_embs[same_hotel_most_sim], self.compare_embs[diff_hotel_most_sim]

In [None]:
def sct_loss(s_an, s_pos, s_neg):
    S_ap = torch.nn.functional.cosine_similarity(s_an, s_pos)
    S_an = torch.nn.functional.cosine_similarity(s_an, s_neg)
    return torch.where(S_ap>S_an, args.lam*S_an, triplet_loss(S_ap, S_an)).mean()

def triplet_loss(S_ap, S_an):
    e_S_ap = torch.exp(S_ap)
    e_S_an = torch.exp(S_an)
    return -torch.log(torch.div(e_S_ap, e_S_ap+e_S_an))

In [None]:
sets_df = pd.concat([
    train_df.set_index('image_id').assign(Set='train').Set,
    valid_df.set_index('image_id').assign(Set='val').Set
])
data_df = data_df.join(sets_df, on='image_id')

In [None]:
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.AdamW(model.parameters(), lr=args.lr)

scheduler = torch.optim.lr_scheduler.OneCycleLR(
                optimizer,
                max_lr=args.lr,
                epochs=args.epochs, #20
                steps_per_epoch=len(train_loader),
                div_factor=10,
                final_div_factor=1,
                pct_start=0.1,
                anneal_strategy="cos",
            )

start_epoch = 1

In [None]:
os.makedirs(f"{OUTPUT_FOLDER}{model_name}_image-embeddings", exist_ok=True)

base_embeds, _ = generate_embeddings(base_loader, model, "Generate embeddings for all images")
data_df["embeddings"] = list(base_embeds)
data_df.to_pickle(f"{OUTPUT_FOLDER}{model_name}_image-embeddings/epoch0.pkl")
#data_df.to_pickle(f"{OUTPUT_FOLDER}{model_name}_image-embeddings/epoch{start_epoch-1}.pkl")

In [None]:
test_similarity(args, data_df, model)

In [None]:
train_hotel_id_code = torch.from_numpy(data_df[(data_df.Set=='train')].hotel_id_code.values).to(args.device)
train_hotel_id_code.requires_grad = False

train_embs = torch.from_numpy(np.stack(data_df[(data_df.Set=='train')].embeddings.values)).to(args.device)
train_embs.requires_grad = False

#hard_pos_hard_neg_getter = HardPosHardNegFinder(train_hotel_id_code, args)
pos_neg_getter = EasyPosHardNegFinder(train_hotel_id_code, args)
pos_neg_getter.update_compare_embs(train_embs)

In [None]:
for epoch in range(start_epoch, args.epochs+1):
    train_loss, train_score = train_epoch(args, model, train_loader, criterion, optimizer, scheduler, epoch, pos_neg_getter)
    save_checkpoint(model, scheduler, optimizer, epoch, model_name, train_loss, train_score)
    
    # generate embeddings for all train images and save them for inference
    base_embeds, _ = generate_embeddings(base_loader, model, "Generate embeddings for all images")
    data_df["embeddings"] = list(base_embeds)
    data_df.to_pickle(f"{OUTPUT_FOLDER}{model_name}_image-embeddings/epoch{epoch}.pkl")
    test_similarity(args, data_df, model)
    train_embs = torch.from_numpy(np.stack(data_df[(data_df.Set=='train')].embeddings.values)).to(args.device)
    train_embs.requires_grad = False
    pos_neg_getter.update_compare_embs(train_embs)

In [None]:
base_dataset  = HotelTrainDataset(train_df, base_transform, data_path=IMAGE_FOLDER)
base_loader   = DataLoader(base_dataset, num_workers=args.num_workers, batch_size=args.embgen_batch_size, shuffle=False)
test_similarity(args, data_df, model)