# Setup

In [None]:
#TODO: Test both cpu and gpu versions?
%env DEEPFILL=jiahuiyu-deepfill-offline
!cp -r ../input/$DEEPFILL/generative_inpainting_v2 generative_inpainting

In [None]:
!pip install ../input/$DEEPFILL/*.whl # neuralgym converted with tf_upgrade_v2

In [None]:
!pip install ../input/tf-slim110/*.whl

In [None]:
!mkdir checkpoints
!cp -rf /kaggle/input/deepfill-v2-pretrained/places2_256_deepfill_v2 checkpoints

In [None]:
import sys
sys.path.append('../input/timmmaster') # Newer timm version

# Imports

In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import random
import os
import math

In [None]:
from sklearn.metrics import accuracy_score
from sklearn.utils import class_weight
from PIL import Image as pil_image
from tqdm import tqdm
import scipy

import matplotlib
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader

import timm
from timm.optim import Lookahead, RAdam

# Global

In [None]:
print(os.listdir("/kaggle/input/"))

In [None]:
SEED = 42
IMG_SIZE = 512
PROJECT_FOLDER = "../input/hotelid-2022-train-images-512x512/"
TEST_DATA_FOLDER = "../input/hotel-id-to-combat-human-trafficking-2022-fgvc9/test_images/"

# Helper functions - seed and metric calculator

In [None]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True

# Dataset and transformations

In [None]:
import albumentations as A
import albumentations.pytorch as APT
import cv2 

base_transform = A.Compose([
    A.ToFloat(),
    APT.transforms.ToTensor(),
])

base_transform256 = A.Compose([
    A.Resize(256, 256),
    A.ToFloat(),
    APT.transforms.ToTensor(),
])

## Inpanting

In [None]:
%cd generative_inpainting
from inpaint_model import InpaintCAModel
import neuralgym as ng
import tensorflow as tf

def load_inpaint_model(): # Taken from batch_test.py
    FLAGS = ng.Config('inpaint.yml')
    #ng.get_gpus(1)

    sess_config = tf.compat.v1.ConfigProto()
    sess_config.gpu_options.allow_growth = True
    sess = tf.compat.v1.Session(config=sess_config)

    model = InpaintCAModel()
    tf.compat.v1.disable_eager_execution()
    input_image_ph = tf.compat.v1.placeholder(
        tf.float32, shape=(1, IMG_SIZE, IMG_SIZE*2, 3))
    output = model.build_server_graph(FLAGS, input_image_ph)
    output = (output + 1.) * 127.5
    output = tf.reverse(output, [-1])
    output = tf.saturate_cast(output, tf.uint8)
    vars_list = tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.GLOBAL_VARIABLES)
    assign_ops = []
    for var in vars_list:
        vname = var.name
        from_name = vname
        var_value = tf.train.load_variable("../checkpoints/places2_256_deepfill_v2", from_name)
        assign_ops.append(tf.compat.v1.assign(var, var_value))
    sess.run(assign_ops)
    print('Model loaded.')
    return sess, output, input_image_ph

inpaint_model_sess, inpaint_model_output, inpaint_model_ph = load_inpaint_model()
%cd ..

In [None]:
lower_red = np.array([0,0,200])
upper_red = np.array([55,55,255])
mask_kernel = np.ones((4,4), np.uint8)

def get_mask(img):
    mask = cv2.inRange(img, lower_red, upper_red)
    mask = cv2.dilate(mask, mask_kernel, iterations=1)
    return cv2.cvtColor(mask, cv2.COLOR_GRAY2BGR)

def inpaint_image(image, mask):
    h, w, _ = image.shape
    grid = 4
    image = image[:h//grid*grid, :w//grid*grid, :]
    mask = mask[:h//grid*grid, :w//grid*grid, :]
    #print('Shape of image: {}'.format(image.shape))

    image = np.expand_dims(image, 0)
    mask = np.expand_dims(mask, 0)
    input_image = np.concatenate([image, mask], axis=2)

    # load pretrained model
    result = inpaint_model_sess.run(inpaint_model_output, feed_dict={inpaint_model_ph: input_image})
    return result[0][:, :, ::-1]

In [None]:
def pad_image(img):
    w, h, c = np.shape(img)
    if w > h:
        pad = int((w - h) / 2)
        img = cv2.copyMakeBorder(img, 0, 0, pad, pad, cv2.BORDER_CONSTANT, value=0)
    else:
        pad = int((h - w) / 2)
        img = cv2.copyMakeBorder(img, pad, pad, 0, 0, cv2.BORDER_CONSTANT, value=0)
        
    return img

def open_and_preprocess_image(image_path):
    img = cv2.imread(image_path)
    img = pad_image(img)
    img = cv2.resize(img, (IMG_SIZE, IMG_SIZE))
    mask = get_mask(img)
    img = inpaint_image(img, mask)
    
    return cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

In [None]:
# Test code
# from IPython.display import Image
# img = open_and_preprocess_image(TEST_DATA_FOLDER+'abc.jpg')
# cv2.imwrite("test.png", cv2.cvtColor(img, cv2.COLOR_RGB2BGR))
# Image("test.png")

In [None]:
# More test code
# Add new mask and test again
# img = pil_image.open('test.png')
# mask = pil_image.open('../input/hotel-id-to-combat-human-trafficking-2022-fgvc9/train_masks/00095.png')
# mask = mask.resize((IMG_SIZE, IMG_SIZE))
# img.paste(mask, (0,0), mask)
# img.save('test_mask.png')
# # Image("test_mask.png")

# img = open_and_preprocess_image('test_mask.png')
# cv2.imwrite("test2.png", cv2.cvtColor(img, cv2.COLOR_RGB2BGR))
# Image("test2.png")

## Dataset

In [None]:
class HotelImageDataset:
    def __init__(self, data, transform=None, data_folder="train_images/", test=False):
        self.data = data
        self.data_folder = data_folder
        self.transform = transform
        self.test = test

    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, idx):
        record = self.data.iloc[idx]
        image_path = self.data_folder + record["image_id"]
        
        if "test" in self.data_folder:
            image = np.array(open_and_preprocess_image(image_path)).astype(np.uint8)
        else:
            image = np.array(pil_image.open(image_path)).astype(np.uint8)

        if self.transform:
            transformed = self.transform(image=image)
        
        if not self.test:
            return {
                "image" : transformed["image"]
            }

        return {
            "image" : transformed["image"],
            256: base_transform256(image=image)["image"],
        }

# Model

In [None]:
# source: https://github.com/ronghuaiyang/arcface-pytorch/blob/master/models/metrics.py
class ArcMarginProduct(nn.Module):
    r"""Implement of large margin arc distance: :
        Args:
            in_features: size of each input sample
            out_features: size of each output sample
            s: norm of input feature
            m: margin
            cos(theta + m)
        """
    def __init__(self, in_features, out_features, s=30.0, m=0.50, easy_margin=False):
        super(ArcMarginProduct, self).__init__()
        self.in_features = in_features
        self.out_features = out_features
        self.s = s
        self.m = m
        self.weight = nn.Parameter(torch.FloatTensor(out_features, in_features))
        nn.init.xavier_uniform_(self.weight)

        self.easy_margin = easy_margin
        self.cos_m = math.cos(m)
        self.sin_m = math.sin(m)
        self.th = math.cos(math.pi - m)
        self.mm = math.sin(math.pi - m) * m

    def forward(self, input, label):
        # --------------------------- cos(theta) & phi(theta) ---------------------------
        cosine = F.linear(F.normalize(input), F.normalize(self.weight))
        sine = torch.sqrt((1.0 - torch.pow(cosine, 2)).clamp(0, 1))
        phi = cosine * self.cos_m - sine * self.sin_m
        if self.easy_margin:
            phi = torch.where(cosine > 0, phi, cosine)
        else:
            phi = torch.where(cosine > self.th, phi, cosine - self.mm)
        # --------------------------- convert label to one-hot ---------------------------
        # one_hot = torch.zeros(cosine.size(), requires_grad=True, device='cuda')
        one_hot = torch.zeros(cosine.size(), device=args.device)
        one_hot.scatter_(1, label.view(-1, 1).long(), 1)
        # -------------torch.where(out_i = {x_i if condition_i else y_i) -------------
        output = (one_hot * phi) + ((1.0 - one_hot) * cosine)  # you can use torch.where if your torch.__version__ is 0.4
        output *= self.s

        return output

class HotelIdModel(nn.Module):
    def __init__(self, out_features, embed_size=256, backbone_name="efficientnet_b3"):
        super(HotelIdModel, self).__init__()

        self.embed_size = embed_size
        self.backbone = timm.create_model(backbone_name, pretrained=False)
        in_features = self.backbone.get_classifier().in_features

        fc_name = self.backbone.default_cfg['classifier']
        if fc_name == 'classifier':
            self.backbone.classifier = nn.Identity()
        elif fc_name == 'head.fc':
            self.backbone.head.fc = nn.Identity()
        elif fc_name == 'fc':
            self.backbone.fc = nn.Identity()
        elif fc_name == 'head':
            self.backbone.head = nn.Identity()
        else:
            raise Exception("unknown classifier layer: " + fc_name)

        self.arc_face = ArcMarginProduct(self.embed_size, out_features, s=30.0, m=0.50, easy_margin=False)

        self.post = nn.Sequential(
            nn.utils.weight_norm(nn.Linear(in_features, self.embed_size*2), dim=None),
            nn.BatchNorm1d(self.embed_size*2),
            nn.Dropout(0.2),
            nn.utils.weight_norm(nn.Linear(self.embed_size*2, self.embed_size)),
            nn.BatchNorm1d(self.embed_size),
        )

        print(f"Model {backbone_name} ArcMarginProduct - Features: {in_features}, Embeds: {self.embed_size}")
        
    def forward(self, input, targets = None):
        x = self.backbone(input)
        x = x.view(x.size(0), -1)
        x = self.post(x)
        
        if targets is not None:
            logits = self.arc_face(x, targets)
            return logits
        
        return x

In [None]:
class EmbeddingNet(nn.Module):
    def __init__(self, n_classes=100, embed_size=64, backbone_name="efficientnet_b0"):
        super(EmbeddingNet, self).__init__()
        
        self.embed_size = embed_size
        self.backbone = timm.create_model(backbone_name, pretrained=False)
        in_features = self.backbone.get_classifier().in_features

        fc_name = self.backbone.default_cfg['classifier']
        if fc_name == 'classifier':
            self.backbone.classifier = nn.Identity()
        elif fc_name == 'head.fc':
            self.backbone.head.fc = nn.Identity()
        elif fc_name == 'fc':
            self.backbone.fc = nn.Identity()
        elif fc_name == 'head':
            self.backbone.head = nn.Identity()
        else:
            raise Exception("unknown classifier layer: " + fc_name)
        
        self.post = nn.Sequential(
            nn.utils.weight_norm(nn.Linear(in_features, self.embed_size*2), dim=None),
            nn.BatchNorm1d(self.embed_size*2),
            nn.Dropout(0.2),
            nn.utils.weight_norm(nn.Linear(self.embed_size*2, self.embed_size)),
        )

        self.classifier = nn.Sequential(
            nn.BatchNorm1d(self.embed_size),
            nn.Dropout(0.2),
            nn.Linear(self.embed_size, n_classes),
        )
        
        print(f"Model {backbone_name} EmbeddingNet - Features: {in_features}, Embeds: {self.embed_size}")
        
    def embed_and_classify(self, x):
        x = self.forward(x)
        return x, self.classifier(x)

    def forward(self, x):
        x = self.backbone(x)
        x = x.view(x.size(0), -1)
        x = self.post(x)
        return x

# Model helper functions

In [None]:
from sklearn.metrics.pairwise import cosine_similarity

def get_embeds(loader, model, bar_desc="Generating embeds"):
    outputs_all = []
    
    model.eval()
    with torch.no_grad():
        t = tqdm(loader, desc=bar_desc)
        for i, sample in enumerate(t):
            input = sample['image'].to(args.device)
            output = model(input)
            outputs_all.extend(output.detach().cpu().numpy())
#             outputs_all.extend(output.detach().cpu().numpy().astype(np.float16))
            
            #break #TODO REMOVE
    return outputs_all

In [None]:
def get_distances(sample, base_embeds, model_array):
    distances = None
    for i, model in enumerate(model_array):
        image = sample['image']
        if model[1] != IMG_SIZE:
            image = sample[model[1]]
        output = model[0](image.to(args.device))
        output = output.detach().cpu().numpy()
#         output = output.detach().cpu().numpy().astype(np.float16)
        model_base_embeds = base_embeds[i]
        output_distances = cosine_similarity(output, model_base_embeds)
        
        if distances is None:
            distances = output_distances
        else:
            distances = distances * output_distances
            
    return distances
    

def predict(loader, base_df, base_embeds, model_array, n_matches=5, bar_desc="Generating embeds"):
    preds = []
    with torch.no_grad():
        t = tqdm(loader, desc=bar_desc)
        for i, sample in enumerate(t):
            distances = get_distances(sample, base_embeds, model_array)
            
            for j in range(len(distances)):
                tmp_df = base_df.copy()
                tmp_df["distance"] = distances[j]
                tmp_df = tmp_df.sort_values(by=["distance", "hotel_id"], ascending=False).reset_index(drop=True)
                preds.extend([tmp_df["hotel_id"].unique()[:n_matches]])

    return preds

def find_closest_match(args, test_loader, base_loader, model_array, n_matches=5):
    base_embeds = {}
    for i, model in enumerate(model_array):
        base_embeds[i] = get_embeds(base_loader[model[1]], model[0], "Generating embeds for train")
    
    preds = predict(test_loader, base_loader[IMG_SIZE].dataset.data, base_embeds, model_array, n_matches, f"Generating predictions")
        
    return preds

# Prepare data

### for submission

In [None]:
data_df = pd.read_csv(PROJECT_FOLDER + "train.csv")
test_df = pd.DataFrame(data={"image_id": os.listdir(TEST_DATA_FOLDER), "hotel_id": ""}).sort_values(by="image_id")
print(test_df)

# Train and evaluate

In [None]:
def get_model(model_type, backbone_name, embed_size, image_size, checkpoint_path, args):
    if model_type == 'arcmargin':
        model = HotelIdModel(args.n_classes, embed_size, backbone_name)
    else:
        model = EmbeddingNet(args.n_classes, embed_size, backbone_name)
        
    checkpoint = torch.load(checkpoint_path)
    model.load_state_dict(checkpoint["model"])
    model = model.to(args.device)
    
    return (model, image_size)

In [None]:
class args:
    batch_size = 32
    num_workers = 0
    n_classes = data_df["hotel_id"].nunique()
    device = ('cuda' if torch.cuda.is_available() else 'cpu')
    
    
seed_everything(seed=SEED)

base_dataset = {}
base_loader = {}

TRAIN_DATA_FOLDER = "../input/hotelid-2022-train-images-512x512/images/"
base_dataset[512] = HotelImageDataset(data_df, base_transform, data_folder=TRAIN_DATA_FOLDER)
base_loader[512] = DataLoader(base_dataset[512], num_workers=args.num_workers, batch_size=args.batch_size, shuffle=False)

TRAIN_DATA_FOLDER = "../input/hotelid-2022-train-images-256x256/images/"
base_dataset[256] = HotelImageDataset(data_df, base_transform, data_folder=TRAIN_DATA_FOLDER)
base_loader[256] = DataLoader(base_dataset[256], num_workers=args.num_workers, batch_size=args.batch_size, shuffle=False)

test_dataset = HotelImageDataset(test_df, base_transform, data_folder=TEST_DATA_FOLDER, test=True)
test_loader = DataLoader(test_dataset, num_workers=args.num_workers, batch_size=args.batch_size, shuffle=False)

### Inference

In [None]:
model_array = [
                get_model("arcmargin", 
                         "efficientnet_b1", 2048, IMG_SIZE, # (Higher validation score than 4096)
                         "../input/hotelarcmarginmodels/checkpoint-arcmargin-model-efficientnet_b1-512x512-2048embeds-3116hotels.pt", 
                         args),
    
               get_model("cosface", 
                         "ecaresnet50d_pruned", 4096, IMG_SIZE,
                         "../input/hotelidcosfaceecaresnet50dtrained/checkpoint-cosface-model-ecaresnet50d_pruned-512x512-4096embeds-3116hotels.pt", 
                         args),
               
               get_model("arcmargin",
                         "eca_nfnet_l1", 1024, IMG_SIZE,
                         "../input/hotelarcmarginmodels/checkpoint-arcmargin-model-eca_nfnet_l1-512x512-1024embeds-3116hotels.pt",
                         args),

               get_model("arcmargin", 
                         "swinv2_base_window16_256", 4096, 256,
                         "../input/hotelarcmarginmodels/checkpoint-arcmargin-model-swinv2_base_window16_256-256x256-4096embeds-3116hotels.pt",
                         args),
              ]

### submission

In [None]:
# %%time

if len(os.listdir(TEST_DATA_FOLDER)) > 5:
    preds = find_closest_match(args, test_loader, base_loader, model_array, n_matches=5)
    test_df["hotel_id"] = [str(list(l)).strip("[]").replace(",", "") for l in preds]

test_df.to_csv("submission.csv", index=False)
test_df.head()

In [None]:
# predict(test_loader, base_loader.dataset.data, base_embeds, model_array, n_matches, f"Generating predictions")