# Setup
This notebook is intended to run on colab, so some things are commented out to make it work on kaggle.

In [None]:
# !nvidia-smi

In [None]:
# from google.colab import drive
# drive.mount('/gdrive')
# %cd /gdrive

In [None]:
!pip install git+https://github.com/rwightman/pytorch-image-models
!pip install albumentations -U
# !pip install imgaug -U

# Imports

In [None]:
import numpy as np 
import pandas as pd
import random
import os
import math
import time

In [None]:
from sklearn.metrics import accuracy_score
from sklearn.utils import class_weight
from PIL import Image as pil_image
from tqdm import tqdm
import scipy

import matplotlib
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader

import timm
from timm.optim import Lookahead, RAdam

# Global

In [None]:
IMG_SIZE = 512
SEED = 42
PROJECT_FOLDER = "../input/hotel-id-2021-fgvc8/"
DATA_FOLDER = "../input/hotelid-images-512x512-padded/"
OUTPUT_FOLDER = "./"

# PROJECT_FOLDER = "/gdrive/MyDrive/Projects/Hotel-ID/"
# DATA_FOLDER = "/home/data/"
# OUTPUT_FOLDER = PROJECT_FOLDER + "output/"

In [None]:
# !mkdir {DATA_FOLDER}
# !unzip -qq {PROJECT_FOLDER}data/train-{IMG_SIZE}x{IMG_SIZE}.zip -d /home/data/

In [None]:
print(os.listdir(PROJECT_FOLDER))
print(len(os.listdir(DATA_FOLDER)))

# Helper functions - seed and metric calculator

In [None]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True

# Dataset and transformations

In [None]:
import albumentations as A
import albumentations.pytorch as APT
import cv2 

train_transform = A.Compose([
    # A.Resize(IMG_SIZE, IMG_SIZE),
    # A.CLAHE(p=1), 
    
    A.HorizontalFlip(p=0.75),
    A.VerticalFlip(p=0.25),
    A.ShiftScaleRotate(p=0.5, border_mode=cv2.BORDER_CONSTANT),
    A.OpticalDistortion(p=0.25),
    A.IAAPerspective(p=0.25),
    A.CoarseDropout(p=0.5),

    A.RandomBrightness(p=0.75),
    A.ToFloat(),
    APT.transforms.ToTensor(),
])


valid_transform = A.Compose([
    # A.Resize(IMG_SIZE, IMG_SIZE),
    # A.CLAHE(p=1),
    A.ToFloat(),
    APT.transforms.ToTensor(),
])

In [None]:
class HotelTrainDataset:
    def __init__(self, data, transform=None, data_path="train_images/"):
        self.data = data
        self.data_path = data_path
        self.transform = transform
        self.fake_load = False

    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, idx):
        record = self.data.iloc[idx]
        image_path = self.data_path + record["image"]

        if self.fake_load:
            image = np.random.randint(0, 255, (32, 32, 3)).astype(np.uint8)
        else:
            image = np.array(pil_image.open(image_path)).astype(np.uint8)

        if self.transform:
            transformed = self.transform(image=image)
        
        return {
            "image" : transformed["image"],
            "target" : record['hotel_id_code'],
        }

# Model

In [None]:
# source: https://github.com/ronghuaiyang/arcface-pytorch/blob/master/models/metrics.py
class ArcMarginProduct(nn.Module):
    r"""Implement of large margin arc distance: :
        Args:
            in_features: size of each input sample
            out_features: size of each output sample
            s: norm of input feature
            m: margin
            cos(theta + m)
        """
    def __init__(self, in_features, out_features, s=30.0, m=0.50, easy_margin=False):
        super(ArcMarginProduct, self).__init__()
        self.in_features = in_features
        self.out_features = out_features
        self.s = s
        self.m = m
        self.weight = nn.Parameter(torch.FloatTensor(out_features, in_features))
        nn.init.xavier_uniform_(self.weight)

        self.easy_margin = easy_margin
        self.cos_m = math.cos(m)
        self.sin_m = math.sin(m)
        self.th = math.cos(math.pi - m)
        self.mm = math.sin(math.pi - m) * m

    def forward(self, input, label):
        # --------------------------- cos(theta) & phi(theta) ---------------------------
        cosine = F.linear(F.normalize(input), F.normalize(self.weight))
        sine = torch.sqrt((1.0 - torch.pow(cosine, 2)).clamp(0, 1))
        phi = cosine * self.cos_m - sine * self.sin_m
        if self.easy_margin:
            phi = torch.where(cosine > 0, phi, cosine)
        else:
            phi = torch.where(cosine > self.th, phi, cosine - self.mm)
        # --------------------------- convert label to one-hot ---------------------------
        # one_hot = torch.zeros(cosine.size(), requires_grad=True, device='cuda')
        one_hot = torch.zeros(cosine.size(), device='cuda')
        one_hot.scatter_(1, label.view(-1, 1).long(), 1)
        # -------------torch.where(out_i = {x_i if condition_i else y_i) -------------
        output = (one_hot * phi) + ((1.0 - one_hot) * cosine)  # you can use torch.where if your torch.__version__ is 0.4
        output *= self.s

        return output

class HotelIdModel(nn.Module):
    def __init__(self, out_features, embed_size=256, backbone_name="efficientnet_b3"):
        super(HotelIdModel, self).__init__()

        self.embed_size = embed_size
        self.backbone = timm.create_model(backbone_name, pretrained=True)
        in_features = self.backbone.get_classifier().in_features

        fc_name, _ = list(self.backbone.named_modules())[-1]
        if fc_name == 'classifier':
            self.backbone.classifier = nn.Identity()
        elif fc_name == 'head.fc':
            self.backbone.head.fc = nn.Identity()
        elif fc_name == 'fc':
            self.backbone.fc = nn.Identity()
        else:
            raise Exception("unknown classifier layer: " + fc_name)

        self.arc_face = ArcMarginProduct(self.embed_size, out_features, s=30.0, m=0.20, easy_margin=False)

        self.post = nn.Sequential(
            nn.utils.weight_norm(nn.Linear(in_features, self.embed_size*2), dim=None),
            nn.BatchNorm1d(self.embed_size*2),
            nn.Dropout(0.2),
            nn.utils.weight_norm(nn.Linear(self.embed_size*2, self.embed_size)),
            nn.BatchNorm1d(self.embed_size),
        )

        print(f"Model {backbone_name} ArcMarginProduct - Features: {in_features}, Embeds: {self.embed_size}")
        
    def forward(self, input, targets = None):
        x = self.backbone(input)
        x = x.view(x.size(0), -1)
        x = self.post(x)
        
        if targets is not None:
            logits = self.arc_face(x, targets)
            return logits
        
        return x

# Model helper functions

In [None]:
def get_embeds(loader, model, bar_desc="Generating embeds"):
    targets_all = []
    outputs_all = []
    
    model.eval()
    with torch.no_grad():
        t = tqdm(loader, desc=bar_desc)
        for i, sample in enumerate(t):
            input = sample['image'].to(args.device)
            target = sample['target'].to(args.device)
            output = model(input)

            targets_all.extend(target.cpu().numpy())
            outputs_all.extend(output.detach().cpu().numpy())
            
    return targets_all, outputs_all

In [None]:
from sklearn.metrics.pairwise import cosine_similarity
    
def get_distance_matrix(embeds, base_embeds):
    distance_matrix = []
    embeds_dataset = torch.utils.data.TensorDataset(torch.Tensor(embeds))
    embeds_dataloader = DataLoader(embeds_dataset, num_workers=2, batch_size=1024, shuffle=False)
    
    t = tqdm(embeds_dataloader)
    for i, sample in enumerate(t): 
        distances = cosine_similarity(sample[0].numpy(), base_embeds)
        distance_matrix.extend(distances)
        
    return np.array(distance_matrix)

In [None]:
def save_checkpoint(model, scheduler, optimizer, epoch, name, loss=None, score=None):
    checkpoint = {"epoch": epoch,
                  "model": model.state_dict(),
                  "scheduler": scheduler.state_dict(),
                  "optimizer": optimizer.state_dict(),
                  "loss": loss,
                  "score": score,
                  }

    torch.save(checkpoint, f"{OUTPUT_FOLDER}checkpoint-{name}.pt")


def load_checkpoint(model, scheduler, optimizer, name):
    checkpoint = torch.load(f"{OUTPUT_FOLDER}checkpoint-{name}.pt")

    model.load_state_dict(checkpoint["model"])
    scheduler.load_state_dict(checkpoint["scheduler"])
    # optimizer.load_state_dict(checkpoint["optimizer"])

    return model, scheduler, optimizer, checkpoint["epoch"]

In [None]:
def iterate_loader(loader, epochs):
    loader.dataset.fake_load = True
    with torch.no_grad():
        for i in range(epochs):
            t = tqdm(loader, desc=f"Iterating loader {i+1}/{epochs}")
            for j, sample in enumerate(t):
                images = sample['image']
                targets = sample['target']

    loader.dataset.fake_load = False

In [None]:
def train_epoch(args, model, loader, criterion, optimizer, scheduler, epoch):
    losses = []
    targets_all = []
    outputs_all = []
    
    model.train()
    t = tqdm(loader)
    
    for i, sample in enumerate(t):
        optimizer.zero_grad()
        
        input = sample['image'].to(args.device)
        target = sample['target'].to(args.device)
        
        output = model(input, target)
        loss = criterion(output, target)
        
        loss.backward()
        optimizer.step()

        if scheduler is not None:
            scheduler.step()
        
        losses.append(loss.item())
        targets_all.extend(target.cpu().numpy())
        outputs_all.extend(torch.sigmoid(output).detach().cpu().numpy())
        
        score = accuracy_score(targets_all, np.argmax(outputs_all, axis=1))
        t.set_description(f"Epoch {epoch}/{args.epochs} - Train loss:{loss:0.4f}, score: {score:0.4f}")
        
    return np.mean(losses), score
        

def find_closest_match(base_df, distance_matrix, n_matches=5):
    preds = []
    N_dist = len(distance_matrix)
    for i in tqdm(range(N_dist), total=N_dist, desc="Getting closest match"):
        tmp_df = base_df.copy()
        tmp_df["distance"] = distance_matrix[i]
        tmp_df = tmp_df.sort_values(by=["distance", "hotel_id"], ascending=False).reset_index(drop=True)
        preds.extend([tmp_df["hotel_id_code"].unique()[:n_matches]])
    
    preds = np.array(preds)
    return preds


def calc_metric(y_true, y_pred, n_matches=5):
    y = np.repeat([y_true], repeats=n_matches, axis=0).T
    acc_top_1 = (y_pred[:, 0] == y_true).mean()
    acc_top_5 = (y_pred == y).any(axis=1).mean()
    print(f"Accuracy: {acc_top_1:0.4f}, top 5 accuracy: {acc_top_5:0.4f}")
    return acc_top_1, acc_top_5


def test(base_loader, valid_loader, model):
    base_targets, base_embeds = get_embeds(base_loader, model, "Generating embeds for train")
    valid_targets, valid_embeds = get_embeds(valid_loader, model, "Generating embeds for test")
    distance_matrix = get_distance_matrix(valid_embeds, base_embeds)
    val_preds = find_closest_match(base_loader.dataset.data, distance_matrix)
    calc_metric(valid_targets, val_preds)
    return base_embeds, valid_embeds, valid_targets, val_preds, distance_matrix


def test_closest_match_tta(args, base_loader, valid_df, tta_transforms, model):
    base_targets, base_embeds = get_embeds(base_loader, model, "Generating embeds for train")
    distance_matrix = None

    for key in tta_transforms:
        valid_dataset = HotelTrainDataset(valid_df, tta_transforms[key], data_path=DATA_FOLDER)
        valid_loader = DataLoader(valid_dataset, num_workers=args.num_workers, batch_size=args.batch_size, shuffle=False)
        valid_targets, valid_embeds = get_embeds(valid_loader, model, f"Generating embeds for test {key}")
        
        distances = get_distance_matrix(valid_embeds, base_embeds)

        if distance_matrix is None:
            distance_matrix = distances
        else:
            distance_matrix = np.min(np.dstack((distance_matrix, distances)), axis = 2)
    
    val_preds = find_closest_match(base_loader.dataset.data, distance_matrix)
    calc_metric(valid_targets, val_preds)

# Prepare data

In [None]:
def sample_data(n_hotels, min_images, max_images):
    data_df = pd.read_csv(PROJECT_FOLDER + "train.csv", parse_dates=["timestamp"])
    sample_df = data_df.groupby("hotel_id").filter(lambda x: (x["image"].nunique() > min_images) & (x["image"].nunique() < max_images))
    sample_df["hotel_id_code"] = sample_df["hotel_id"].astype('category').cat.codes.values.astype(np.int64)
    sample_df = sample_df[sample_df["hotel_id_code"] < n_hotels]

    print(f"Subsample with {len(sample_df.hotel_id.unique())} hotels out of {len(data_df.hotel_id.unique())} " + 
          f"with total {len(sample_df)} images ({len(sample_df) / len(data_df) * 100:0.2f} %)")
    
    return sample_df

In [None]:
# FOR TESTING DIFFERENT SETTING
# data_df = sample_data(1000, 15, 50)

# FOR FINAL TRAINING
data_df = pd.read_csv(PROJECT_FOLDER + "train.csv", parse_dates=["timestamp"])
data_df["hotel_id_code"] = data_df["hotel_id"].astype('category').cat.codes.values.astype(np.int64)

fig = go.Figure()
fig.add_trace(go.Histogram(x=data_df["hotel_id_code"]))
fig.update_xaxes(type="category")
fig.show()

# Train and evaluate

In [None]:
def train_and_validate(args, data_df):
    model_name = f"arcmargin-model-{args.backbone_name}-{IMG_SIZE}x{IMG_SIZE}-{args.embed_size}embeds-{args.n_classes}hotels"
    print(model_name)
    # SEED and split
    seed_everything(seed=SEED)
    valid_df = data_df.groupby("hotel_id").sample(args.val_samples, random_state=SEED)
    train_df = data_df[~data_df["image"].isin(valid_df["image"])]

    # create model
    model = HotelIdModel(args.n_classes, args.embed_size, args.backbone_name)
    model = model.to(args.device)

    # train data loader
    train_dataset = HotelTrainDataset(train_df, train_transform, data_path=DATA_FOLDER)
    train_loader = DataLoader(train_dataset, num_workers=args.num_workers, batch_size=args.batch_size, shuffle=True, pin_memory=False)
    # train without augmentations to generate base embeddings
    base_dataset = HotelTrainDataset(train_df, valid_transform, data_path=DATA_FOLDER)
    base_loader = DataLoader(base_dataset, num_workers=args.num_workers, batch_size=args.batch_size, shuffle=False)
    # valid loader
    valid_dataset = HotelTrainDataset(valid_df, valid_transform, data_path=DATA_FOLDER)
    valid_loader = DataLoader(valid_dataset, num_workers=args.num_workers, batch_size=args.batch_size, shuffle=False)

    print(f"Base: {len(base_dataset)}\nValidation: {len(valid_dataset)}")

    criterion = nn.CrossEntropyLoss()
    optimizer = Lookahead(torch.optim.AdamW(model.parameters(), lr=args.lr), k=3)
    scheduler = torch.optim.lr_scheduler.OneCycleLR(
                    optimizer,
                    max_lr=args.lr,
                    epochs=args.epochs,
                    steps_per_epoch=len(train_loader),
                    div_factor=10,
                    final_div_factor=1,
                    pct_start=0.1,
                    anneal_strategy="cos",
                )
    
    start_epoch = 1

    if args.continue_from_checkpoint:
        model, scheduler, optimizer, last_epoch = load_checkpoint(model, scheduler, optimizer, model_name)
        iterate_loader(train_loader, last_epoch)
        start_epoch = start_epoch + last_epoch

    torch.cuda.empty_cache()

    for epoch in range(start_epoch, args.epochs +1):
        train_loss, train_score = train_epoch(args, model, train_loader, criterion, optimizer, scheduler, epoch)
        save_checkpoint(model, scheduler, optimizer, epoch, model_name, train_loss, train_score)
        if (epoch == 1): #  or (epoch % 3) == 0:
            base_embeds, valid_embeds, valid_targets, val_preds, distance_matrix = test(base_loader, valid_loader, model)

    base_embeds, valid_embeds, valid_targets, val_preds, distance_matrix = test(base_loader, valid_loader, model)

    # output = {"base_embeds": base_embeds,
    #           "valid_embeds": valid_embeds,
    #           "valid_targets": valid_targets,
    #           "val_preds": val_preds,
    #           "distance_matrix": distance_matrix,
    #           "train_df" : train_df,
    #           "valid_df": valid_df,
    #           }

    # torch.save(output, f"{OUTPUT_FOLDER}output-{model_name}.pt")

In [None]:
# %%time 

# class args:
#     epochs = 6
#     lr = 1e-3
#     batch_size = 16
#     num_workers = 2
#     embed_size = 4096
#     val_samples = 1
#     backbone_name="eca_nfnet_l0"
#     n_classes = data_df["hotel_id_code"].nunique()
#     device = ('cuda' if torch.cuda.is_available() else 'cpu')
#     continue_from_checkpoint = False

# train_and_validate(args, data_df)

In [None]:
# %%time 

# class args:
#     epochs = 9
#     lr = 1e-3
#     batch_size = 16
#     num_workers = 2
#     embed_size = 4096
#     val_samples = 1
#     continue_from_checkpoint = True
#     backbone_name="efficientnet_b1"
#     n_classes = data_df["hotel_id_code"].nunique()
#     device = ('cuda' if torch.cuda.is_available() else 'cpu')

# train_and_validate(args, data_df)


# RESULTS
# Iterating loader 1/4: 100%|██████████| 5612/5612 [20:05<00:00,  4.66it/s]
# ...
# Iterating loader 4/4: 100%|██████████| 5612/5612 [19:29<00:00,  4.80it/s]
# Epoch 5/9 - Train loss:2.5940, score: 0.2919: 100%|██████████| 5612/5612 [2:00:37<00:00,  1.29s/it]
# Epoch 6/9 - Train loss:4.4544, score: 0.3838: 100%|██████████| 5612/5612 [2:00:07<00:00,  1.28s/it]
# Epoch 7/9 - Train loss:2.8829, score: 0.4948:  27%|██▋       | 1495/5612 [17:03<1:01:32,  1.11it/s]
# Iterating loader 1/6: 100%|██████████| 5612/5612 [01:21<00:00, 68.73it/s]
# ...
# Iterating loader 6/6: 100%|██████████| 5612/5612 [01:22<00:00, 68.03it/s]
# Epoch 7/9 - Train loss:3.1958, score: 0.4358:  90%|████████▉ | 5042/5612 [1:58:12<20:23,  2.15s/it]
# Iterating loader 1/6: 100%|██████████| 5612/5612 [01:18<00:00, 71.15it/s]
# ...
# Iterating loader 6/6: 100%|██████████| 5612/5612 [01:17<00:00, 72.06it/s]
# Epoch 7/9 - Train loss:4.7988, score: 0.4360: 100%|██████████| 5612/5612 [2:22:07<00:00,  1.52s/it]
# Epoch 8/9 - Train loss:1.2727, score: 0.5257:  88%|████████▊ | 4961/5612 [2:02:48<27:56,  2.57s/it]
# Iterating loader 1/7: 100%|██████████| 5612/5612 [01:23<00:00, 66.99it/s]
# ...
# Iterating loader 7/7: 100%|██████████| 5612/5612 [01:19<00:00, 70.49it/s]
# Epoch 8/9 - Train loss:3.3698, score: 0.4863: 100%|██████████| 5612/5612 [2:22:22<00:00,  1.52s/it]
# Epoch 9/9 - Train loss:3.6118, score: 0.5580:  62%|██████▏   | 3483/5612 [1:09:32<1:00:59,  1.72s/it]
# Iterating loader 1/8: 100%|██████████| 5612/5612 [01:19<00:00, 70.66it/s]
# ...
# Iterating loader 8/8: 100%|██████████| 5612/5612 [01:20<00:00, 69.83it/s]
# Epoch 9/9 - Train loss:4.3291, score: 0.5247: 100%|██████████| 5612/5612 [2:24:09<00:00,  1.54s/it]
# Generating embeds for train: 100%|██████████| 5612/5612 [17:36<00:00,  5.31it/s]
# Generating embeds for test: 100%|██████████| 486/486 [01:41<00:00,  4.80it/s]
# 100%|██████████| 8/8 [01:23<00:00, 10.48s/it]
# Getting closest match: 100%|██████████| 7770/7770 [09:35<00:00, 13.50it/s]
# Accuracy: 0.6802, top 5 accuracy: 0.7979

# Results


|Size|Hotels|Epochs|LR|Model|Embeds|Optimizer|Scheduler|Acc|Acc 5| PL | Comment |
| --- | --- | --- | --- | --- | --- | --- | --- |--- | --- | --- | --- |
|512|7770|6|1e-3|efficientnet_b1|4096|Look3AdamW|OneCycle-10-1|0.0682|0.7979||
|512|7770|6|1e-3|eca_nfnet_l0|1024|Look3AdamW|OneCycle-10-1|0.6368|0.7604|0.630|
|512|7770|6|1e-3|efficientnet_b1|1024|Look3AdamW|OneCycle-10-1|0.5871|0.7151|0.592|
|512|500|9|1e-3|eca_nfnet_l0|256|Look3AdamW|OneCycle-10-1|0.8040|0.8940|
|512|500|6|1e-3|eca_nfnet_l0|256|Look3AdamW|OneCycle-10-1|0.8440|0.9220||cos-m=0.2|
|512|500|6|1e-3|efficientnet_b1|256|Look3AdamW|OneCycle-10-1|0.7980|0.9060||cos-m=0.2|
|512|500|2x6|1e-3|efficientnet_b1|256|Look3AdamW|OneCycle-10-1|0.8240|0.9140||cos-m=0.2|
|512|500|9|1e-3|ecaresnet50d_pruned|256|Look3AdamW|OneCycle-10-1|0.7780|0.8780|
|512|500|9|1e-3|efficientnet_b0|256|Look3AdamW|OneCycle-10-1|0.7680|0.8720|
|512|500|9|1e-3|efficientnet_b1|256|Look3AdamW|OneCycle-10-1|0.7780|0.8780|
|512|500|9|1e-3|efficientnet_b1|1024|Look3AdamW|OneCycle-10-1|0.8040|0.8820|
|256(6x)+512(3x)|500|9|1e-3|efficientnet_b1|256|Look3AdamW|OneCycle-10-1|0.7580|0.8660|
|512|500|9|1e-3|efficientnet_b1|256|AdamW|OneCycle-10-1|0.7720|0.8660|
|512|500|9|1e-3|efficientnet_b3|256|Look3AdamW|OneCycle-10-1|0.7520|0.8440|
|512|500|9|1e-3|dla102|256|Look3AdamW|OneCycle-10-1|super slow|
|256|500|9|1e-3|eca_nfnet_l0|256|Look3AdamW|OneCycle-10-10|0.7500|0.8500|
|256|500|6|1e-3|eca_nfnet_l0|256|Look3AdamW|OneCycle-10-1|0.7800|0.8740||cos-m=0.5|
|256|500|6|1e-3|eca_nfnet_l0|256|Look3AdamW|OneCycle-10-1|0.7880|0.8960||cos-m=0.2|
|256|500|6|1e-3|eca_nfnet_l0|256|Look3AdamW|OneCycle-10-1|0.7580|0.8740||cos-m=0.2-norm embeds|
|256|500|9|1e-3|eca_nfnet_l0|256|Look3AdamW|OneCycle-10-1|0.7540|0.8580|
|256|500|9|1e-3|eca_nfnet_l0|512|Look3AdamW|OneCycle-10-1|0.7440|0.8420|
|256|500|6|1e-3|eca_nfnet_l0|512|Look3AdamW|OneCycle-10-1|0.7380|0.8460|
|256|500|6|1e-2|eca_nfnet_l0|512|Look3AdamW|OneCycle-10-1||
|256|500|9|1e-3|eca_nfnet_l0+Mish|256|Look3AdamW|OneCycle-10-1|0.7500|0.8300|
|256|500|9|1e-3|eca_nfnet_l0|256|Look3AdamW|OneCycle-10-0.5|0.7300|0.8500|
|256|500|9|1e-3|eca_nfnet_l1|256|Look3AdamW|OneCycle-10-1|0.7560|0.8640|
|256|500|9|1e-3|seresnext26d_32x4d|256|Look3AdamW|OneCycle-10-1|0.6780|0.7820|
|256|500|9|1e-3|nfnet_f0|256|Look3AdamW|OneCycle-10-1|doesn't converge||
|256|500|9|1e-3|swsl_resnet18|256|Look3AdamW|OneCycle-10-1|0.6780|0.7280|
|256|500|9|1e-3|swsl_resnet50|256|Look3AdamW|OneCycle-10-1|0.6400|0.7760|
|256|500|9|1e-3|efficientnet_b0|256|Look3AdamW|OneCycle-10-1|0.6500|0.8020|
|256|500|9|1e-3|efficientnet_b0|256|Look3AdamW|OneCycle-10-1|0.6940|0.8360||cos|
|256|500|9|1e-3|efficientnet_b1|256|Look3AdamW|OneCycle-10-1|0.6540|0.7760|
|256|500|6|1e-3|efficientnet_b1|256|Look3AdamW|OneCycle-10-1|0.6880|0.8480||cos-m=0.2-norm embeds|
|256|500|6|1e-3|efficientnet_b1|256|Look3AdamW|OneCycle-10-1|0.668|0.8340||cos-m=0.2|
|256|500|9|1e-3|efficientnet_b3|256|Look3AdamW|OneCycle-10-1|0.6320|0.7520|
|256|500|9|1e-3|adv_inception_v3|256|Look3AdamW|OneCycle-10-1|0.5000|0.6700|
|256|500|9|1e-3|ecaresnet50t|256|Look3AdamW|OneCycle-10-1|0.7180|0.8220|
|256|500|9|1e-3|ecaresnet50d_pruned|256|Look3AdamW|OneCycle-10-1|0.6980|0.8340|
|256|500|9|1e-3|ecaresnet101d_pruned|256|Look3AdamW|OneCycle-10-1|0.6680|0.7980|
|256|500|9|1e-3|ecaresnext50t_32x4d|256|Look3AdamW|OneCycle-10-1|0.1240|0.2480|
|256|500|9|1e-3|nf_ecaresnet50|256|Look3AdamW|OneCycle-10-1|doesn't converge|
|256|500|9|1e-3|nf_seresnet50|256|Look3AdamW|OneCycle-10-1|doesn't converge|
|256|500|9|1e-3|ese_vovnet39b_evos|256|Look3AdamW|OneCycle-10-1|doesn't converge|
|256|500|9|1e-3|eca_vovnet39b |256|Look3AdamW|OneCycle-10-1|doesn't converge|
|256|500|9|1e-3|tresnet_m|256|Look3AdamW|OneCycle-10-1|0.7260|0.8120|
|256|500|9|1e-3|vit_small_resnet26d_224|256|Look3AdamW|OneCycle-10-1|0.6060|0.7460|
|256|500|9|1e-3|tv_resnet50|256|Look3AdamW|OneCycle-10-1|0.5720|0.7500|
|256|500|9|1e-3|selecsls42b|256|Look3AdamW|OneCycle-10-1|0.5800|0.7620|
|256|500|9|1e-3|resnet50|256|Look3AdamW|OneCycle-10-1|0.6360|0.7680|
|256|500|9|1e-3|botnet50t_224|256|Look3AdamW|OneCycle-10-1|0.0340|0.1100|
|256|500|9|1e-3|dm_nfnet_f0|256|Look3AdamW|OneCycle-10-1|doesn't converge|
|256|500|9|1e-3|dla60 |256|Look3AdamW|OneCycle-10-1|0.6580|0.8040|
|256|500|9|1e-3|densenet121 |256|Look3AdamW|OneCycle-10-1|0.6120|0.7620|
|256|500|9|1e-3|tf_mixnet_m|256|Look3AdamW|OneCycle-10-1|0.6480|0.7700|
|256|500|9|1e-3|tf_mixnet_l |256|Look3AdamW|OneCycle-10-1|0.6720|0.7920|
|256|500|9|1e-3|dla102|256|Look3AdamW|OneCycle-10-1|0.6640|0.8180|
|256|500|12|1e-3|dla102|256|Look3AdamW|OneCycle-10-1|0.6580|0.8040|
|256|500|9|1e-3||256|Look3AdamW|OneCycle-10-1|||
|256|500|9|1e-3||256|Look3AdamW|OneCycle-10-1|||



 
