In [None]:
# !pip install faiss-gpu

In [None]:
from scipy.io import loadmat
import albumentations as A
import torch
import torchvision.models as models
import cv2
from albumentations.pytorch import ToTensorV2
from torch import nn
import math
import glob
import sys
sys.path.append('../input/d/kozodoi/timm-pytorch-image-models/pytorch-image-models-master/')
import timm
import pandas as pd
from torch.nn.parameter import Parameter
from tqdm import tqdm
import torch.nn.functional as F
from typing import Dict, List, Optional

# import faiss
import torch
import torch.nn.functional as F
from torch.utils.data import DataLoader, Dataset
import numpy as np

In [None]:
class CFG:
    original = '../input/product/Stanford_Online_Products/'
    pretrain_model = "../input/resnet-50-adaptive-weighted/resnet50_adaptive_arcface_with_weighted_CosineAnnealingWarmRestarts_CrossEntropyLoss-Copy3.pt"
    path_test = "../input/product/Stanford_Online_Products/Ebay_test.txt"
    path_train = "../input/product-detail/train_info (1).csv"
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    num_classes= 11318
    input_size = 256
    use_pretrained = True
    ### model config
    use_pretrained = True
    model_name = "resnet50" #"efficientnet_b3" #"densenet121"#"efficientnet_b3"resnet50
    embedding_size = 512
    train = True
    dropout = 0.5
    lambda_=10
    adaptive=True
    metric = 'adaptive_arcface_with_weighted' #"adaptive_arcface_with_weighted" #'adaptive_arcface' # arcface, cosface , softmax 
    use_fc = True
    s = 30
    margin = 0.5
    ls_eps = 0.0
    theta_zero = 0.785
    batch_size = 512
    worker = 8
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [None]:
class AdaCos(nn.Module):
    def __init__(self, in_features, out_features, m=0.50, ls_eps=0, theta_zero=math.pi/4):
        super(AdaCos, self).__init__()
        self.in_features = in_features
        self.out_features = out_features
        self.theta_zero = theta_zero
        self.s = math.log(out_features - 1) / math.cos(theta_zero)
        self.m = m
        self.ls_eps = ls_eps  # label smoothing
        self.weight = Parameter(torch.FloatTensor(out_features, in_features))
        nn.init.xavier_uniform_(self.weight)

    def forward(self, input, label):
        # normalize features
        x = F.normalize(input)
        # normalize weights
        W = F.normalize(self.weight)
        # dot product
        logits = F.linear(x, W)
        # add margin
        theta = torch.acos(torch.clamp(logits, -1.0 + 1e-7, 1.0 - 1e-7))
        target_logits = torch.cos(theta + self.m)
        one_hot = torch.zeros_like(logits)
        one_hot.scatter_(1, label.view(-1, 1).long(), 1)
        if self.ls_eps > 0:
            one_hot = (1 - self.ls_eps) * one_hot + self.ls_eps / self.out_features
        output = logits * (1 - one_hot) + target_logits * one_hot
        # feature re-scale
        with torch.no_grad():
            B_avg = torch.where(one_hot < 1, torch.exp(self.s * logits), torch.zeros_like(logits))
            B_avg = torch.sum(B_avg) / input.size(0)
            theta_med = torch.median(theta)
            self.s = torch.log(B_avg) / torch.cos(torch.min(self.theta_zero * torch.ones_like(theta_med), theta_med))
        output *= self.s

        return output
class ArcMarginProduct(nn.Module):
    r"""Implement of large margin arc distance: :
        Args:
            in_features: size of each input sample
            out_features: size of each output sample
            s: norm of input feature
            m: margin
            cos(theta + m)
        """
    def __init__(self, in_features, out_features, s=30.0, m=0.50, easy_margin=False, ls_eps=0.0):
        super(ArcMarginProduct, self).__init__()
        self.in_features = in_features
        self.out_features = out_features
        self.s = s
        self.m = m
        self.ls_eps = ls_eps  # label smoothing
        self.weight = Parameter(torch.FloatTensor(out_features, in_features))
        nn.init.xavier_uniform_(self.weight)

        self.easy_margin = easy_margin
        self.cos_m = math.cos(m)
        self.sin_m = math.sin(m)
        self.th = math.cos(math.pi - m)
        self.mm = math.sin(math.pi - m) * m

    def forward(self, input, label):
        # --------------------------- cos(theta) & phi(theta) ---------------------------
        cosine = F.linear(F.normalize(input), F.normalize(self.weight))
        sine = torch.sqrt(1.0 - torch.pow(cosine, 2))
        phi = cosine * self.cos_m - sine * self.sin_m
        if self.easy_margin:
            phi = torch.where(cosine > 0, phi, cosine)
        else:
            phi = torch.where(cosine > self.th, phi, cosine - self.mm)
        # --------------------------- convert label to one-hot ---------------------------
        # one_hot = torch.zeros(cosine.size(), requires_grad=True, device='cuda')
        one_hot = torch.zeros(cosine.size(), device='cuda')
        one_hot.scatter_(1, label.view(-1, 1).long(), 1)
        if self.ls_eps > 0:
            one_hot = (1 - self.ls_eps) * one_hot + self.ls_eps / self.out_features
        # -------------torch.where(out_i = {x_i if condition_i else y_i) -------------
        output = (one_hot * phi) + ((1.0 - one_hot) * cosine)
        output *= self.s

        return output
class AddMarginProduct(nn.Module):
    r"""Implement of large margin cosine distance: :
    Args:
        in_features: size of each input sample
        out_features: size of each output sample
        s: norm of input feature
        m: margin
        cos(theta) - m
    """

    def __init__(self, in_features, out_features, s=30.0, m=0.40):
        super(AddMarginProduct, self).__init__()
        self.in_features = in_features
        self.out_features = out_features
        self.s = s
        self.m = m
        self.weight = Parameter(torch.FloatTensor(out_features, in_features))
        nn.init.xavier_uniform_(self.weight)

    def forward(self, input, label):
        # --------------------------- cos(theta) & phi(theta) ---------------------------
        cosine = F.linear(F.normalize(input), F.normalize(self.weight))
        phi = cosine - self.m
        # --------------------------- convert label to one-hot ---------------------------
        one_hot = torch.zeros(cosine.size(), device=CFG.device)
        # one_hot = one_hot.cuda() if cosine.is_cuda else one_hot
        one_hot.scatter_(1, label.view(-1, 1).long(), 1)
        # -------------torch.where(out_i = {x_i if condition_i else y_i) -------------
        output = (one_hot * phi) + ((1.0 - one_hot) * cosine)  # you can use torch.where if your torch.__version__ is 0.4
        output *= self.s
        # print(output)

        return output

class LiArcFace(nn.Module):
    def __init__(self, in_features, out_features, m=0.4, s=64.0):
        super().__init__()
        self.weight = nn.Parameter(torch.empty(out_features, in_features))
        nn.init.xavier_normal_(self.weight)
        self.m = m
        self.s = s

    def forward(self, input, label):
        W = F.normalize(self.weight)
        input = F.normalize(input)
        cosine = input @ W.t()
        theta = torch.acos(cosine)
        m = torch.zeros_like(theta)
        m.scatter_(1, label.view(-1, 1), self.m)
        scale = -2 * self.s / math.pi
        return self.s + scale * (theta + m)

class Arcface_adaptive_margin(nn.Module):
    r"""Implement of large margin arc distance: :
        Args:
            in_features: size of each input sample
            out_features: size of each output sample
            s: norm of input feature
            m: List margin for each class
            cos(theta + m)
        """

    def __init__(self, in_features, out_features, s=30.0, m=0.50, device='cuda', easy_margin=False, ls_eps=0.0):
        super(Arcface_adaptive_margin, self).__init__()
        self.in_features = torch.tensor(in_features)
        self.out_features = torch.tensor(out_features)
        self.s = torch.tensor(s)
        self.device = device
        self.m = Parameter(torch.tensor([[m]] * out_features, requires_grad=True, device=self.device,
                                        dtype=torch.double))  # automatic margin
        self.m_update = 0
        self.ls_eps = torch.tensor(ls_eps).type(dtype=torch.double)  # label smoothing
        self.weight = Parameter(torch.FloatTensor(out_features, in_features))
        nn.init.xavier_uniform_(self.weight)
        self.easy_margin = easy_margin
        self.torch_pi = torch.tensor(3.141592, dtype=torch.double, device=self.device)

    def forward(self, input, label):
        self.m_update += 1
        m = self.m[label]
        cos_m = torch.cos(m)
        sin_m = torch.sin(m)
        th = torch.cos(torch.sub(self.torch_pi, m))
        mm = torch.mul(torch.sin(torch.sub(self.torch_pi, m)), m)

        # --------------------------- cos(theta) & phi(theta) ---------------------------
        cosine = F.linear(F.normalize(input), F.normalize(self.weight))
        sine = torch.sqrt(torch.sub(1.0, torch.pow(cosine, 2)))
        phi = torch.sub(torch.mul(cosine, cos_m), torch.mul(sine, sin_m))

        if self.easy_margin:
            phi = torch.where(cosine > 0, phi, cosine)
        else:
            phi = torch.where(cosine > th, phi, torch.sub(cosine, mm))

        # --------------------------- convert label to one-hot ---------------------------
        one_hot = torch.zeros(cosine.size(), device=self.device)
        one_hot.scatter_(1, label.view(-1, 1).long(), 1)
        if self.ls_eps > 0:
            one_hot = torch.add(torch.mul(torch.sub(1, self.ls_eps), one_hot),
                                torch.div(self.ls_eps, self.out_features))

        # -------------torch.where(out_i = {x_i if condition_i else y_i) -------------
        output = torch.add(torch.mul(one_hot, phi), torch.mul(torch.sub(1.0, one_hot), cosine))
        output = torch.mul(output, self.s)

        return output

class ArcModule(nn.Module):
    def __init__(self, in_features, out_features, s = 10, m = CFG.margin):
        super().__init__()
        self.in_features = in_features
        self.out_features = out_features
        self.s = s
        self.m = m
        self.weight = nn.Parameter(torch.FloatTensor(out_features, in_features))
        nn.init.xavier_normal_(self.weight)

        self.cos_m = math.cos(m)
        self.sin_m = math.sin(m)
        self.th = torch.tensor(math.cos(math.pi - m))
        self.mm = torch.tensor(math.sin(math.pi - m) * m)

    def forward(self, inputs, labels):
        cos_th = F.linear(inputs, F.normalize(self.weight))
        cos_th = cos_th.clamp(-1, 1)
        sin_th = torch.sqrt(1.0 - torch.pow(cos_th, 2))
        cos_th_m = cos_th * self.cos_m - sin_th * self.sin_m
        # print(type(cos_th), type(self.th), type(cos_th_m), type(self.mm))
        cos_th_m = torch.where(cos_th > self.th, cos_th_m, cos_th - self.mm)

        cond_v = cos_th - self.th
        cond = cond_v <= 0
        cos_th_m[cond] = (cos_th - self.mm)[cond]

        if labels.dim() == 1:
            labels = labels.unsqueeze(-1)
        onehot = torch.zeros(cos_th.size()).cuda()
        labels = labels.type(torch.LongTensor).cuda()
        onehot.scatter_(1, labels, 1.0)
        outputs = onehot * cos_th_m + (1.0 - onehot) * cos_th
        outputs = outputs * self.s
        return outputs

class Arcface_adaptive_margin(nn.Module):
    r"""Implement of large margin arc distance: :
        Args:
            in_features: size of each input sample
            out_features: size of each output sample
            s: norm of input feature
            m: List margin for each class
            cos(theta + m)
        """

    def __init__(self, in_features, out_features, s=30.0, m=0.50, device='cuda', easy_margin=False, ls_eps=0.0,file_m="./auto_margin.dat"):
        super(Arcface_adaptive_margin, self).__init__()
        self.in_features = torch.tensor(in_features)
        self.out_features = torch.tensor(out_features)
        self.s = torch.tensor(s)
        self.file_m = file_m
        self.device = device
        self.m = Parameter(torch.tensor([[m]] * out_features, requires_grad=True, device=self.device,
                                        dtype=torch.double))  # automatic margin

        self.ls_eps = torch.tensor(ls_eps).type(dtype=torch.double)  # label smoothing
        self.weight = Parameter(torch.FloatTensor(out_features, in_features))
        nn.init.xavier_uniform_(self.weight)
        self.easy_margin = easy_margin
        self.torch_pi = torch.tensor(3.141592, dtype=torch.double, device=self.device)

# class Arcface_update(nn.Module):

#     def __init__(self, channel_size=CFG.embedding_size, out_feature=CFG.num_classes, dropout=0.5, backbone=CFG.model_name, pretrained=True):
#         super(Arcface_update, self).__init__()
#         self.backbone = timm.create_model(backbone, pretrained=True)
# #         self.backbone.load_state_dict(torch.load(CFG.orginal_pretrain_model, map_location=CFG.device))
#         self.channel_size = channel_size
#         self.out_feature = out_feature
#         self.in_features = self.backbone.classifier.in_features
#         self.margin = ArcModule(in_features=self.channel_size, out_features = self.out_feature)
#         self.bn1 = nn.BatchNorm2d(self.in_features)
#         self.dropout = nn.Dropout2d(dropout, inplace=True)
#         self.fc1 = nn.Linear(self.in_features * 8 * 8 , self.channel_size)
#         self.bn2 = nn.BatchNorm1d(self.channel_size)
        
#     def forward(self, x, labels=None):
#         features = self.backbone.features(x)
#         features = self.bn1(features)
#         features = self.dropout(features)
#         features = features.view(features.size(0), -1)
#         features = self.fc1(features)
#         features = self.bn2(features)
#         features = F.normalize(features)
#         if labels is not None:
#             return self.margin(features, labels)
#         return features

In [None]:
# class Model(nn.Module):

#     def __init__(self,
#                  n_classes,
#                  model_name='efficientnet_b3',
#                  use_fc=False,
#                  fc_dim=512,
#                  dropout=0.0,
#                  metric='softmax',
#                  s=30.0,
#                  margin=0.50,
#                  ls_eps=0.0,
#                  theta_zero=0.785,
#                  pretrained=False):
#         """
#         :param n_classes:
#         :param model_name: name of model from pretrainedmodels
#             e.g. resnet50, resnext101_32x4d, pnasnet5large
#         :param pooling: One of ('SPoC', 'MAC', 'RMAC', 'GeM', 'Rpool', 'Flatten', 'CompactBilinearPooling')
#         :param metric: One of ('arcface', 'cosface', 'softmax')
#         """
#         super(Model, self).__init__()
#         print('Building Model Backbone for {} model'.format(model_name))

#         self.backbone = timm.create_model(model_name, pretrained=True)
# #         self.backbone.load_state_dict(torch.load(CFG.orginal_pretrain_model, map_location=CFG.device))
#         final_in_features = self.backbone.classifier.in_features
        
#         self.backbone.classifier = nn.Identity()
#         self.backbone.global_pool = nn.Identity()
        
#         self.pooling =  nn.AdaptiveAvgPool2d(1)
            
#         self.use_fc = use_fc
#         if use_fc:
#             self.dropout = nn.Dropout(p=dropout)
#             self.fc = nn.Linear(final_in_features, fc_dim)
#             self.bn = nn.BatchNorm1d(fc_dim)
#             self._init_params()
#             final_in_features = fc_dim

#         self.metric = metric
#         if metric == 'arcface':
#             self.final = ArcMarginProduct(final_in_features, n_classes,
#                                           s=s, m=margin, easy_margin=False, ls_eps=ls_eps)
#         elif metric == 'cosface':
#             self.final = AddMarginProduct(final_in_features, n_classes, s=s, m=margin)
#         elif metric == 'adacos':
#             self.final = AdaCos(final_in_features, n_classes, m=margin, theta_zero=theta_zero)
#         elif metric == 'LiArcFace':
#             print("LiArcFace")
#             self.final = LiArcFace(final_in_features, n_classes)
#         elif metric =='adaptive':
#             self.final = Arcface_adaptive_margin(final_in_features, n_classes,
#                                           s=s, m=margin, easy_margin=False, ls_eps=ls_eps)
#         else:
#             self.final = nn.Linear(final_in_features, n_classes)

#     def _init_params(self):
#         nn.init.xavier_normal_(self.fc.weight)
#         nn.init.constant_(self.fc.bias, 0)
#         nn.init.constant_(self.bn.weight, 1)
#         nn.init.constant_(self.bn.bias, 0)

#     def forward(self, x, label):
#         feature = self.extract_feat(x)
#         if self.metric in ('arcface', 'cosface', 'adacos','LiArcFace'):
#             logits = self.final(feature, label)
#         else:
#             logits = self.final(feature)
#         return logits

#     def extract_feat(self, x):
#         batch_size = x.shape[0]
#         x = self.backbone(x)
#         x = self.pooling(x).view(batch_size, -1)

#         if self.use_fc:
#             x = self.dropout(x)
#             x = self.fc(x)
#             x = self.bn(x)

#         return x
# class Arcface_adaptive_margin_with_weighted(nn.Module):
#     r"""Implement of large margin arc distance: :
#         Args:
#             in_features: size of each input sample
#             out_features: size of each output sample
#             s: norm of input feature
#             m: List margin for each class
#             cos(theta + m)
#         """

#     def __init__(self, in_features, out_features, s=30.0, m=0.50, device='cuda', easy_margin=False, ls_eps=0.0,file_m="./auto_margin.dat"):
#         super(Arcface_adaptive_margin_with_weighted, self).__init__()
#         self.in_features = torch.tensor(in_features)
#         self.out_features = torch.tensor(out_features)
#         self.s = torch.tensor(s)
#         self.file_m = file_m
#         self.device = device
#         self.m = Parameter(torch.tensor([[m]] * out_features, requires_grad=True, device=self.device,
#                                         dtype=torch.double))  # automatic margin

#         self.ls_eps = torch.tensor(ls_eps).type(dtype=torch.double)  # label smoothing
#         self.weight = Parameter(torch.FloatTensor(out_features, in_features))
#         nn.init.xavier_uniform_(self.weight)
#         self.easy_margin = easy_margin
#         self.torch_pi = torch.tensor(3.141592, dtype=torch.double, device=self.device)

#     def forward(self, input, label):
#         file_m = open(self.file_m, 'ab')
#         np.savetxt(file_m, self.m.detach().cpu().numpy().reshape(1, -1))
#         file_m.close()
#         m = self.m[label]      # list margin m 
#         cos_m = torch.cos(m)   # cos list margin m 
#         sin_m = torch.sin(m)   # cos(m+theta) =  cos(m) * cos(theta) - sin(m)*sin(theta)
#         th = torch.cos(torch.sub(self.torch_pi, m)) # th =  threshold = Pi-m -> check m+theta < Pi <=> cos(theta) < cos(Pi-m) 
#         mm = torch.mul(torch.sin(torch.sub(self.torch_pi, m)), m)  

#         # --------------------------- cos(theta) & phi(theta) ---------------------------
#         cosine = F.linear(F.normalize(input), F.normalize(self.weight))
#         sine = torch.sqrt(torch.sub(1.0, torch.pow(cosine, 2)))
#         phi = torch.sub(torch.mul(cosine, cos_m), torch.mul(sine, sin_m))
        
#         if self.easy_margin:
#             phi = torch.where(cosine > 0, phi, cosine)
#         else:
#             phi = torch.where(cosine > th, phi, torch.sub(cosine, mm))

#         # --------------------------- convert label to one-hot ---------------------------
#         one_hot = torch.zeros(cosine.size(), device=self.device)
#         one_hot.scatter_(1, label.view(-1, 1).long(), 1)
#         if self.ls_eps > 0:
#             one_hot = torch.add(torch.mul(torch.sub(1, self.ls_eps), one_hot),
#                                 torch.div(self.ls_eps, self.out_features))

#         # -------------torch.where(out_i = {x_i if condition_i else y_i) -------------
#         output = torch.add(torch.mul(one_hot, phi)*m, torch.mul(torch.sub(1.0, one_hot), cosine))
#         output = self.s*output

#         return output
# explain https://github.com/siriusdemon/Build-Your-Own-Face-Model/blob/master/recognition/model/metric.py
class Arcface_adaptive_margin_with_weighted(nn.Module):
    r"""Implement of large margin arc distance: :
        Args:
            in_features: size of each input sample
            out_features: size of each output sample
            s: norm of input feature
            m: List margin for each class
            cos(theta + m)
        """

    def __init__(self, in_features, out_features, s=30.0, m=0.50, device='cuda', easy_margin=False, ls_eps=0.0,file_m="./auto_margin.dat"):
        super(Arcface_adaptive_margin_with_weighted, self).__init__()
        self.in_features = torch.tensor(in_features)
        self.out_features = torch.tensor(out_features)
        self.s = torch.tensor(s)
        self.file_m = file_m
        self.device = device
        self.m = Parameter(torch.tensor([[m]] * out_features, requires_grad=True, device=self.device,
                                        dtype=torch.double))  # automatic margin

        self.ls_eps = torch.tensor(ls_eps).type(dtype=torch.double)  # label smoothing
        self.weight = Parameter(torch.FloatTensor(out_features, in_features))
        nn.init.xavier_uniform_(self.weight)
        self.easy_margin = easy_margin
        self.torch_pi = torch.tensor(3.141592, dtype=torch.double, device=self.device)

    def forward(self, input, label):
        file_m = open(self.file_m, 'ab')
        np.savetxt(file_m, self.m.detach().cpu().numpy().reshape(1, -1))
        file_m.close()
        m = self.m[label]      # list margin m
        tensor_index =  torch.arange(11318).to(self.device)
#         another_m = torch.tensor([value for value in tensor_index if value not in label])
#         another_m = another_m.unsqueeze(1).to(self.device)
#         another_m = self.m[tensor_index[tensor_index!=label]]
        cos_m = torch.cos(m)   # cos list margin m 
        sin_m = torch.sin(m)   # cos(m+theta) =  cos(m) * cos(theta) - sin(m)*sin(theta)
        th = torch.cos(torch.sub(self.torch_pi, m)) # th =  threshold = Pi-m -> check m+theta < Pi <=> cos(theta) < cos(Pi-m) 
        mm = torch.mul(torch.sin(torch.sub(self.torch_pi, m)), m)  

        # --------------------------- cos(theta) & phi(theta) ---------------------------
        cosine = F.linear(F.normalize(input), F.normalize(self.weight))
        sine = torch.sqrt(torch.sub(1.0, torch.pow(cosine, 2)))
        phi = torch.sub(torch.mul(cosine, cos_m), torch.mul(sine, sin_m))
        
        if self.easy_margin:
            phi = torch.where(cosine > 0, phi, cosine)
        else:
            phi = torch.where(cosine > th, phi, torch.sub(cosine, mm))

        # --------------------------- convert label to one-hot ---------------------------
        one_hot = torch.zeros(cosine.size(), device=self.device)
        one_hot.scatter_(1, label.view(-1, 1).long(), 1)
        if self.ls_eps > 0:
            one_hot = torch.add(torch.mul(torch.sub(1, self.ls_eps), one_hot),
                                torch.div(self.ls_eps, self.out_features))

        # -------------torch.where(out_i = {x_i if condition_i else y_i) -------------
        output = torch.add(torch.mul(one_hot, phi)*m, torch.mul(torch.sub(1.0, one_hot), cosine))
        output = output*self.s

        return output

In [None]:
class Model(nn.Module):

    def __init__(self, n_classes, model_name='efficientnet_b3', use_fc=False, fc_dim=512,\
                 dropout=0.0, metric='softmax', s=30.0, margin=0.50, ls_eps=0.0,\
                 theta_zero=0.785, pretrained=False):
        """
        :param n_classes:
        :param model_name: name of model from pretrainedmodels
            e.g. resnet50, resnext101_32x4d, pnasnet5large
        :param pooling: One of ('SPoC', 'MAC', 'RMAC', 'GeM', 'Rpool', 'Flatten', 'CompactBilinearPooling')
        :param metric: One of ('arcface', 'cosface', 'softmax')
        """
        super(Model, self).__init__()
        print('Building Model Backbone for {} model'.format(model_name))

        self.backbone = timm.create_model(model_name, pretrained=False)
#         self.backbone.load_state_dict(torch.load(CFG.orginal_pretrain_model, map_location=CFG.device))
        
        if CFG.model_name=="resnet50":
            final_in_features = self.backbone.fc.in_features
            self.backbone.fc = nn.Identity()
        else:
            final_in_features = self.backbone.classifier.in_features
            self.backbone.classifier = nn.Identity()
        self.backbone.global_pool = nn.Identity()

        self.pooling = nn.AdaptiveAvgPool2d(1)

        self.use_fc = use_fc
        if use_fc:
            self.dropout = nn.Dropout(p=dropout)
            self.fc = nn.Linear(final_in_features, fc_dim)
            self.bn = nn.BatchNorm1d(fc_dim)
            self._init_params()
            final_in_features = fc_dim

        self.metric = metric
        if metric == 'arcface':
            self.final = ArcMarginProduct(final_in_features, n_classes,
                                          s=s, m=margin, easy_margin=False, ls_eps=ls_eps)
        elif metric == 'cosface':
            self.final = AddMarginProduct(final_in_features, n_classes, s=s, m=margin, device=CFG.device)
        elif metric == 'adacos':
            self.final = AdaCos(final_in_features, n_classes, m=margin, theta_zero=theta_zero)
        elif metric == 'adaptive_arcface':
            self.final = Arcface_adaptive_margin(final_in_features, n_classes, s=s, m=margin,\
                                                 device=CFG.device, easy_margin=False, ls_eps=ls_eps)
        elif metric == 'adaptive_arcface_with_weighted':
            self.final = Arcface_adaptive_margin_with_weighted(final_in_features, n_classes, s=s, m=margin,\
                                                 device=CFG.device, easy_margin=False, ls_eps=ls_eps)
        else:
            self.final = nn.Linear(final_in_features, n_classes)

    def _init_params(self):
        nn.init.xavier_normal_(self.fc.weight)
        nn.init.constant_(self.fc.bias, 0)
        nn.init.constant_(self.bn.weight, 1)
        nn.init.constant_(self.bn.bias, 0)

    def forward(self, x, label):
        feature = self.extract_feat(x)
        if self.metric in ('arcface', 'cosface', 'adacos','adaptive_arcface','adaptive_arcface_with_weighted'):
            logits = self.final(feature, label)
        else:
            logits = self.final(feature)
        return logits

    def extract_feat(self, x):
        batch_size = x.shape[0]
        x = self.backbone(x)
        x = self.pooling(x).view(batch_size, -1)

        if self.use_fc:
            x = self.dropout(x)
            x = self.fc(x)
            x = self.bn(x)

        return x

In [None]:
class FocalLoss(torch.nn.Module):

    def __init__(self, gamma=0, eps=1e-7):
        super(FocalLoss, self).__init__()
        self.gamma = gamma
        self.eps = eps
        self.ce = torch.nn.CrossEntropyLoss()

    def forward(self, input, target):
        logp = self.ce(input, target)
        p = torch.exp(-logp)
        loss = (1 - p) ** self.gamma * logp
        return loss.mean()

In [None]:
class Custom_dataset():
    
    def __init__(self, df, transform = None):
        
        self.df = df.reset_index()
        self.transform = transform
                
    def __len__(self):
        return self.df.shape[0]
    
    def __getitem__(self,index):
        
        img_path, class_id = self.df.loc[index, 'path'], self.df.loc[index,'class_id']
        
        sample = cv2.imread(img_path)
        sample = cv2.cvtColor(sample, cv2.COLOR_BGR2RGB)
        
        if self.transform is not None:
            sample = self.transform(image=sample)["image"]

        return sample, torch.tensor(class_id)

In [None]:
# model = Model(**{'n_classes':CFG.num_classes,
#             'model_name':CFG.model_name,
#             'use_fc':CFG.use_fc,
#             'fc_dim':CFG.embedding_size,
#             'dropout':CFG.dropout,
#             'metric':CFG.metric,
#             's':CFG.s,
#             'margin':CFG.margin,
#             'ls_eps':CFG.ls_eps,
#             'theta_zero':CFG.theta_zero,
#             'pretrained':True
#         }
# )
# model = Arcface_update()
model = Model(**{
                'n_classes':CFG.num_classes,
                'model_name':CFG.model_name,
                'use_fc':CFG.use_fc,
                'fc_dim':CFG.embedding_size,
                'dropout':CFG.dropout,
                'metric':CFG.metric,
                's':CFG.s,
                'margin':CFG.margin,
                'ls_eps':CFG.ls_eps,
                'theta_zero':CFG.theta_zero,
                'pretrained':False
            }
    )

if CFG.pretrain_model is not None:
    print("load pretrain")
    checkpoint = torch.load(CFG.pretrain_model,map_location=torch.device('cpu'))
    model.load_state_dict(checkpoint['model_state_dict'])    

trans = A.Compose([
        A.Resize(CFG.input_size, CFG.input_size),
        A.Normalize(),
        A.pytorch.transforms.ToTensorV2()
])

trans = A.Compose([
    A.LongestMaxSize(max_size = CFG.input_size ),
    A.PadIfNeeded(min_height=CFG.input_size, min_width=CFG.input_size, p=1),
    A.Resize(CFG.input_size, CFG.input_size),
    A.Normalize(),
    ToTensorV2()
])

In [None]:
df_test = pd.read_csv(CFG.path_test,delimiter = " ",index_col=None)
df_test['path'] = CFG.original + df_test['path']
df_test['class_id']-=1

# df_train = pd.read_csv(CFG.path_train,index_col=None)
# df_train['path'] = CFG.original + df_train['path']
# df_train['class_id']-=1

test = Custom_dataset(df_test, transform = trans)
test_loader = DataLoader(dataset=test, num_workers=CFG.worker, batch_size=CFG.batch_size, shuffle=False, pin_memory = False)

# train = Custom_dataset(df_train[df_train['phase']=='train'], transform = trans)
# train_loader = DataLoader(dataset=train, num_workers=CFG.worker, batch_size=CFG.batch_size, shuffle=False, pin_memory = False)


model.to(CFG.device)
model.eval()

In [None]:
# df_test = pd.read_csv(CFG.path_test,delimiter = " ",index_col=None)
# df_test

In [None]:
def calc_recall_at_k(T, Y, k):
    """
    T : [nb_samples] (target labels)
    Y : [nb_samples x k] (k predicted labels/neighbours)
    """

    s = 0
    for t,y in zip(T,Y):
        if t in torch.Tensor(y).long()[:k]:
            s += 1
    return s / (1. * len(T))
def l2_norm(input):
    input_size = input.size()
    buffer = torch.pow(input, 2)
    normp = torch.sum(buffer, 1).add_(1e-12)
    norm = torch.sqrt(normp)
    _output = torch.div(input, norm.view(-1, 1).expand_as(input))
    output = _output.view(input_size)

    return output
def predict_batchwise(model, dataloader):
    device = "cuda"
    model_is_training = model.training
    model.eval()
    
    ds = dataloader.dataset
    A = [[] for i in range(len(ds[0]))]
    with torch.no_grad():
        # extract batches (A becomes list of samples)
        for batch in tqdm(dataloader):
            for i, J in enumerate(batch):
                # i = 0: sz_batch * images
                # i = 1: sz_batch * labels
                # i = 2: sz_batch * indices
                if i == 0:
                    # move images to device of model (approximate device)
#                     J = model(J.cuda())
                    J = model.extract_feat(J.cuda())
                for j in J:
                    A[i].append(j)
           
    model.train()
    model.train(model_is_training) # revert to previous training state
    
    return [torch.stack(A[i]) for i in range(len(A))]
def evaluate_cos_SOP(model, dataloader):
    nb_classes = 11316
    
    # calculate embeddings with model and get targets
    X, T = predict_batchwise(model, dataloader)
    X = l2_norm(X)
    
    # get predictions by assigning nearest 8 neighbors with cosine
    K = 1000
    Y = []
    xs = []
    for x in X:
        if len(xs)<10000:
            xs.append(x)
        else:
            xs.append(x)            
            xs = torch.stack(xs,dim=0)
            cos_sim = F.linear(xs,X)
            y = T[cos_sim.topk(1 + K)[1][:,1:]]
            Y.append(y.float().cpu())
            xs = []
            
    # Last Loop
    xs = torch.stack(xs,dim=0)
    cos_sim = F.linear(xs,X)
    y = T[cos_sim.topk(1 + K)[1][:,1:]]
    Y.append(y.float().cpu())
    Y = torch.cat(Y, dim=0)

    # calculate recall @ 1, 2, 4, 8
    recall = []
    for k in [1, 2,3,4,5,10, 100, 1000]:
        r_at_k = calc_recall_at_k(T, Y, k)
        recall.append(r_at_k)
        print("R@{} : {:.3f}".format(k, 100 * r_at_k))
    return recall

In [None]:
recall = evaluate_cos_SOP(model, test_loader)
print(recall)

In [None]:
gg

In [None]:
gallery_features = []
gallery_label = []
dem=0
with torch.no_grad():
    for bi,d in tqdm(enumerate(train_loader), total=len(train_loader)):
        batch_size = d[0].size()[0]

        image = d[0]
        targets = d[1]

        image = image.to(CFG.device)
        targets = targets.to(CFG.device)

#         output = model.extract_feat(image)
        output = model(image)
#         logist = model(image,targets)
        gallery_features.append(output)
        gallery_label.append(targets)
gallery_features = torch.cat(gallery_features, dim=0)
gallery_label = torch.cat(gallery_label,dim=0)

In [None]:
query_features = []
query_labels = []
dem=0
with torch.no_grad():
    for bi,d in tqdm(enumerate(test_loader), total=len(test_loader)):
        batch_size = d[0].size()[0]

        image = d[0]
        targets = d[1]

        image = image.to(CFG.device)
        targets = targets.to(CFG.device)

#         output = model.extract_feat(image)
        output = model(image)
        query_features.append(output)
        query_labels.append(targets)
query_features = torch.cat(query_features,dim=0)
query_labels = torch.cat(query_labels, dim = 0)

In [None]:
@torch.no_grad()
def recall_at_ks(query_features: torch.Tensor,
                 query_labels: torch.LongTensor,
                 ks: List[int],
                 gallery_features: Optional[torch.Tensor] = None,
                 gallery_labels: Optional[torch.Tensor] = None,
                 cosine: bool = False) -> Dict[int, float]:
    """
    Compute the recall between samples at each k. This function uses about 8GB of memory.
    Parameters
    ----------
    query_features : torch.Tensor
        Features for each query sample. shape: (num_queries, num_features)
    query_labels : torch.LongTensor
        Labels corresponding to the query features. shape: (num_queries,)
    ks : List[int]
        Values at which to compute the recall.
    gallery_features : torch.Tensor
        Features for each gallery sample. shape: (num_queries, num_features)
    gallery_labels : torch.LongTensor
        Labels corresponding to the gallery features. shape: (num_queries,)
    cosine : bool
        Use cosine distance between samples instead of euclidean distance.
    Returns
    -------
    recalls : Dict[int, float]
        Values of the recall at each k.
    """
    offset = 0
    if gallery_features is None and gallery_labels is None:
        offset = 1
        gallery_features = query_features
        gallery_labels = query_labels
    elif gallery_features is None or gallery_labels is None:
        raise ValueError('gallery_features and gallery_labels needs to be both None or both Tensors.')

    if cosine:
        print("cosine")
        pass
        query_features = F.normalize(query_features, p=2, dim=1)
        gallery_features = F.normalize(gallery_features, p=2, dim=1)
    to_cpu_numpy = lambda x: x.cpu().numpy()
    q_f, q_l, g_f, g_l = map(to_cpu_numpy, [query_features, query_labels, gallery_features, gallery_labels])

    res = faiss.StandardGpuResources()
    flat_config = faiss.GpuIndexFlatConfig()
    flat_config.device = 0

    max_k = max(ks)
    index_function = faiss.GpuIndexFlatIP if cosine else faiss.GpuIndexFlatL2
    index = index_function(res, g_f.shape[1], flat_config)
    index.add(g_f)
    closest_indices = index.search(q_f, max_k + offset)[1]

    recalls = {}
    for k in ks:
        indices = closest_indices[:, offset:k + offset]
        recalls[k] = (q_l[:, None] == g_l[indices]).any(1).mean()
    return {k: round(v * 100, 2) for k, v in recalls.items()}

In [None]:
recall_at_ks(query_features,query_labels,[1,10,20,30,40,50],gallery_features,gallery_label,cosine=True)