# **2class**

In [None]:
import pandas as pd

df = pd.read_csv('../input/shopee-product-matching/test.csv')

if df.shape[0] != 3:
    !pip install /kaggle/input/kerasapplications -q
    !pip install /kaggle/input/efficientnet-keras-source-code/ -q --no-deps

    import os

    import efficientnet.tfkeras as efn
    import numpy as np
    import pandas as pd
    import tensorflow as tf

    def auto_select_accelerator():
        try:
            tpu = tf.distribute.cluster_resolver.TPUClusterResolver()
            tf.config.experimental_connect_to_cluster(tpu)
            tf.tpu.experimental.initialize_tpu_system(tpu)
            strategy = tf.distribute.experimental.TPUStrategy(tpu)
            print("Running on TPU:", tpu.master())
        except ValueError:
            strategy = tf.distribute.get_strategy()
        print(f"Running on {strategy.num_replicas_in_sync} replicas")

        return strategy


    def build_decoder(with_labels=True, target_size=(300, 300), ext='jpg'):
        def decode(path):
            file_bytes = tf.io.read_file(path)
            if ext == 'png':
                img = tf.image.decode_png(file_bytes, channels=3)
            elif ext in ['jpg', 'jpeg']:
                img = tf.image.decode_jpeg(file_bytes, channels=3)
            else:
                raise ValueError("Image extension not supported")

            img = tf.cast(img, tf.float32) / 255.0
            img = tf.image.resize(img, target_size)

            return img

        def decode_with_labels(path, label):
            return decode(path), label

        return decode_with_labels if with_labels else decode


    def build_augmenter(with_labels=True):
        def augment(img):
            img = tf.image.random_flip_left_right(img)
            img = tf.image.random_flip_up_down(img)
            return img

        def augment_with_labels(img, label):
            return augment(img), label

        return augment_with_labels if with_labels else augment


    def build_dataset(paths, labels=None, bsize=32, cache=True,
                      decode_fn=None, augment_fn=None,
                      augment=True, repeat=True, shuffle=1024, 
                      cache_dir=""):
        if cache_dir != "" and cache is True:
            os.makedirs(cache_dir, exist_ok=True)

        if decode_fn is None:
            decode_fn = build_decoder(labels is not None)

        if augment_fn is None:
            augment_fn = build_augmenter(labels is not None)

        AUTO = tf.data.experimental.AUTOTUNE
        slices = paths if labels is None else (paths, labels)

        dset = tf.data.Dataset.from_tensor_slices(slices)
        dset = dset.map(decode_fn, num_parallel_calls=AUTO)
        dset = dset.cache(cache_dir) if cache else dset
        dset = dset.map(augment_fn, num_parallel_calls=AUTO) if augment else dset
        dset = dset.repeat() if repeat else dset
        dset = dset.shuffle(shuffle) if shuffle else dset
        dset = dset.batch(bsize).prefetch(AUTO)

        return dset

    COMPETITION_NAME = "shopee-product-matching"
    strategy = auto_select_accelerator()
    BATCH_SIZE = strategy.num_replicas_in_sync * 1024

    IMSIZE = (224, 240, 260, 300, 380, 456, 528, 600)

    load_dir = f"/kaggle/input/{COMPETITION_NAME}/"
    sub_df = pd.read_csv('../input/shopee-product-matching/test.csv')
    test_paths = load_dir + "test_images/" + sub_df['image'] 

    sub_df['clothers'] = 0
    sub_df['other'] = 0
    

    label_cols = sub_df.columns[4:]

    test_decoder = build_decoder(with_labels=False, target_size=(IMSIZE[0], IMSIZE[0]))
    dtest = build_dataset(
        test_paths, bsize=BATCH_SIZE, repeat=False, 
        shuffle=False, augment=False, cache=False,
        decode_fn=test_decoder
    )

    with strategy.scope():
        model = tf.keras.models.load_model(
            '../input/shopee-2-class-train-efnb0-2/modelb00.h5'
        )

    model.summary()

    sub_df[label_cols] = model.predict(dtest, verbose=1)
    df = sub_df.copy()
    del sub_df

    df_other = df[df['other'] >= 0.5]
    df_clothes = df[df['other'] < 0.5]
    df_other_len = df_other.shape[0]
    df_other = df_other.append(df_clothes).reset_index(drop=True)
    df = df_other.copy()
    del df_other, df_clothes
else:
    df['clothes'] = [0,1,0]
    df['other'] = [1,0,1]    
    df_other = df[df['other'] >= 0.5]
    df_clothes = df[df['other'] < 0.5]
    df_other = df_other.append(df_clothes).reset_index(drop=True)
    df = df_other.copy()   
    df_other_len = 2


In [None]:
from numba import cuda
import torch
cuda.select_device(0)
cuda.close()
cuda.select_device(0)

# Setting

In [None]:
import sys, os
sys.path.append('../input/timm-pytorch-image-models/pytorch-image-models-master')

import numpy as np 
import pandas as pd 

import math, random
import cv2
import timm
from tqdm import tqdm 

import albumentations as A 
from albumentations.pytorch.transforms import ToTensorV2

import torch 
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from torch import nn
import torch.nn.functional as F 
import torchvision.models as models
from torch.nn import Parameter

import gc
import cudf, cuml, cupy
from cuml.feature_extraction.text import TfidfVectorizer
from cuml.neighbors import NearestNeighbors

import copy
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
import transformers
from transformers import (BertTokenizer, BertModel,
                          DistilBertTokenizer, DistilBertModel)

In [None]:
COMPUTE_CV = False
SAVE_IMGEMBEDDING = False
EFF_B5 = True
ECA_NFNET_L0 = False
MODEL_TESTING_NFNET = False
MY_NFNET = True
BERT = True
DISTILBERT = False
SAVE_DISTILBERT = False # You need to enable internet to download pretrained model
EMBEDDING34_TH = 0.30

#df = pd.read_csv('../input/shopee-product-matching/test.csv')
if len(df)>3: COMPUTE_CV = False
if COMPUTE_CV: 
    print('this submission notebook will compute CV score but commit notebook will not')
else:
    print('this submission notebook will only be used to submit result')

In [None]:
class CFG:
    
    img_size = 512
    fc_dim = 512
    batch_size = 20
    seed = 2020
    
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    classes = 11014
    classes_other = 11014 - 3282
    classes_clothes = 3282
        
    model_name = 'tf_efficientnet_b5_ns'
    model_name2 = 'eca_nfnet_l0'
    model_name3 = 'efficientnet_b3'
    model_name5 = 'eca_nfnet_l0'
    model_name6 = 'eca_nfnet_l0'
    
    model_path = '../input/shopee-pytorch-models/arcface_512x512_eff_b5_.pt'
    if MODEL_TESTING_NFNET or MY_NFNET:
        model_path2 = '../input/shopee-price-match-guarantee-embeddings/arcface_512x512_nfnet_l0(mish)_b24_15.pt'
    else:
        model_path2 = '../input/shopee-pytorch-models/arcface_512x512_nfnet_l0 (mish).pt'
    model_path3 = '../input/shopee-pytorch-models/arcface_512x512_eff_b3.pt'
    model_path5 = '../input/other-eca-nfnet-l0-training-21ep/arcface_512x512_nfnet_l0(mish)19.pt'
    model_path6 = '../input/clothes-eca-nfnet-l0-training-30ep/arcface_512x512_nfnet_l0(mish)28.pt'
    scale = 30 
    margin = 0.5

# Utility

In [None]:
def read_dataset(COMPUTE_CV,df = df):
    
    if COMPUTE_CV:
        df_cu = cudf.DataFrame(df)
        image_paths = '../input/shopee-product-matching/train_images/' + df['image']
    
    else:
        df_cu = cudf.DataFrame(df)
        image_paths = '../input/shopee-product-matching/test_images/' + df['image']

    return df, df_cu, image_paths

In [None]:
def seed_torch(seed=42):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    
seed_torch(CFG.seed)

In [None]:
def combine_predictions(row):
    x = np.concatenate([row['image_predictions'], row['text_predictions'], row['phash_predictions']])
    return ' '.join( np.unique(x))
def combine_for_cv(row):
    x = np.concatenate([row['image_predictions'], row['text_predictions'], row['phash_predictions']])
    return np.unique(x)
def combine_predictions_BERT(row):
    x = np.concatenate([row['image_predictions'], row['text_predictions'], row['phash_predictions'], row['text_predictions_BERT']])
    return ' '.join( np.unique(x))
def combine_for_cv_BERT(row):
    x = np.concatenate([row['image_predictions'], row['text_predictions'], row['phash_predictions'], row['text_predictions_BERT']])
    return np.unique(x)


In [None]:
def getMetric(col):
    def f1score(row):
        n = len(np.intersect1d(row.target, row[col]))
        return 2*n / (len(row.target) + len(row[col]))
    return f1score

In [None]:
def get_test_transforms():

    return A.Compose(
        [
            A.Resize(CFG.img_size,CFG.img_size,always_apply=True),
            A.Normalize(),
        ToTensorV2(p=1.0)
        ]
    )

In [None]:
class ShopeeDataset(Dataset):
    def __init__(self, image_paths, transforms=None):

        self.image_paths = image_paths
        self.augmentations = transforms

    def __len__(self):
        return self.image_paths.shape[0]

    def __getitem__(self, index):
        image_path = self.image_paths[index]
        
        image = cv2.imread(image_path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        
        if self.augmentations:
            augmented = self.augmentations(image=image)
            image = augmented['image']       
    
        return image,torch.tensor(1)

# Image

## Model

In [None]:
class ArcMarginProduct(nn.Module):
    def __init__(self, in_features, out_features, scale=30.0, margin=0.50, easy_margin=False, ls_eps=0.0):
        super(ArcMarginProduct, self).__init__()
        self.in_features = in_features
        self.out_features = out_features
        self.scale = scale
        self.margin = margin
        self.ls_eps = ls_eps  # label smoothing
        self.weight = Parameter(torch.FloatTensor(out_features, in_features))
        nn.init.xavier_uniform_(self.weight)

        self.easy_margin = easy_margin
        self.cos_m = math.cos(margin)
        self.sin_m = math.sin(margin)
        self.th = math.cos(math.pi - margin)
        self.mm = math.sin(math.pi - margin) * margin

    def forward(self, input, label):
        # --------------------------- cos(theta) & phi(theta) ---------------------------
        cosine = F.linear(F.normalize(input), F.normalize(self.weight))
        sine = torch.sqrt(1.0 - torch.pow(cosine, 2))
        phi = cosine * self.cos_m - sine * self.sin_m
        if self.easy_margin:
            phi = torch.where(cosine > 0, phi, cosine)
        else:
            phi = torch.where(cosine > self.th, phi, cosine - self.mm)
        # --------------------------- convert label to one-hot ---------------------------
        # one_hot = torch.zeros(cosine.size(), requires_grad=True, device='cuda')
        one_hot = torch.zeros(cosine.size(), device='cuda')
        one_hot.scatter_(1, label.view(-1, 1).long(), 1)
        if self.ls_eps > 0:
            one_hot = (1 - self.ls_eps) * one_hot + self.ls_eps / self.out_features
        # -------------torch.where(out_i = {x_i if condition_i else y_i) -------------
        output = (one_hot * phi) + ((1.0 - one_hot) * cosine)
        output *= self.scale

        return output
    

class ShopeeModel(nn.Module):

    def __init__(
        self,
        n_classes = CFG.classes,
        model_name = CFG.model_name,
        fc_dim = 512,
        margin = CFG.margin,
        scale = CFG.scale,
        use_fc = False,
        pretrained = False):


        super(ShopeeModel,self).__init__()
        print('Building Model Backbone for {} model'.format(model_name))

        self.backbone = timm.create_model(model_name, pretrained=pretrained)

        if model_name == 'resnext50_32x4d':
            final_in_features = self.backbone.fc.in_features
            self.backbone.fc = nn.Identity()
            self.backbone.global_pool = nn.Identity()

        elif model_name == 'efficientnet_b3':
            final_in_features = self.backbone.classifier.in_features
            self.backbone.classifier = nn.Identity()
            self.backbone.global_pool = nn.Identity()

        elif model_name == 'tf_efficientnet_b5_ns':
            final_in_features = self.backbone.classifier.in_features
            self.backbone.classifier = nn.Identity()
            self.backbone.global_pool = nn.Identity()
        
        elif model_name == 'nfnet_f3':
            final_in_features = self.backbone.head.fc.in_features
            self.backbone.head.fc = nn.Identity()
            self.backbone.head.global_pool = nn.Identity()

        self.pooling =  nn.AdaptiveAvgPool2d(1)

        self.use_fc = use_fc

        self.dropout = nn.Dropout(p=0.0)
        self.fc = nn.Linear(final_in_features, fc_dim)
        self.bn = nn.BatchNorm1d(fc_dim)
        self._init_params()
        final_in_features = fc_dim

        self.final = ArcMarginProduct(
            final_in_features,
            n_classes,
            scale = scale,
            margin = margin,
            easy_margin = False,
            ls_eps = 0.0
        )

    def _init_params(self):
        nn.init.xavier_normal_(self.fc.weight)
        nn.init.constant_(self.fc.bias, 0)
        nn.init.constant_(self.bn.weight, 1)
        nn.init.constant_(self.bn.bias, 0)

    def forward(self, image, label):
        feature = self.extract_feat(image)
        #logits = self.final(feature,label)
        return feature

    def extract_feat(self, x):
        batch_size = x.shape[0]
        x = self.backbone(x)
        x = self.pooling(x).view(batch_size, -1)

        if self.use_fc:
            x = self.dropout(x)
            x = self.fc(x)
            x = self.bn(x)
        return x
    
    
# class ShopeeModel2(nn.Module):

#     def __init__(
#         self,
#         n_classes = CFG.classes,
#         model_name = CFG.model_name2,
#         fc_dim = 512,
#         margin = CFG.margin,
#         scale = CFG.scale,
#         use_fc = True,
#         pretrained = False):


#         super(ShopeeModel2,self).__init__()
#         print('Building Model Backbone for {} model'.format(model_name))

#         self.backbone = timm.create_model(model_name, pretrained=pretrained)

#         if model_name == 'resnext50_32x4d':
#             final_in_features = self.backbone.fc.in_features
#             self.backbone.fc = nn.Identity()
#             self.backbone.global_pool = nn.Identity()

#         elif model_name == 'efficientnet_b3':
#             final_in_features = self.backbone.classifier.in_features
#             self.backbone.classifier = nn.Identity()
#             self.backbone.global_pool = nn.Identity()

#         elif model_name == 'tf_efficientnet_b5_ns':
#             final_in_features = self.backbone.classifier.in_features
#             self.backbone.classifier = nn.Identity()
#             self.backbone.global_pool = nn.Identity()
        
#         elif model_name == 'eca_nfnet_l0':
#             final_in_features = self.backbone.head.fc.in_features
#             self.backbone.head.fc = nn.Identity()
#             self.backbone.head.global_pool = nn.Identity()

#         self.pooling =  nn.AdaptiveAvgPool2d(1)

#         self.use_fc = use_fc

#         self.dropout = nn.Dropout(p=0.0)
#         self.fc = nn.Linear(final_in_features, fc_dim)
#         self.bn = nn.BatchNorm1d(fc_dim)
#         self._init_params()
#         final_in_features = fc_dim

#         self.final = ArcMarginProduct(
#             final_in_features,
#             n_classes,
#             scale = scale,
#             margin = margin,
#             easy_margin = False,
#             ls_eps = 0.0
#         )

#     def _init_params(self):
#         nn.init.xavier_normal_(self.fc.weight)
#         nn.init.constant_(self.fc.bias, 0)
#         nn.init.constant_(self.bn.weight, 1)
#         nn.init.constant_(self.bn.bias, 0)

#     def forward(self, image, label):
#         feature = self.extract_feat(image)
     
#         return feature

#     def extract_feat(self, x):
#         batch_size = x.shape[0]
#         x = self.backbone(x)
#         x = self.pooling(x).view(batch_size, -1)

#         if self.use_fc:
#             x = self.dropout(x)
#             x = self.fc(x)
#             x = self.bn(x)
#         return x
    
    
class ShopeeModel3(nn.Module):

    def __init__(
        self, 
        model_name = CFG.model_name3,
        n_classes = CFG.classes,
        fc_dim = 512,
        margin = CFG.margin,
        scale = CFG.scale,
        use_fc = True,
        pretrained = False):


        super(ShopeeModel3,self).__init__()
        print('Building Model Backbone for {} model'.format(model_name))

        self.backbone = timm.create_model(model_name, pretrained=pretrained)

        if model_name == 'resnext50_32x4d':
            final_in_features = self.backbone.fc.in_features
            self.backbone.fc = nn.Identity()
            self.backbone.global_pool = nn.Identity()

        elif model_name == 'efficientnet_b3':
            final_in_features = self.backbone.classifier.in_features
            self.backbone.classifier = nn.Identity()
            self.backbone.global_pool = nn.Identity()

        elif model_name == 'tf_efficientnet_b5_ns':
            final_in_features = self.backbone.classifier.in_features
            self.backbone.classifier = nn.Identity()
            self.backbone.global_pool = nn.Identity()
        
        elif model_name == 'eca_nfnet_l0':
            final_in_features = self.backbone.head.fc.in_features
            self.backbone.head.fc = nn.Identity()
            self.backbone.head.global_pool = nn.Identity()

        self.pooling =  nn.AdaptiveAvgPool2d(1)

        self.use_fc = use_fc

        self.dropout = nn.Dropout(p=0.0)
        self.fc = nn.Linear(final_in_features, fc_dim)
        self.bn = nn.BatchNorm1d(fc_dim)
        self._init_params()
        final_in_features = fc_dim

        self.final = ArcMarginProduct(
            final_in_features,
            n_classes,
            scale = scale,
            margin = margin,
            easy_margin = False,
            ls_eps = 0.0
        )

    def _init_params(self):
        nn.init.xavier_normal_(self.fc.weight)
        nn.init.constant_(self.fc.bias, 0)
        nn.init.constant_(self.bn.weight, 1)
        nn.init.constant_(self.bn.bias, 0)

    def forward(self, image, label):
        feature = self.extract_feat(image)
        #logits = self.final(feature,label)
        return feature

    def extract_feat(self, x):
        batch_size = x.shape[0]
        x = self.backbone(x)
        x = self.pooling(x).view(batch_size, -1)

        if self.use_fc:
            x = self.dropout(x)
            x = self.fc(x)
            x = self.bn(x)
        return x
    
class ShopeeModel5(nn.Module):

    def __init__(
        self,
        n_classes = CFG.classes_other,
        model_name = CFG.model_name5,
        fc_dim = 512,
        margin = CFG.margin,
        scale = CFG.scale,
        use_fc = True,
        pretrained = False):


        super(ShopeeModel5,self).__init__()
        print('Building Model Backbone for {} model'.format(model_name))

        self.backbone = timm.create_model(model_name, pretrained=pretrained)

        if model_name == 'resnext50_32x4d':
            final_in_features = self.backbone.fc.in_features
            self.backbone.fc = nn.Identity()
            self.backbone.global_pool = nn.Identity()

        elif model_name == 'efficientnet_b3':
            final_in_features = self.backbone.classifier.in_features
            self.backbone.classifier = nn.Identity()
            self.backbone.global_pool = nn.Identity()

        elif model_name == 'tf_efficientnet_b5_ns':
            final_in_features = self.backbone.classifier.in_features
            self.backbone.classifier = nn.Identity()
            self.backbone.global_pool = nn.Identity()
        
        elif model_name == 'eca_nfnet_l0':
            final_in_features = self.backbone.head.fc.in_features
            self.backbone.head.fc = nn.Identity()
            self.backbone.head.global_pool = nn.Identity()

        self.pooling =  nn.AdaptiveAvgPool2d(1)

        self.use_fc = use_fc

        self.dropout = nn.Dropout(p=0.0)
        self.fc = nn.Linear(final_in_features, fc_dim)
        self.bn = nn.BatchNorm1d(fc_dim)
        self._init_params()
        final_in_features = fc_dim

        self.final = ArcMarginProduct(
            final_in_features,
            n_classes,
            scale = scale,
            margin = margin,
            easy_margin = False,
            ls_eps = 0.0
        )

    def _init_params(self):
        nn.init.xavier_normal_(self.fc.weight)
        nn.init.constant_(self.fc.bias, 0)
        nn.init.constant_(self.bn.weight, 1)
        nn.init.constant_(self.bn.bias, 0)

    def forward(self, image, label):
        feature = self.extract_feat(image)
        #logits = self.final(feature,label)
        return feature

    def extract_feat(self, x):
        batch_size = x.shape[0]
        x = self.backbone(x)
        x = self.pooling(x).view(batch_size, -1)

        if self.use_fc:
            x = self.dropout(x)
            x = self.fc(x)
            x = self.bn(x)
        return x
    
class ShopeeModel6(nn.Module):

    def __init__(
        self,
        n_classes = CFG.classes_clothes,
        model_name = CFG.model_name6,
        fc_dim = 512,
        margin = CFG.margin,
        scale = CFG.scale,
        use_fc = True,
        pretrained = False):


        super(ShopeeModel6,self).__init__()
        print('Building Model Backbone for {} model'.format(model_name))

        self.backbone = timm.create_model(model_name, pretrained=pretrained)

        if model_name == 'resnext50_32x4d':
            final_in_features = self.backbone.fc.in_features
            self.backbone.fc = nn.Identity()
            self.backbone.global_pool = nn.Identity()

        elif model_name == 'efficientnet_b3':
            final_in_features = self.backbone.classifier.in_features
            self.backbone.classifier = nn.Identity()
            self.backbone.global_pool = nn.Identity()

        elif model_name == 'tf_efficientnet_b5_ns':
            final_in_features = self.backbone.classifier.in_features
            self.backbone.classifier = nn.Identity()
            self.backbone.global_pool = nn.Identity()
        
        elif model_name == 'eca_nfnet_l0':
            final_in_features = self.backbone.head.fc.in_features
            self.backbone.head.fc = nn.Identity()
            self.backbone.head.global_pool = nn.Identity()

        self.pooling =  nn.AdaptiveAvgPool2d(1)

        self.use_fc = use_fc

        self.dropout = nn.Dropout(p=0.0)
        self.fc = nn.Linear(final_in_features, fc_dim)
        self.bn = nn.BatchNorm1d(fc_dim)
        self._init_params()
        final_in_features = fc_dim

        self.final = ArcMarginProduct(
            final_in_features,
            n_classes,
            scale = scale,
            margin = margin,
            easy_margin = False,
            ls_eps = 0.0
        )

    def _init_params(self):
        nn.init.xavier_normal_(self.fc.weight)
        nn.init.constant_(self.fc.bias, 0)
        nn.init.constant_(self.bn.weight, 1)
        nn.init.constant_(self.bn.bias, 0)

    def forward(self, image, label):
        feature = self.extract_feat(image)
        #logits = self.final(feature,label)
        return feature

    def extract_feat(self, x):
        batch_size = x.shape[0]
        x = self.backbone(x)
        x = self.pooling(x).view(batch_size, -1)

        if self.use_fc:
            x = self.dropout(x)
            x = self.fc(x)
            x = self.bn(x)
        return x

In [None]:
# https://www.kaggle.com/parthdhameliya77/pytorch-eca-nfnet-l0-image-tfidf-inference
class Mish_func(torch.autograd.Function):
    
    """from: https://github.com/tyunist/memory_efficient_mish_swish/blob/master/mish.py"""
    
    @staticmethod
    def forward(ctx, i):
        result = i * torch.tanh(F.softplus(i))
        ctx.save_for_backward(i)
        return result

    @staticmethod
    def backward(ctx, grad_output):
        i = ctx.saved_variables[0]
  
        v = 1. + i.exp()
        h = v.log() 
        grad_gh = 1./h.cosh().pow_(2) 

        # Note that grad_hv * grad_vx = sigmoid(x)
        #grad_hv = 1./v  
        #grad_vx = i.exp()
        
        grad_hx = i.sigmoid()

        grad_gx = grad_gh *  grad_hx #grad_hv * grad_vx 
        
        grad_f =  torch.tanh(F.softplus(i)) + i * grad_gx 
        
        return grad_output * grad_f 


class Mish(nn.Module):
    def __init__(self, **kwargs):
        super().__init__()
        pass
    def forward(self, input_tensor):
        return Mish_func.apply(input_tensor)


def replace_activations(model, existing_layer, new_layer):
    
    """A function for replacing existing activation layers"""
    
    for name, module in reversed(model._modules.items()):
        if len(list(module.children())) > 0:
            model._modules[name] = replace_activations(module, existing_layer, new_layer)

        if type(module) == existing_layer:
            layer_old = module
            layer_new = new_layer
            model._modules[name] = layer_new
    return model

## Embedding

In [None]:
def get_image_embeddings(image_paths, model_name=CFG.model_name, EFF_B5=EFF_B5, nfnet_only=False):
    
    if EFF_B5 and not nfnet_only:
        embeds = []

        model = ShopeeModel(model_name = model_name)
        model.eval()
        model.load_state_dict(torch.load(CFG.model_path))
        model = model.to(CFG.device)

        image_dataset = ShopeeDataset(image_paths=image_paths,transforms=get_test_transforms())
        image_loader = DataLoader(
            image_dataset,
            batch_size=CFG.batch_size,
            pin_memory=True,
            drop_last=False,
            num_workers=4
        )

        with torch.no_grad():
            for img,label in tqdm(image_loader): 
                img = img.cuda()
                label = label.cuda()
                feat = model(img,label)
                image_embeddings = feat.detach().cpu().numpy()
                embeds.append(image_embeddings)


        del model, image_embeddings
        image_embeddings1 = np.concatenate(embeds)
        print(f'image embeddings1 shape is {image_embeddings1.shape}')
        del embeds
        gc.collect()
        
    else: image_embeddings1 = None
    
    #---
    
#     model = ShopeeModel2()
#     model.eval()
#     model = replace_activations(model, torch.nn.SiLU, Mish())

#     model.load_state_dict(torch.load(CFG.model_path2))
#     model = model.to(CFG.device)
    
#     image_dataset = ShopeeDataset(image_paths=image_paths,transforms=get_test_transforms())
#     image_loader = DataLoader(
#         image_dataset,
#         batch_size=CFG.batch_size,
#         pin_memory=True,
#         drop_last=False,
#         num_workers=4
#     )
    
#     embeds2 = []
#     with torch.no_grad():
#         for img,label in tqdm(image_loader): 
#             img = img.cuda()
#             label = label.cuda()
#             feat = model(img,label)
#             image_embeddings = feat.detach().cpu().numpy()
#             embeds2.append(image_embeddings)
    
#     del model
#     image_embeddings3 = np.concatenate(embeds2)
#     print(f'image embeddings3 shape is {image_embeddings3.shape}')
#     del embeds2
#     gc.collect()
    
    #---
    if not nfnet_only:
        embeds = []

        model = ShopeeModel3()
        model.eval()
        model.load_state_dict(torch.load(CFG.model_path3))
        model = model.to(CFG.device)


        image_dataset = ShopeeDataset(image_paths=image_paths,transforms=get_test_transforms())
        image_loader = DataLoader(
            image_dataset,
            batch_size=CFG.batch_size,
            pin_memory=True,
            drop_last=False,
            num_workers=4
        )


        with torch.no_grad():
            for img,label in tqdm(image_loader): 
                img = img.cuda()
                label = label.cuda()
                feat = model(img,label)
                image_embeddings = feat.detach().cpu().numpy()
                embeds.append(image_embeddings)


        del model
        image_embeddings4 = np.concatenate(embeds)
        print(f'Our image embeddings shape is {image_embeddings.shape}')
        del embeds
        gc.collect()    

    else: image_embeddings4 = None

    embeds = []
    
    model = ShopeeModel5()
    model.eval()
    model.load_state_dict(torch.load(CFG.model_path5))
    model = model.to(CFG.device)
    

    image_dataset = ShopeeDataset(image_paths=image_paths,transforms=get_test_transforms())
    image_loader = torch.utils.data.DataLoader(
        image_dataset,
        batch_size=CFG.batch_size,
        pin_memory=True,
        drop_last=False,
        num_workers=4
    )
    
    
    with torch.no_grad():
        for img,label in tqdm(image_loader): 
            img = img.cuda()
            label = label.cuda()
            feat = model(img,label)
            image_embeddings = feat.detach().cpu().numpy()
            embeds.append(image_embeddings)
    
    
    del model
    image_embeddings5 = np.concatenate(embeds)
    print(f'Our image embeddings shape is {image_embeddings.shape}')
    del embeds
    gc.collect() 
    
    embeds = []
    
    model = ShopeeModel6()
    model.eval()
    model.load_state_dict(torch.load(CFG.model_path6))
    model = model.to(CFG.device)
    

    image_dataset = ShopeeDataset(image_paths=image_paths,transforms=get_test_transforms())
    image_loader = torch.utils.data.DataLoader(
        image_dataset,
        batch_size=CFG.batch_size,
        pin_memory=True,
        drop_last=False,
        num_workers=4
    )
    
    
    with torch.no_grad():
        for img,label in tqdm(image_loader): 
            img = img.cuda()
            label = label.cuda()
            feat = model(img,label)
            image_embeddings = feat.detach().cpu().numpy()
            embeds.append(image_embeddings)
    
    
    del model
    image_embeddings6 = np.concatenate(embeds)
    print(f'Our image embeddings shape is {image_embeddings.shape}')
    del embeds
    gc.collect() 
    return image_embeddings1, image_embeddings4, image_embeddings5, image_embeddings6

## Prediction

In [None]:
# Threshold shifting depending on dataset length
# https://www.kaggle.com/c/shopee-product-matching/discussion/234927
def dataset_th(known_th, known_dataset_len, new_dataset_len):
    return (-2.051562606852219e-06 * (new_dataset_len-known_dataset_len)) + known_th

dataset_th(1.7, 34250, 70000)

# **other**

In [None]:
def get_image_predictions_other(df, embeddings1, embeddings34, 
                          EFF_B5=EFF_B5, ECA_NFNET_L0=ECA_NFNET_L0, predictions34_th=0.36):
    
    if len(df) > 3:
        KNN = 50
    else : 
        KNN = 3
    
    #--
    if EFF_B5:
        model = NearestNeighbors(n_neighbors = KNN)
        model.fit(embeddings1)
        distances, indices = model.kneighbors(embeddings1)

        threshold = 1.7 - 0.2978
        predictions1 = []
        for k in tqdm(range(0, df_other_len)):
            idx = np.where(distances[k,] < threshold)[0]
            ids = indices[k,idx]
            posting_ids = list(df['posting_id'].iloc[ids])
            # for ii in np.arange(1.7-0.2978, (1.7-0.2978)*1.5, 0.04):
                # if ii < (1.7-0.2978)*1.5 and len(posting_ids) <= 1:
                    # idx = np.where(distances[k,] < ii)[0]
                    # ids = indices[k,idx]
                    # posting_ids = list(df['posting_id'].iloc[ids].values)  
            predictions1.append(posting_ids)

        del model, distances, indices, embeddings1
        gc.collect()

    #--
#     if ECA_NFNET_L0:
#         model = NearestNeighbors(n_neighbors = KNN, metric = 'cosine')
#         model.fit(embeddings3)
#         distances, indices = model.kneighbors(embeddings3)

#         threshold=0.36
#         predictions3 = []
#         for k in tqdm(range((0, df_other_len))):
#             idx = np.where(distances[k,] < threshold)[0]
#             ids = indices[k,idx]
#             posting_ids = list(df['posting_id'].iloc[ids])
#             predictions3.append(posting_ids)

#         del model, distances, indices, embeddings3
#         gc.collect()    
    #--
    
    model = NearestNeighbors(n_neighbors = KNN, metric = 'cosine')
    model.fit(embeddings34)
    distances, indices = model.kneighbors(embeddings34)
    
    predictions34 = []
    for k in tqdm(range(0, df_other_len)):
        idx = np.where(distances[k,] < predictions34_th)[0]
        ids = indices[k,idx]
        posting_ids = list(df['posting_id'].iloc[ids].values)
        for ii in np.arange(predictions34_th, 0.54, 0.02):
            if ii < 0.5 and len(posting_ids) <= 1:
                idx = np.where(distances[k,] < ii)[0]
                ids = indices[k,idx]
                posting_ids = list(df['posting_id'].iloc[ids].values)    
        predictions34.append(posting_ids)
        
    del model, distances, indices
    gc.collect()
    
    # combine predictions(i.e. image IDs) of all the models & remove the duplicates.
    # we can try & experiment here to combine different models here..
    if EFF_B5 and ECA_NFNET_L0:
        predictions = [list(set(a + c + d)) for a, c, d in zip(predictions1, predictions3, predictions34)]
    elif EFF_B5:
        predictions = [list(set(a + d)) for a, d in zip(predictions1, predictions34)]
    else:
        predictions = predictions34

    
    return predictions

# **clothes**

In [None]:
def get_image_predictions_clothes(df, embeddings1, embeddings34, 
                          EFF_B5=EFF_B5, ECA_NFNET_L0=ECA_NFNET_L0, predictions34_th=0.36):
    
    if len(df) > 3:
        KNN = 50
    else : 
        KNN = 3
    
    #--
    if EFF_B5:
        model = NearestNeighbors(n_neighbors = KNN)
        model.fit(embeddings1)
        distances, indices = model.kneighbors(embeddings1)

        threshold = 1.7 - 0.2978
        predictions1 = []
        for k in tqdm(range(df_other_len, df.shape[0])):
            idx = np.where(distances[k,] < threshold)[0]
            ids = indices[k,idx]
            posting_ids = list(df['posting_id'].iloc[ids])
            # for ii in np.arange(1.7-0.2978, (1.7-0.2978)*1.5, 0.04):
                # if ii < (1.7-0.2978)*1.5 and len(posting_ids) <= 1:
                    # idx = np.where(distances[k,] < ii)[0]
                    # ids = indices[k,idx]
                    # posting_ids = list(df['posting_id'].iloc[ids].values)  
            predictions1.append(posting_ids)

        del model, distances, indices, embeddings1
        gc.collect()

    #--
#     if ECA_NFNET_L0:
#         model = NearestNeighbors(n_neighbors = KNN, metric = 'cosine')
#         model.fit(embeddings3)
#         distances, indices = model.kneighbors(embeddings3)

#         threshold=0.36
#         predictions3 = []
#         for k in tqdm(range(df_other_len, df.shape[0])):
#             idx = np.where(distances[k,] < threshold)[0]
#             ids = indices[k,idx]
#             posting_ids = list(df['posting_id'].iloc[ids])
#             predictions3.append(posting_ids)

#         del model, distances, indices, embeddings3
#         gc.collect()    
    #--
    
    model = NearestNeighbors(n_neighbors = KNN, metric = 'cosine')
    model.fit(embeddings34)
    distances, indices = model.kneighbors(embeddings34)
    
    predictions34 = []
    for k in tqdm(range(df_other_len, df.shape[0])):
        idx = np.where(distances[k,] < predictions34_th)[0]
        ids = indices[k,idx]
        posting_ids = list(df['posting_id'].iloc[ids].values)
        for ii in np.arange(predictions34_th, 0.54, 0.02):
            if ii < 0.5 and len(posting_ids) <= 1:
                idx = np.where(distances[k,] < ii)[0]
                ids = indices[k,idx]
                posting_ids = list(df['posting_id'].iloc[ids].values)    
        predictions34.append(posting_ids)
        
    del model, distances, indices
    gc.collect()
    
    # combine predictions(i.e. image IDs) of all the models & remove the duplicates.
    # we can try & experiment here to combine different models here..
    if EFF_B5 and ECA_NFNET_L0:
        predictions = [list(set(a + c + d)) for a, c, d in zip(predictions1, predictions3, predictions34)]
    elif EFF_B5:
        predictions = [list(set(a + d)) for a, d in zip(predictions1, predictions34)]
    else:
        predictions = predictions34

    
    return predictions

# Text

## Embedding and Prediction

In [None]:
#https://www.kaggle.com/cdeotte/part-2-rapids-tfidfvectorizer-cv-0-700#Use-Text-Embeddings

def get_text_predictions(df, df_cu, max_features=25_000):
    
    model = TfidfVectorizer(stop_words=None, binary=True, max_features=max_features)
    text_embeddings = model.fit_transform(df_cu['title']).toarray()
    preds = []
    CHUNK = 1024*4

    print('Finding similar titles...')
    CTS = len(df)//CHUNK
    if len(df)%CHUNK!=0: CTS += 1
    for j in range( CTS ):

        a = j*CHUNK
        b = (j+1)*CHUNK
        b = min(b,len(df))
        print('chunk',a,'to',b)

        # COSINE SIMILARITY DISTANCE
        cts = cupy.matmul(text_embeddings, text_embeddings[a:b].T).T

        for k in range(b-a):
            IDX = cupy.where(cts[k,]>0.80)[0]
            o = df.iloc[cupy.asnumpy(IDX)].posting_id.values
            
            for ii in np.arange(0.80, 0.50, -0.02):
                if ii > 0.5 and o.shape[0] <= 1:
                    IDX = cupy.where(cts[k,] > ii)[0]
                    o = df.iloc[cupy.asnumpy(IDX)].posting_id.values
            preds.append(o)
    
    del model,text_embeddings
    gc.collect()
    return preds

# Data Load-in and Preparation

In [None]:
df, df_cu, image_paths = read_dataset(COMPUTE_CV)
df.head()

In [None]:
if not COMPUTE_CV or SAVE_IMGEMBEDDING:
    image_embeddings1, image_embeddings4, image_embeddings5, image_embeddings6 = get_image_embeddings(image_paths.values, nfnet_only=MODEL_TESTING_NFNET if COMPUTE_CV else COMPUTE_CV)
    if SAVE_IMGEMBEDDING and not MODEL_TESTING_NFNET: 
        np.savetxt('tf_efficientnet_b5_ns.csv', image_embeddings1, delimiter=',')
        np.savetxt('eca_nfnet_l0.csv', image_embeddings3, delimiter=',')
        np.savetxt('efficientnet_b3.csv', image_embeddings4, delimiter=',')
    elif COMPUTE_CV and MODEL_TESTING_NFNET:
        image_embeddings1 = np.loadtxt('../input/shopee-price-match-guarantee-embeddings/tf_efficientnet_b5_ns.csv', delimiter=',')
        np.savetxt('eca_nfnet_l0_b24_15.csv', image_embeddings3, delimiter=',')
        image_embeddings4 = np.loadtxt('../input/shopee-price-match-guarantee-embeddings/efficientnet_b3.csv', delimiter=',')
else:
    if EFF_B5:
        image_embeddings1 = np.loadtxt('../input/shopee-price-match-guarantee-embeddings/tf_efficientnet_b5_ns.csv', delimiter=',')
    image_embeddings3 = np.loadtxt('../input/shopee-price-match-guarantee-embeddings/eca_nfnet_l0_b24_15.csv', delimiter=',')
    image_embeddings4 = np.loadtxt('../input/shopee-price-match-guarantee-embeddings/efficientnet_b3.csv', delimiter=',')

In [None]:
image_embeddings_other = image_embeddings4 * 0.4  + image_embeddings5 * 0.6
image_embeddings_clothes = image_embeddings4 * 0.4  + image_embeddings6 * 0.6
del image_embeddings4,image_embeddings5,image_embeddings6

In [None]:
df

In [None]:
image_predictions_other = get_image_predictions_other(df, image_embeddings1, image_embeddings_other, 
                                          predictions34_th=EMBEDDING34_TH)
image_predictions_clothes = get_image_predictions_clothes(df, image_embeddings1, image_embeddings_clothes, 
                                          predictions34_th=EMBEDDING34_TH)

text_predictions = get_text_predictions(df, df_cu)

In [None]:
image_predictions_other.extend(image_predictions_clothes)

In [None]:
image_predictions = image_predictions_other.copy()
del image_predictions_other, image_predictions_clothes

# DistilBERT

- [LB with only TEXT data](https://www.kaggle.com/c/shopee-product-matching/discussion/230620)
- [Does using BERT or any transformer Model even makes sense?](https://www.kaggle.com/c/shopee-product-matching/discussion/228741)
- [Indonesian DistilBERT finetuning with ArcMargin](https://www.kaggle.com/moeinshariatnia/indonesian-distilbert-finetuning-with-arcmargin)

In [None]:
if DISTILBERT:
    class CFG:
        DistilBERT = True # if set to False, BERT model will be used
        bert_hidden_size = 768

        batch_size = 64
        epochs = 30
        num_workers = 4
        learning_rate = 1e-5 #3e-5
        scheduler = "ReduceLROnPlateau"
        step = 'epoch'
        patience = 2
        factor = 0.8
        dropout = 0.5
        model_path = "/kaggle/working"
        max_length = 30
        model_save_name = "model.pt"
        device = torch.device("cuda" if torch.cuda.is_available() else 'cpu')

        
    if SAVE_DISTILBERT:
        if CFG.DistilBERT:
            model_name='cahya/distilbert-base-indonesian'
            tokenizer = DistilBertTokenizer.from_pretrained(model_name)
            bert_model = DistilBertModel.from_pretrained(model_name)
        else:
            model_name='cahya/bert-base-indonesian-522M'
            tokenizer = BertTokenizer.from_pretrained(model_name)
            bert_model = BertModel.from_pretrained(model_name)
        tokenizer.save_pretrained("./models/tokenizer/")
        bert_model.save_pretrained("./models/bert_model/")
    else:
        tokenizer = DistilBertTokenizer.from_pretrained("../input/shopee-price-match-guarantee-embeddings/DistilBERT_pretrained/models/tokenizer")
        bert_model = DistilBertModel.from_pretrained("../input/shopee-price-match-guarantee-embeddings/DistilBERT_pretrained/models/bert_model")


    lbl_encoder = LabelEncoder()
    df['label_code'] = lbl_encoder.fit_transform(df['label_group'])
    NUM_CLASSES = df['label_code'].nunique()


    class TextDataset(torch.utils.data.Dataset):
        def __init__(self, dataframe, tokenizer, mode="train", max_length=None):
            self.dataframe = dataframe
            if mode != "test":
                self.targets = dataframe['label_code'].values
            texts = list(dataframe['title'].apply(lambda o: str(o)).values)
            self.encodings = tokenizer(texts, 
                                       padding=True, 
                                       truncation=True, 
                                       max_length=max_length)
            self.mode = mode

        def __getitem__(self, idx):
            # putting each tensor in front of the corresponding key from the tokenizer
            # HuggingFace tokenizers give you whatever you need to feed to the corresponding model
            item = {key: torch.tensor(values[idx]) for key, values in self.encodings.items()}
            # when testing, there are no targets so we won't do the following
            if self.mode != "test":
                item['labels'] = torch.tensor(self.targets[idx]).long()
            return item

        def __len__(self):
            return len(self.dataframe)


    dataset = TextDataset(df, tokenizer, mode='test', max_length=CFG.max_length)
    dataloader = DataLoader(dataset, 
                             batch_size=CFG.batch_size, 
                             num_workers=CFG.num_workers, 
                             shuffle=True)
    batch = next(iter(dataloader))
    
    
    # code from https://github.com/ronghuaiyang/arcface-pytorch/blob/47ace80b128042cd8d2efd408f55c5a3e156b032/models/metrics.py#L10
    class ArcMarginProduct(nn.Module):
        r"""Implement of large margin arc distance: :
            Args:
                in_features: size of each input sample
                out_features: size of each output sample
                s: norm of input feature
                m: margin
                cos(theta + m)
            """
        def __init__(self, in_features, out_features, s=30.0, m=0.50, easy_margin=False):
            super(ArcMarginProduct, self).__init__()
            self.in_features = in_features
            self.out_features = out_features
            self.s = s
            self.m = m
            self.weight = Parameter(torch.FloatTensor(out_features, in_features))
            nn.init.xavier_uniform_(self.weight)

            self.easy_margin = easy_margin
            self.cos_m = math.cos(m)
            self.sin_m = math.sin(m)
            self.th = math.cos(math.pi - m)
            self.mm = math.sin(math.pi - m) * m

        def forward(self, input, label):
            # --------------------------- cos(theta) & phi(theta) ---------------------------
            cosine = F.linear(F.normalize(input), F.normalize(self.weight))
            sine = torch.sqrt((1.0 - torch.pow(cosine, 2)).clamp(0, 1))
            phi = cosine * self.cos_m - sine * self.sin_m
            if self.easy_margin:
                phi = torch.where(cosine > 0, phi, cosine)
            else:
                phi = torch.where(cosine > self.th, phi, cosine - self.mm)
            # --------------------------- convert label to one-hot ---------------------------
            # one_hot = torch.zeros(cosine.size(), requires_grad=True, device='cuda')
            one_hot = torch.zeros(cosine.size(), device=CFG.device)
            one_hot.scatter_(1, label.view(-1, 1).long(), 1)
            # -------------torch.where(out_i = {x_i if condition_i else y_i) -------------
            output = (one_hot * phi) + ((1.0 - one_hot) * cosine)  # you can use torch.where if your torch.__version__ is 0.4
            output *= self.s
            # print(output)

            return output


    class Model(nn.Module):
        def __init__(self, 
                     bert_model, 
                     num_classes=NUM_CLASSES, 
                     last_hidden_size=CFG.bert_hidden_size):

            super().__init__()
            self.bert_model = bert_model
            self.arc_margin = ArcMarginProduct(last_hidden_size, 
                                               num_classes, 
                                               s=30.0, 
                                               m=0.50, 
                                               easy_margin=False)

        def get_bert_features(self, batch):
            output = self.bert_model(input_ids=batch['input_ids'], attention_mask=batch['attention_mask'])
            last_hidden_state = output.last_hidden_state # shape: (batch_size, seq_length, bert_hidden_dim)
            CLS_token_state = last_hidden_state[:, 0, :] # obtaining CLS token state which is the first token.
            return CLS_token_state

        def forward(self, batch):
            CLS_hidden_state = self.get_bert_features(batch)
            output = self.arc_margin(CLS_hidden_state, batch['labels'])
            return output

        
    def get_text_embeddings():
        model = Model(bert_model) #the class of the model u created
        model.eval()
        WEIGHTS_FILE = "../input/shopee-price-match-guarantee-embeddings/DistilBERT/model.pt"
        model.load_state_dict(torch.load(WEIGHTS_FILE))
        model = model.to(CFG.device)
                
        embeds = []
        with torch.no_grad():
            for txt, label in tqdm(dataloader):
                txt = txt.cuda()
                label = label.cuda()
                feat = model(txt, label)
                text_embeddings = feat.detach().cpu().numpy()
                embeds.append(text_embeddings)

        del model
        text_embeddings = np.concatenate(embeds)
        print(f'text_embeddings shape is {text_embeddings.shape}')
        del embeds
        gc.collect()

        
    get_text_embeddings()

# BERT

- [Metric Learning Pipeline : Only Text Sbert](https://www.kaggle.com/tanulsingh077/metric-learning-pipeline-only-text-sbert)
- [Reaching 0.612 with Text Only : Shopee](https://www.kaggle.com/tanulsingh077/reaching-0-612-with-text-only-shopee)

In [None]:
if BERT:
    NUM_WORKERS = 4
    BATCH_SIZE = 16
    SEED = 42

    device = torch.device('cuda')

    ################################################# MODEL ####################################################################

    transformer_model = '../input/sentence-transformer-models/paraphrase-xlm-r-multilingual-v1/0_Transformer'
    TOKENIZER = transformers.AutoTokenizer.from_pretrained(transformer_model)

    ################################################ MODEL PATH ###############################################################

    TEXT_MODEL_PATH = '../input/best-multilingual-model/sentence_transfomer_xlm_best_loss_num_epochs_25_arcface.bin'

    model_params = {
        'n_classes':11014,
        'model_name':transformer_model,
        'use_fc':False,
        'fc_dim':512,
        'dropout':0.3,
    }

    class ShopeeDataset(Dataset):
        def __init__(self, csv):
            self.csv = csv.reset_index()

        def __len__(self):
            return self.csv.shape[0]

        def __getitem__(self, index):
            row = self.csv.iloc[index]

            text = row.title

            text = TOKENIZER(text, padding='max_length', truncation=True, max_length=128, return_tensors="pt")
            input_ids = text['input_ids'][0]
            attention_mask = text['attention_mask'][0]  

            return input_ids, attention_mask


    class ShopeeNet(nn.Module):

        def __init__(self,
                     n_classes,
                     model_name='bert-base-uncased',
                     use_fc=False,
                     fc_dim=512,
                     dropout=0.0):
            """
            :param n_classes:
            :param model_name: name of model from pretrainedmodels
                e.g. resnet50, resnext101_32x4d, pnasnet5large
            :param pooling: One of ('SPoC', 'MAC', 'RMAC', 'GeM', 'Rpool', 'Flatten', 'CompactBilinearPooling')
            :param loss_module: One of ('arcface', 'cosface', 'softmax')
            """
            super(ShopeeNet, self).__init__()

            self.transformer = transformers.AutoModel.from_pretrained(model_name)
            final_in_features = self.transformer.config.hidden_size

            self.use_fc = use_fc

            if use_fc:
                self.dropout = nn.Dropout(p=dropout)
                self.fc = nn.Linear(final_in_features, fc_dim)
                self.bn = nn.BatchNorm1d(fc_dim)
                self._init_params()
                final_in_features = fc_dim


        def _init_params(self):
            nn.init.xavier_normal_(self.fc.weight)
            nn.init.constant_(self.fc.bias, 0)
            nn.init.constant_(self.bn.weight, 1)
            nn.init.constant_(self.bn.bias, 0)

        def forward(self, input_ids,attention_mask):
            feature = self.extract_feat(input_ids,attention_mask)
            return F.normalize(feature)

        def extract_feat(self, input_ids,attention_mask):
            x = self.transformer(input_ids=input_ids,attention_mask=attention_mask)

            features = x[0]
            features = features[:,0,:]

            if self.use_fc:
                features = self.dropout(features)
                features = self.fc(features)
                features = self.bn(features)

            return features


    def get_BERT_embeddings(df):
        embeds = []

        model = ShopeeNet(**model_params)
        model.eval()

        model.load_state_dict(dict(list(torch.load(TEXT_MODEL_PATH).items())[:-1]))
        model = model.to(device)

        text_dataset = ShopeeDataset(df)
        text_loader = torch.utils.data.DataLoader(
            text_dataset,
            batch_size=BATCH_SIZE,
            pin_memory=True,
            drop_last=False,
            num_workers=NUM_WORKERS
        )


        with torch.no_grad():
            for input_ids, attention_mask in tqdm(text_loader): 
                input_ids = input_ids.cuda()
                attention_mask = attention_mask.cuda()
                feat = model(input_ids, attention_mask)
                text_embeddings = feat.detach().cpu().numpy()
                embeds.append(text_embeddings)


        del model
        text_embeddings = np.concatenate(embeds)
        print(f'Our text embeddings shape is {text_embeddings.shape}')
        del embeds
        gc.collect()
        return text_embeddings


    def get_neighbours_cos_sim(df,embeddings):
        '''
        When using cos_sim use normalized features else use normal features
        '''
        embeddings = cupy.array(embeddings)

        if False:
            thresholds = list(np.arange(0.5,0.7,0.05))

            scores = []
            for threshold in thresholds:

    ################################################# Code for Getting Preds #########################################
                preds = []
                CHUNK = 1024*4

                print('Finding similar titles...for threshold :',threshold)
                CTS = len(embeddings)//CHUNK
                if len(embeddings)%CHUNK!=0: CTS += 1

                for j in range( CTS ):
                    a = j*CHUNK
                    b = (j+1)*CHUNK
                    b = min(b,len(embeddings))

                    cts = cupy.matmul(embeddings,embeddings[a:b].T).T

                    for k in range(b-a):
                        IDX = cupy.where(cts[k,]>threshold)[0]
                        o = df.iloc[cupy.asnumpy(IDX)].posting_id.values
                        o = ' '.join(o)
                        preds.append(o)
    ######################################################################################################################
                df['pred_matches'] = preds
                df['f1'] = f1_score(df['matches'], df['pred_matches'])
                score = df['f1'].mean()
                print(f'Our f1 score for threshold {threshold} is {score}')
                scores.append(score)

            thresholds_scores = pd.DataFrame({'thresholds': thresholds, 'scores': scores})
            max_score = thresholds_scores[thresholds_scores['scores'] == thresholds_scores['scores'].max()]
            best_threshold = max_score['thresholds'].values[0]
            best_score = max_score['scores'].values[0]
            print(f'Our best score is {best_score} and has a threshold {best_threshold}')

        else:
            preds = []
            CHUNK = 1024*4
            threshold = 0.9

            print('Finding similar texts...for threshold :',threshold)
            CTS = len(embeddings)//CHUNK
            if len(embeddings)%CHUNK!=0: CTS += 1

            for j in range( CTS ):
                a = j*CHUNK
                b = (j+1)*CHUNK
                b = min(b,len(embeddings))
                print('chunk',a,'to',b)

                cts = cupy.matmul(embeddings,embeddings[a:b].T).T

                for k in range(b-a):
                    IDX = cupy.where(cts[k,]>threshold)[0]
                    o = df.iloc[cupy.asnumpy(IDX)].posting_id.values
                    preds.append(o)

        return df, preds
    
    
    text_embeddings_BERT = get_BERT_embeddings(df)
    _, text_predictions_BERT = get_neighbours_cos_sim(df, text_embeddings_BERT)

# Result

In [None]:
df['image_predictions'] = image_predictions
df['text_predictions'] = text_predictions
tmp = df.groupby('image_phash').posting_id.agg('unique').to_dict()
df['phash_predictions'] = df.image_phash.map(tmp)
if BERT:
    df['text_predictions_BERT'] = text_predictions_BERT
    df['matches'] = df.apply(combine_predictions_BERT, axis = 1)
else: 
    df['matches'] = df.apply(combine_predictions, axis = 1)
df1 = pd.read_csv('../input/shopee-product-matching/test.csv')    
df1.columns = ['posting_id', 'a', 'b', 'c']
df = pd.merge(df1, df, on = 'posting_id', how = 'left')

df[['posting_id', 'matches']].to_csv('submission.csv', index = False)
df[['posting_id', 'matches']].head()

In [None]:
if COMPUTE_CV:
    tmp = df.groupby('label_group').posting_id.agg('unique').to_dict()
    df['target'] = df.label_group.map(tmp) 
    if BERT:
        df['matches_CV'] = df.apply(combine_for_cv_BERT, axis = 1)
    else:
        df['matches_CV'] = df.apply(combine_for_cv, axis = 1)
    MyCVScore = df.apply(getMetric('matches_CV'), axis=1)
    print('CV score =', MyCVScore.mean())
    
    
print(f'COMPUTE_CV = {COMPUTE_CV}')
print(f'SAVE_IMGEMBEDDING = {SAVE_IMGEMBEDDING}')
print(f'EFF_B5 = {EFF_B5}')
print(f'ECA_NFNET_L0 = {ECA_NFNET_L0}')
print(f'MODEL_TESTING_NFNET = {MODEL_TESTING_NFNET}')
print(f'MY_NFNET = {MY_NFNET}')
print(f'BERT = {BERT}')
print(f'DISTILBERT = {DISTILBERT}')
print(f'SAVE_DISTILBERT = {SAVE_DISTILBERT}')
print(f'EMBEDDING34_TH = {EMBEDDING34_TH}')