In [None]:
import sys
sys.path.append("../input/timm-pytorch-image-models/pytorch-image-models-master")

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import cv2
import glob
import gc
from tqdm.auto import tqdm
import os
import sys
import time
import random
import math
import re

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import albumentations as A
from albumentations.pytorch import ToTensorV2
from albumentations.core.transforms_interface import ImageOnlyTransform
import timm

from sklearn.preprocessing import normalize
from sklearn.decomposition import PCA, TruncatedSVD
import cupy
import cuml
import cudf
from cuml.feature_extraction.text import TfidfVectorizer
from cuml.neighbors import NearestNeighbors
# from cuml.decomposition import PCA, TruncatedSVD
from cuml.manifold import UMAP

from transformers import AutoTokenizer, AutoModel, BertTokenizer, BertModel, DistilBertTokenizer, DistilBertModel
import transformers


from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer

from gensim.corpora import Dictionary
from gensim.models import Word2Vec

import warnings
warnings.filterwarnings("ignore")

In [None]:
class CFG:
    debug = False
    check_ram = False
    calc_cv = True
    
    # phash
    n_components = 32
    
    # tfidf
    max_features = 25000
    tfidf_thresh = 0.75
    
    # wrod2vec
    epochs = 100
    vector_size = 512  # embedding size
    window = 5
    
    
    # image models
    
    image_model_name1 = "eca_nfnet_l0"
    size1 = 512
    image_pretrained_path1 = "../input/shopee-pretrained/nfnet_5/arcface_512x512_nfnet_l0(mish).pt"
    image_loss_module1 = "curricularface"
#     pca_components = 128
    
    
    image_model_name2 = "swin_small_patch4_window7_224"
    size2 = 224
    image_pretrained_path2 = "../input/shopee-pretrained/swin1/curricularface_224x224_vit_base_patch16_224.pt"
    image_loss_module2 = "curricularface"
  
    
    image_model_name3 = "vit_base_patch16_224"
    size3 = 224
    image_pretrained_path3 = "../input/shopee-pretrained/vit1/curricularface_224x224_vit_base_patch16_224.pt"
    image_loss_module3 = "curricularface"
    
    
    # bert models
    transformer_model1 = '../input/sentence-transformer-models/paraphrase-xlm-r-multilingual-v1/0_Transformer'
    max_length1 = 64
    bert_pretrained_path1 = "../input/shopee-pretrained/arcface_sbert3/sentence_transfomer_xlm_best_loss_num_epochs_30_arcface.pth"
    bert_loss_module1 = "arcface"
    
    transformer_model2 = "../input/distilbert-base-indonesian"
    max_length2 = 32
    bert_pretrained_path2 = "../input/shopee-pretrained/distilbert2/distilbert_curricularface_30_.pth"
    bert_loss_module2 = "curricularface"
    
    # others
    n_neighbors = 50
    
    thresh = 0.36
    
    
    classes = 11014
    scale = 30
    margin = 0.5
    fc_dim = 512
    

In [None]:
PATH = "../input/shopee-product-matching/"

train_df = pd.read_csv(PATH + "train.csv")
# train_df = pd.read_csv("../input/shopee-cv-splitting-way/train_folds.csv")
# train_df = pd.read_csv("../input/shopee-cv-folds/train_label_group_5folds.csv")
test_df = pd.read_csv(PATH + "test.csv")
sample_submission = pd.read_csv(PATH + "sample_submission.csv")

In [None]:
train_df["image"] = train_df["image"].apply(lambda x: PATH + "train_images/" + x)
# if not CFG.check_ram:
test_df["image"] = test_df["image"].apply(lambda x: PATH + "test_images/" + x)

tmp = train_df.groupby('label_group').posting_id.agg('unique').to_dict()
train_df['target'] = train_df.label_group.map(tmp)

if len(test_df) != 3:
    CFG.calc_cv = False

if CFG.check_ram and (len(test_df)==3):
    test_df = pd.concat([train_df, train_df], axis=0).reset_index()
    
if CFG.calc_cv:
    test_df = train_df

In [None]:
def seed_everything(seed=42):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
seed_everything(42)

In [None]:
def getMetric(col):
    def f1score(row):
        n = len( np.intersect1d(row.target, row[col]) )
        fp = len(np.setdiff1d(row[col], row.target))
        fn = len(np.setdiff1d(row.target, row[col]))
#         return fn
        return 2 * n / (2 * n + fp + fn)
#         return 2*n / (len(row.target)+len(row[col]))
    return f1score

def get_score(df, col):
    return df.apply(getMetric(col),axis=1).mean()

In [None]:
def predict(features, df, thresh=0.90, chunk=1024):
    features = cupy.asarray(features)
    pred = []
    CHUNK = chunk
    CTS = len(features) // CHUNK
    if len(features)%CHUNK!=0: CTS += 1
    for j in range(CTS):

        a = j*CHUNK
        b = (j+1)*CHUNK
        b = min(b,len(features))
        print('chunk',a,'to',b)

        # COSINE SIMILARITY DISTANCE
#         cts = np.matmul(phash_vector, phash_vector[a:b].T).T
        cts = cupy.matmul(features, features[a:b].T).T

        for k in range(b-a):
            IDX = cupy.where(cts[k,]>thresh)[0]
            o = df.iloc[cupy.asnumpy(IDX)].posting_id.values
            pred.append(o)
        del cts
    return pred

def get_neighbor_images(image_embeddings, df, chunk=4096, thresh=0.5, n_neighbors=50, show=False, metric="euclidean"):
    preds = []
    CHUNK = chunk
    model = NearestNeighbors(n_neighbors=n_neighbors, metric=metric)
    model.fit(image_embeddings)

    print('Finding similar images...')
    CTS = len(image_embeddings)//CHUNK
    if len(image_embeddings)%CHUNK!=0: CTS += 1
    for j in range( CTS ):

        a = j*CHUNK
        b = (j+1)*CHUNK
        b = min(b,len(image_embeddings))
        print('chunk',a,'to',b)
        distances, indices = model.kneighbors(image_embeddings[a:b,])
        if (j == 0) and show:
            show_barplot(distances, indices, df)

        for k in range(b-a):
            IDX = np.where(distances[k,]<thresh)[0]
            IDS = indices[k,IDX]
            o = df.iloc[IDS].posting_id.values
            preds.append(o)
            
    return preds

def displayDF(train, random=False, COLS=6, ROWS=4, path=""):
    for k in range(ROWS):
        plt.figure(figsize=(20,5))
        for j in range(COLS):
            if random: row = np.random.randint(0,len(train))
            else: row = COLS*k + j
            name = train["image"].values[row]
            title = train["title"].values[row]
            title_with_return = ""
            for i,ch in enumerate(title):
                title_with_return += ch
                if (i!=0)&(i%20==0): title_with_return += '\n'
            img = cv2.imread(path+name)[:, :, ::-1]
            plt.subplot(1,COLS,j+1)
            plt.title(title_with_return)
            plt.axis('off')
            plt.imshow(img)
        plt.show()
        
def show_barplot(distances, indices,  df, num_show=8):
    for k in range(num_show):
        plt.figure(figsize=(20,3))
        plt.plot(np.arange(50), cupy.asnumpy(distances[k,]),'o-')
#         plt.plot(np.arange(50), distances[k,],'o-')
        plt.title('Text Distance From Train Row %i to Other Train Rows'%k,size=16)
        plt.ylabel('Distance to Train Row %i'%k,size=14)
        plt.xlabel('Index Sorted by Distance to Train Row %i'%k,size=14)
        plt.show()
        
        cluster = df.loc[cupy.asnumpy(indices[k,:8])]
#         cluster = df.loc[indices[k,:8]]
        displayDF(cluster, random=False, ROWS=2, COLS=4)
        print( df.loc[cupy.asnumpy(indices[k,:8]), ['title','label_group']] )
#         print( df.loc[indices[k,:10], ['title','label_group']] )

In [None]:
def combine_for_sub(row, cols, count=1):
    x = np.concatenate([row[col] for col in cols])
    unique, counts = np.unique(x, return_counts=True)
    return ' '.join(unique[counts>=count])

def combine_for_cv(row, cols, count=1):
    x = np.concatenate([row[col] for col in cols])
    unique, counts = np.unique(x, return_counts=True)
    return unique[counts>=count]

# text preprocessing

In [None]:
def remove_emoji(title):
    """
    title: str
    example:
    >>>title = '\\xe2\\x9d\\xa4 RATU \\xe2\\x9d\\xa4 MAYCREATE MOISTURIZING SPRAY'
    >>>removed = remove_emoji(title)
    >>>removed
    ' RATU  MAYCREATE MOISTURIZING SPRAY'
    """
    matches_spans = [m.span() for m in re.finditer(r"\\x[0-9a-fA-F][0-9a-fA-F]", title)]
    spans = []
    if len(matches_spans) > 0:
        for i, (span1, span2) in enumerate(zip(matches_spans[:-1], matches_spans[1:])):
            if i == 0:
                spans.append([span1[0], -1])
            if span1[1] != span2[0]:
                spans[-1][1] = span1[1]
                spans.append([span2[0], -1])
        spans[-1][1] = matches_spans[-1][1]
        emojis = []
        for span in spans:
            emojis.append(title[span[0]:span[1]])

        for emj in emojis:
            title = title.replace(emj, "")
        return title
    else:
        return title
    
def lower_title(title):
    return title.lower()
    
def unify_units(title):
    title += " " # insert space in order to recognize the last units
    
    # gram, kg, mg
    title = re.sub(r"(([1-9]\d*|0)(\.\d+)?)(\s|)(gram|grm|gr\s|g\s)", r"\1gram ", title)
    title = re.sub(r"(([1-9]\d*|0)(\.\d+)?)(\s|)kg", r"\1kg ", title)
    title = re.sub(r"(([1-9]\d*|0)(\.\d+)?)(\s|)mg", r"\1mg ", title)
    
    # L, ml
    title = re.sub(r"(([1-9]\d*|0)(\.\d+)?)(\s|)(liter|l\s)", r"\1liter ", title)
    title = re.sub(r"(([1-9]\d*|0)(\.\d+)?)(\s|)ml", r"\1ml ", title)
    
    # m, cm, km, mm
    title = re.sub(r"(([1-9]\d*|0)(\.\d+)?)(\s|)(meter|m\s)", r"\1meter ", title)
    title = re.sub(r"(([1-9]\d*|0)(\.\d+)?)(\s|)cm", r"\1cm ", title)
    title = re.sub(r"(([1-9]\d*|0)(\.\d+)?)(\s|)km", r"\1km ", title)
    title = re.sub(r"(([1-9]\d*|0)(\.\d+)?)(\s|)mm", r"\1mm ", title)
    
    # pcs, pieces
    title = re.sub(r"(([1-9]\d*|0)(\.\d+)?)(\s|)(pcs|pieces)", r"\1pcs ", title)
    
    # watt
    title = re.sub(r"(([1-9]\d*|0)(\.\d+)?)(\s|)watt", r"\1watt ", title)
    
    # A mA mAh
#     title = re.sub(r"(([1-9]\d*|0)(\.\d+)?)(\s|)(a)", r"\1amper", title)
#     title = re.sub(r"(([1-9]\d*|0)(\.\d+)?)(\s|)(ma)", r"\1ma", title)
#     title = re.sub(r"(([1-9]\d*|0)(\.\d+)?)(\s|)(mah)", r"\1mha", title)
    
    # GB MB TB
    title = re.sub(r"(([1-9]\d*|0)(\.\d+)?)(\s|)gb", r"\1gb ", title)
    title = re.sub(r"(([1-9]\d*|0)(\.\d+)?)(\s|)mb", r"\1mb ", title)
    title = re.sub(r"(([1-9]\d*|0)(\.\d+)?)(\s|)tb", r"\1tb ", title)
    
    # ply
    title = re.sub(r"(([1-9]\d*|0)(\.\d+)?)(\s|)ply", r"\1ply ", title)
    
    # inch
    title = re.sub(r"(([1-9]\d*|0)(\.\d+)?)(\s|)inch", r"\1inch ", title)
    
    return title

def remove_letters(title):
    title = re.sub('[!"#$\'\\\\()*:;<=>?@[\\]^_`{|}~「」〔〕“”〈〉『』【】＆＊・（）＄＃＠。、？！｀＋￥％]', " ", title)
    
    # / and -
    title = re.sub(r"([a-z0-9])(-|/)([a-z])", "\1 \3", title)
    title = re.sub(r"([a-z])(-|/)([a-z0-9])", "\1 \3", title)
    
    title = re.sub(r"\s(-|/|\+)\s", " ", title)
    
    
    # remove sequential space
    title = title.strip()
    title = re.sub(r"\s{2,}", " ", title)
    
    
    # -
#     title = re.sub(r"([a-z0-9])/([a-z])", "\1 \2", title)
#     title = re.sub(r"([a-z])/([a-z0-9])", "\1 \2", title)
    
#     code_regex = re.compile('[!"#$%&\'\\\\()*+,-/:;<=>?@[\\]^_`{|}~「」〔〕“”〈〉『』【】＆＊・（）＄＃＠。、？！｀＋￥％]')
#     # .  は削除しちゃいかん
#     title = code_regex.sub(' ', title)
#     title = title.strip()
    return title

def convert_comma(title):
    title = re.sub(r"([0-9]),([0-9])", r"\1.\2", title)
    return title
    
def text_preprocessing(text):
    text = remove_emoji(text)
    text = text.lower()
    text = convert_comma(text)
    text = unify_units(text)
#     text = remove_letters(text)
    

    return text

# TFIDF

In [None]:
def predict_tfidf(df, title_col, max_features=25000, thresh=0.5, norm=False):
    df = df.loc[:, ["posting_id", title_col]]
    gf = cudf.DataFrame(df)
    model = TfidfVectorizer(stop_words="english", binary=True, max_features=25000, dtype=np.float32)
    text_embeddings = model.fit_transform(gf[title_col]).toarray()
    print(text_embeddings.shape)
#     text_embeddings = cupy.asnumpy(text_embeddings)
    del model
    gc.collect()
    if norm:
        text_embeddings = cupy.asnumpy(text_embeddings)
        text_embeddings = text_embeddings.astype(np.float16)
        text_embeddings = normalize(text_embeddings)
        text_embeddings = cupy.asarray(text_embeddings)
    print("start predicting")
    preds = predict(text_embeddings, df, thresh=thresh)
    del text_embeddings
    return preds

# word2vec

In [None]:
def get_word2vec_embeddings(titles, vector_size=50, window=5, min_count=1, workers=4, epochs=1500, negative=10, sg=1):
    titles_tokens = [title.split() for title in titles]
    dictionary = Dictionary(titles_tokens)
    bow_corpus = [dictionary.doc2bow(token) for token in titles_tokens]
    print("start word2vec training")
    t0 = time.time()
    model = Word2Vec(titles_tokens, vector_size=vector_size, window=window, min_count=min_count, workers=workers, epochs=epochs, negative=negative, sg=sg)
    print(f"end word2vec training {time.time()-t0:.1f}s")
    embeddings = []
    for sentence in titles:
        title = sentence.split()
        vectors = [model.wv[w] for w in title]
        embeddings.append(np.mean(vectors, axis=0))
    model.save("word2vec_pretrained.bin")
    return np.array(embeddings)

# bert 

In [None]:
class ShopeeTextDataset(Dataset):
    def __init__(self, title, label, tokenizer=None, max_length=64):
        self.title = title
        self.label = label
        self.tokenizer = tokenizer
        self.max_length = max_length
        
    def __len__(self):
        return len(self.title)
    def __getitem__(self, idx):
        title = self.title[idx]
        label = self.label[idx]
        
        title = self.tokenizer(title, padding='max_length', truncation=True, max_length=self.max_length, return_tensors="pt")
#         print(title)
        input_ids = title['input_ids'][0]
        attention_mask = title['attention_mask'][0]
        
        return {"input_ids": input_ids, "attention_mask": attention_mask, "label": label}

In [None]:
class ShopeeNet(nn.Module):

    def __init__(self,
                 n_classes,
                 model_name='bert-base-uncased',
                 pooling='mean_pooling',
                 use_fc=True,
                 fc_dim=512,
                 dropout=0.0,
                 loss_module='softmax',
                 s=30.0,
                 margin=0.50,
                 ls_eps=0.0,
                 theta_zero=0.785,
                 distil=False):
        """
        :param n_classes:
        :param model_name: name of model from pretrainedmodels
            e.g. resnet50, resnext101_32x4d, pnasnet5large
        :param pooling: One of ('SPoC', 'MAC', 'RMAC', 'GeM', 'Rpool', 'Flatten', 'CompactBilinearPooling')
        :param loss_module: One of ('arcface', 'cosface', 'softmax')
        """
        super(ShopeeNet, self).__init__()
        if distil:
            self.transformer = transformers.DistilBertModel.from_pretrained(model_name)
        else:
            self.transformer = transformers.AutoModel.from_pretrained(model_name)
        final_in_features = self.transformer.config.hidden_size
        
        self.pooling = pooling
        self.use_fc = use_fc
    
        if use_fc:
            self.dropout = nn.Dropout(p=dropout)
            self.fc = nn.Linear(final_in_features, fc_dim)
            self.bn = nn.BatchNorm1d(fc_dim)
            self.relu = nn.ReLU()
            self._init_params()
            final_in_features = fc_dim

        self.loss_module = loss_module
        if loss_module == 'arcface':
            self.final = ArcMarginProduct(final_in_features, n_classes,
                                          scale=s, margin=margin, easy_margin=False, ls_eps=ls_eps)
        elif loss_module == "curricularface":
            self.final = CurricularFace(final_in_features, n_classes, 
                                           s=s, m=margin)
        else:
            self.final = nn.Linear(final_in_features, n_classes)

    def _init_params(self):
        nn.init.xavier_normal_(self.fc.weight)
        nn.init.constant_(self.fc.bias, 0)
        nn.init.constant_(self.bn.weight, 1)
        nn.init.constant_(self.bn.bias, 0)

    def forward(self, input_ids,attention_mask, label=None):
        feature = self.extract_feat(input_ids,attention_mask)
        if label is None:
            return feature
        if self.loss_module == 'arcface':
            logits, loss = self.final(feature, label)
            return feature, logits, loss
        else:
            logits = self.final(feature)
        return feature, logits

    def extract_feat(self, input_ids,attention_mask):
        x = self.transformer(input_ids=input_ids,attention_mask=attention_mask)
        
        features = x[0]
        features = features[:,0,:]
        if self.use_fc:
            features = self.dropout(features)
            features = self.fc(features)
            features = self.bn(features)
            features = self.relu(features)

        return features

In [None]:
def get_bert_embeddings(title, transformer_model, pretrained_path, max_length=64, loss_module="arcface"):
    device = "cuda" if torch.cuda.is_available() else "cpu"
    if "distilbert" in transformer_model:
        tokenizer =  transformers.DistilBertTokenizer.from_pretrained(transformer_model)
        distil = True
    else:
        tokenizer = AutoTokenizer.from_pretrained(transformer_model)
        distil = False
    
    ds = ShopeeTextDataset(title, np.zeros_like(title), tokenizer, max_length=max_length)
    data_loader = DataLoader(ds, batch_size=32, shuffle=False, pin_memory=False)
    
#     model = ShopeeNLPModel(model_name="bert_base_uncased", out_dims=CFG.bert_dims)
    
    model = ShopeeNet(n_classes=CFG.classes, model_name=transformer_model, pooling="clf",
                     use_fc=True, fc_dim=CFG.fc_dim, loss_module=loss_module, distil=distil)
    model.load_state_dict(torch.load(
        pretrained_path, map_location="cpu"))
    model.to(device)
    model.eval()
    feats = []
    with torch.no_grad():
        for data in tqdm(data_loader):
            txt = data["input_ids"].to(device)
            mask = data["attention_mask"].to(device)
            feat = model(txt, mask)
            feats.append(feat.detach().cpu().numpy())
    del model
    gc.collect()
    feats = np.concatenate(feats)
    return feats

In [None]:
# phash

In [None]:
def phash2bin(phash):
    return format(int(phash, 16), "64b")

def vectorize_bin(bins):
    """
    bins: np.array 
    vectorize_bin(train_df["bin"].values)
    """
    def vectorize_row(row):
        return [int(r) for r in list(row)]
    list_bin = [vectorize_row(r) for r in bins]
    return np.array(list_bin)

def get_phash_embeddings(df, n_components=32):
    print("getting phash embeddings")
    df["phash_bin"] = df["image_phash"].apply(phash2bin)
    features = vectorize_bin(df["phash_bin"].values)
    print(features.shape)
    pca = PCA(n_components=n_components)
    embeddings = pca.fit_transform(features)
    return embeddings

# Image Dataset

In [None]:
class ShopeeImageDataset(Dataset):
    def __init__(self, x, transforms=None, cc_transforms=None):
        """
        x: np.array, 
        """
        self.x = x
        self.transforms = transforms
        self.cc_transforms = cc_transforms
    def __len__(self):
        return len(self.x)
    def __getitem__(self, idx):
        image = cv2.imread(self.x[idx])[:, :, ::-1]
        if self.transforms:
            image_ = self.transforms(image=image)["image"]
            if self.cc_transforms:
                cropped = self.cc_transforms(image=image)["image"]
            else:
                cropped = image_
        return {"image": image_, "cropped": cropped}

In [None]:
def get_transforms(size, mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]):
    return A.Compose([
        A.Resize(size, size),
#         FaceHiding(p=1.0),
#         FaceMosaic(p=1.0),
        A.Normalize(mean, std),
        ToTensorV2(),
    ])
def get_cc_transforms(size, mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225], resized_size=600):
    return A.Compose([
        A.Resize(resized_size, resized_size),
        A.CenterCrop(size, size),
#         FaceHiding(p=1.0),
#         FaceMosaic(p=1.0),
        A.Normalize(mean, std),
        ToTensorV2(),
    ])

# Arcmargin product, CurricularFace

In [None]:
class ArcMarginProduct(nn.Module):
    def __init__(self, in_features, out_features, scale=30.0, margin=0.50, easy_margin=False, ls_eps=0.0):
        super(ArcMarginProduct, self).__init__()
        self.in_features = in_features
        self.out_features = out_features
        self.scale = scale
        self.margin = margin
        self.ls_eps = ls_eps  # label smoothing
        self.weight = nn.Parameter(torch.FloatTensor(out_features, in_features))
        nn.init.xavier_uniform_(self.weight)

        self.easy_margin = easy_margin
        self.cos_m = math.cos(margin)
        self.sin_m = math.sin(margin)
        self.th = math.cos(math.pi - margin)
        self.mm = math.sin(math.pi - margin) * margin

    def forward(self, input, label):
        cosine = F.linear(F.normalize(input), F.normalize(self.weight))
        sine = torch.sqrt(1.0 - cosine*cosine)
        phi = cosine * self.cos_m - sine * self.sin_m
        if self.easy_margin:
            phi = torch.where(cosine > 0, phi, cosine)
        else:
            phi = torch.where(cosine > self.th, phi, cosine - self.mm)
    
        one_hot = torch.zeros(cosine.size(), device='cuda')
        one_hot.scatter_(1, label.view(-1, 1).long(), 1)
        if self.ls_eps > 0:
            one_hot = (1 - self.ls_eps) * one_hot + self.ls_eps / self.out_features

        output = (one_hot * phi) + ((1.0 - one_hot) * cosine)
        output *= self.scale
        return output, nn.CrossEntropyLoss()(output,label)

def l2_norm(input, axis = 1):
    norm = torch.norm(input, 2, axis, True)
    output = torch.div(input, norm)

    return output

class CurricularFace(nn.Module):
    def __init__(self, in_features, out_features, s = 30, m = 0.50):
        super(CurricularFace, self).__init__()

        print('Using Curricular Face')

        self.in_features = in_features
        self.out_features = out_features
        self.m = m
        self.s = s
        self.cos_m = math.cos(m)
        self.sin_m = math.sin(m)
        self.threshold = math.cos(math.pi - m)
        self.mm = math.sin(math.pi - m) * m
        self.kernel = nn.Parameter(torch.Tensor(in_features, out_features))
        self.register_buffer('t', torch.zeros(1))
        nn.init.normal_(self.kernel, std=0.01)

    def forward(self, embbedings, label):
        embbedings = l2_norm(embbedings, axis = 1)
        kernel_norm = l2_norm(self.kernel, axis = 0)
        cos_theta = torch.mm(embbedings, kernel_norm)
        cos_theta = cos_theta.clamp(-1, 1)  # for numerical stability
        with torch.no_grad():
            origin_cos = cos_theta.clone()
        target_logit = cos_theta[torch.arange(0, embbedings.size(0)), label].view(-1, 1)

        sin_theta = torch.sqrt(1.0 - torch.pow(target_logit, 2))
        cos_theta_m = target_logit * self.cos_m - sin_theta * self.sin_m #cos(target+margin)
        mask = cos_theta > cos_theta_m
        final_target_logit = torch.where(target_logit > self.threshold, cos_theta_m, target_logit - self.mm)

        hard_example = cos_theta[mask]
        with torch.no_grad():
            self.t = target_logit.mean() * 0.01 + (1 - 0.01) * self.t
        cos_theta[mask] = hard_example * (self.t + hard_example)
        cos_theta.scatter_(1, label.view(-1, 1).long(), final_target_logit)
        output = cos_theta * self.s
        return output, nn.CrossEntropyLoss()(output,label)

# Image model

In [None]:
class ShopeeModel4(nn.Module):

    def __init__(
        self,
        n_classes = CFG.classes,
        model_name = "eca_nfnet_l0",
        fc_dim = CFG.fc_dim,
        margin = CFG.margin,
        scale = CFG.scale,
        use_fc = True,
        pretrained = True,
        loss_module="arcface"):


        super(ShopeeModel4,self).__init__()
        print('Building Model Backbone for {} model'.format(model_name))

        self.backbone = timm.create_model(model_name, pretrained=pretrained)

        if model_name == 'resnext50_32x4d':
            final_in_features = self.backbone.fc.in_features
            self.backbone.fc = nn.Identity()
            self.backbone.global_pool = nn.Identity()

        elif 'efficientnet' in model_name:
            final_in_features = self.backbone.classifier.in_features
            self.backbone.classifier = nn.Identity()
            self.backbone.global_pool = nn.Identity()
        
        elif 'nfnet' in model_name:
            final_in_features = self.backbone.head.fc.in_features
            self.backbone.head.fc = nn.Identity()
            self.backbone.head.global_pool = nn.Identity()
        
        elif ("swin" in model_name) or ("vit" in model_name):
            final_in_features = self.backbone.head.in_features
            self.backbone.head = nn.Identity()
        
        if ("swin" in model_name) or ("vit" in model_name):
            self.pooling = nn.Identity()
        else:
            self.pooling =  nn.AdaptiveAvgPool2d(1)
#         self.pooling =  nn.AdaptiveAvgPool2d(1)

        self.use_fc = use_fc

        if use_fc:
            self.dropout = nn.Dropout(p=0.0)
            self.fc = nn.Linear(final_in_features, fc_dim)
            self.bn = nn.BatchNorm1d(fc_dim)
            self._init_params()
            final_in_features = fc_dim
            
        if loss_module == "curricularface":
            self.final = CurricularFace(final_in_features, 
                                               n_classes, 
                                               s=scale, 
                                               m=margin)
        elif loss_module == "arcface":
            self.final = ArcMarginProduct(final_in_features,
                                            n_classes,
                                            scale = scale,
                                            margin = margin,
                                            easy_margin = False,
                                            ls_eps = 0.0)

    def _init_params(self):
        nn.init.xavier_normal_(self.fc.weight)
        nn.init.constant_(self.fc.bias, 0)
        nn.init.constant_(self.bn.weight, 1)
        nn.init.constant_(self.bn.bias, 0)

    def forward(self, image, label=None):
        feature = self.extract_feat(image)
        if self.training:
            logits = self.final(feature, label)
            return logits
        else:
            return feature

    def extract_feat(self, x):
        batch_size = x.shape[0]
        x = self.backbone(x)
        x = self.pooling(x).view(batch_size, -1)

        if self.use_fc:
            x = self.dropout(x)
            x = self.fc(x)
            x = self.bn(x)
        return x

In [None]:
class Mish_func(torch.autograd.Function):

    """from: https://github.com/tyunist/memory_efficient_mish_swish/blob/master/mish.py"""
    
    @staticmethod
    def forward(ctx, i):
        result = i * torch.tanh(F.softplus(i))
        ctx.save_for_backward(i)
        return result

    @staticmethod
    def backward(ctx, grad_output):
        i = ctx.saved_variables[0]
        v = 1. + i.exp()
        h = v.log()
        grad_gh = 1./h.cosh().pow_(2)
        grad_hx = i.sigmoid()
        grad_gx = grad_gh *  grad_hx
        grad_f =  torch.tanh(F.softplus(i)) + i * grad_gx
        return grad_output * grad_f 


class Mish(nn.Module):
    def __init__(self, **kwargs):
        super().__init__()
        pass

    def forward(self, input_tensor):
        return Mish_func.apply(input_tensor)


def replace_activations(model, existing_layer, new_layer):
    
    """A function for replacing existing activation layers"""
    
    for name, module in reversed(model._modules.items()):
        if len(list(module.children())) > 0:
            model._modules[name] = replace_activations(module, existing_layer, new_layer)
        if type(module) == existing_layer:
            layer_old = module
            layer_new = new_layer
            model._modules[name] = layer_new
    return model

In [None]:
class ShopeeModel(nn.Module):

    def __init__(
        self,
        n_classes = CFG.classes,
        model_name = "tf_efficientnet_b7_ns",
        fc_dim = CFG.fc_dim,
        margin = CFG.margin,
        scale = CFG.scale,
        use_fc = True,
        pretrained = True):

        super(ShopeeModel,self).__init__()
        print('Building Model Backbone for {} model'.format(model_name))

        self.backbone = timm.create_model(model_name, pretrained=pretrained)
        in_features = self.backbone.classifier.in_features
        self.backbone.classifier = nn.Identity()
        self.backbone.global_pool = nn.Identity()
        self.pooling =  nn.AdaptiveAvgPool2d(1)
        self.use_fc = use_fc

        if use_fc:
            self.dropout = nn.Dropout(p=0.1)
            self.classifier = nn.Linear(in_features, fc_dim)
            self.bn = nn.BatchNorm1d(fc_dim)
            self._init_params()
            in_features = fc_dim

        self.final = ArcMarginProduct(
            in_features,
            n_classes,
            scale = scale,
            margin = margin,
            easy_margin = False,
            ls_eps = 0.0
        )

    def _init_params(self):
        nn.init.xavier_normal_(self.classifier.weight)
        nn.init.constant_(self.classifier.bias, 0)
        nn.init.constant_(self.bn.weight, 1)
        nn.init.constant_(self.bn.bias, 0)

    def forward(self, image, label=None):
        features = self.extract_features(image)
        if self.training:
            logits = self.final(features, label)
            return logits
        else:
            return features

    def extract_features(self, x):
        batch_size = x.shape[0]
        x = self.backbone(x)
        x = self.pooling(x).view(batch_size, -1)

        if self.use_fc and self.training:
            x = self.dropout(x)
            x = self.classifier(x)
            x = self.bn(x)
        return x

# Image Enbeddings

In [None]:
def get_image_embeddings_from_model(x, model, transforms=None, batch_size=16, tta=False, cc_transforms=None):
    device = "cuda" if torch.cuda.is_available() else "cpu"
    dataset = ShopeeImageDataset(x, transforms, cc_transforms)
    dataloader = DataLoader(dataset, shuffle=False, batch_size=batch_size,
                            pin_memory=False, num_workers=2)
    
    model.eval()
    model.to(device)
    preds = []
    with torch.no_grad():
        for data in tqdm(dataloader):
            image = data["image"].to(device)
#             image = image.to(device)
            if tta:
                cropped = data["cropped"].to(device)
                bs, _, size, _ = image.shape
                image = torch.stack([image, cropped,
#                                      image.flip(-1), cropped.flip(-1), image.flip(-2), cropped.flip(-2),
#                                      image.flip(-1).flip(-2), cropped.flip(-1).flip(-2)
                                    ], 0)
                image = image.view(-1, 3, size, size)
            pred = model(image)
            if tta:
                pred = pred.view(2, bs, -1).mean(0)
            preds.append(pred.detach().cpu().numpy())
            del pred
            
    preds = np.concatenate(preds, axis=0)
    return preds

In [None]:
def get_image_embeddings(images, model_name, pretrained_path, size, replace_mish=False, tta=True, loss_module="arcface"):

    if model_name == "tf_efficientnet_b7_ns":
        model = ShopeeModel(pretrained=False, model_name=model_name)
    else:
        model = ShopeeModel4(pretrained=False, model_name=model_name, loss_module=loss_module)
    if replace_mish:
        existing_layer = torch.nn.SiLU
        new_layer = Mish()
        model = replace_activations(model, existing_layer, new_layer)
        print("replaced mish")
        
    model.load_state_dict(
        torch.load(pretrained_path, map_location="cpu")
    )
    model.eval()
    
    if "vit" in model_name:
        mean = [0.5, 0.5, 0.5]
        std = [0.5, 0.5, 0.5]
    else:
        mean=[0.485, 0.456, 0.406]
        std=[0.229, 0.224, 0.225]
     
    transforms = get_transforms(size, mean=mean, std=std)
    if tta:
        cc_transforms = get_cc_transforms(size, mean=mean, std=mean, resized_size=int(size*1.2))
    else:
        cc_transforms = None
    image_embedding = get_image_embeddings_from_model(images, model, transforms=transforms, batch_size=32, tta=True, cc_transforms=cc_transforms)
    return image_embedding


# main

In [None]:
if __name__ == "__main__":
    print("start")
    
    device = "cuda" if torch.cuda.is_available() else "cpu"
    
    if len(test_df) == 3:
        CFG.n_neighbors = 2
        CFG.n_components = 1
        CFG.pca_components = 1
        
    # ----- text preprocessing ----------
    test_df["preprocessed_title"] = test_df["title"].apply(text_preprocessing)
    titles = test_df["preprocessed_title"].values
    
    # ------- phash ----------
    phash_embeddings = get_phash_embeddings(test_df, n_components=CFG.n_components)
    
    
    # ------ TFIDF -----------
    tfidf_pred = predict_tfidf(test_df, "preprocessed_title", thresh=CFG.tfidf_thresh, max_features=CFG.max_features)
    test_df["tfidf_pred"] = tfidf_pred
    
    if CFG.calc_cv:
        score = get_score(test_df, "tfidf_pred")
        print(f"CV for tfidf is {score}")
        
    del tfidf_pred
    gc.collect()
    
    
    # --------- word2vec -----------

    word2vec_embeddings = get_word2vec_embeddings(titles, vector_size=CFG.vector_size, window=CFG.window, epochs=CFG.epochs)
    
        
    # ------- Image -------------
    images = test_df["image"].values
    image_embeddings1 = get_image_embeddings(images, CFG.image_model_name1, CFG.image_pretrained_path1,
                                 CFG.size1, replace_mish=True, tta=True, loss_module=CFG.image_loss_module1)
    
    image_embeddings2 = get_image_embeddings(images, CFG.image_model_name2, CFG.image_pretrained_path2,
                                 CFG.size2, replace_mish=False, tta=True, loss_module=CFG.image_loss_module2)
    
    image_embeddings3 = get_image_embeddings(images, CFG.image_model_name3, CFG.image_pretrained_path3,
                                 CFG.size3, replace_mish=False, tta=True, loss_module=CFG.image_loss_module3)
    

#     image_embeddings_pca = PCA(n_components=CFG.pca_components).fit_transform(image_embeddings1)
    
    
    # ------- bert ---------------
    bert_embeddings1 = get_bert_embeddings(titles, CFG.transformer_model1, CFG.bert_pretrained_path1, max_length=CFG.max_length1, loss_module=CFG.bert_loss_module1)
    bert_embeddings2 = get_bert_embeddings(titles, CFG.transformer_model2, CFG.bert_pretrained_path2, max_length=CFG.max_length2, loss_module=CFG.bert_loss_module2)

    
    # ---------- concat --------
    embeddings = np.concatenate([image_embeddings1,
                                 bert_embeddings1,
                                 phash_embeddings,
                                 word2vec_embeddings,
#                                  image_embeddings_pca,
                                 image_embeddings2,
                                 bert_embeddings2,
                                 image_embeddings3,
                                ], axis=1)
    print("embedding shape ",embeddings.shape)
    pred = get_neighbor_images(embeddings, test_df, thresh=CFG.thresh, n_neighbors=CFG.n_neighbors, metric="cosine")
    test_df["pred"] = pred
    
    del embeddings, image_embeddings1, bert_embeddings1
    gc.collect()
    
    if CFG.calc_cv:
        score = get_score(test_df, "pred")
        print("Score is ", score)
        
    # ---------- combine ------------
    pred_cols = ["tfidf_pred", "pred"]
    if CFG.calc_cv:
        test_df["matches"] = test_df.apply(lambda x: combine_for_cv(x, pred_cols, count=1), axis=1)
        score = get_score(test_df, "matches")
        print("cv score: ", score)
        test_df.to_csv("oof.csv", index=False)
    test_df["matches"] = test_df.apply(lambda x: combine_for_sub(x, pred_cols, count=1), axis=1)
    
    test_df[['posting_id','matches']].to_csv('submission.csv',index=False)

In [None]:
pd.read_csv("submission.csv").head()