In [2]:
%load_ext autoreload
%autoreload 2

In [52]:
import pandas as pd
from torch.utils.data import DataLoader, Dataset
import albumentations
from transformers import *
import geffnet
import cv2
import math
from tqdm import tqdm

In [3]:
from utils import *

In [12]:
train = pd.read_csv(PATH/'train.csv')
tmp = train.groupby('label_group').posting_id.agg('unique').to_dict()
train['target'] = train.label_group.map(tmp)
print('train shape is', train.shape )
train.head()

train shape is (34250, 6)


Unnamed: 0,posting_id,image,image_phash,title,label_group,target
0,train_129225211,0000a68812bc7e98c42888dfb1c07da0.jpg,94974f937d4c2433,Paper Bag Victoria Secret,249114794,"[train_129225211, train_2278313361]"
1,train_3386243561,00039780dfc94d01db8676fe789ecd05.jpg,af3f9460c2838f0f,"Double Tape 3M VHB 12 mm x 4,5 m ORIGINAL / DOUBLE FOAM TAPE",2937985045,"[train_3386243561, train_3423213080]"
2,train_2288590299,000a190fdd715a2a36faed16e2c65df7.jpg,b94cb00ed3e50f78,Maling TTS Canned Pork Luncheon Meat 397 gr,2395904891,"[train_2288590299, train_3803689425]"
3,train_2406599165,00117e4fc239b1b641ff08340b429633.jpg,8514fc58eafea283,Daster Batik Lengan pendek - Motif Acak / Campur - Leher Kancing (DPT001-00) Batik karakter Alhadi,4093212188,"[train_2406599165, train_3342059966]"
4,train_3369186413,00136d1cf4edede0203f32f05f660588.jpg,a6f319f924ad708c,Nescafe \xc3\x89clair Latte 220ml,3648931069,"[train_3369186413, train_921438619]"


In [17]:
tmp = train.groupby('image_phash').posting_id.agg('unique').to_dict()
train['phash_pred'] = train.image_phash.map(tmp)

In [18]:
train

Unnamed: 0,posting_id,image,image_phash,title,label_group,target,phash_pred
0,train_129225211,0000a68812bc7e98c42888dfb1c07da0.jpg,94974f937d4c2433,Paper Bag Victoria Secret,249114794,"[train_129225211, train_2278313361]",[train_129225211]
1,train_3386243561,00039780dfc94d01db8676fe789ecd05.jpg,af3f9460c2838f0f,"Double Tape 3M VHB 12 mm x 4,5 m ORIGINAL / DOUBLE FOAM TAPE",2937985045,"[train_3386243561, train_3423213080]",[train_3386243561]
2,train_2288590299,000a190fdd715a2a36faed16e2c65df7.jpg,b94cb00ed3e50f78,Maling TTS Canned Pork Luncheon Meat 397 gr,2395904891,"[train_2288590299, train_3803689425]",[train_2288590299]
3,train_2406599165,00117e4fc239b1b641ff08340b429633.jpg,8514fc58eafea283,Daster Batik Lengan pendek - Motif Acak / Campur - Leher Kancing (DPT001-00) Batik karakter Alhadi,4093212188,"[train_2406599165, train_3342059966]",[train_2406599165]
4,train_3369186413,00136d1cf4edede0203f32f05f660588.jpg,a6f319f924ad708c,Nescafe \xc3\x89clair Latte 220ml,3648931069,"[train_3369186413, train_921438619]",[train_3369186413]
...,...,...,...,...,...,...,...
34245,train_4028265689,fff1c07ceefc2c970a7964cfb81981c5.jpg,e3cd72389f248f21,Masker Bahan Kain Spunbond Non Woven 75 gsm 3 ply lapis Bisa Dicuci,3776555725,"[train_2829161572, train_4028265689]",[train_4028265689]
34246,train_769054909,fff401691371bdcb382a0d9075dfea6a.jpg,be86851f72e2853c,MamyPoko Pants Royal Soft - S 70 - Popok Celana,2736479533,"[train_1463059254, train_769054909]",[train_769054909]
34247,train_614977732,fff421b78fa7284284724baf249f522e.jpg,ad27f0d08c0fcbf0,KHANZAACC Robot RE101S 1.2mm Subwoofer Bass Metal Wired Headset,4101248785,"[train_4126022211, train_3926241003, train_2325457554, train_9568348, train_512157627, train_1264798465, train_603410791, train_1369506345, train_614977732]",[train_614977732]
34248,train_3630949769,fff51b87916dbfb6d0f8faa01bee67b8.jpg,e3b13bd1d896c05c,"Kaldu NON MSG HALAL Mama Kamu Ayam Kampung , Sapi Lokal, Jamur (Bkn Alsultan / Biocell)",1663538013,"[train_3419392575, train_1431563868, train_3630949769]",[train_3630949769]


In [21]:

def f1score(row, col):
    n = len( np.intersect1d(row.target,row[col]) )
    return 2*n / (len(row.target)+len(row[col]))


In [24]:
train['f1'] = train.apply(functools.partial(f1score, col='phash_pred'),axis=1)
print('CV score for baseline =',train.f1.mean())

CV score for baseline = 0.5530933399168149


## B0+Bert model

In [29]:
import os

def get_transforms(img_size=256):
    return  albumentations.Compose([
                albumentations.Resize(300, 300),
                albumentations.CenterCrop(img_size,img_size, p=1.0),
#                 albumentations.RandomBrightnessContrast(brightness_limit=(-0.1,0.1), contrast_limit=(-0.1, 0.1), p=0.5),
                albumentations.Normalize()
            ])


class LandmarkDataset(Dataset):
    def __init__(self, csv, split, mode, transforms=get_transforms(img_size=256), tokenizer=None):

        self.csv = csv.reset_index()
        self.split = split
        self.mode = mode
        self.transform = transforms
        self.tokenizer = tokenizer

    def __len__(self):
        return self.csv.shape[0]

    def __getitem__(self, index):
        row = self.csv.iloc[index]
        
        text = row.title
        
        image = cv2.imread(row.filepath)
        image = image[:, :, ::-1]
        
        res0 = self.transform(image=image)
        image0 = res0['image'].astype(np.float32)
        image = image0.transpose(2, 0, 1)        

        text = self.tokenizer(text, padding='max_length', truncation=True, max_length=16, return_tensors="pt")
        input_ids = text['input_ids'][0]
        attention_mask = text['attention_mask'][0]

        if self.mode == 'test':
            return torch.tensor(image), input_ids, attention_mask
        else:
            return torch.tensor(image), input_ids, attention_mask, torch.tensor(row.label_group)

In [34]:
tokenizer = AutoTokenizer.from_pretrained(PATH/'bert-base-uncased')

In [40]:
df_test = train.copy()
df_test['filepath'] = df_test['image'].apply(lambda x: str(PATH/'train_images'/x))
dataset_test = LandmarkDataset(df_test, 'test', 'test', transforms=get_transforms(img_size=256), tokenizer=tokenizer)
test_loader = DataLoader(dataset_test, batch_size=16, num_workers=4)

print(len(dataset_test),dataset_test[0])

34250 (tensor([[[ 1.2214,  1.0673,  0.7248,  ..., -0.5767, -0.6109, -0.5938],
         [ 0.9817,  0.8961,  0.8961,  ..., -0.8164, -0.6109, -0.5424],
         [ 0.8104,  1.0844,  1.2728,  ..., -0.9020, -0.6623, -0.5424],
         ...,
         [ 1.3755,  1.3242,  1.5639,  ...,  1.4612,  1.1015,  1.6153],
         [ 1.2043,  1.0844,  1.2043,  ...,  1.5297,  1.5468,  1.1872],
         [ 1.5125,  1.3927,  1.2557,  ...,  1.2214,  1.4440,  1.7009]],

        [[ 0.0126, -0.1625, -0.6702,  ..., -0.5651, -0.4426, -0.4776],
         [-0.4426, -0.5301, -0.4951,  ..., -0.8102, -0.4951, -0.4776],
         [-0.4776, -0.1625,  0.1527,  ..., -0.8978, -0.5651, -0.4601],
         ...,
         [ 0.1352,  0.3102,  0.4678,  ...,  0.3452,  0.0476,  0.5378],
         [-0.4251, -0.4251, -0.4426,  ...,  0.3102,  0.5378, -0.0574],
         [ 0.1702, -0.0399, -0.3375,  ..., -0.1625,  0.2227,  0.5203]],

        [[ 0.4439,  0.3916, -0.1487,  ..., -0.2358, -0.1835, -0.2184],
         [ 0.0082, -0.0441, -0.0615,  

In [45]:
class ArcMarginProduct_subcenter(nn.Module):
    def __init__(self, in_features, out_features, k=3):
        super().__init__()
        self.weight = nn.Parameter(torch.FloatTensor(out_features*k, in_features))
        self.reset_parameters()
        self.k = k
        self.out_features = out_features
        
    def reset_parameters(self):
        stdv = 1. / math.sqrt(self.weight.size(1))
        self.weight.data.uniform_(-stdv, stdv)
        
    def forward(self, features):
        cosine_all = F.linear(F.normalize(features), F.normalize(self.weight))
        cosine_all = cosine_all.view(-1, self.out_features, self.k)
        cosine, _ = torch.max(cosine_all, dim=2)
        return cosine 
    
sigmoid = torch.nn.Sigmoid()

class Swish(torch.autograd.Function):
    @staticmethod
    def forward(ctx, i):
        result = i * sigmoid(i)
        ctx.save_for_backward(i)
        return result
    @staticmethod
    def backward(ctx, grad_output):
        i = ctx.saved_variables[0]
        sigmoid_i = sigmoid(i)
        return grad_output * (sigmoid_i * (1 + i * (1 - sigmoid_i)))

class Swish_module(nn.Module):
    def forward(self, x):
        return Swish.apply(x)

    
 
    
class enet_arcface_FINAL(nn.Module):

    def __init__(self, enet_type, out_dim):
        super(enet_arcface_FINAL, self).__init__()
        self.bert = AutoModel.from_pretrained(PATH/'bert-base-uncased')
        self.enet = geffnet.create_model(enet_type.replace('-', '_'), pretrained=None)
        self.feat = nn.Linear(self.enet.classifier.in_features+self.bert.config.hidden_size, 512)
        self.swish = Swish_module()
        self.dropout = nn.Dropout(0.5)
        self.metric_classify = ArcMarginProduct_subcenter(512, out_dim)
        self.enet.classifier = nn.Identity()
 
    def forward(self, x,input_ids, attention_mask):
        x = self.enet(x)
        text = self.bert(input_ids=input_ids, attention_mask=attention_mask)[1]
        x = torch.cat([x, text], 1)
        x = self.swish(self.feat(x))
        return F.normalize(x), self.metric_classify(x)
    
def load_model(model, model_file):
    state_dict = torch.load(model_file)
    if "model_state_dict" in state_dict.keys():
        state_dict = state_dict["model_state_dict"]
    state_dict = {k[7:] if k.startswith('module.') else k: state_dict[k] for k in state_dict.keys()}
#     del state_dict['metric_classify.weight']
    model.load_state_dict(state_dict, strict=True)
    print(f"loaded {model_file}")
    model.eval()    
    return model

In [46]:
WGT = PATH/'b0ns_256_bert_20ep_fold0_epoch27.pth'

In [49]:
model = enet_arcface_FINAL('tf_efficientnet_b0_ns', out_dim=11014).cuda()
model = load_model(model, WGT)


loaded /home/slex/data/shopee/b0ns_256_bert_20ep_fold0_epoch27.pth


In [53]:
embeds = []

with torch.no_grad():
    for img, input_ids, attention_mask in tqdm(test_loader): 
        img, input_ids, attention_mask = img.cuda(), input_ids.cuda(), attention_mask.cuda()
        feat, _ = model(img, input_ids, attention_mask)
        image_embeddings = feat.detach().cpu().numpy()
        embeds.append(image_embeddings)

100%|██████████| 2141/2141 [01:28<00:00, 24.11it/s]


In [67]:
image_embeddings = np.concatenate(embeds)
print('image embeddings shape',image_embeddings.shape)

image embeddings shape (34250, 512)


In [68]:
cts = image_embeddings @ image_embeddings.T

In [80]:
preds = []
for k in range(len(image_embeddings)):
    idxs = np.where(cts[k]>.5)[0]
    o = df_test.iloc[idxs].posting_id.values
    preds.append(o)

In [81]:
preds[:5]

[array(['train_129225211', 'train_2278313361'], dtype=object),
 array(['train_3386243561', 'train_3423213080'], dtype=object),
 array(['train_2288590299', 'train_3803689425'], dtype=object),
 array(['train_2406599165', 'train_3342059966'], dtype=object),
 array(['train_3369186413', 'train_921438619'], dtype=object)]

In [83]:
df_test['preds2']=preds
df_test

Unnamed: 0,posting_id,image,image_phash,title,label_group,target,phash_pred,f1,filepath,preds2
0,train_129225211,0000a68812bc7e98c42888dfb1c07da0.jpg,94974f937d4c2433,Paper Bag Victoria Secret,249114794,"[train_129225211, train_2278313361]",[train_129225211],0.666667,/home/slex/data/shopee/train_images/0000a68812bc7e98c42888dfb1c07da0.jpg,"[train_129225211, train_2278313361]"
1,train_3386243561,00039780dfc94d01db8676fe789ecd05.jpg,af3f9460c2838f0f,"Double Tape 3M VHB 12 mm x 4,5 m ORIGINAL / DOUBLE FOAM TAPE",2937985045,"[train_3386243561, train_3423213080]",[train_3386243561],0.666667,/home/slex/data/shopee/train_images/00039780dfc94d01db8676fe789ecd05.jpg,"[train_3386243561, train_3423213080]"
2,train_2288590299,000a190fdd715a2a36faed16e2c65df7.jpg,b94cb00ed3e50f78,Maling TTS Canned Pork Luncheon Meat 397 gr,2395904891,"[train_2288590299, train_3803689425]",[train_2288590299],0.666667,/home/slex/data/shopee/train_images/000a190fdd715a2a36faed16e2c65df7.jpg,"[train_2288590299, train_3803689425]"
3,train_2406599165,00117e4fc239b1b641ff08340b429633.jpg,8514fc58eafea283,Daster Batik Lengan pendek - Motif Acak / Campur - Leher Kancing (DPT001-00) Batik karakter Alhadi,4093212188,"[train_2406599165, train_3342059966]",[train_2406599165],0.666667,/home/slex/data/shopee/train_images/00117e4fc239b1b641ff08340b429633.jpg,"[train_2406599165, train_3342059966]"
4,train_3369186413,00136d1cf4edede0203f32f05f660588.jpg,a6f319f924ad708c,Nescafe \xc3\x89clair Latte 220ml,3648931069,"[train_3369186413, train_921438619]",[train_3369186413],0.666667,/home/slex/data/shopee/train_images/00136d1cf4edede0203f32f05f660588.jpg,"[train_3369186413, train_921438619]"
...,...,...,...,...,...,...,...,...,...,...
34245,train_4028265689,fff1c07ceefc2c970a7964cfb81981c5.jpg,e3cd72389f248f21,Masker Bahan Kain Spunbond Non Woven 75 gsm 3 ply lapis Bisa Dicuci,3776555725,"[train_2829161572, train_4028265689]",[train_4028265689],0.666667,/home/slex/data/shopee/train_images/fff1c07ceefc2c970a7964cfb81981c5.jpg,[train_4028265689]
34246,train_769054909,fff401691371bdcb382a0d9075dfea6a.jpg,be86851f72e2853c,MamyPoko Pants Royal Soft - S 70 - Popok Celana,2736479533,"[train_1463059254, train_769054909]",[train_769054909],0.666667,/home/slex/data/shopee/train_images/fff401691371bdcb382a0d9075dfea6a.jpg,"[train_1463059254, train_769054909]"
34247,train_614977732,fff421b78fa7284284724baf249f522e.jpg,ad27f0d08c0fcbf0,KHANZAACC Robot RE101S 1.2mm Subwoofer Bass Metal Wired Headset,4101248785,"[train_4126022211, train_3926241003, train_2325457554, train_9568348, train_512157627, train_1264798465, train_603410791, train_1369506345, train_614977732]",[train_614977732],0.200000,/home/slex/data/shopee/train_images/fff421b78fa7284284724baf249f522e.jpg,"[train_3926241003, train_2325457554, train_9568348, train_512157627, train_1264798465, train_603410791, train_614977732]"
34248,train_3630949769,fff51b87916dbfb6d0f8faa01bee67b8.jpg,e3b13bd1d896c05c,"Kaldu NON MSG HALAL Mama Kamu Ayam Kampung , Sapi Lokal, Jamur (Bkn Alsultan / Biocell)",1663538013,"[train_3419392575, train_1431563868, train_3630949769]",[train_3630949769],0.500000,/home/slex/data/shopee/train_images/fff51b87916dbfb6d0f8faa01bee67b8.jpg,"[train_3419392575, train_1431563868, train_3630949769]"


In [84]:
df_test['b0bert_score'] = df_test.apply(functools.partial(f1score, col='preds2'),axis=1)
print('CV score for baseline =',df_test.b0bert_score.mean())

CV score for baseline = 0.9087361652947048
