In [1]:
import numpy as np # linear algebra
import pandas as pd
import cv2
import matplotlib.pyplot as plt
import math
import os

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset,DataLoader

import transformers
from transformers import get_cosine_schedule_with_warmup
from transformers import get_linear_schedule_with_warmup, get_cosine_schedule_with_warmup

from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import StratifiedKFold, GroupKFold

timm_path = "../input/timm-pytorch-image-models/pytorch-image-models-master"
import sys
sys.path.append(timm_path)
import timm

import albumentations as A
from albumentations.pytorch import ToTensorV2
from tqdm.notebook import tqdm

import warnings
warnings.filterwarnings('ignore')

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

cuda


In [2]:
torch.autograd.set_detect_anomaly(True)

<torch.autograd.anomaly_mode.set_detect_anomaly at 0x7f07c0d74550>

In [3]:
class GeM2(nn.Module):
    def __init__(self, p=3, eps=1e-6):
        super(GeM2,self).__init__()
        self.p = nn.Parameter(torch.ones(1)*p)
        self.eps = eps

    def forward(self, x):
        return self.gem(x, p=self.p, eps=self.eps)
        
    def gem(self, x, p=3, eps=1e-6):
        return F.avg_pool2d(x.clamp(min=eps).pow(p), (x.size(-2), x.size(-1))).pow(1./p)
        
    def __repr__(self):
        return self.__class__.__name__ + '(' + 'p=' + '{:.4f}'.format(self.p.data.tolist()[0]) + ', ' + 'eps=' + str(self.eps) + ')'

In [4]:
df  = pd.read_csv("../input/train-folds/train_folds.csv",index_col=0)

In [5]:
transformer_path = '../input/sentence-transformer-models/paraphrase-xlm-r-multilingual-v1/0_Transformer'
tokenizer = transformers.AutoTokenizer.from_pretrained(transformer_path)

In [6]:
train_aug = A.Compose(
    [  

        A.Resize(448,448,p=1.0),
        A.HorizontalFlip(p=0.5),
        A.VerticalFlip(p=0.5),
        A.Rotate(limit=120, p=0.8),
        A.RandomBrightness(limit=(0.09, 0.6), p=0.5),
     A.Normalize(p=1.0),
        ToTensorV2(p=1.0)
    ]
)
val_aug = A.Compose(
    [  

        A.Resize(width=448, height=448, p=1.0),
     A.Normalize(p=1.0),
        ToTensorV2(p=1.0)
    ]
)

In [7]:
class Shop(Dataset):
    def __init__(self,df,tokenizer,augs=None,):
        self.df = df
        self.augs = augs
        self.tokenizer = tokenizer
    
    def __len__(self):
        return len(self.df)
    
    def __getitem__(self,idx):
        img_src = self.df.path.iloc[idx]
        image = cv2.imread(img_src)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB).astype(np.uint8)
        
        p_id =  self.df.posting_id.iloc[idx]
        
        if (self.augs):
            transformed = self.augs(image=image)
            image = transformed['image']
        
        label = self.df.iloc[idx].label_group	
        label =torch.tensor(label, dtype=torch.long)
        
        
        text = str(self.df.title[idx])
        text = " ".join(text.split())
        
        text_input =  self.tokenizer(text,truncation = True , padding='max_length', 
                                      max_length = 40,return_tensors="pt")
        
        input_ids = text_input["input_ids"][0]
        mask = text_input["attention_mask"][0]
        
        return image,label,p_id,input_ids,mask

In [8]:
class ArcModule(nn.Module):
    def __init__(self, in_features, out_features, s = 20, m = 0.35):
        super().__init__()
        self.in_features = in_features
        self.out_features = out_features
        self.s = s
        self.m = m
        self.weight = nn.Parameter(torch.FloatTensor(out_features, in_features))
        nn.init.kaiming_normal_(self.weight)

        self.cos_m = math.cos(m)
        self.sin_m = math.sin(m)
        self.th = torch.tensor(math.cos(math.pi - m))
        self.mm = torch.tensor(math.sin(math.pi - m) * m)
        
      

    def forward(self, inputs, labels=None):
        cos_th = F.linear(inputs, F.normalize(self.weight))
        cos_th = cos_th.clamp(-1, 1) 
        sin_th = torch.sqrt(1.0 - torch.pow(cos_th, 2)+ 1e-8)
        cos_th_m = cos_th * self.cos_m - sin_th * self.sin_m
        # print(type(cos_th), type(self.th), type(cos_th_m), type(self.mm))
        cos_th_m = torch.where(cos_th > self.th, cos_th_m, cos_th - self.mm)

        cond_v = cos_th - self.th
        cond = cond_v <= 0
        cos_th_m[cond] = (cos_th - self.mm)[cond]

        if labels.dim() == 1:
            labels = labels.unsqueeze(-1)
        onehot = torch.zeros(cos_th.size()).to(device)
        labels = labels.type(torch.LongTensor).to(device)
        onehot.scatter_(1, labels, 1.0)
        outputs = onehot * cos_th_m + (1.0 - onehot) * cos_th
        outputs = outputs * self.s
        return outputs


In [9]:
class cv_Model(nn.Module):
    def __init__(self,network,output_size =11014 ,pretrained=False):
        super().__init__()
        self.op = output_size
        self.network = network
        self.backbone = timm.create_model(self.network , features_only=True, pretrained=pretrained)
        self.gem2 = GeM2()
        self.fc1 = nn.Linear(2048,512)
        self.do = nn.Dropout(p=0.25)
        self.bn1 = nn.BatchNorm1d(2048)
        self.bn2 = nn.BatchNorm1d(512)
        self.pl = nn.PReLU()
        self.margin = ArcModule(in_features=512, out_features = self.op)
        
        nn.init.kaiming_normal_(self.fc1.weight)
        nn.init.zeros_(self.fc1.bias)

    def forward(self,x,labels=None):
        y = self.backbone(x)
        #y1 = self.gem1(y[2])
        y2 = self.gem2(y[4])
        
        #y1 = y1.view(x.shape[0],-1)
        y2 = y2.view(x.shape[0],-1)
        y2 = self.bn1(y2)
        
        #concat = torch.cat((y1,y2),dim = 1)
        fc1 = self.do(self.fc1(y2))
        feat1 = self.bn2(fc1)
        feat2 =  feat1
        if labels is not None:
            return self.margin(feat2, labels)
        else:
            return feat2

In [10]:
class NLP_Model(nn.Module):
    def __init__(self,path,output_size =11014):
        super().__init__()
        self.op = output_size
        self.path = path
        self.backbone = transformers.AutoModel.from_pretrained(self.path)
        self.fc1 = nn.Linear(768,224)
        self.do = nn.Dropout(p=0.3)
        self.bn1 = nn.BatchNorm1d(224)
        self.pl = nn.PReLU()
        self.margin = ArcModule(in_features=224, out_features = self.op)
        
        nn.init.kaiming_normal_(self.fc1.weight)
        nn.init.zeros_(self.fc1.bias)
        
        
    def forward(self,input_ids,attention_mask,labels=None):
        x = self.backbone(input_ids=input_ids,attention_mask=attention_mask)
        x = x[0][:,0,:]
        x = self.do(x)
        feat = self.pl(self.bn1(self.fc1(x)))
        
        if labels is not None:
            return self.margin(feat, labels)
        else:
            return feat

In [11]:
class Model2(nn.Module):
    def __init__(self,output_size =11014):
        super().__init__()
        self.op = output_size
        
        self.m1  = cv_Model('seresnet50')
        WEIGHTS_FILE1 = "../input/serex-0134/image_modelV2_seresnex.pth"
        self.m1.load_state_dict(torch.load(WEIGHTS_FILE1))
        
        self.m2 = NLP_Model(transformer_path)
        WEIGHTS_FILE ='../input/bert-t0/bert_T0_F4.pth'
        self.m2.load_state_dict(torch.load(WEIGHTS_FILE))
        
        self.fc1 = nn.Linear(736,736)
        self.bn1 = nn.BatchNorm1d(736)
        self.do = nn.Dropout(p=0.3)
        
        self.fc2 = nn.Linear(736,512)
        self.bn2 = nn.BatchNorm1d(512)
        
        self.pl = nn.PReLU()
        
        self.margin = ArcModule(in_features=512, out_features = self.op)
                        
    def forward(self,x,input_ids,attention_mask,labels=None):
        x1 = self.m1(x)
        x2 = self.m2(input_ids, attention_mask)
        concat = torch.cat((x1,x2),dim = 1)
        
        
        feat = F.relu(self.bn1(self.fc1(concat)))
        
        feat = self.do(feat)
        
        feat = self.pl(self.bn2(self.fc2(concat)))
        
        
        feat2 =  F.normalize(feat)
        if labels is not None:
            return self.margin(feat2, labels)
        else:
            return feat2                   

In [12]:
class AverageMeter(object):
    """Computes and stores the average and current value"""
    def __init__(self):
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count
        
        
def train_one_epoch(train_loader,model,optimizer,criterion,e,epochs,scheduler):
    losses = AverageMeter()
    model.train()
    global_step = 0
    loop = tqdm(enumerate(train_loader),total = len(train_loader))
    #image,label,p_id,input_ids,mask
    for step,(image,labels,_,input_ids,mask) in loop:
        image = image.to(device)
        labels= labels.to(device)
        input_ids = input_ids.to(device)
        attention_mask =mask.to(device)   
        logitss = model(image,input_ids, attention_mask,labels)
        batch_size = labels.size(0)
        loss  = criterion(logitss,labels)
        
        losses.update(loss.item(), batch_size)
        
        optimizer.zero_grad()
        loss.backward()
        #torch.nn.utils.clip_grad_norm_(m.parameters(), 1000 )
        optimizer.step()
        scheduler.step() 
        global_step += 1
        
        loop.set_description(f"Epoch {e+1}/{epochs}")
        loop.set_postfix(loss = loss.item() ,stage = 'train')
        
        
    return losses.avg

In [13]:
model = Model2()
model.to(device);

In [14]:
class FocalLoss(nn.Module):

    def __init__(self, gamma=1, eps=1e-7):
        super(FocalLoss, self).__init__()
        self.gamma = gamma
        #print(self.gamma)
        self.eps = eps
        self.ce = torch.nn.CrossEntropyLoss(reduction="none")

    def forward(self, input, target):
        logp = self.ce(input, target)
        p = torch.exp(-logp)
        loss = (1 - p) ** self.gamma * logp
        return loss.mean()

In [15]:
def fit(fold):
  
    df_train = df[(df.fold == 1) | (df.fold == 3) | (df.fold == 4)].reset_index(drop=True)
    df_valid = df[df.fold == fold].reset_index(drop=True)
    
    train_data = Shop(df_train,tokenizer,augs= train_aug)
    val_data   = Shop(df_valid,tokenizer,augs=val_aug)
    
    train_loader = DataLoader(train_data,shuffle=True,
                        num_workers=4,
                        batch_size=8,
                        drop_last=True,
                            pin_memory=True)
    
    '''val_loader = DataLoader(val_data,shuffle=False,
                        num_workers=4,
                            pin_memory=True,
                        batch_size=32)'''
    criterion= FocalLoss()
    epochs = 7
    optimizer = torch.optim.AdamW(model.parameters(), lr=1e-4 , weight_decay = 1e-4)
    
    num_train_steps = math.ceil(len(train_loader))
    warmup_epochs = 1
    num_warmup_steps= num_train_steps * warmup_epochs
    num_training_steps=int(num_train_steps * epochs)
    scheduler = get_cosine_schedule_with_warmup(optimizer,num_warmup_steps = num_warmup_steps,num_training_steps =num_training_steps)
    
    best_acc = 0
    loop = range(epochs)
    for e in loop:
        
        train_loss = train_one_epoch(train_loader,model,optimizer,criterion,e,epochs,scheduler)
        print(f'For epoch {e+1}/{epochs}')
        print(f'average train_loss {train_loss}')
        
        '''val_loss = val_one_epoch(val_loader,model,optimizer,criterion,scheduler)
        print(f'avarage val_loss { val_loss }')'''
        
        
    
    
fit(2)

  0%|          | 0/2568 [00:00<?, ?it/s]

For epoch 1/7
average train_loss 15.933830038409367


  0%|          | 0/2568 [00:00<?, ?it/s]

For epoch 2/7
average train_loss 13.782540615102583


  0%|          | 0/2568 [00:00<?, ?it/s]

For epoch 3/7
average train_loss 12.360589010693202


  0%|          | 0/2568 [00:00<?, ?it/s]

For epoch 4/7
average train_loss 11.358840351721208


  0%|          | 0/2568 [00:00<?, ?it/s]

For epoch 5/7
average train_loss 10.654026868558747


  0%|          | 0/2568 [00:00<?, ?it/s]

For epoch 6/7
average train_loss 10.219490649637033


  0%|          | 0/2568 [00:00<?, ?it/s]

For epoch 7/7
average train_loss 10.034968974805695


In [16]:
torch.save(model.state_dict(),'combo.pth') 
test = df[df.fold == 2].reset_index(drop = True)
a = Shop(test,tokenizer,augs=val_aug)
t_loader =  DataLoader(a,shuffle=False,
                        num_workers=4,
                        batch_size=16,
                        pin_memory=True)
def generate_test_features(test_loader):
    model.eval()
    bar = tqdm(test_loader)
    
    FEAS = []
    

    with torch.no_grad():
        for batch_idx, (image,_,_,input_ids,mask) in enumerate(bar):

            image = image.to(device)
            input_ids = input_ids.to(device)
            attention_mask =mask.to(device)  

            features =  model(image,input_ids, attention_mask)

            FEAS += [features.detach().cpu()]

    FEAS = torch.cat(FEAS).cpu().numpy()
    
    return FEAS
FEAS = generate_test_features(t_loader)

  0%|          | 0/429 [00:00<?, ?it/s]

In [17]:
FEAS

array([[ 0.03935479,  0.04264934, -0.01100215, ..., -0.06859029,
        -0.00113633, -0.03183628],
       [ 0.06802026,  0.06764684, -0.05868232, ...,  0.02336464,
         0.0655977 ,  0.0429031 ],
       [ 0.01821735, -0.01103489, -0.0220452 , ...,  0.04853012,
         0.01040079,  0.0019627 ],
       ...,
       [-0.01461781, -0.04458086,  0.03877342, ..., -0.04186166,
         0.10046479,  0.0208245 ],
       [-0.01461782, -0.04458084,  0.0387734 , ..., -0.04186165,
         0.10046481,  0.02082447],
       [ 0.07055347,  0.02209408, -0.0256315 , ..., -0.04531487,
         0.08611646,  0.00174664]], dtype=float32)

In [18]:
from sklearn import metrics
import gc
preds = []
CHUNK = 900
#te = ar[:,1:]
print('Finding similar titles...')
CTS = len(test)//CHUNK
if len(test)%CHUNK!=0: CTS += 1
for j in range( CTS ):
    
    a = j*CHUNK
    b = (j+1)*CHUNK
    b = min(b,len(test))
    print('chunk',a,'to',b)
    cts = metrics.pairwise_distances(FEAS[a:b],FEAS, metric='cosine')
    cts = 1-cts
    for k in range(b-a):
        IDX = np.where(cts[k,]>0.85)[0]
        o = test.iloc[IDX].posting_id.values
        preds.append(o)

#del te,dx,modelx
_ = gc.collect()

Finding similar titles...
chunk 0 to 900
chunk 900 to 1800
chunk 1800 to 2700
chunk 2700 to 3600
chunk 3600 to 4500
chunk 4500 to 5400
chunk 5400 to 6300
chunk 6300 to 6850


In [19]:
test['preds'] = preds
tmp = test.groupby('image_phash').posting_id.agg('unique').to_dict()
test['preds3'] = test.image_phash.map(tmp)
def getMetric(col):
    def f1score(row):
        n = len( np.intersect1d(row.target,row[col]) )
        return 2*n / (len(row.target)+len(row[col]))
    return f1score
def combine_for_sub(row):
    x = np.concatenate([row.preds, row.preds3])
    return ' '.join( np.unique(x) )

def combine_for_cv(row):
    x = np.concatenate([row.preds, row.preds3])
    return np.unique(x)
tmp = test.groupby('label_group').posting_id.agg('unique').to_dict()
test['target'] = test.label_group.map(tmp)
test['oof'] = test.apply(combine_for_cv,axis=1)
test['f1'] = test.apply(getMetric('oof'),axis=1)
print('CV Score =', test.f1.mean() )

CV Score = 0.6827204598005432
