In [1]:
!pip install nltk
! pip install timm==0.6.7
!pip install transformers
!pip install opencv-python



In [2]:
import sys
import os
import gc
import cv2
import math
import time
import tqdm
import random
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

import warnings
warnings.filterwarnings('ignore')

from sklearn.model_selection import KFold,StratifiedKFold

import torch
import torchvision
import torch.nn as nn
import torch.optim as optim
from torch.nn import Parameter
import torch.nn.functional as F
from torch.optim import Adam, lr_scheduler, AdamW
from torch.utils.data import Dataset, DataLoader
from torch.optim.lr_scheduler import _LRScheduler
from torch.optim.lr_scheduler import (CosineAnnealingWarmRestarts, CosineAnnealingLR, 
                                      ReduceLROnPlateau)

from sklearn.preprocessing import LabelEncoder

import albumentations as A 
from albumentations.pytorch.transforms import ToTensorV2

import transformers
from transformers import AutoModel, AutoTokenizer
import timm
# albumentations for augs
import albumentations
from albumentations.pytorch.transforms import ToTensorV2

In [30]:
config = {
    'learning_rate':0.01,
    'train_batch_size':12,
    'valid_batch_size':12,
    'accumulation_step':4,
    'epochs':15,
    'nfolds':5,
    'seed':42,
    
    's':30.0,
    'm':0.5,
    'ls_eps':0.0,
    'easy_margin':False,
}

In [34]:
MODEL_PATH = 'bert-base-cased'
# backbone = AutoModel.from_pretrained(MODEL_PATH)
tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH)
tokenizer.save_pretrained('bert_tokenizer')

# model.save_pretrained('bert_model')
backbone = AutoModel.from_pretrained("bert-base-cased")
bert_config = backbone.config
bert_config.add_cross_attention = True
bert_config.is_decoder = True
backbone = AutoModel.from_pretrained("bert-base-cased", config=bert_config)

class Model(nn.Module):
    def __init__(self,backbone):
        super(Model,self).__init__()
        self.backbone = backbone
        self.in_features = self.backbone.pooler.dense.in_features
        self.dropout = nn.Dropout(0.2)
        self.final = ArcMarginProduct(self.in_features, 11014,
                                      s=config['s'], m=config['m'],
                                      easy_margin=config['easy_margin'], 
                                      ls_eps=config['ls_eps'])
    
    def forward(self,x):
        output = self.backbone(**x)
        x = output['last_hidden_state'][:,0,:]
        x = self.dropout(x)
        x = self.final(x)
        return x  
bert_model = Model(backbone)
bert_model.to(device)

Some weights of BertModel were not initialized from the model checkpoint at bert-base-cased and are newly initialized: ['bert.encoder.layer.0.crossattention.output.LayerNorm.bias', 'bert.encoder.layer.0.crossattention.output.LayerNorm.weight', 'bert.encoder.layer.0.crossattention.output.dense.bias', 'bert.encoder.layer.0.crossattention.output.dense.weight', 'bert.encoder.layer.0.crossattention.self.key.bias', 'bert.encoder.layer.0.crossattention.self.key.weight', 'bert.encoder.layer.0.crossattention.self.query.bias', 'bert.encoder.layer.0.crossattention.self.query.weight', 'bert.encoder.layer.0.crossattention.self.value.bias', 'bert.encoder.layer.0.crossattention.self.value.weight', 'bert.encoder.layer.1.crossattention.output.LayerNorm.bias', 'bert.encoder.layer.1.crossattention.output.LayerNorm.weight', 'bert.encoder.layer.1.crossattention.output.dense.bias', 'bert.encoder.layer.1.crossattention.output.dense.weight', 'bert.encoder.layer.1.crossattention.self.key.bias', 'bert.encoder.l

Model(
  (backbone): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(28996, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-11): 12 x BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=

In [35]:
config = {
    'learning_rate':0.01,
    'train_batch_size':12,
    'valid_batch_size':12,
    'accumulation_step':4,
    'epochs':15,
    'nfolds':5,
    'seed':42,
    
    's':30.0,
    'm':0.5,
    'ls_eps':0.0,
    'easy_margin':False,
}

In [5]:
class ArcMarginProduct(nn.Module):
    def __init__(self, in_features, out_features, s=30.0, m=0.50, easy_margin=False, ls_eps=0.0):
        super(ArcMarginProduct, self).__init__()
        self.in_features = in_features
        self.out_features = out_features
        self.s = s
        self.m = m
        self.ls_eps = ls_eps  
        self.weight = Parameter(torch.FloatTensor(out_features, in_features))
        nn.init.xavier_uniform_(self.weight)

        self.easy_margin = easy_margin
        self.cos_m = math.cos(m)
        self.sin_m = math.sin(m)
        self.th = math.cos(math.pi - m)
        self.mm = math.sin(math.pi - m) * m

    def forward(self, input):
        cosine = F.linear(F.normalize(input), F.normalize(self.weight)).to('cuda')
        sine = torch.sqrt(1.0 - torch.pow(cosine, 2)).to('cuda')
        phi = (cosine * self.cos_m - sine * self.sin_m).to('cuda')
        if self.easy_margin:
            phi = torch.where(cosine > 0, phi, cosine)
        else:
            phi = torch.where(cosine > self.th, phi, cosine - self.mm)
        one_hot = torch.zeros(cosine.size(), device='cuda')
#         one_hot.scatter_(1, label.view(-1, 1).long(), 1)
        if self.ls_eps > 0:
            one_hot = (1 - self.ls_eps) * one_hot + self.ls_eps / self.out_features
        output = (one_hot * phi) + ((1.0 - one_hot) * cosine)
        output *= self.s

        return output

In [6]:
class Model(nn.Module):
    def __init__(self,backbone):
        super(Model,self).__init__()
        self.backbone = backbone
        self.in_features = self.backbone.pooler.dense.in_features
        self.dropout = nn.Dropout(0.2)
        self.final = ArcMarginProduct(self.in_features, 11014,
                                      s=config['s'], m=config['m'],
                                      easy_margin=config['easy_margin'], 
                                      ls_eps=config['ls_eps'])
    
    def forward(self,x,label):
        output = self.backbone(**x)
        x = output['last_hidden_state'][:,0,:]
        x = self.dropout(x)
        x = self.final(x,label)
        return x  

In [7]:
if torch.cuda.is_available():
    device = torch.device("cuda")
else:
    device=torch.device('cpu')

In [24]:
BEST_BERT_MODEL_PATH = "../input/bert/pytorch/bert/1/bert.bin"
# Defining Model
bert_model = Model(backbone)
bert_model.load_state_dict(torch.load(BEST_BERT_MODEL_PATH, map_location=device),  strict=False)
MODEL_PATH = 'bert-base-cased'
tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH)
# tokenizer.save_pretrained('bert_tokenizer')

In [9]:
class EfficientNet(nn.Module):

  def __init__(self,
                n_classes,
                model_name='efficientnet_b0',
                use_fc=False,
                fc_dim=512,
                dropout=0.0,
                loss_module='softmax',
                s=30.0,
                margin=0.50,
                ls_eps=0.0,
                theta_zero=0.785,
                pretrained=True):
      """
      :param n_classes:
      :param model_name: name of model from pretrainedmodels
          e.g. resnet50, resnext101_32x4d, pnasnet5large
      :param pooling: One of ('SPoC', 'MAC', 'RMAC', 'GeM', 'Rpool', 'Flatten', 'CompactBilinearPooling')
      :param loss_module: One of ('arcface', 'cosface', 'softmax')
      """
      super(EfficientNet, self).__init__()
      print('Building Model Backbone for {} model'.format(model_name))

      self.backbone = timm.create_model(model_name, pretrained=pretrained)
      final_in_features = self.backbone.classifier.in_features

      self.backbone.classifier = nn.Identity()
      self.backbone.global_pool = nn.Identity()

      self.pooling =  nn.AdaptiveAvgPool2d(1)

      self.use_fc = use_fc
      if use_fc:
          self.dropout = nn.Dropout(p=dropout)
          self.fc = nn.Linear(final_in_features, fc_dim)
          self.bn = nn.BatchNorm1d(fc_dim)
          self._init_params()
          final_in_features = fc_dim

      self.loss_module = loss_module
#       if loss_module == 'arcface':
      self.final = ArcMarginProduct(final_in_features, n_classes,
                                    s=s, m=margin, easy_margin=False, ls_eps=ls_eps)
#       elif loss_module == 'cosface':
#           self.final = AddMarginProduct(final_in_features, n_classes, s=s, m=margin)
#       elif loss_module == 'adacos':
#           self.final = AdaCos(final_in_features, n_classes, m=margin, theta_zero=theta_zero)
#       else:
#           self.final = nn.Linear(final_in_features, n_classes)

  def _init_params(self):
      nn.init.xavier_normal_(self.fc.weight)
      nn.init.constant_(self.fc.bias, 0)
      nn.init.constant_(self.bn.weight, 1)
      nn.init.constant_(self.bn.bias, 0)

  def forward(self, x):
      feature = self.extract_feat(x)
      logits = self.final(feature)
      return logits

  def extract_feat(self, x):
      batch_size = x.shape[0]
      x = self.backbone(x)
      x = self.pooling(x).view(batch_size, -1)

      if self.use_fc:
          x = self.dropout(x)
          x = self.fc(x)
          x = self.bn(x)

      return x

In [10]:
loss_module = 'arcface'

model_params = {
    'n_classes':11014,
    'model_name':'efficientnet_b3',
    'use_fc':False,
    'fc_dim':512,
    'dropout':0.0,
    'loss_module':loss_module,
    's':30.0,
    'margin':0.50,
    'ls_eps':0.0,
    'theta_zero':0.785,
    'pretrained':True
}

DIM = (512,512)

NUM_WORKERS = 4
TRAIN_BATCH_SIZE = 16
VALID_BATCH_SIZE = 16
EPOCHS = 5
SEED = 42
#LR = 3e-4


################################################# MODEL ####################################################################

model_name = 'efficientnet_b3' #efficientnet_b0-b7

################################################ Metric Loss and its params #######################################################
loss_module = 'arcface' #'cosface' #'adacos'
s = 30.0
m = 0.5
ls_eps = 0.0
easy_margin = False



scheduler_params = {
        "lr_start": 1e-5,
        "lr_max": 1e-5 * TRAIN_BATCH_SIZE,
        "lr_min": 1e-6,
        "lr_ramp_ep": 5,
        "lr_sus_ep": 0,
        "lr_decay": 0.8,
    }

In [11]:
data = pd.read_csv('../input/shopee-product-matching/train.csv')
data['filepath'] = data['image'].apply(lambda x: os.path.join('../input/shopee-product-matching', 'train_images', x))
encoder = LabelEncoder()
data['label_group'] = encoder.fit_transform(data['label_group'])

In [12]:
data = data.head(2000)

In [13]:
class EfficientNetDataset(Dataset):
  def __init__(self, csv, transforms=None):

      self.csv = csv.reset_index()
      self.augmentations = transforms

  def __len__(self):
      return self.csv.shape[0]

  def __getitem__(self, index):
      row = self.csv.iloc[index]

      image = cv2.imread(row.filepath)
      image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

      if self.augmentations:
          augmented = self.augmentations(image=image)
          image = augmented['image']


      return image

In [14]:
def get_valid_transforms():
  return albumentations.Compose(
      [
          albumentations.Resize(DIM[0],DIM[1],always_apply=True),
          albumentations.Normalize(),
      ToTensorV2(p=1.0)
      ]
  )

In [15]:
eff_dataset = EfficientNetDataset(data, get_valid_transforms())

In [16]:
eff_dataset_loader = torch.utils.data.DataLoader(
  eff_dataset,
  batch_size=8,
  pin_memory=True,
  drop_last=True,
)

In [17]:
BEST_EFF_MODEL_PATH = "../input/efficientnet/pytorch/efficientnet/1/model_efficientnet_b3_IMG_SIZE_512_EPOCH_4_arcface.bin"
if torch.cuda.is_available():
    device = torch.device("cuda")
else:
    device=torch.device('cpu')

# Defining Model for specific fold
eff_model = EfficientNet(**model_params)
eff_model.load_state_dict(torch.load(BEST_EFF_MODEL_PATH, map_location=device))
eff_model.to(device)
print("Loaded EfficientNet model")

Building Model Backbone for efficientnet_b3 model
Loaded EfficientNet model


In [18]:
def eval_fn(data_loader, model,device):
    outputs = []
    model.eval()
    with torch.no_grad():
        for d in data_loader:
            batch_size = d.size()[0]
            image = d
            image = image.to(device)
            output = model(image)
            outputs.append(output)
    return outputs

In [19]:
embedded_eff = torch.cat(eval_fn(eff_dataset_loader, eff_model, device), dim=0).detach().cpu().numpy()

In [20]:
from sklearn.neighbors import NearestNeighbors

def get_image_neighbors(df, embeddings, KNN=50):

    model = NearestNeighbors(n_neighbors = KNN)
    model.fit(embeddings)
    distances, indices = model.kneighbors(embeddings)
    
    threshold = 4.5
    predictions = []
    for k in range(embeddings.shape[0]):
        idx = np.where(distances[k,] < threshold)[0]
        ids = indices[k,idx]
        posting_ids = df['posting_id'].iloc[ids].values
        predictions.append(posting_ids)
        
    del model, distances, indices
    gc.collect()
    return df, predictions

In [21]:
df, image_predictions = get_image_neighbors(data, embedded_eff)

In [40]:
def get_text_embeddings(data):
    outputs = []
    for title in data.title.values:
        input_ids = tokenizer(title, return_tensors="pt",padding='max_length',truncation=True)
        input_ids.to(device)
        bert_model.to(device)
        with torch.no_grad():
            output = bert_model(input_ids)
        outputs.append(output)
    return outputs

In [42]:
embedded_bert = torch.cat(get_text_embeddings(data), dim=0).detach().cpu().numpy()

In [43]:
df, text_predictions = get_image_neighbors(data, embedded_eff)