In [1]:
!pip install nltk
# !pip install transformers[torch]
# !pip install accelerate -U

! pip install timm==0.6.7
!pip install transformers
!pip install opencv-python

Collecting timm==0.6.7
  Downloading timm-0.6.7-py3-none-any.whl (509 kB)
[K     |████████████████████████████████| 509 kB 6.7 MB/s eta 0:00:01
Installing collected packages: timm
Successfully installed timm-0.6.7


In [2]:
# Preliminaries
from tqdm import tqdm
import math
import random
import os
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder

# Visuals and CV2
import cv2

# albumentations for augs
import albumentations
from albumentations.pytorch.transforms import ToTensorV2

#torch
import torch
import timm
import torch
import torch.nn as nn
from torch.nn import Parameter
from torch.nn import functional as F
from torch.utils.data import Dataset,DataLoader
from torch.optim import Adam
from torch.optim.lr_scheduler import CosineAnnealingWarmRestarts, CosineAnnealingLR, ReduceLROnPlateau
from torch.optim import Adam, lr_scheduler
from torch.optim.lr_scheduler import _LRScheduler

In [3]:
DIM = (512,512)

NUM_WORKERS = 4
TRAIN_BATCH_SIZE = 16
VALID_BATCH_SIZE = 16
EPOCHS = 5
SEED = 42
#LR = 3e-4

device = torch.device('cuda')


################################################# MODEL ####################################################################

model_name = 'efficientnet_b3' #efficientnet_b0-b7

################################################ Metric Loss and its params #######################################################
loss_module = 'arcface' #'cosface' #'adacos'
s = 30.0
m = 0.5
ls_eps = 0.0
easy_margin = False



scheduler_params = {
        "lr_start": 1e-5,
        "lr_max": 1e-5 * TRAIN_BATCH_SIZE,
        "lr_min": 1e-6,
        "lr_ramp_ep": 5,
        "lr_sus_ep": 0,
        "lr_decay": 0.8,
    }

############################################## Model Params ###############################################################
model_params = {
    'n_classes':11014,
    'model_name':'efficientnet_b3',
    'use_fc':False,
    'fc_dim':512,
    'dropout':0.0,
    'loss_module':loss_module,
    's':30.0,
    'margin':0.50,
    'ls_eps':0.0,
    'theta_zero':0.785,
    'pretrained':True
}

In [4]:
def seed_torch(seed=42):
  random.seed(seed)
  os.environ['PYTHONHASHSEED'] = str(seed)
  np.random.seed(seed)
  torch.manual_seed(seed)
  torch.cuda.manual_seed(seed)
  torch.backends.cudnn.deterministic = True

seed_torch(SEED)


In [5]:
class AverageMeter(object):
  def __init__(self):
      self.reset()

  def reset(self):
      self.val = 0
      self.avg = 0
      self.sum = 0
      self.count = 0

  def update(self, val, n=1):
      self.val = val
      self.sum += val * n
      self.count += n
      self.avg = self.sum / self.count

In [6]:
def fetch_scheduler(optimizer):
  if SCHEDULER =='ReduceLROnPlateau':
      scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=factor, patience=patience, verbose=True, eps=eps)
  elif SCHEDULER =='CosineAnnealingLR':
      scheduler = CosineAnnealingLR(optimizer, T_max=T_max, eta_min=min_lr, last_epoch=-1)
  elif SCHEDULER =='CosineAnnealingWarmRestarts':
      scheduler = CosineAnnealingWarmRestarts(optimizer, T_0=T_0, T_mult=1, eta_min=min_lr, last_epoch=-1)
  return scheduler

In [7]:
def fetch_loss():
  loss = nn.CrossEntropyLoss()
  return loss

In [8]:
def get_train_transforms():
  return albumentations.Compose(
      [
          albumentations.Resize(DIM[0],DIM[1],always_apply=True),
          albumentations.HorizontalFlip(p=0.5),
          albumentations.VerticalFlip(p=0.5),
          albumentations.Rotate(limit=120, p=0.8),
          albumentations.RandomBrightness(limit=(0.09, 0.6), p=0.5),
          #albumentations.Cutout(num_holes=8, max_h_size=8, max_w_size=8, fill_value=0, always_apply=False, p=0.5),
          #albumentations.ShiftScaleRotate(
            #  shift_limit=0.25, scale_limit=0.1, rotate_limit=0
          #),
          albumentations.Normalize(),
          ToTensorV2(p=1.0),
      ]
  )

def get_valid_transforms():
  return albumentations.Compose(
      [
          albumentations.Resize(DIM[0],DIM[1],always_apply=True),
          albumentations.Normalize(),
      ToTensorV2(p=1.0)
      ]
  )

In [9]:
class EfficientDataset(Dataset):
  def __init__(self, csv, transforms=None):

      self.csv = csv.reset_index()
      self.augmentations = transforms

  def __len__(self):
      return self.csv.shape[0]

  def __getitem__(self, index):
      row = self.csv.iloc[index]

      text = row.title

      image = cv2.imread(row.filepath)
      image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

      if self.augmentations:
          augmented = self.augmentations(image=image)
          image = augmented['image']


      return image,torch.tensor(row.label_group)

In [25]:
class EfficientNet(nn.Module):

  def __init__(self,
                n_classes,
                model_name='efficientnet_b0',
                use_fc=False,
                fc_dim=512,
                dropout=0.0,
                loss_module='softmax',
                s=30.0,
                margin=0.50,
                ls_eps=0.0,
                theta_zero=0.785,
                pretrained=True):
      """
      :param n_classes:
      :param model_name: name of model from pretrainedmodels
          e.g. resnet50, resnext101_32x4d, pnasnet5large
      :param pooling: One of ('SPoC', 'MAC', 'RMAC', 'GeM', 'Rpool', 'Flatten', 'CompactBilinearPooling')
      :param loss_module: One of ('arcface', 'cosface', 'softmax')
      """
      super(EfficientNet, self).__init__()
      print('Building Model Backbone for {} model'.format(model_name))

      self.backbone = timm.create_model(model_name, pretrained=pretrained)
      final_in_features = self.backbone.classifier.in_features

      self.backbone.classifier = nn.Identity()
      self.backbone.global_pool = nn.Identity()

      self.pooling =  nn.AdaptiveAvgPool2d(1)

      self.use_fc = use_fc
      if use_fc:
          self.dropout = nn.Dropout(p=dropout)
          self.fc = nn.Linear(final_in_features, fc_dim)
          self.bn = nn.BatchNorm1d(fc_dim)
          self._init_params()
          final_in_features = fc_dim

      self.loss_module = loss_module
#       if loss_module == 'arcface':
      self.final = ArcMarginProduct(final_in_features, n_classes,
                                    s=s, m=margin, easy_margin=False, ls_eps=ls_eps)
#       elif loss_module == 'cosface':
#           self.final = AddMarginProduct(final_in_features, n_classes, s=s, m=margin)
#       elif loss_module == 'adacos':
#           self.final = AdaCos(final_in_features, n_classes, m=margin, theta_zero=theta_zero)
#       else:
#           self.final = nn.Linear(final_in_features, n_classes)

  def _init_params(self):
      nn.init.xavier_normal_(self.fc.weight)
      nn.init.constant_(self.fc.bias, 0)
      nn.init.constant_(self.bn.weight, 1)
      nn.init.constant_(self.bn.bias, 0)

  def forward(self, x, label):
      feature = self.extract_feat(x)
      if self.loss_module in ('arcface', 'cosface', 'adacos'):
          logits = self.final(feature, label)
      else:
          logits = self.final(feature)
      return logits

  def extract_feat(self, x):
      batch_size = x.shape[0]
      x = self.backbone(x)
      x = self.pooling(x).view(batch_size, -1)

      if self.use_fc:
          x = self.dropout(x)
          x = self.fc(x)
          x = self.bn(x)

      return x

In [26]:
# class AdaCos(nn.Module):
#   def __init__(self, in_features, out_features, m=0.50, ls_eps=0, theta_zero=math.pi/4):
#       super(AdaCos, self).__init__()
#       self.in_features = in_features
#       self.out_features = out_features
#       self.theta_zero = theta_zero
#       self.s = math.log(out_features - 1) / math.cos(theta_zero)
#       self.m = m
#       self.ls_eps = ls_eps  # label smoothing
#       self.weight = Parameter(torch.FloatTensor(out_features, in_features))
#       nn.init.xavier_uniform_(self.weight)

#   def forward(self, input, label):
#       # normalize features
#       x = F.normalize(input)
#       # normalize weights
#       W = F.normalize(self.weight)
#       # dot product
#       logits = F.linear(x, W)
#       # add margin
#       theta = torch.acos(torch.clamp(logits, -1.0 + 1e-7, 1.0 - 1e-7))
#       target_logits = torch.cos(theta + self.m)
#       one_hot = torch.zeros_like(logits)
#       one_hot.scatter_(1, label.view(-1, 1).long(), 1)
#       if self.ls_eps > 0:
#           one_hot = (1 - self.ls_eps) * one_hot + self.ls_eps / self.out_features
#       output = logits * (1 - one_hot) + target_logits * one_hot
#       # feature re-scale
#       with torch.no_grad():
#           B_avg = torch.where(one_hot < 1, torch.exp(self.s * logits), torch.zeros_like(logits))
#           B_avg = torch.sum(B_avg) / input.size(0)
#           theta_med = torch.median(theta)
#           self.s = torch.log(B_avg) / torch.cos(torch.min(self.theta_zero * torch.ones_like(theta_med), theta_med))
#       output *= self.s

#       return output

In [27]:
class ArcMarginProduct(nn.Module):
    r"""Implement of large margin arc distance: :
        Args:
            in_features: size of each input sample
            out_features: size of each output sample
            s: norm of input feature
            m: margin
            cos(theta + m)
        """
    def __init__(self, in_features, out_features, s=30.0, m=0.50, easy_margin=False, ls_eps=0.0):
        super(ArcMarginProduct, self).__init__()
        self.in_features = in_features
        self.out_features = out_features
        self.s = s
        self.m = m
        self.ls_eps = ls_eps  # label smoothing
        self.weight = Parameter(torch.FloatTensor(out_features, in_features))
        nn.init.xavier_uniform_(self.weight)

        self.easy_margin = easy_margin
        self.cos_m = math.cos(m)
        self.sin_m = math.sin(m)
        self.th = math.cos(math.pi - m)
        self.mm = math.sin(math.pi - m) * m

    def forward(self, input, label):
        # --------------------------- cos(theta) & phi(theta) ---------------------------
        cosine = F.linear(F.normalize(input), F.normalize(self.weight))
        sine = torch.sqrt(1.0 - torch.pow(cosine, 2))
        phi = cosine * self.cos_m - sine * self.sin_m
        if self.easy_margin:
            phi = torch.where(cosine > 0, phi, cosine)
        else:
            phi = torch.where(cosine > self.th, phi, cosine - self.mm)
        # --------------------------- convert label to one-hot ---------------------------
        # one_hot = torch.zeros(cosine.size(), requires_grad=True, device='cuda')
        one_hot = torch.zeros(cosine.size(), device='cuda')
        one_hot.scatter_(1, label.view(-1, 1).long(), 1)
        if self.ls_eps > 0:
            one_hot = (1 - self.ls_eps) * one_hot + self.ls_eps / self.out_features
        # -------------torch.where(out_i = {x_i if condition_i else y_i) -------------
        output = (one_hot * phi) + ((1.0 - one_hot) * cosine)
        output *= self.s

        return output

In [28]:
# class AddMarginProduct(nn.Module):
#     r"""Implement of large margin cosine distance: :
#     Args:
#         in_features: size of each input sample
#         out_features: size of each output sample
#         s: norm of input feature
#         m: margin
#         cos(theta) - m
#     """

#     def __init__(self, in_features, out_features, s=30.0, m=0.40):
#         super(AddMarginProduct, self).__init__()
#         self.in_features = in_features
#         self.out_features = out_features
#         self.s = s
#         self.m = m
#         self.weight = Parameter(torch.FloatTensor(out_features, in_features))
#         nn.init.xavier_uniform_(self.weight)

#     def forward(self, input, label):
#         # --------------------------- cos(theta) & phi(theta) ---------------------------
#         cosine = F.linear(F.normalize(input), F.normalize(self.weight))
#         phi = cosine - self.m
#         # --------------------------- convert label to one-hot ---------------------------
#         one_hot = torch.zeros(cosine.size(), device='cuda')
#         # one_hot = one_hot.cuda() if cosine.is_cuda else one_hot
#         one_hot.scatter_(1, label.view(-1, 1).long(), 1)
#         # -------------torch.where(out_i = {x_i if condition_i else y_i) -------------
#         output = (one_hot * phi) + ((1.0 - one_hot) * cosine)  # you can use torch.where if your torch.__version__ is 0.4
#         output *= self.s
#         # print(output)

#         return output

In [29]:
class ShopeeScheduler(_LRScheduler):
    def __init__(self, optimizer, lr_start=5e-6, lr_max=1e-5,
                 lr_min=1e-6, lr_ramp_ep=5, lr_sus_ep=0, lr_decay=0.8,
                 last_epoch=-1):
        self.lr_start = lr_start
        self.lr_max = lr_max
        self.lr_min = lr_min
        self.lr_ramp_ep = lr_ramp_ep
        self.lr_sus_ep = lr_sus_ep
        self.lr_decay = lr_decay
        super(ShopeeScheduler, self).__init__(optimizer, last_epoch)

    def get_lr(self):
        if not self._get_lr_called_within_step:
            warnings.warn("To get the last learning rate computed by the scheduler, "
                          "please use `get_last_lr()`.", UserWarning)

        if self.last_epoch == 0:
            self.last_epoch += 1
            return [self.lr_start for _ in self.optimizer.param_groups]

        lr = self._compute_lr_from_epoch()
        self.last_epoch += 1

        return [lr for _ in self.optimizer.param_groups]

    def _get_closed_form_lr(self):
        return self.base_lrs

    def _compute_lr_from_epoch(self):
        if self.last_epoch < self.lr_ramp_ep:
            lr = ((self.lr_max - self.lr_start) /
                  self.lr_ramp_ep * self.last_epoch +
                  self.lr_start)

        elif self.last_epoch < self.lr_ramp_ep + self.lr_sus_ep:
            lr = self.lr_max

        else:
            lr = ((self.lr_max - self.lr_min) * self.lr_decay**
                  (self.last_epoch - self.lr_ramp_ep - self.lr_sus_ep) +
                  self.lr_min)
        return lr

In [30]:
def train_fn(dataloader,model,criterion,optimizer,device,scheduler,epoch):
    model.train()
    loss_score = AverageMeter()

    tk0 = tqdm(enumerate(dataloader), total=len(dataloader))
    for bi,d in tk0:

        batch_size = d[0].shape[0]


        images = d[0]
        targets = d[1]

        images = images.to(device)
        targets = targets.to(device)

        optimizer.zero_grad()

        output = model(images,targets)

        loss = criterion(output,targets)

        loss.backward()
        optimizer.step()

        loss_score.update(loss.detach().item(), batch_size)
        tk0.set_postfix(Train_Loss=loss_score.avg,Epoch=epoch,LR=optimizer.param_groups[0]['lr'])

    if scheduler is not None:
            scheduler.step()

    return loss_score

In [31]:
def eval_fn(data_loader,model,criterion,device):

    loss_score = AverageMeter()

    model.eval()
    tk0 = tqdm(enumerate(data_loader), total=len(data_loader))

    with torch.no_grad():

        for bi,d in tk0:
            batch_size = d[0].size()[0]

            image = d[0]
            targets = d[1]

            image = image.to(device)
            targets = targets.to(device)

            output = model(image,targets)

            loss = criterion(output,targets)

            loss_score.update(loss.detach().item(), batch_size)
            tk0.set_postfix(Eval_Loss=loss_score.avg)

    return loss_score

In [32]:
import pandas as pd
from sklearn.model_selection import KFold

def create_folds(data, num_splits):
  # Khởi tạo KFold với số fold là 5
  kf = KFold(n_splits=num_splits, shuffle=True, random_state=42)

  # Thêm cột fold vào DataFrame và gán giá trị cho từng fold
  for fold, (train_index, val_index) in enumerate(kf.split(data), 1):
    data.loc[val_index, 'fold'] = int(fold)
  return data

In [33]:
data = pd.read_csv('../input/shopee-product-matching/train.csv')
data['filepath'] = data['image'].apply(lambda x: os.path.join('../input/shopee-product-matching', 'train_images', x))
data = create_folds(data, num_splits=5)

In [34]:
data.head()

Unnamed: 0,posting_id,image,image_phash,title,label_group,filepath,fold
0,train_129225211,0000a68812bc7e98c42888dfb1c07da0.jpg,94974f937d4c2433,Paper Bag Victoria Secret,249114794,../input/shopee-product-matching/train_images/...,1.0
1,train_3386243561,00039780dfc94d01db8676fe789ecd05.jpg,af3f9460c2838f0f,"Double Tape 3M VHB 12 mm x 4,5 m ORIGINAL / DO...",2937985045,../input/shopee-product-matching/train_images/...,4.0
2,train_2288590299,000a190fdd715a2a36faed16e2c65df7.jpg,b94cb00ed3e50f78,Maling TTS Canned Pork Luncheon Meat 397 gr,2395904891,../input/shopee-product-matching/train_images/...,4.0
3,train_2406599165,00117e4fc239b1b641ff08340b429633.jpg,8514fc58eafea283,Daster Batik Lengan pendek - Motif Acak / Camp...,4093212188,../input/shopee-product-matching/train_images/...,2.0
4,train_3369186413,00136d1cf4edede0203f32f05f660588.jpg,a6f319f924ad708c,Nescafe \xc3\x89clair Latte 220ml,3648931069,../input/shopee-product-matching/train_images/...,1.0


In [35]:
encoder = LabelEncoder()
data['label_group'] = encoder.fit_transform(data['label_group'])
data = data.head(2000)

In [36]:
def run():

  train = data[data['fold']!=1].reset_index(drop=True)
  valid = data[data['fold']==1].reset_index(drop=True)
  # Defining DataSet
  train_dataset = EfficientDataset(
      csv=train,
      transforms=get_train_transforms(),
  )

  valid_dataset = EfficientDataset(
      csv=valid,
      transforms=get_valid_transforms(),
  )

  train_loader = torch.utils.data.DataLoader(
      train_dataset,
      batch_size=TRAIN_BATCH_SIZE,
      pin_memory=True,
      drop_last=True,
      num_workers=NUM_WORKERS
  )

  valid_loader = torch.utils.data.DataLoader(
      valid_dataset,
      batch_size=VALID_BATCH_SIZE,
      num_workers=NUM_WORKERS,
      shuffle=False,
      pin_memory=True,
      drop_last=False,
  )

  # Defining Device
  if torch.cuda.is_available():
    device = torch.device("cuda")
  else:
    device=torch.device('cpu')

  # Defining Model for specific fold
  model = EfficientNet(**model_params)
  # model.load_state_dict(torch.load(BEST_MODEL_PATH, map_location=device))
  model.to(device)

  #DEfining criterion
  criterion = fetch_loss()
  criterion.to(device)

  optimizer = torch.optim.Adam(model.parameters(), lr=scheduler_params['lr_start'])

  #Defining LR SCheduler
  scheduler = ShopeeScheduler(optimizer,**scheduler_params)

  # TRAIN LOOP
  best_loss = 10000
  for epoch in range(EPOCHS):
      train_loss = train_fn(train_loader, model,criterion, optimizer, device,scheduler=scheduler,epoch=epoch)

      valid_loss = eval_fn(valid_loader, model, criterion,device)
      print(f'val_loss_{epoch}: {valid_loss.avg}')
      if valid_loss.avg < best_loss:
          best_loss = valid_loss.avg
          torch.save(model.state_dict(),f'model_{model_name}_IMG_SIZE_{DIM[0]}_{loss_module}.bin')
          print('best model found for epoch {}'.format(epoch))
      torch.save(model.state_dict(),f'model_{model_name}_IMG_SIZE_{DIM[0]}_EPOCH_{epoch}_{loss_module}.bin')

In [37]:
run()

Building Model Backbone for efficientnet_b3 model


Downloading: "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/efficientnet_b3_ra2-cf984f9c.pth" to /root/.cache/torch/hub/checkpoints/efficientnet_b3_ra2-cf984f9c.pth
100%|██████████| 101/101 [00:57<00:00,  1.74it/s, Epoch=0, LR=1e-5, Train_Loss=24] 
100%|██████████| 24/24 [00:05<00:00,  4.53it/s, Eval_Loss=23.8]


val_loss_0: 23.84964332879368
best model found for epoch 0


100%|██████████| 101/101 [00:56<00:00,  1.78it/s, Epoch=1, LR=7e-5, Train_Loss=22] 
100%|██████████| 24/24 [00:04<00:00,  4.94it/s, Eval_Loss=23.8]


val_loss_1: 23.847656967123246
best model found for epoch 1


100%|██████████| 101/101 [00:56<00:00,  1.79it/s, Epoch=2, LR=0.00013, Train_Loss=20.1]
100%|██████████| 24/24 [00:04<00:00,  4.86it/s, Eval_Loss=23.5]


val_loss_2: 23.457274245219814
best model found for epoch 2


100%|██████████| 101/101 [00:56<00:00,  1.79it/s, Epoch=3, LR=0.000128, Train_Loss=18] 
100%|██████████| 24/24 [00:05<00:00,  4.79it/s, Eval_Loss=23.1]


val_loss_3: 23.133458000561586
best model found for epoch 3


100%|██████████| 101/101 [00:56<00:00,  1.79it/s, Epoch=4, LR=8.24e-5, Train_Loss=16] 
100%|██████████| 24/24 [00:05<00:00,  4.63it/s, Eval_Loss=22.9]


val_loss_4: 22.92686398160053
best model found for epoch 4
