In [1]:
import numpy as np
import pandas as pd

import torch
import torch.nn as nn
from torch.nn import functional as F
import torch.optim as optim
from torch.nn.modules import CrossEntropyLoss

from torchvision import transforms
from sklearn.metrics import accuracy_score, f1_score

from utils import ShopeeTrainDataset, ShopeeImageDataset, DistancePredict, get_metric, ShopeeScheduler, validate, NDCG

import timm

import os
from tqdm.notebook import tqdm
import math

In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else 'cpu')
torch.cuda.set_device(1)

In [3]:
class config:
    PATH = "./data/"

    model_name = "eca_nfnet_l0"

    threshold = 0.7
    
    epoch = 15
    batch_size = 16
    num_workers=8
    prefetch_factor =8
    report_every_batch = 20
    
    backbone_lr = 1e-4
    arcface_lr = 1e-2
    lr = 1e-3

    gamma = 0.1
    step_size = 5

    margin=0.3
    margin_set = None
    
    backbone_scheduler_params = {
        "lr_start": 1e-5,
        "lr_max": 1e-4,     # 1e-5 * 32 (if batch_size(=32) is different then)
        "lr_min": 1e-6,
        "lr_warmup_ep": 4,
        "lr_sus_ep": 0,
        "lr_decay": 0.8,
        "step_size": 1,
    }
    scheduler_params = {
        "lr_start": 1e-4,
        "lr_max": 1e-3,     # 1e-5 * 32 (if batch_size(=32) is different then)
        "lr_min": 1e-5,
        "lr_warmup_ep": 4,
        "lr_sus_ep": 0,
        "lr_decay": 0.8,
        "step_size": 1,
    }

In [4]:
import logging
logger = logging.getLogger()
logger.setLevel(logging.DEBUG)
# 建立一个filehandler来把日志记录在文件里，级别为debug以上
fh = logging.FileHandler("log/train_{}.log".format(config.model_name))
fh.setLevel(logging.DEBUG)
# 建立一个streamhandler来把日志打在CMD窗口上，级别为error以上
ch = logging.StreamHandler()
ch.setLevel(logging.INFO)
# 设置日志格式
formatter = logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(lineno)s %(message)s",datefmt="%Y-%m-%d %H:%M:%S")
ch.setFormatter(formatter)
fh.setFormatter(formatter)
#将相应的handler添加在logger对象中
logger.addHandler(ch)
logger.addHandler(fh)

In [5]:
class ArcFace(nn.Module):
    """ NN module for projecting extracted embeddings onto the sphere surface """
    
    def __init__(self, in_features, out_features, s=30, m=0.5):
        super(ArcFace, self).__init__()
        self.in_features = in_features
        self.out_features = out_features
        self.s = s
        self.m = m
        self.cos_m = math.cos(self.m)
        self.sin_m = math.sin(self.m)
        self.arc_min = math.cos(math.pi - self.m)
        self.margin_min = math.sin(math.pi - self.m) * self.m
        self.weight = nn.Parameter(torch.FloatTensor(out_features, in_features))
        nn.init.xavier_uniform_(self.weight)
    
    def _update_margin(self, new_margin):
        self.m = new_margin
        self.cos_m = math.cos(self.m)
        self.sin_m = math.sin(self.m)
        self.arc_min = math.cos(math.pi - self.m)
        self.margin_min = math.sin(math.pi - self.m) * self.m

    def forward(self, embedding, label):
        cos = F.linear(F.normalize(embedding), F.normalize(self.weight))
        sin = torch.sqrt(1.0 - torch.pow(cos, 2)).clamp(0, 1)
        phi = cos * self.cos_m - sin * self.sin_m
        phi = torch.where(cos > self.arc_min, phi, cos - self.margin_min)

        one_hot = torch.zeros(cos.size(), device=device)
        one_hot.scatter_(1, label.view(-1, 1).long(), 1)
        logits = one_hot * phi + (1.0 - one_hot) * cos
        logits *= self.s
        return logits

In [6]:
class Model(nn.Module):
    def __init__(self, model_name, n_classes, margin=0.5, fc_dim=1024):
        super(Model, self).__init__()
        logger.info("Building Model Backbone for {} model".format(model_name))
        self.model_name = model_name
        self.backbone = timm.create_model(model_name, pretrained=True)
        
        if "eca_nfnet" in model_name:
            feat_size = self.backbone.head.fc.in_features
            self.backbone.head.fc = nn.Identity()
            self.backbone.head.global_pool = nn.Identity()
                
        elif "efficientnet" in model_name:
            feat_size = self.backbone.classifier.in_features
            self.backbone.classifier = nn.Identity()
            self.backbone.global_pool = nn.Identity()
        
        self.pooling =  nn.AdaptiveAvgPool2d(1)
        self.dropout = nn.Dropout(p=0.1)
        self.fc = nn.Linear(feat_size, fc_dim)
        self.bn = nn.BatchNorm1d(fc_dim)
        self.margin = ArcFace(fc_dim, n_classes, m=margin)
        self._init_params()

    def _init_params(self):
        nn.init.xavier_normal_(self.fc.weight)
        nn.init.constant_(self.fc.bias, 0)
        nn.init.constant_(self.bn.weight, 1)
        nn.init.constant_(self.bn.bias, 0)

    def forward(self, x, labels=None):
        batch_size = x.shape[0]
        x = self.backbone(x)
        x = self.pooling(x).view(batch_size, -1)
        
        x = self.dropout(x)
        x = self.fc(x)
        x = self.bn(x)
        x = F.normalize(x,dim=1)
        if labels is not None:
            return self.margin(x,labels)
        else:
            return x

In [7]:
def read_dataset(name="train"):
    assert name in {"train", "test"}
    df = pd.read_csv(config.PATH + '{}.csv'.format(name))
    df["image_path"] = config.PATH + '{}_images/'.format(name) + df['image']

    return df

In [8]:
train = read_dataset("train")
label_group_dict = train.groupby("label_group").posting_id.agg("unique").to_dict()
train['target'] = train.label_group.map(label_group_dict)
train.head()


Unnamed: 0,posting_id,image,image_phash,title,label_group,image_path,target
0,train_129225211,0000a68812bc7e98c42888dfb1c07da0.jpg,94974f937d4c2433,Paper Bag Victoria Secret,249114794,./data/train_images/0000a68812bc7e98c42888dfb1...,"[train_129225211, train_2278313361]"
1,train_3386243561,00039780dfc94d01db8676fe789ecd05.jpg,af3f9460c2838f0f,"Double Tape 3M VHB 12 mm x 4,5 m ORIGINAL / DO...",2937985045,./data/train_images/00039780dfc94d01db8676fe78...,"[train_3386243561, train_3423213080]"
2,train_2288590299,000a190fdd715a2a36faed16e2c65df7.jpg,b94cb00ed3e50f78,Maling TTS Canned Pork Luncheon Meat 397 gr,2395904891,./data/train_images/000a190fdd715a2a36faed16e2...,"[train_2288590299, train_3803689425]"
3,train_2406599165,00117e4fc239b1b641ff08340b429633.jpg,8514fc58eafea283,Daster Batik Lengan pendek - Motif Acak / Camp...,4093212188,./data/train_images/00117e4fc239b1b641ff08340b...,"[train_2406599165, train_3342059966]"
4,train_3369186413,00136d1cf4edede0203f32f05f660588.jpg,a6f319f924ad708c,Nescafe \xc3\x89clair Latte 220ml,3648931069,./data/train_images/00136d1cf4edede0203f32f05f...,"[train_3369186413, train_921438619]"


In [9]:
n_classes = len(train["label_group"].unique())
num = int(0.2 * n_classes)
np.random.seed(1)
test_group = np.random.choice(train["label_group"].unique(), num)
#test_group
df_test = train[train["label_group"].isin(test_group)]
df_train = train[~train["label_group"].isin(test_group)]
df_test.reset_index(drop=True, inplace=True)
print(len(df_train), len(df_test))

28194 6056


In [10]:
transform = transforms.Compose([
    transforms.Resize((256, 256)),
    transforms.ToTensor(),
    transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
])

train_dataset = ShopeeTrainDataset(df_train, transform = transform)
test_dataset = ShopeeImageDataset(df_test, transform = transform)
train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=config.batch_size, shuffle=True, num_workers=2)
test_dataloader = torch.utils.data.DataLoader(test_dataset, batch_size=config.batch_size, shuffle=False, num_workers=2)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.df['label_class'] = self.df['label_group'].map(class_mapping)


In [11]:
n_classes = len(train_dataset.df['label_group'].unique())
n_classes

9024

In [12]:
def get_image_feature(model, dataloader):
    image_features = []
    with torch.no_grad():
        for images in tqdm(dataloader):
            images = images.to(device)
            features = model(images)
            image_features.append(features)
            del images
    image_features = torch.cat(image_features, axis=0)

    torch.cuda.empty_cache()   
    return image_features

In [13]:
def train_fn(data_loader, model, criterion, optimizers, schedulers):
    model.train()
    fin_loss = 0.0
    
    for batch_id, (images, labels) in enumerate(tqdm(data_loader)):
            images, labels = images.to(device), labels.to(device)
            output = model(images, labels)
            loss = criterion(output, labels)
            for opt in optimizers:
                opt.zero_grad()
            loss.backward()
            for opt in optimizers:
                opt.step()
            fin_loss += loss.item() 

            if batch_id % config.report_every_batch == 0:
                logger.debug("Batch: {}/{}  loss: {:4f}".format((batch_id+1), len(data_loader), loss.item()))
        
    for scheduler in schedulers:
        scheduler.step()

    torch.cuda.empty_cache()

    return fin_loss / len(data_loader)

def eval_fn(data_loader, model, df):
    model.eval()

    with torch.no_grad():
        image_features = get_image_feature(model, data_loader)
        ndcg = NDCG(image_features, df)

        del image_features
        torch.cuda.empty_cache()

        return ndcg

In [14]:
def run_train(model_name, max_epochs, threshold, margin, margin_set = None):
    if not os.path.exists("model/{model_name}".format(model_name=model_name)):
        os.makedirs("model/{model_name}".format(model_name=model_name))
    
    model = Model(model_name, n_classes=n_classes, margin=margin).to(device)
    backbone_params = model.backbone.parameters()
    backbone_params_id = list(map(id, model.backbone.parameters()))
    arcface_params = model.margin.parameters()
    arcface_params_id = list(map(id, model.margin.parameters()))
    other_params = filter(lambda p: id(p) not in backbone_params_id and id(p) not in arcface_params_id, model.parameters())

    params = [
        {"params": backbone_params, "lr": config.backbone_lr},
        {"params": arcface_params, "lr": config.arcface_lr},
        {"params": other_params}
    ]

    optimizer = optim.AdamW(params, lr=config.lr)
    scheduler  = optim.lr_scheduler.StepLR(optimizer, step_size=config.step_size, gamma=config.gamma)

    criterion = CrossEntropyLoss()

    for epoch in range(max_epochs):
        # update margin
        if margin_set is not None and epoch in margin_set:
            margin = margin_set[epoch]
            model.margin._update_margin(margin)
            logger.info("Epoch: {}  margin: {:2f}".format(epoch, margin))
        # Train
        logger.info("-----Epoch: {} Train-----".format(epoch))
        train_avg_loss = train_fn(train_dataloader, model, criterion, [optimizer], [scheduler])
        logger.info("Epoch: {}  avg loss: {:4f}".format(epoch, train_avg_loss))

        # eval
        logger.info("-----Epoch: {} Validation-----".format(epoch))
        ndcg = eval_fn(test_dataloader, model, df_test)
        logger.info("Epoch: {}  NDCG: {:4f}".format(epoch, ndcg.mean()))
        
        torch.save(model.state_dict(), "model/{model_name}/{model_name}_epoch_{epoch}.pt".format(model_name = model_name, epoch=epoch))

In [15]:
run_train(model_name = config.model_name, max_epochs = config.epoch, threshold = config.threshold, margin = config.margin, margin_set=config.margin_set)

2021-11-09 06:22:45 - root - INFO - 4 Building Model Backbone for eca_nfnet_l0 model
2021-11-09 06:22:46 - timm.models.helpers - INFO - 183 Loading pretrained weights from url (https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/ecanfnet_l0_ra2-e3e9ac50.pth)
2021-11-09 06:22:48 - root - INFO - 30 -----Epoch: 0 Train-----


  0%|          | 0/1763 [00:00<?, ?it/s]

In [None]:
torch.cuda.empty_cache()