In [52]:
import torch
import timm
import cv2
import os
import swifter

import numpy as np
import pandas as pd 
import torch.nn as nn

import torch.nn.functional as F

import albumentations as A
from albumentations.pytorch import ToTensorV2

from timm.scheduler import CosineLRScheduler

from torch.utils.data import Dataset, DataLoader

from sklearn.utils import shuffle
from sklearn.model_selection import StratifiedGroupKFold
from sklearn.metrics import precision_recall_curve, auc, average_precision_score

from IPython.display import display

from PIL import Image
from tqdm import tqdm

import glob
import random
from gc import collect as garbage_collector

from dataclasses import dataclass
from safetensors.torch import load_file

In [53]:
class TrunkModel(nn.Module):
    def __init__(self, model_name,):
        super().__init__()
        self.trunk_model = timm.create_model(
            model_name=model_name,
            pretrained=True,
            num_classes=0
        )
        
        #self.__delete_mlp_head()
        print('----TRUNK----')
        print(self.trunk_model)

    def __delete_mlp_head(self):
        model_layers = list(self.trunk_model.named_modules())
        mlp_layer_name = None
        for i, (name, module) in enumerate(model_layers):
            if isinstance(module, nn.Linear):
                mlp_layer_name = name
                mlp_module_in_features = module.in_features
                break

        self.mlp_module_in_features = mlp_module_in_features

        delattr(self.trunk_model, mlp_layer_name)
        
        print(
            "MLP head was deleted"
        )


    def forward(self, x):
        return self.trunk_model(x)


class HeadModel(nn.Module):
    def __init__(self, mlp_architecture):
        super().__init__()
        self.head = nn.Sequential(*mlp_architecture)
        self._init_params()
        print('----HEAD----')
        print(self.head)
        
    def _init_params(self):
        nn.init.xavier_normal_(self.head[0].weight)
        nn.init.constant_(self.head[0].bias, 0)
        nn.init.constant_(self.head[1].weight, 1)
        nn.init.constant_(self.head[1].bias, 0)
        
    def forward(self, x):
        return self.head(x)
class EmbeddingModel(nn.Module):
    def __init__(self, trunk, head):
        super().__init__()
        self.trunk = trunk
        self.head = head
        
    def forward(self, x):
        x = self.trunk(x)
        x = self.head(x)
        return F.normalize(x)

class KakUchitToBlya(Dataset):

    def __init__(self, dataframe, transforms, path_to_images):
        self.data = self.create_image_names_pairs(dataframe)
        self.transforms = transforms
        self.path = path_to_images
            
    @staticmethod
    def create_image_names_pairs(df):
        return list(zip(df.variantid_1, df.variantid_2, df.base_title_image, df.cand_title_image, df.is_double))

        
    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        id1, id2, img1, img2, target = self.data[idx]
        
        try:
            image1 = cv2.imread(self.path + img1 + '.jpg')
            image2 = cv2.imread(self.path + img2 + '.jpg')
    
            image1 = cv2.cvtColor(image1, cv2.COLOR_BGR2RGB)
            image2 = cv2.cvtColor(image2, cv2.COLOR_BGR2RGB)
            
            if self.transforms:
                image1 = self.transforms(image=image1)['image']
                image2 = self.transforms(image=image2)['image']
        except:
            image1 = torch.zeros((3, IMG_SIZE, IMG_SIZE))
            image2 = torch.zeros((3, IMG_SIZE, IMG_SIZE))
            target = 255
        
        return {
            "image1": image1,
            "image2": image2,
            "id1": id1,
            "id2": id2,
            "labels": torch.tensor(target, dtype=torch.long)
        }

class OnlineContrastiveLoss(nn.Module):
    def __init__(
        self, margin: float = 0.5, strategy='online'
    ) -> None:

        super().__init__()
        self.margin = margin
        self.distance_metric = lambda x, y: 1 - F.cosine_similarity(x, y)
        self.strategy = strategy
        
    def forward(self, embedding1, embedding2, labels, size_average=True):

        mask = labels != 255
        labels = labels[mask]
        embedding1 = embedding1[mask]
        embedding2 = embedding2[mask]
        
        
        distance_matrix = self.distance_metric(embedding1, embedding2)
        
        if self.strategy == 'online':
            negs = distance_matrix[labels == 0]
            poss = distance_matrix[labels == 1]
        
            negative_pairs = negs[negs < (poss.max() if len(poss) > 1 else negs.mean())]
            positive_pairs = poss[poss > (negs.min() if len(negs) > 1 else poss.mean())]
    
            positive_loss = positive_pairs.pow(2).sum()
            negative_loss = F.relu(self.margin - negative_pairs).pow(2).sum()
            loss = positive_loss + negative_loss
            if size_average:
                loss /= (len(negative_pairs) + len(positive_pairs) + 1e-8)
            return loss
        elif self.strategy == 'common':
            losses = 0.5 * (
            labels.float() * distance_matrix.pow(2) + (1 - labels).float() * F.relu(self.margin - distance_matrix).pow(2)
        )
            return losses.mean() if size_average else losses.sum()
        elif self.strategy == 'combined':
            negs = distance_matrix[labels == 0]
            poss = distance_matrix[labels == 1]
        
            negative_pairs = negs[negs < (poss.max() if len(poss) > 1 else negs.mean())]
            positive_pairs = poss[poss > (negs.min() if len(negs) > 1 else poss.mean())]
    
            positive_loss = positive_pairs.pow(2).sum()
            negative_loss = F.relu(self.margin - negative_pairs).pow(2).sum()
            loss = positive_loss + negative_loss
            loss /= (len(negative_pairs) + len(positive_pairs) + 1e-8)
            
            losses = 0.5 * (
            labels.float() * distance_matrix.pow(2) + (1 - labels).float() * F.relu(self.margin - distance_matrix).pow(2)
        )
            losses = losses.mean()
            return (loss + losses) * 0.5

In [54]:
def get_merged_df_from_path(path, columns_for_filter=None):
    all_data = []
    for parquet in glob.glob(path + '*.parquet'):
        all_data.append(
            pd.read_parquet(parquet, engine='pyarrow')
        )
    all_data_df = pd.concat(all_data, ignore_index=True)
    
    return all_data_df[columns_for_filter] if columns_for_filter else all_data_df 

In [55]:
# Переменные
SEED = 1488

# ---------МОДЕЛЬ----------
MODEL_NAME = 'resnet50.a1_in1k'
FC_DIM = 768

# ---------ОБУЧЕНИЕ---------
N_EPOCH = 1
IMG_SIZE = 224
DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
CURRENT_FOLD = 1

# ---------ДАННЫЕ-----------
PATH_TO_TRAIN_PARQUETS = r'C:\avito\tables\train/'
PATH_TO_TRAIN_IMAGES = r'C:\avito\images\train\images/'

PATH_TO_TEST_PARQUETS = r'C:\avito\tables\test/'
PATH_TO_TEST_IMAGES = r'C:\avito\images\test\images/'

PATH_TO_TXT = './train_images_zip_paths.txt'

COLUMNS = ['base_item_id',
           'cand_item_id',
           'base_subcategory_name',
           'cand_subcategory_name',
           'group_id', 
           'action_date', 
           'base_title_image', 
           'cand_title_image', 
           'is_double']


In [56]:
train_transforms = A.Compose(
    [
        A.Resize(IMG_SIZE, IMG_SIZE),
        #A.HorizontalFlip(),
        #A.VerticalFlip(),
        #A.Rotate(), 
        A.Normalize(),
        ToTensorV2(),
    ]
)

test_transforms = A.Compose(
    [
        A.Resize(IMG_SIZE, IMG_SIZE),
        A.Normalize(),
        ToTensorV2(),
    ]
)


In [57]:
df = get_merged_df_from_path(PATH_TO_TRAIN_PARQUETS, COLUMNS).rename(columns={'base_item_id': 'variantid_1', 'cand_item_id': 'variantid_2'})

In [62]:
# df = pd.read_parquet(
#     '/kaggle/input/for-fasttext-and-bert-avito/train_zalupa.parquet',
#     columns=['variantid_1', 'variantid_2', 'group_id', 'is_double']
# )

df = df.sort_values(by=['variantid_1', 'variantid_2'])
df = df.sample(len(df), random_state=42)

sgkf = StratifiedGroupKFold(n_splits=5)

fold_mapping = {
    '0': {
        'train_idxs': [],
        'val_idxs': [],
    },
    '1': {
        'train_idxs': [],
        'val_idxs': [],
    },
    '2': {
        'train_idxs': [],
        'val_idxs': [],
    },
    '3': {
        'train_idxs': [],
        'val_idxs': [],
    },
    '4': {
        'train_idxs': [],
        'val_idxs': [],
    },
}

for fold, (train_idx, val_idx) in enumerate(sgkf.split(df, df['is_double'], groups=df['group_id'])):
    fold_mapping[str(fold)]['train_idxs'] = train_idx
    fold_mapping[str(fold)]['val_idxs'] = val_idx

In [10]:
trunk_model = TrunkModel(MODEL_NAME)
head_model_architecture = nn.Sequential(
        nn.Linear(trunk_model.trunk_model.num_features, FC_DIM),
        nn.BatchNorm1d(FC_DIM)
       
)
head_model = HeadModel(head_model_architecture)
model = EmbeddingModel(trunk_model, head_model).to(DEVICE)

INFO:timm.models._builder:Loading pretrained weights from Hugging Face hub (timm/resnet50.a1_in1k)
INFO:timm.models._hub:[timm/resnet50.a1_in1k] Safe alternative available for 'pytorch_model.bin' (as 'model.safetensors'). Loading weights using safetensors.


----TRUNK----
ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (act1): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (act1): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (drop_block): Identity()
      (act2): ReLU(inplace=True)
      (aa): Identity()
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_sta

In [None]:
oof_preds = np.zeros(len(df))
for fold in range(0, 5):

    w = load_file(f'./resnet/resnet_fold{fold}.safetensors')

    model.load_state_dict(w)
    model.eval()
    val_idx = fold_mapping[str(fold)]['val_idxs']
    val_dataset = KakUchitToBlya(dataframe=df.iloc[val_idx], 
                                 transforms=test_transforms, 
                                 path_to_images=PATH_TO_TRAIN_IMAGES)
    fold_preds = []
    i = 0
    with torch.no_grad():
        for inputs in tqdm(DataLoader(val_dataset, 
                                      shuffle=False, 
                                      #pin_memory=True, 
                                      #num_workers=6, 
                                      batch_size=1)):
            
            img1 = inputs.pop('image1').to(DEVICE)
            img2 = inputs.pop('image2').to(DEVICE)
            labels = inputs.pop('labels').to(DEVICE)

            batch = torch.cat([img1, img2], dim=0)
            
            assert batch.shape[0] % 2 == 0
            
            outputs = model(batch)
    
            emb1, emb2 = torch.chunk(outputs, 2)

            batch_cossim = F.cosine_similarity(emb1, emb2)
            batch_cossim[labels == 255] = np.nan
            
            fold_preds.extend(
                batch_cossim.tolist()
            )
            
    assert len(val_idx) == len(fold_preds)
    
    oof_preds[val_idx] = fold_preds


In [10]:
df['cossims_resnet'] = oof_preds

In [13]:
df[['variantid_1', 'variantid_2', 'cossims_resnet']].to_parquet('./cossim_resnet.parquet')