In [1]:
import os
import gc
import cv2
import time
import copy
import numpy as np
import pandas as pd
from tqdm.notebook import tqdm
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import roc_auc_score

import torch
from torch import nn
from torch.utils.data import Dataset, DataLoader
from torchvision import models
import torch.nn.functional as F

import albumentations as A
from albumentations.pytorch import ToTensorV2

import warnings
warnings.filterwarnings("ignore")

In [2]:
config = {
    'seed': 42,
    'n_folds': 3,
    'num_workers': 2,
    'device': torch.device("cuda:0" if torch.cuda.is_available() else "cpu"),
    'img_size': 380,
    'batch_size': 16,
    'lr_head': 1e-3,
    'lr_body': 1e-5,
    'epochs_warmup': 1,
    'epochs_tune': 25,
    'model_name': 'efficientnet_b4',
    'grad_accum': 1
}

In [3]:
PATH = '/kaggle/input/aaa-ml-comp/avito-auto-moderation'
TRAIN_FILE = 'train_v2.csv'
TEST_FILE = 'sample_submission_v2.csv'

In [4]:
def seed_everything(seed):
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True

seed_everything(config['seed'])

In [5]:
df_full = pd.read_csv(os.path.join(PATH, TRAIN_FILE))
print(f"—Ä–∞–∑–º–µ—Ä –¥–æ –æ—á–∏—Å—Ç–∫–∏: {len(df_full)}")

# —É–¥–∞–ª—è–µ–º –¥—É–±–ª–∏–∫–∞—Ç—ã –∏ —Ç—Ä–µ—à
trash_ids = [224, 278, 669, 148, 399, 613]
files_to_drop = [f"{idx}.jpg" for idx in trash_ids]
df_full = df_full[~df_full['image'].isin(files_to_drop)].reset_index(drop=True)

# –∏—Å–ø—Ä–∞–≤–ª—è–µ–º –Ω–µ–≤–µ—Ä–Ω—ã–µ –º–µ—Ç–∫–∏
false_negatives_ids = [24, 82, 102, 253, 346, 427, 455, 511, 828, 936]
fix_to_class_1 = [f"{idx}.jpg" for idx in false_negatives_ids]

false_positives_ids = [98, 248, 542, 670, 820, 925]
fix_to_class_0 = [f"{idx}.jpg" for idx in false_positives_ids]

df_full.loc[df_full['image'].isin(fix_to_class_1), 'label'] = 1
df_full.loc[df_full['image'].isin(fix_to_class_0), 'label'] = 0

print(f"—Ä–∞–∑–º–µ—Ä –ø–æ—Å–ª–µ –æ—á–∏—Å—Ç–∫–∏: {len(df_full)}")

—Ä–∞–∑–º–µ—Ä –¥–æ –æ—á–∏—Å—Ç–∫–∏: 1143
—Ä–∞–∑–º–µ—Ä –ø–æ—Å–ª–µ –æ—á–∏—Å—Ç–∫–∏: 1137


In [None]:
def load_images_to_ram(df, folder_path, size=380):
    cache = {}
    print(f"–∑–∞–≥—Ä—É–∑–∫–∞ –∏–∑–æ–±—Ä–∞–∂–µ–Ω–∏–π –≤ –ø–∞–º—è—Ç—å (size: {size})...")
    for name in tqdm(df['image'].values):
        path = os.path.join(folder_path, name)
        img = cv2.imread(path)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        if img.shape[0] > size:
            img = cv2.resize(img, (size, size))
        cache[name] = img
    return cache

In [7]:
df_test = pd.read_csv(os.path.join(PATH, TEST_FILE))
all_images_df = pd.concat([df_full, df_test])

unique_images = pd.DataFrame(all_images_df['image'].unique(), columns=['image'])
images_cache = load_images_to_ram(unique_images, PATH, size=config['img_size'])

–∑–∞–≥—Ä—É–∑–∫–∞ –∏–∑–æ–±—Ä–∞–∂–µ–Ω–∏–π –≤ –ø–∞–º—è—Ç—å (size: 380)...


  0%|          | 0/1518 [00:00<?, ?it/s]

In [8]:
# –∞—É–≥–º–µ–Ω—Ç–∞—Ü–∏–∏
def get_transforms(data_type, img_size):
    if data_type == 'train':
        return A.Compose([
            A.Resize(img_size, img_size),
            A.HorizontalFlip(p=0.5),
            A.Rotate(limit=15, p=0.5),
            A.OneOf([
                A.MotionBlur(blur_limit=5, p=0.5),
                A.GaussNoise(var_limit=(10.0, 50.0), p=0.5),
                A.ISONoise(p=0.5),
                A.ImageCompression(quality_lower=60, quality_upper=90, p=0.5),
            ], p=0.5),
            A.RandomBrightnessContrast(brightness_limit=0.2, contrast_limit=0.2, p=0.5),
            A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
            ToTensorV2(),
        ])
    else: # valid / test
        return A.Compose([
            A.Resize(img_size, img_size),
            A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
            ToTensorV2(),
        ])

In [9]:
# –∫–ª–∞—Å—Å –¥–∞—Ç–∞—Å–µ—Ç–∞
class AvitoDataset(Dataset):
    def __init__(self, df, transform=None, cache=None):
        self.df = df
        self.file_names = df['image'].values
        self.labels = df['label'].values if 'label' in df.columns else None
        self.transform = transform
        self.cache = cache

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        file_name = self.file_names[idx]
        # –±–µ—Ä–µ–º –∏–∑ –∫—ç—à–∞
        image = self.cache.get(file_name, np.zeros((380, 380, 3), dtype=np.uint8))
        
        if self.transform:
            image = self.transform(image=image)['image']
            
        if self.labels is not None:
            return image, torch.tensor(self.labels[idx], dtype=torch.float32)
        return image

In [None]:
# –º–æ–¥–µ–ª—å
class UniversalModel(nn.Module):
    def __init__(self, model_name, pretrained=True):
        super().__init__()
        
        self.model = models.efficientnet_b4(weights='DEFAULT' if pretrained else None)
        # –∑–∞–º–µ–Ω—è–µ–º –∫–ª–∞—Å—Å–∏—Ñ–∏–∫–∞—Ç–æ—Ä –ø–æ–¥ –±–∏–Ω–∞—Ä–Ω—É—é –∑–∞–¥–∞—á—É
        in_features = self.model.classifier[1].in_features
        self.model.classifier[1] = nn.Linear(in_features, 1)

    def forward(self, x):
        return self.model(x)
    
    def unfreeze(self):
        for param in self.model.parameters(): 
            param.requires_grad = True

In [11]:
# mixup –∞—É–≥–º–µ–Ω—Ç–∞—Ü–∏—è
def mixup_data(x, y, alpha=0.4):
    if alpha > 0:
        lam = np.random.beta(alpha, alpha)
    else:
        lam = 1
    batch_size = x.size(0)
    index = torch.randperm(batch_size).to(x.device)
    mixed_x = lam * x + (1 - lam) * x[index]
    y_a, y_b = y, y[index]
    return mixed_x, y_a, y_b, lam

def mixup_criterion(criterion, pred, y_a, y_b, lam):
    return lam * criterion(pred, y_a) + (1 - lam) * criterion(pred, y_b)

In [12]:
def bootstrap_auc(y_true, y_pred, n_bootstraps=1000):
    scores = []
    rng = np.random.RandomState(42)
    for i in range(n_bootstraps):
        indices = rng.randint(0, len(y_pred), len(y_pred))
        if len(np.unique(y_true[indices])) < 2: continue
        scores.append(roc_auc_score(y_true[indices], y_pred[indices]))
    return np.mean(scores)

In [13]:
class EarlyStopping:
    def __init__(self, patience=5, path='checkpoint.pth'):
        self.patience = patience
        self.counter = 0
        self.best_score = -np.inf
        self.early_stop = False
        self.path = path

    def __call__(self, val_auc, model):
        if val_auc > self.best_score:
            self.best_score = val_auc
            torch.save(model.state_dict(), self.path)
            self.counter = 0
            return True 
        else:
            self.counter += 1
            if self.counter >= self.patience:
                self.early_stop = True
            return False

In [14]:
def train_fold(fold_n, train_idx, val_idx, df, images_cache):
    print(f"\n===== fold {fold_n+1} =====")
    
    train_ds = AvitoDataset(df.iloc[train_idx], get_transforms('train', config['img_size']), images_cache)
    val_ds = AvitoDataset(df.iloc[val_idx], get_transforms('valid', config['img_size']), images_cache)
    
    train_loader = DataLoader(train_ds, batch_size=config['batch_size'], shuffle=True, num_workers=config['num_workers'])
    val_loader = DataLoader(val_ds, batch_size=config['batch_size'], shuffle=False, num_workers=config['num_workers'])
    
    model = UniversalModel(config['model_name']).to(config['device'])
    
    # –≤–∑–≤–µ—à–∏–≤–∞–Ω–∏–µ –∫–ª–∞—Å—Å–æ–≤ –¥–ª—è loss
    n_pos = df.iloc[train_idx]['label'].sum()
    n_neg = len(train_idx) - n_pos
    pos_weight = torch.tensor([n_neg/n_pos]).float().to(config['device'])
    criterion = nn.BCEWithLogitsLoss(pos_weight=pos_weight)
    
    es = EarlyStopping(patience=5, path=f"model_fold{fold_n}.pth")
    
    # warmup (–æ–±—É—á–∞–µ–º —Ç–æ–ª—å–∫–æ –≥–æ–ª–æ–≤—É)
    if config['epochs_warmup'] > 0:
        print("warmup...")
        # –∑–∞–º–æ—Ä–∞–∂–∏–≤–∞–µ–º –≤—Å–µ, –∫—Ä–æ–º–µ –∫–ª–∞—Å—Å–∏—Ñ–∏–∫–∞—Ç–æ—Ä–∞
        for param in model.model.parameters():
            param.requires_grad = False
        for param in model.model.classifier.parameters():
            param.requires_grad = True
            
        optimizer = torch.optim.AdamW(filter(lambda p: p.requires_grad, model.parameters()), lr=config['lr_head'])
        
        model.train()
        for epoch in range(config['epochs_warmup']):
            for imgs, labels in tqdm(train_loader, leave=False):
                imgs, labels = imgs.to(config['device']), labels.to(config['device']).view(-1, 1)
                optimizer.zero_grad()
                loss = criterion(model(imgs), labels)
                loss.backward()
                optimizer.step()

    # fine-tuning (—Ä–∞–∑–º–æ—Ä–∞–∂–∏–≤–∞–µ–º –≤—Å–µ)
    print("fine-tuning...")
    model.unfreeze()
    
    optimizer = torch.optim.AdamW(model.parameters(), lr=config['lr_body'], weight_decay=0.01)
    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=config['epochs_tune'])
    
    for epoch in range(config['epochs_tune']):
        model.train()
        optimizer.zero_grad()
        
        running_loss = 0
        for step, (imgs, labels) in enumerate(tqdm(train_loader, leave=False)):
            imgs = imgs.to(config['device'])
            labels = labels.to(config['device']).view(-1, 1)
            
            # mixup
            inputs, targets_a, targets_b, lam = mixup_data(imgs, labels, alpha=0.4)
            outputs = model(inputs)
            loss = mixup_criterion(criterion, outputs, targets_a, targets_b, lam)
            
            # –Ω–∞–∫–æ–ø–ª–µ–Ω–∏–µ –≥—Ä–∞–¥–∏–µ–Ω—Ç–∞
            loss = loss / config['grad_accum']
            loss.backward()
            
            if (step + 1) % config['grad_accum'] == 0:
                torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
                optimizer.step()
                optimizer.zero_grad()
            
            running_loss += loss.item()
        
        # –≤–∞–ª–∏–¥–∞—Ü–∏—è
        model.eval()
        preds, targets = [], []
        with torch.no_grad():
            for imgs, labels in val_loader:
                imgs = imgs.to(config['device'])
                # —É—Å—Ä–µ–¥–Ω—è–µ–º –ø—Ä–µ–¥—Å–∫–∞–∑–∞–Ω–∏–µ –¥–ª—è –æ—Ä–∏–≥–∏–Ω–∞–ª–∞ –∏ –æ—Ç—Ä–∞–∂–µ–Ω–∏—è –ø–æ –≥–æ—Ä–∏–∑–æ–Ω—Ç–∞–ª–∏
                out1 = model(imgs)
                out2 = model(torch.flip(imgs, [3]))
                p = (torch.sigmoid(out1) + torch.sigmoid(out2)) / 2
                preds.extend(p.cpu().view(-1).numpy())
                targets.extend(labels.numpy())
        
        val_auc = bootstrap_auc(np.array(targets), np.array(preds))
        scheduler.step()
        
        is_best = es(val_auc, model)
        print(f"epoch {epoch+1} | loss: {running_loss/len(train_loader):.4f} | val auc: {val_auc:.4f} {'üèÜ' if is_best else ''}")
        
        if es.early_stop:
            print("early stopping")
            break

    # –æ—á–∏—Å—Ç–∫–∞
    del model, optimizer, train_loader, val_loader
    torch.cuda.empty_cache()
    gc.collect()

In [15]:
skf = StratifiedKFold(n_splits=config['n_folds'], shuffle=True, random_state=config['seed'])

for fold, (train_idx, val_idx) in enumerate(skf.split(df_full, df_full['label'])):
    train_fold(fold, train_idx, val_idx, df_full, images_cache)


===== fold 1 =====


Downloading: "https://download.pytorch.org/models/efficientnet_b4_rwightman-23ab8bcd.pth" to /root/.cache/torch/hub/checkpoints/efficientnet_b4_rwightman-23ab8bcd.pth
100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 74.5M/74.5M [00:00<00:00, 242MB/s]


warmup...


  0%|          | 0/48 [00:00<?, ?it/s]

fine-tuning...


  0%|          | 0/48 [00:00<?, ?it/s]

epoch 1 | loss: 1.0809 | val auc: 0.9635 üèÜ


  0%|          | 0/48 [00:00<?, ?it/s]

epoch 2 | loss: 1.0648 | val auc: 0.9673 üèÜ


  0%|          | 0/48 [00:00<?, ?it/s]

epoch 3 | loss: 1.0650 | val auc: 0.9655 


  0%|          | 0/48 [00:00<?, ?it/s]

epoch 4 | loss: 1.0424 | val auc: 0.9699 üèÜ


  0%|          | 0/48 [00:00<?, ?it/s]

epoch 5 | loss: 1.0317 | val auc: 0.9716 üèÜ


  0%|          | 0/48 [00:00<?, ?it/s]

epoch 6 | loss: 1.0370 | val auc: 0.9721 üèÜ


  0%|          | 0/48 [00:00<?, ?it/s]

epoch 7 | loss: 1.0141 | val auc: 0.9719 


  0%|          | 0/48 [00:00<?, ?it/s]

epoch 8 | loss: 0.9915 | val auc: 0.9750 üèÜ


  0%|          | 0/48 [00:00<?, ?it/s]

epoch 9 | loss: 0.9970 | val auc: 0.9740 


  0%|          | 0/48 [00:00<?, ?it/s]

epoch 10 | loss: 1.0016 | val auc: 0.9749 


  0%|          | 0/48 [00:00<?, ?it/s]

epoch 11 | loss: 0.9722 | val auc: 0.9746 


  0%|          | 0/48 [00:00<?, ?it/s]

epoch 12 | loss: 0.9726 | val auc: 0.9769 üèÜ


  0%|          | 0/48 [00:00<?, ?it/s]

epoch 13 | loss: 0.9653 | val auc: 0.9776 üèÜ


  0%|          | 0/48 [00:00<?, ?it/s]

epoch 14 | loss: 0.9355 | val auc: 0.9775 


  0%|          | 0/48 [00:00<?, ?it/s]

epoch 15 | loss: 0.9261 | val auc: 0.9772 


  0%|          | 0/48 [00:00<?, ?it/s]

epoch 16 | loss: 0.9527 | val auc: 0.9771 


  0%|          | 0/48 [00:00<?, ?it/s]

epoch 17 | loss: 0.9440 | val auc: 0.9784 üèÜ


  0%|          | 0/48 [00:00<?, ?it/s]

epoch 18 | loss: 0.9285 | val auc: 0.9778 


  0%|          | 0/48 [00:00<?, ?it/s]

epoch 19 | loss: 0.9361 | val auc: 0.9776 


  0%|          | 0/48 [00:00<?, ?it/s]

epoch 20 | loss: 0.9367 | val auc: 0.9787 üèÜ


  0%|          | 0/48 [00:00<?, ?it/s]

epoch 21 | loss: 0.9211 | val auc: 0.9776 


  0%|          | 0/48 [00:00<?, ?it/s]

epoch 22 | loss: 0.9281 | val auc: 0.9778 


  0%|          | 0/48 [00:00<?, ?it/s]

epoch 23 | loss: 0.9333 | val auc: 0.9771 


  0%|          | 0/48 [00:00<?, ?it/s]

epoch 24 | loss: 0.9144 | val auc: 0.9787 


  0%|          | 0/48 [00:00<?, ?it/s]

epoch 25 | loss: 0.9114 | val auc: 0.9780 
early stopping

===== fold 2 =====
warmup...


  0%|          | 0/48 [00:00<?, ?it/s]

fine-tuning...


  0%|          | 0/48 [00:00<?, ?it/s]

epoch 1 | loss: 1.0533 | val auc: 0.9448 üèÜ


  0%|          | 0/48 [00:00<?, ?it/s]

epoch 2 | loss: 1.0490 | val auc: 0.9458 üèÜ


  0%|          | 0/48 [00:00<?, ?it/s]

epoch 3 | loss: 1.0364 | val auc: 0.9489 üèÜ


  0%|          | 0/48 [00:00<?, ?it/s]

epoch 4 | loss: 0.9959 | val auc: 0.9499 üèÜ


  0%|          | 0/48 [00:00<?, ?it/s]

epoch 5 | loss: 1.0229 | val auc: 0.9500 üèÜ


  0%|          | 0/48 [00:00<?, ?it/s]

epoch 6 | loss: 0.9936 | val auc: 0.9506 üèÜ


  0%|          | 0/48 [00:00<?, ?it/s]

epoch 7 | loss: 1.0043 | val auc: 0.9524 üèÜ


  0%|          | 0/48 [00:00<?, ?it/s]

epoch 8 | loss: 0.9497 | val auc: 0.9514 


  0%|          | 0/48 [00:00<?, ?it/s]

epoch 9 | loss: 0.9715 | val auc: 0.9524 üèÜ


  0%|          | 0/48 [00:00<?, ?it/s]

epoch 10 | loss: 0.9408 | val auc: 0.9506 


  0%|          | 0/48 [00:00<?, ?it/s]

epoch 11 | loss: 0.9243 | val auc: 0.9519 


  0%|          | 0/48 [00:00<?, ?it/s]

epoch 12 | loss: 0.9506 | val auc: 0.9473 


  0%|          | 0/48 [00:00<?, ?it/s]

epoch 13 | loss: 0.9165 | val auc: 0.9502 


  0%|          | 0/48 [00:00<?, ?it/s]

epoch 14 | loss: 0.9180 | val auc: 0.9500 
early stopping

===== fold 3 =====
warmup...


  0%|          | 0/48 [00:00<?, ?it/s]

fine-tuning...


  0%|          | 0/48 [00:00<?, ?it/s]

epoch 1 | loss: 1.0818 | val auc: 0.9371 üèÜ


  0%|          | 0/48 [00:00<?, ?it/s]

epoch 2 | loss: 1.0637 | val auc: 0.9400 üèÜ


  0%|          | 0/48 [00:00<?, ?it/s]

epoch 3 | loss: 1.0560 | val auc: 0.9476 üèÜ


  0%|          | 0/48 [00:00<?, ?it/s]

epoch 4 | loss: 1.0428 | val auc: 0.9481 üèÜ


  0%|          | 0/48 [00:00<?, ?it/s]

epoch 5 | loss: 1.0203 | val auc: 0.9526 üèÜ


  0%|          | 0/48 [00:00<?, ?it/s]

epoch 6 | loss: 1.0297 | val auc: 0.9522 


  0%|          | 0/48 [00:00<?, ?it/s]

epoch 7 | loss: 1.0164 | val auc: 0.9547 üèÜ


  0%|          | 0/48 [00:00<?, ?it/s]

epoch 8 | loss: 1.0065 | val auc: 0.9564 üèÜ


  0%|          | 0/48 [00:00<?, ?it/s]

epoch 9 | loss: 1.0088 | val auc: 0.9552 


  0%|          | 0/48 [00:00<?, ?it/s]

epoch 10 | loss: 0.9816 | val auc: 0.9550 


  0%|          | 0/48 [00:00<?, ?it/s]

epoch 11 | loss: 0.9833 | val auc: 0.9602 üèÜ


  0%|          | 0/48 [00:00<?, ?it/s]

epoch 12 | loss: 0.9586 | val auc: 0.9592 


  0%|          | 0/48 [00:00<?, ?it/s]

epoch 13 | loss: 0.9602 | val auc: 0.9621 üèÜ


  0%|          | 0/48 [00:00<?, ?it/s]

epoch 14 | loss: 0.9548 | val auc: 0.9601 


  0%|          | 0/48 [00:00<?, ?it/s]

epoch 15 | loss: 0.9481 | val auc: 0.9583 


  0%|          | 0/48 [00:00<?, ?it/s]

epoch 16 | loss: 0.9592 | val auc: 0.9596 


  0%|          | 0/48 [00:00<?, ?it/s]

epoch 17 | loss: 0.9457 | val auc: 0.9623 üèÜ


  0%|          | 0/48 [00:00<?, ?it/s]

epoch 18 | loss: 0.9162 | val auc: 0.9630 üèÜ


  0%|          | 0/48 [00:00<?, ?it/s]

epoch 19 | loss: 0.9194 | val auc: 0.9598 


  0%|          | 0/48 [00:00<?, ?it/s]

epoch 20 | loss: 0.9096 | val auc: 0.9619 


  0%|          | 0/48 [00:00<?, ?it/s]

epoch 21 | loss: 0.9244 | val auc: 0.9596 


  0%|          | 0/48 [00:00<?, ?it/s]

epoch 22 | loss: 0.9249 | val auc: 0.9611 


  0%|          | 0/48 [00:00<?, ?it/s]

epoch 23 | loss: 0.9145 | val auc: 0.9623 
early stopping


In [16]:
if len(df_test) > 0 and df_test.iloc[0]['image'] not in images_cache:
    print("–¥–æ–≥—Ä—É–∂–∞–µ–º —Ç–µ—Å—Ç –≤ –∫—ç—à...")
    test_cache = load_images_to_ram(df_test, PATH, size=config['img_size'])
    images_cache.update(test_cache)

# –¥–∞—Ç–∞—Å–µ—Ç –∏ –ª–æ–∞–¥–µ—Ä –¥–ª—è —Ç–µ—Å—Ç–∞
test_ds = AvitoDataset(df_test, get_transforms('valid', config['img_size']), images_cache)
test_loader = DataLoader(
    test_ds, 
    batch_size=config['batch_size'], 
    shuffle=False, 
    num_workers=config['num_workers'],
    pin_memory=True
)

final_preds = np.zeros(len(df_test))

In [18]:
for fold in range(config['n_folds']):
    weights_path = f"model_fold{fold}.pth"
    print(f"–∑–∞–≥—Ä—É–∑–∫–∞ –º–æ–¥–µ–ª–∏: {weights_path}...")
    
    # –∏–Ω–∏—Ü–∏–∞–ª–∏–∑–∏—Ä—É–µ–º –∞—Ä—Ö–∏—Ç–µ–∫—Ç—É—Ä—É
    model = UniversalModel(config['model_name'], pretrained=False)
    
    # –∑–∞–≥—Ä—É–∂–∞–µ–º –≤–µ—Å–∞
    state_dict = torch.load(weights_path, map_location=config['device'])
    model.load_state_dict(state_dict)
    
    model.to(config['device'])
    model.eval()
    
    fold_preds = []
    
    # —Ü–∏–∫–ª –ø—Ä–µ–¥—Å–∫–∞–∑–∞–Ω–∏—è
    with torch.no_grad():
        for imgs in tqdm(test_loader, desc=f"fold {fold+1}", leave=False):
            imgs = imgs.to(config['device'])
            
            out1 = model(imgs)
            out2 = model(torch.flip(imgs, [3]))
            
            p = (torch.sigmoid(out1) + torch.sigmoid(out2)) / 2
            
            fold_preds.extend(p.cpu().view(-1).numpy())
            
    final_preds += np.array(fold_preds)
    
    del model, state_dict
    torch.cuda.empty_cache()
    gc.collect()


final_preds /= config['n_folds']

submission = df_test[['image']].copy()
submission['score'] = final_preds

submission.to_csv('submission.csv', index=False)

–∑–∞–≥—Ä—É–∑–∫–∞ –º–æ–¥–µ–ª–∏: model_fold0.pth...


fold 1:   0%|          | 0/24 [00:00<?, ?it/s]

–∑–∞–≥—Ä—É–∑–∫–∞ –º–æ–¥–µ–ª–∏: model_fold1.pth...


fold 2:   0%|          | 0/24 [00:00<?, ?it/s]

–∑–∞–≥—Ä—É–∑–∫–∞ –º–æ–¥–µ–ª–∏: model_fold2.pth...


fold 3:   0%|          | 0/24 [00:00<?, ?it/s]