In [1]:
import numpy as np
import pandas as pd
from torch.utils.data import Dataset, DataLoader
import gc

import cv2
import timm
import torch
from sklearn.model_selection import StratifiedKFold

import albumentations as A
from albumentations.pytorch import ToTensorV2

from tqdm import tqdm
import torch.nn as nn
from torch.optim.lr_scheduler import CosineAnnealingLR
import os
import warnings
warnings.filterwarnings('ignore')

import joblib
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import balanced_accuracy_score



In [2]:
class CFG:
    SEED = 42
    IMG_SIZE = 512
    ROOT_DIR = '/kaggle/input/UBC-OCEAN'
    TMA_TRAIN_PATH = '/kaggle/input/UBC-OCEAN/train_images'
    NTMA_TRAIN_PATH = '/kaggle/input/UBC-OCEAN/train_thumbnails'
    #MODEL_NAME = 'tf_efficientnet_b0_ns'
    MODEL_NAME = 'tf_efficientnetv2_s_in21ft1k'
    #MODEL_NAME = 'resnet50.ra_in1k'
    #MODEL_NAME = 'efficientnetv2_s'
    #MODEL_NAME = 'vgg16_bn'
    PRE_TRAIN = True
    #CHECKPOINT_PATH = '/kaggle/input/tf-efficientnet/pytorch/tf-efficientnet-b0/1/tf_efficientnet_b0_aa-827b6e33.pth'
    CHECKPOINT_PATH = None
    TRAIN_BATCH_SIZE = 16
    VALID_BATCH_SIZE = 32
    NUM_WORKERS = 2
    NUM_CLASSES = 5
    LR = 1e-4
    ETA_MIN = 1e-6
    T_MAX = 60
    WEIGHT_DECAY = 1e-3
    NUM_EPOCHS = 30
    DEVICE = 'cuda:0'
    IS_CLASS_WEIGHT = True
    IS_EMA = False
    IS_AWP = True
    AWP_START = 12
    ADV_LR = 1
    ADV_EPS = 0.0001

# EMA

In [3]:
class EMA(object):
    '''
        apply expontential moving average to a model. This should have same function as the `tf.train.ExponentialMovingAverage` of tensorflow.
        usage:
            model = resnet()
            model.train()
            ema = EMA(model, 0.9999)
            ....
            for img, lb in dataloader:
                loss = ...
                loss.backward()
                optim.step()
                ema.update_params() # apply ema
            evaluate(model)  # evaluate with original model as usual
            ema.apply_shadow() # copy ema status to the model
            evaluate(model) # evaluate the model with ema paramters
            ema.restore() # resume the model parameters
        args:
            - model: the model that ema is applied
            - alpha: each parameter p should be computed as p_hat = alpha * p + (1. - alpha) * p_hat
            - buffer_ema: whether the model buffers should be computed with ema method or just get kept
        methods:
            - update_params(): apply ema to the model, usually call after the optimizer.step() is called
            - apply_shadow(): copy the ema processed parameters to the model
            - restore(): restore the original model parameters, this would cancel the operation of apply_shadow()
    '''
    def __init__(self, model, alpha, buffer_ema=True):
        self.step = 0
        self.model = model
        self.alpha = alpha
        self.buffer_ema = buffer_ema
        self.shadow = self.get_model_state()
        self.backup = {}
        self.param_keys = [k for k, _ in self.model.named_parameters()]
        self.buffer_keys = [k for k, _ in self.model.named_buffers()]

    def update_params(self):
        decay = min(self.alpha, (self.step + 1) / (self.step + 10))
        state = self.model.state_dict()
        for name in self.param_keys:
            self.shadow[name].copy_(
                decay * self.shadow[name]
                + (1 - decay) * state[name]
            )
        for name in self.buffer_keys:
            if self.buffer_ema:
                self.shadow[name].copy_(
                    decay * self.shadow[name]
                    + (1 - decay) * state[name]
                )
            else:
                self.shadow[name].copy_(state[name])
        self.step += 1

    def apply_shadow(self):
        self.backup = self.get_model_state()
        self.model.load_state_dict(self.shadow)

    def restore(self):
        self.model.load_state_dict(self.backup)

    def get_model_state(self):
        return {
            k: v.clone().detach()
            for k, v in self.model.state_dict().items()
        }

# AMP

In [4]:
class AWP:
    def __init__(self, model, criterion, optimizer, adv_param="weight", adv_lr=1, adv_eps=0.0001):
        self.model = model
        self.criterion = criterion
        self.optimizer = optimizer
        self.adv_param = adv_param
        self.adv_lr = adv_lr
        self.adv_eps = adv_eps
        self.backup = {}
        self.backup_eps = {}

    def attack_backward(self, inputs, labels):
        if self.adv_lr == 0:
            return
        self._save()
        self._attack_step()

        y_preds = self.model(inputs)

        adv_loss = self.criterion(y_preds, labels)
        self.optimizer.zero_grad()
        return adv_loss

    def _attack_step(self):
        e = 1e-6
        for name, param in self.model.named_parameters():
            if param.requires_grad and param.grad is not None and self.adv_param in name:
                norm1 = torch.norm(param.grad)
                norm2 = torch.norm(param.data.detach())
                if norm1 != 0 and not torch.isnan(norm1):
                    # 在损失函数之前获得梯度
                    r_at = self.adv_lr * param.grad / (norm1 + e) * (norm2 + e)
                    param.data.add_(r_at)
                    param.data = torch.min(
                        torch.max(param.data, self.backup_eps[name][0]), self.backup_eps[name][1]
                    )

    def _save(self):
        for name, param in self.model.named_parameters():
            if param.requires_grad and param.grad is not None and self.adv_param in name:
                if name not in self.backup:
                    self.backup[name] = param.data.clone()
                    grad_eps = self.adv_eps * param.abs().detach()
                    self.backup_eps[name] = (
                        self.backup[name] - grad_eps,
                        self.backup[name] + grad_eps,
                    )

    def _restore(self,):
        for name, param in self.model.named_parameters():
            if name in self.backup:
                param.data = self.backup[name]
        self.backup = {}
        self.backup_eps = {}

# EDA

In [5]:
train_df = pd.read_csv('/kaggle/input/UBC-OCEAN/train.csv')
train_df

Unnamed: 0,image_id,label,image_width,image_height,is_tma
0,4,HGSC,23785,20008,False
1,66,LGSC,48871,48195,False
2,91,HGSC,3388,3388,True
3,281,LGSC,42309,15545,False
4,286,EC,37204,30020,False
...,...,...,...,...,...
533,65022,LGSC,53355,46675,False
534,65094,MC,55042,45080,False
535,65300,HGSC,75860,27503,False
536,65371,HGSC,42551,41800,False


In [6]:
def get_train_file_path(image_id):
    if os.path.exists(f"{CFG.NTMA_TRAIN_PATH}/{image_id}_thumbnail.png"):
        return f"{CFG.NTMA_TRAIN_PATH}/{image_id}_thumbnail.png"
    else:
        return f"{CFG.TMA_TRAIN_PATH}/{image_id}.png"

In [7]:
train_df['image_path'] = train_df['image_id'].apply(get_train_file_path)
train_df

Unnamed: 0,image_id,label,image_width,image_height,is_tma,image_path
0,4,HGSC,23785,20008,False,/kaggle/input/UBC-OCEAN/train_thumbnails/4_thu...
1,66,LGSC,48871,48195,False,/kaggle/input/UBC-OCEAN/train_thumbnails/66_th...
2,91,HGSC,3388,3388,True,/kaggle/input/UBC-OCEAN/train_images/91.png
3,281,LGSC,42309,15545,False,/kaggle/input/UBC-OCEAN/train_thumbnails/281_t...
4,286,EC,37204,30020,False,/kaggle/input/UBC-OCEAN/train_thumbnails/286_t...
...,...,...,...,...,...,...
533,65022,LGSC,53355,46675,False,/kaggle/input/UBC-OCEAN/train_thumbnails/65022...
534,65094,MC,55042,45080,False,/kaggle/input/UBC-OCEAN/train_thumbnails/65094...
535,65300,HGSC,75860,27503,False,/kaggle/input/UBC-OCEAN/train_thumbnails/65300...
536,65371,HGSC,42551,41800,False,/kaggle/input/UBC-OCEAN/train_thumbnails/65371...


In [8]:
encoder = LabelEncoder()
train_df['label_id'] = encoder.fit_transform(train_df['label'])

print(train_df['label_id'].value_counts())

with open("label_encoder.pkl", "wb") as fp:
    joblib.dump(encoder, fp)

label_id
2    222
1    124
0     99
3     47
4     46
Name: count, dtype: int64


In [9]:
train_df['label'].value_counts()

label
HGSC    222
EC      124
CC       99
LGSC     47
MC       46
Name: count, dtype: int64

In [10]:
train_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 538 entries, 0 to 537
Data columns (total 7 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   image_id      538 non-null    int64 
 1   label         538 non-null    object
 2   image_width   538 non-null    int64 
 3   image_height  538 non-null    int64 
 4   is_tma        538 non-null    bool  
 5   image_path    538 non-null    object
 6   label_id      538 non-null    int64 
dtypes: bool(1), int64(4), object(2)
memory usage: 25.9+ KB


In [11]:
train_df[['image_width', 'image_height']].describe()

Unnamed: 0,image_width,image_height
count,538.0,538.0
mean,48859.533457,29729.460967
std,20040.989927,10762.899796
min,2964.0,2964.0
25%,34509.0,22089.5
50%,48160.0,29732.0
75%,64143.75,37880.75
max,105763.0,50155.0


# Outlier Detection
If your dataset contains information related to outlier detection, perform a dedicated EDA for this aspect:<br>
Visualize outliers using scatter plots or box plots.<br>
Apply statistical methods or machine learning techniques to identify outliers.<br>

# class weights

In [12]:
# class weights
from sklearn.utils.class_weight import compute_class_weight
class_weights=compute_class_weight(class_weight='balanced', classes = np.array([0.0,1.0,2.0,3.0,4.0]), y = train_df['label_id'].values)
#class_weights = np.array([1.0868686868686868,0.867741935483871,0.4846846846846847,2.2893617021276595,2.3391304347826085])
print(class_weights)
class_weights=torch.tensor(class_weights,dtype=torch.float32)

[1.08686869 0.86774194 0.48468468 2.2893617  2.33913043]


# Data Aug

In [13]:
mean = [0.485, 0.456, 0.406]
std = [0.229, 0.224, 0.225]
"""
train_transform = A.Compose([
    #A.HorizontalFlip(p=0.5),
    #A.VerticalFlip(p=0.5),
    
    #A.GaussNoise(p = 0.2),
    #A.OneOf([
    #    A.MotionBlur(p=0.2),
    #    A.MedianBlur(blur_limit=3, p=0.1),
    #    A.Blur(blur_limit=3, p=0.1),
    #], p=0.2),
    #A.ShiftScaleRotate(shift_limit=0.0625, scale_limit=0.2, rotate_limit=45, p=0.2),

    #A.RandomBrightnessContrast(p=0.2),
    A.Resize(height=CFG.IMG_SIZE, width=CFG.IMG_SIZE, p=1.0),
    A.ShiftScaleRotate(shift_limit=0.1, 
        scale_limit=0.15, 
        rotate_limit=60, 
        p=0.5),
    A.HueSaturationValue(
        hue_shift_limit=0.2, 
        sat_shift_limit=0.2, 
        val_shift_limit=0.2, 
        p=0.5
    ),
    A.RandomBrightnessContrast(
        brightness_limit=(-0.1,0.1), 
        contrast_limit=(-0.1, 0.1), 
        p=0.5
    ),
    
    # A.Cutout(num_holes=8, max_h_size=32, max_w_size=32, fill_value=0, p=0.5),
    A.Normalize(
        mean=mean,
        std=std,
        max_pixel_value=255.0,
        p=1.0
    ),
    ToTensorV2(p=1.0),
])
"""
train_transform = A.Compose([
    A.Resize(512, 512),
    A.HorizontalFlip(p=0.5),
    A.VerticalFlip(p=0.5),
    A.RandomBrightnessContrast(p=0.75),
    A.ShiftScaleRotate(p=0.75),
    A.OneOf([
            A.GaussNoise(var_limit=[10, 50]),
            A.GaussianBlur(),
            A.MotionBlur(),
            ], p=0.4),
    A.GridDistortion(num_steps=5, distort_limit=0.3, p=0.5),
    A.CoarseDropout(max_holes=1, max_width=int(512* 0.3), max_height=int(512* 0.3), mask_fill_value=0, p=0.5),
    
    # A.Cutout(num_holes=8, max_h_size=32, max_w_size=32, fill_value=0, p=0.5),
    A.Normalize(
        mean=mean,
        std=std,
        max_pixel_value=255.0,
        p=1.0
    ),
    ToTensorV2(p=1.0),
])

valid_transform = A.Compose([
    A.Resize(height=CFG.IMG_SIZE, width=CFG.IMG_SIZE, p=1.0),
    A.Normalize(
        mean=mean,
        std=std,
        max_pixel_value=255.0,
        p=1.0
    ),
    ToTensorV2(p=1.0),
])

In [14]:
import random
def set_seed(seed=42):
    '''Sets the seed of the entire notebook so results are the same every time we run.
    This is for REPRODUCIBILITY.'''
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    # When running on the CuDNN backend, two further options must be set
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    # Set a fixed value for the hash seed
    os.environ['PYTHONHASHSEED'] = str(seed)

In [15]:
# 计算平均损失
class Averager:
    def __init__(self):
        self.current_total = 0.0
        self.iterations = 0.0

    def send(self, value):
        self.current_total += value
        self.iterations += 1

    @property
    def value(self):
        if self.iterations == 0:
            return 0
        else:
            return 1.0 * self.current_total / self.iterations

    def reset(self):
        self.current_total = 0.0
        self.iterations = 0.0

# Model

In [16]:
import torch.nn as nn
import torch.nn.functional as F
class GeM(nn.Module):
    def __init__(self, p=3, eps=1e-6):
        super(GeM, self).__init__()
        self.p = nn.Parameter(torch.ones(1)*p)
        self.eps = eps

    def forward(self, x):
        return self.gem(x, p=self.p, eps=self.eps)
        
    def gem(self, x, p=3, eps=1e-6):
        return F.avg_pool2d(x.clamp(min=eps).pow(p), (x.size(-2), x.size(-1))).pow(1./p)
        
    def __repr__(self):
        return self.__class__.__name__ + \
                '(' + 'p=' + '{:.4f}'.format(self.p.data.tolist()[0]) + \
                ', ' + 'eps=' + str(self.eps) + ')'

In [17]:
class UBCModel(nn.Module):
    def __init__(self, CFG):
        super(UBCModel, self).__init__()
        self.model = timm.create_model(CFG.MODEL_NAME, pretrained=CFG.PRE_TRAIN, checkpoint_path=CFG.CHECKPOINT_PATH)

        in_features = self.model.classifier.in_features
        self.model.classifier = nn.Identity()
        self.model.global_pool = nn.Identity()
        self.pooling = GeM()
        self.linear = nn.Linear(in_features, CFG.NUM_CLASSES)
        self.softmax = nn.Softmax(dim=1)

    def forward(self, images):
        features = self.model(images)
        pooled_features = self.pooling(features).flatten(1)
        output = self.linear(pooled_features)
        return output

    
class UBCModel_ResNet(nn.Module):
    def __init__(self, CFG):
        super(UBCModel_ResNet, self).__init__()
        self.model = timm.create_model(CFG.MODEL_NAME, pretrained=CFG.PRE_TRAIN, checkpoint_path=CFG.CHECKPOINT_PATH)

        in_features = self.model.fc.in_features
        self.model.fc = nn.Identity()
        self.model.global_pool = nn.Identity()
        self.pooling = GeM()
        self.linear = nn.Linear(in_features, CFG.NUM_CLASSES)
        self.softmax = nn.Softmax(dim=1)

    def forward(self, images):
        features = self.model(images)
        pooled_features = self.pooling(features).flatten(1)
        output = self.linear(pooled_features)
        return output

# Dataset

In [18]:
from PIL import Image
class UBCDataset(Dataset):
    def __init__(self, CFG, df, transform):
        self.image_paths = df['image_path'].values
        self.image_ids = df['image_id'].values
        self.labels = df['label_id'].values
        self.transform = transform
        
    def __getitem__(self, index):
        image_id = self.image_ids[index]
        image_path = self.image_paths[index]
        img = cv2.imread(image_path)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        #img = Image.open(image_path)
        #img = np.array(img)
        label = self.labels[index]
        
        if self.transform:
            img = self.transform(image=img)["image"]
        
        return {
            'image': img,
            'label': torch.tensor(label, dtype=torch.long),
            'image_id': image_id
        }
    
    def __len__(self):
        return len(self.image_paths)

# Metric Learning ?

In [19]:
import torch.nn.functional as F

def linear_combination(x, y, epsilon):
    return epsilon * x + (1 - epsilon) * y


def reduce_loss(loss, reduction='mean'):
    return loss.mean() if reduction == 'mean' else loss.sum() if reduction == 'sum' else loss


class LabelSmoothingCrossEntropy(nn.Module):
    def __init__(self, weight, epsilon: float = 0.1, reduction='mean'):
        super().__init__()
        self.epsilon = epsilon
        self.reduction = reduction
        self.weight = weight

    def forward(self, preds, target):
        n = preds.size()[-1]
        log_preds = F.log_softmax(preds, dim=-1)
        log_preds = torch.mul(self.weight.to("cuda"), log_preds)
        loss = reduce_loss(-log_preds.sum(dim=-1), self.reduction)
        nll = F.nll_loss(log_preds, target, reduction=self.reduction)
        return linear_combination(loss / n, nll, self.epsilon)

# Train

In [20]:
def train_one_epoch(CFG, model, epoch, train_loader, criterion, optimizer, scheduler, ema, awp, train_loss=None):
    model.train()
    print(f"Epoch {epoch}")
    train_losses = Averager()
    train_accs = Averager()
    all_preds = []
    all_labels = []
        
    for batch in tqdm(train_loader):
        imgs, labels, image_id = batch['image'], batch['label'], batch['image_id']
        imgs, labels = imgs.to(CFG.DEVICE, dtype=torch.float), labels.to(CFG.DEVICE, dtype=torch.long)
            
        logits = model(imgs)
            
        loss = criterion(logits, labels)
        
        optimizer.step()
        optimizer.zero_grad()
        loss.backward()
        
        if CFG.AWP_START <= epoch and CFG.IS_AWP:
            loss = awp.attack_backward(imgs, labels)
            loss.backward()
            awp._restore()
        
        if CFG.IS_EMA:
            ema.update_params()
            
        train_losses.send(loss.item())
        if train_loss is not None:
            train_loss[image_id] += loss.item()
        
        _, preds = torch.max(model.softmax(logits), 1)
        
        all_preds.extend(preds.cpu().numpy())
        all_labels.extend(labels.cpu().numpy())
        
        train_accs.send((logits.argmax(dim=-1) == labels).float().mean())
    
    print(f"epoch{epoch} LR : {optimizer.param_groups[0]['lr']}")
    scheduler.step()
    train_loss = train_losses.value
    train_acc = train_accs.value
    bl_acc_score = balanced_accuracy_score(all_labels, all_preds)
    print(f"[ Train | {epoch:03d}/{CFG.NUM_EPOCHS:03d}] loss = {train_loss} acc = {train_acc} bl_acc = {bl_acc_score}")

# Valid

In [21]:
def valid_one_epoch(CFG, model, epoch, criterion, valid_loader):
    model.eval()
    valid_accs = Averager()
    valid_losses = Averager()
    all_preds=[]
    all_labels=[]
    with torch.no_grad():
        for batch in tqdm(valid_loader):
            imgs, labels = batch['image'], batch['label']
            imgs, labels = imgs.to(CFG.DEVICE, dtype=torch.float), labels.to(CFG.DEVICE,dtype=torch.long)

            logits = model(imgs)
            
            loss = criterion(logits, labels)
            _, preds = torch.max(model.softmax(logits), 1)
            
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())
            
            valid_accs.send((logits.argmax(dim=-1) == labels).float().mean())
            valid_losses.send(loss.item())
        valid_acc = valid_accs.value
        valid_loss = valid_losses.value
        bl_acc_score = balanced_accuracy_score(all_labels, all_preds)
        print(f"[Valid | {epoch} ] loss = {valid_loss} acc = {valid_acc} bl_acc = {bl_acc_score}")
        return valid_acc

# skf

In [22]:
skf = StratifiedKFold(n_splits=5)

for fold, ( _, val_) in enumerate(skf.split(X=train_df, y=train_df.label)):
      train_df.loc[val_ , "kfold"] = int(fold)

train_df

Unnamed: 0,image_id,label,image_width,image_height,is_tma,image_path,label_id,kfold
0,4,HGSC,23785,20008,False,/kaggle/input/UBC-OCEAN/train_thumbnails/4_thu...,2,0.0
1,66,LGSC,48871,48195,False,/kaggle/input/UBC-OCEAN/train_thumbnails/66_th...,3,0.0
2,91,HGSC,3388,3388,True,/kaggle/input/UBC-OCEAN/train_images/91.png,2,0.0
3,281,LGSC,42309,15545,False,/kaggle/input/UBC-OCEAN/train_thumbnails/281_t...,3,0.0
4,286,EC,37204,30020,False,/kaggle/input/UBC-OCEAN/train_thumbnails/286_t...,1,0.0
...,...,...,...,...,...,...,...,...
533,65022,LGSC,53355,46675,False,/kaggle/input/UBC-OCEAN/train_thumbnails/65022...,3,4.0
534,65094,MC,55042,45080,False,/kaggle/input/UBC-OCEAN/train_thumbnails/65094...,4,4.0
535,65300,HGSC,75860,27503,False,/kaggle/input/UBC-OCEAN/train_thumbnails/65300...,2,4.0
536,65371,HGSC,42551,41800,False,/kaggle/input/UBC-OCEAN/train_thumbnails/65371...,2,4.0


In [None]:
%%time

for fold in range(5):
    train_df_fold = train_df[train_df.kfold != fold].reset_index(drop=True)
    valid_df_fold = train_df[train_df.kfold == fold].reset_index(drop=True)
    seed = 3407
    print(f"Random Seed = {seed}")
    set_seed(seed)
    train_data = UBCDataset(CFG, train_df_fold, train_transform)
    valid_data = UBCDataset(CFG, valid_df_fold, valid_transform)
    print(f"---------Starting flod-{fold}---------")
    print(f"train samples : {len(train_data)}")
    print(f"valid samples : {len(valid_data)}")
    train_loader = DataLoader(train_data, batch_size=CFG.TRAIN_BATCH_SIZE, num_workers=CFG.NUM_WORKERS)
    valid_loader = DataLoader(valid_data, batch_size=CFG.VALID_BATCH_SIZE, num_workers=CFG.NUM_WORKERS)
    
    model = UBCModel(CFG)
    
    model = model.to(CFG.DEVICE)
    ema = EMA(model, 0.96)
    
    optimizer = torch.optim.Adam(model.parameters(), lr=CFG.LR, weight_decay=CFG.WEIGHT_DECAY )
    scheduler = CosineAnnealingLR(optimizer, T_max=CFG.T_MAX, eta_min=CFG.ETA_MIN)
    # multistepLR = MultiStepLR(optimzer, milestones = [5,10,15,20], gamma = 0.8)
    
    #criterion = nn.CrossEntropyLoss(weight = class_weights.to(CFG.DEVICE))
    criterion = LabelSmoothingCrossEntropy(weight = class_weights.to(CFG.DEVICE))
    
    awp = AWP(model, criterion, optimizer, adv_lr=CFG.ADV_LR, adv_eps=CFG.ADV_EPS)
    
    best_acc = 0.640
    for epoch in range(CFG.NUM_EPOCHS):
        if CFG.AWP_START == epoch and CFG.IS_AWP:
            print(f"----------------Starting AWP----------------")
        train_one_epoch(CFG, model, epoch, train_loader, criterion, optimizer, scheduler, ema, awp)
        if epoch % 1 == 0:
            if CFG.IS_EMA:
                ema.apply_shadow()
                valid_acc = valid_one_epoch(CFG, model, epoch, criterion, valid_loader)
                ema.restore()
            else:
                valid_acc = valid_one_epoch(CFG, model, epoch, criterion, valid_loader)
                
            if valid_acc >= best_acc:
                best_acc = valid_acc
                save_path = f'/kaggle/working/model-fold-{fold}-Acc{valid_acc:.4f}.bin'
                torch.save(model.state_dict(), save_path)
                print(f"Saving fold{fold} model acc={valid_acc}")
    print(f"------------------------fold-{fold} Best Acc = {best_acc}------------------------")
                
    # 10 outlier
    #out = sorted(range(len(train_loss)), key = lambda sub: train_loss[sub])[-15:]
    #for x in out:
    #    outlier.add(x)

Random Seed = 42
---------Starting flod-0---------
train samples : 430
valid samples : 108


Downloading model.safetensors:   0%|          | 0.00/86.5M [00:00<?, ?B/s]

Epoch 0


100%|██████████| 27/27 [01:01<00:00,  2.27s/it]


epoch0 LR : 0.0001
[ Train | 000/025] loss = 1.8754949790460091 acc = 0.2113095223903656 bl_acc = 0.2357556856964032


100%|██████████| 4/4 [00:16<00:00,  4.07s/it]


[Valid | 0 ] loss = 1.7652809023857117 acc = 0.2630208432674408 bl_acc = 0.272
Epoch 1


100%|██████████| 27/27 [00:51<00:00,  1.92s/it]


epoch1 LR : 9.99321619703514e-05
[ Train | 001/025] loss = 1.4854390886094835 acc = 0.40112435817718506 bl_acc = 0.4265067537179915


100%|██████████| 4/4 [00:14<00:00,  3.53s/it]


[Valid | 1 ] loss = 1.6620244681835175 acc = 0.1979166716337204 bl_acc = 0.27999999999999997
Epoch 2


100%|██████████| 27/27 [00:52<00:00,  1.94s/it]


epoch2 LR : 9.972883382072952e-05
[ Train | 002/025] loss = 1.3782101692976776 acc = 0.4728836119174957 bl_acc = 0.5371443183749104


100%|██████████| 4/4 [00:14<00:00,  3.50s/it]


[Valid | 2 ] loss = 1.5272804200649261 acc = 0.390625 bl_acc = 0.40044444444444444
Epoch 3


100%|██████████| 27/27 [00:52<00:00,  1.93s/it]


epoch3 LR : 9.939057285945931e-05
[ Train | 003/025] loss = 1.2259455256991916 acc = 0.5710978507995605 bl_acc = 0.6258237709021931


100%|██████████| 4/4 [00:14<00:00,  3.53s/it]


[Valid | 3 ] loss = 1.3678054213523865 acc = 0.4609375 bl_acc = 0.4826666666666667
Epoch 4


100%|██████████| 27/27 [00:51<00:00,  1.92s/it]


epoch4 LR : 9.891830623632337e-05
[ Train | 004/025] loss = 1.1269715980247215 acc = 0.6061508059501648 bl_acc = 0.675349505101422


100%|██████████| 4/4 [00:14<00:00,  3.55s/it]


[Valid | 4 ] loss = 1.2850109338760376 acc = 0.5572916865348816 bl_acc = 0.6124444444444445
Epoch 5


100%|██████████| 27/27 [00:51<00:00,  1.90s/it]


epoch5 LR : 9.831332840130886e-05
[ Train | 005/025] loss = 1.0810828032316986 acc = 0.6382275223731995 bl_acc = 0.6938506447285905


100%|██████████| 4/4 [00:14<00:00,  3.54s/it]


[Valid | 5 ] loss = 1.226390689611435 acc = 0.5755208730697632 bl_acc = 0.6328888888888888
Epoch 6


100%|██████████| 27/27 [00:51<00:00,  1.91s/it]


epoch6 LR : 9.75772975566101e-05
[ Train | 006/025] loss = 0.9577178336955883 acc = 0.7156084775924683 bl_acc = 0.7827399741344119


100%|██████████| 4/4 [00:14<00:00,  3.67s/it]


[Valid | 6 ] loss = 1.2028038501739502 acc = 0.5260416865348816 bl_acc = 0.5906666666666667
Epoch 7


100%|██████████| 27/27 [00:52<00:00,  1.94s/it]


epoch7 LR : 9.671223111161148e-05
[ Train | 007/025] loss = 0.9445859767772533 acc = 0.6997354626655579 bl_acc = 0.7802103474209077


100%|██████████| 4/4 [00:14<00:00,  3.62s/it]


[Valid | 7 ] loss = 1.2237242758274078 acc = 0.625 bl_acc = 0.6637777777777777
Epoch 8


100%|██████████| 27/27 [00:52<00:00,  1.94s/it]


epoch8 LR : 9.572050015330873e-05
[ Train | 008/025] loss = 0.8529603503368519 acc = 0.7741401791572571 bl_acc = 0.8391842064771975


100%|██████████| 4/4 [00:14<00:00,  3.67s/it]


[Valid | 8 ] loss = 1.2221446931362152 acc = 0.59375 bl_acc = 0.6531111111111111
Epoch 9


100%|██████████| 27/27 [00:51<00:00,  1.92s/it]


epoch9 LR : 9.46048229473242e-05
[ Train | 009/025] loss = 0.8050905466079712 acc = 0.8035714030265808 bl_acc = 0.858121780317153


100%|██████████| 4/4 [00:14<00:00,  3.58s/it]


[Valid | 9 ] loss = 1.2598552405834198 acc = 0.6302083730697632 bl_acc = 0.6668888888888889
Epoch 10


100%|██████████| 27/27 [00:52<00:00,  1.95s/it]


epoch10 LR : 9.336825748732971e-05
[ Train | 010/025] loss = 0.7916055387920804 acc = 0.8389549851417542 bl_acc = 0.8852505072595633


100%|██████████| 4/4 [00:14<00:00,  3.53s/it]


[Valid | 10 ] loss = 1.4130527675151825 acc = 0.5 bl_acc = 0.5706666666666667
Epoch 11


100%|██████████| 27/27 [00:52<00:00,  1.95s/it]


epoch11 LR : 9.201419311329847e-05
[ Train | 011/025] loss = 0.7590303134035181 acc = 0.867394208908081 bl_acc = 0.9016564355298984


100%|██████████| 4/4 [00:14<00:00,  3.58s/it]


[Valid | 11 ] loss = 1.291176587343216 acc = 0.5911458730697632 bl_acc = 0.638
----------------Starting AWP----------------
Epoch 12


100%|██████████| 27/27 [00:55<00:00,  2.07s/it]


epoch12 LR : 9.054634122155987e-05
[ Train | 012/025] loss = 0.7927086684438918 acc = 0.8928571939468384 bl_acc = 0.9172169153187457


100%|██████████| 4/4 [00:14<00:00,  3.54s/it]


[Valid | 12 ] loss = 1.2850150167942047 acc = 0.5677083730697632 bl_acc = 0.5693333333333334
Epoch 13


100%|██████████| 27/27 [00:55<00:00,  2.04s/it]


epoch13 LR : 8.896872509212005e-05
[ Train | 013/025] loss = 0.7744541565577189 acc = 0.89682537317276 bl_acc = 0.9276392922323883


100%|██████████| 4/4 [00:14<00:00,  3.51s/it]


[Valid | 13 ] loss = 1.3268731832504272 acc = 0.6276041865348816 bl_acc = 0.6233333333333333
Epoch 14


100%|██████████| 27/27 [00:55<00:00,  2.04s/it]


epoch14 LR : 8.7285668861131e-05
[ Train | 014/025] loss = 0.7580333639074255 acc = 0.8882275223731995 bl_acc = 0.928521806098337


100%|██████████| 4/4 [00:14<00:00,  3.60s/it]


[Valid | 14 ] loss = 1.2130828201770782 acc = 0.625 bl_acc = 0.6464444444444444
Epoch 15


100%|██████████| 27/27 [00:56<00:00,  2.08s/it]


epoch15 LR : 8.550178566873408e-05
[ Train | 015/025] loss = 0.7370247575971816 acc = 0.9117063879966736 bl_acc = 0.9370178369929949


100%|██████████| 4/4 [00:13<00:00,  3.49s/it]


[Valid | 15 ] loss = 1.2226802706718445 acc = 0.6067708730697632 bl_acc = 0.6408888888888888
Epoch 16


100%|██████████| 27/27 [00:55<00:00,  2.05s/it]


epoch16 LR : 8.362196501476347e-05
[ Train | 016/025] loss = 0.6754307172916554 acc = 0.9533730149269104 bl_acc = 0.963853886828401


100%|██████████| 4/4 [00:13<00:00,  3.50s/it]


[Valid | 16 ] loss = 1.2268767058849335 acc = 0.6458333730697632 bl_acc = 0.6702222222222222
Epoch 17


100%|██████████| 27/27 [00:56<00:00,  2.08s/it]


epoch17 LR : 8.165135935696695e-05
[ Train | 017/025] loss = 0.669260718204357 acc = 0.9626322984695435 bl_acc = 0.9774696113679164


100%|██████████| 4/4 [00:14<00:00,  3.52s/it]


[Valid | 17 ] loss = 1.2145839929580688 acc = 0.6354166865348816 bl_acc = 0.6755555555555556
Epoch 18


100%|██████████| 27/27 [00:54<00:00,  2.03s/it]


epoch18 LR : 7.959536998847741e-05
[ Train | 018/025] loss = 0.6622873368086638 acc = 0.9510582089424133 bl_acc = 0.9639930168109123


100%|██████████| 4/4 [00:13<00:00,  3.46s/it]


[Valid | 18 ] loss = 1.2131368219852448 acc = 0.6223958730697632 bl_acc = 0.6577777777777778
Epoch 19


100%|██████████| 27/27 [00:54<00:00,  2.03s/it]


epoch19 LR : 7.745963223324383e-05
[ Train | 019/025] loss = 0.6298814702917028 acc = 0.9699074029922485 bl_acc = 0.9817497003937682


100%|██████████| 4/4 [00:14<00:00,  3.51s/it]


[Valid | 19 ] loss = 1.277357280254364 acc = 0.5703125 bl_acc = 0.6264444444444444
Epoch 20


100%|██████████| 27/27 [00:54<00:00,  2.01s/it]


epoch20 LR : 7.524999999999999e-05
[ Train | 020/025] loss = 0.63655596309238 acc = 0.9722222089767456 bl_acc = 0.9837699024139702


100%|██████████| 4/4 [00:14<00:00,  3.53s/it]


[Valid | 20 ] loss = 1.2292322218418121 acc = 0.6432291865348816 bl_acc = 0.6764444444444444
Epoch 21


100%|██████████| 27/27 [00:54<00:00,  2.03s/it]


epoch21 LR : 7.297252973710755e-05
[ Train | 021/025] loss = 0.6432670972965382 acc = 0.9652777910232544 bl_acc = 0.9734338344507837


100%|██████████| 4/4 [00:13<00:00,  3.39s/it]


[Valid | 21 ] loss = 1.1632819175720215 acc = 0.6588541865348816 bl_acc = 0.696
Saving fold0 model acc=0.6588541865348816
Epoch 22


100%|██████████| 27/27 [00:54<00:00,  2.02s/it]


epoch22 LR : 7.06334638322521e-05
[ Train | 022/025] loss = 0.621836108190042 acc = 0.9603174924850464 bl_acc = 0.9767184828330505


100%|██████████| 4/4 [00:13<00:00,  3.45s/it]


[Valid | 22 ] loss = 1.2213743329048157 acc = 0.6067708730697632 bl_acc = 0.6484444444444444
Epoch 23


100%|██████████| 27/27 [00:54<00:00,  2.03s/it]


epoch23 LR : 6.823921350249236e-05
[ Train | 023/025] loss = 0.6478649090837549 acc = 0.9811508059501648 bl_acc = 0.9808319954211393


100%|██████████| 4/4 [00:13<00:00,  3.43s/it]


[Valid | 23 ] loss = 1.1692599654197693 acc = 0.6666666865348816 bl_acc = 0.6973333333333332
Saving fold0 model acc=0.6666666865348816
Epoch 24


100%|██████████| 27/27 [00:54<00:00,  2.03s/it]


epoch24 LR : 6.579634122155988e-05
[ Train | 024/025] loss = 0.6181418807418259 acc = 0.9715608954429626 bl_acc = 0.9827470153151339


100%|██████████| 4/4 [00:13<00:00,  3.46s/it]


[Valid | 24 ] loss = 1.1835213601589203 acc = 0.6197916865348816 bl_acc = 0.6437777777777777
------------------------fold-0 Best Acc = 0.6666666865348816------------------------
Random Seed = 42
---------Starting flod-1---------
train samples : 430
valid samples : 108
Epoch 0


  7%|▋         | 2/27 [00:04<00:47,  1.89s/it]

In [None]:
#outlier

In [None]:
#for x in outlier:
#    train_out_index = train_df.loc[train_df['image_id'] == x].index
#    train_df = train_df.drop(train_out_index)
#train_df