# About
* **version 1**: naive approach
* **version 2**: For comparing with [Chris's EfficientNetB2 Starter](https://www.kaggle.com/code/cdeotte/efficientnetb2-starter-lb-0-57), I added **log transform** and **LR scheduling**.

## Experimental Settings


### data augmentation¶
* implemented by [albumentations](https://albumentations.ai/)
* Train
    * Resize
* Val, Test
    * Resize
    
### learning settings
* CV Strategy: Stratified Group KFold (K=5)
    * y: `expert_consensus`
    * group: `patient_id`
* max epochs: 9
* data:
    * input image size: 1x512x512
    * batch size: 32
* loss: [KLDivLoss](https://pytorch.org/docs/stable/generated/torch.nn.KLDivLoss.html)
* optimizer: AdamW
    * learning rate: 1.0e-03
    * weight decay: 1.0e-02
    
* lr scheduler: OneCycleLR
    * max lr: 1.0e-03
    * min lr: 1.0e-04
 


# Prepare stage

In [1]:
import sys
import os
import gc
import copy
import yaml
import random
import shutil
from time import time
import typing as tp
from pathlib import Path

import numpy as np
import pandas as pd

from tqdm.notebook import tqdm
from sklearn.model_selection import StratifiedGroupKFold

import torch
from torch import nn
from torch import optim
from torch.optim import lr_scheduler
from torch.cuda import amp

import timm

import albumentations as A
from albumentations.pytorch import ToTensorV2
import logging
from datetime import datetime
import functools

In [2]:
# setting about environment and data path
os.environ["CUDA_VISIBLE_DEVICES"] = "0"

ROOT = Path.cwd().parent
INPUT = ROOT / "input"
OUTPUT = ROOT / "output"
SRC = ROOT / "src"

#input from competitions file
DATA = INPUT / "hms-harmful-brain-activity-classification"
TRAIN_SPEC = DATA / "train_spectrograms"
TEST_SPEC = DATA / "test_spectrograms"

TMP = ROOT / "tmp"
TRAIN_SPEC_SPLIT = TMP / "train_spectrograms_split"
TEST_SPEC_SPLIT = TMP / "test_spectrograms_split"
TMP.mkdir(exist_ok=True)
TRAIN_SPEC_SPLIT.mkdir(exist_ok=True)
TEST_SPEC_SPLIT.mkdir(exist_ok=True)

In [3]:
# Setting for training
RANDAM_SEED = 1086
CLASSES = ["seizure_vote", "lpd_vote", "gpd_vote", "lrda_vote", "grda_vote", "other_vote"]
N_CLASSES = len(CLASSES)
FOLDS = [0, 1, 2, 3, 4] 
N_FOLDS = len(FOLDS)

In [4]:
#Here is the wrapper defination for logging running time

log_filename = 'fixed_training_log.log'

logging.basicConfig(filename=log_filename, level=logging.INFO,
                    format='%(asctime)s %(levelname)s %(message)s', datefmt='%Y-%m-%d %H:%M:%S')

def log_time(func):
    """warpper for logging running time"""

    @functools.wraps(func)
    def wrapper(*args, **kwargs):
        start_time = time()
        result = func(*args, **kwargs)
        end_time = time()
        logging.info(f"{func.__name__} took {end_time - start_time:.4f} seconds.")
        print(f"{func.__name__} took {end_time - start_time:.4f} seconds.")
        return result

    return wrapper

## Read Data, Split Folds, Split Spectrograms

In [5]:
#Read data
@log_time
def load_data(DATA):
    train = pd.read_csv(DATA / "train.csv")
    # convert vote to probability format
    train[CLASSES] /= train[CLASSES].sum(axis=1).values[:, None]
    print(train.shape)
    return train

In [6]:
train = load_data(DATA)
# print(train.head())

(106800, 15)
load_data took 0.2596 seconds.


In [7]:
# Test stage code only, would cancel after finetuning
# only Use the spectrogram_sub_i=0 data instead of full data for training faster
train = train.groupby("spectrogram_id").head(1).reset_index(drop=True)
print(train.shape)
# print(train.head())

(11138, 15)


In [8]:
# Split the data using StratifiedGroupKFold
@log_time
def fold_train_data(train):
    sgkf = StratifiedGroupKFold(n_splits=N_FOLDS, shuffle=True, random_state=RANDAM_SEED)

    # initialized fold num to -1
    # fold based on expert_consensus(output label)
    train["fold"] = -1

    for fold_id, (_, val_idx) in enumerate(
        sgkf.split(train, y=train["expert_consensus"], groups=train["patient_id"])
    ):
        train.loc[val_idx, "fold"] = fold_id
    return train

In [9]:
train = fold_train_data(train)

fold_train_data took 0.8952 seconds.


In [10]:
# Check on fold result, see if its balanced
train.groupby("fold")[CLASSES].sum()

Unnamed: 0_level_0,seizure_vote,lpd_vote,gpd_vote,lrda_vote,grda_vote,other_vote
fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0,407.87897,240.84782,262.474513,142.304068,286.40759,800.087038
1,360.427388,231.931854,193.738,173.763906,333.566517,1166.572336
2,441.934721,328.255479,237.291923,163.192668,355.493987,926.831222
3,425.68598,195.568155,182.017264,148.850582,259.828026,864.049993
4,392.391708,234.916737,120.355588,129.112045,258.598367,873.625556


### split sepectogram files

In [11]:
# Reading spectrogram parquet file based on spectrogram_id, save into npy file
@log_time
def reading_spectrogram(train):
    for spec_id, df in tqdm(train.groupby("spectrogram_id")):
        spec = pd.read_parquet(TRAIN_SPEC / f"{spec_id}.parquet")
        #Transform as hz, time style
        spec_arr = spec.fillna(0).values[:, 1:].T.astype("float32")  # (Hz, Time) = (400, 300)
        for spec_offset, label_id in df[
            ["spectrogram_label_offset_seconds", "label_id"]
        ].astype(int).values:
            spec_offset = spec_offset // 2
            split_spec_arr = spec_arr[:, spec_offset: spec_offset + 300]
            np.save(TRAIN_SPEC_SPLIT / f"{label_id}.npy" , split_spec_arr)
    return

In [12]:
reading_spectrogram(train)

  0%|          | 0/11138 [00:00<?, ?it/s]

reading_spectrogram took 501.6524 seconds.


## Def Model, Dataset, Metric

In [13]:
# def model structure
class HMSHBACSpecModel(nn.Module):

    def __init__(
            self,
            model_name: str,
            pretrained: bool,
            in_channels: int,
            num_classes: int,
        ):
        super().__init__() # Call the initialization method of the parent class (nn.Module)
        self.model = timm.create_model(
            model_name=model_name, pretrained=pretrained,
            num_classes=num_classes, in_chans=in_channels)

    def forward(self, x):
        h = self.model(x)      

        return h

In [14]:
# # Here is possible list for initializing model
# timm.list_models()

### dataset

In [15]:
# def filepath and label style 
FilePath = tp.Union[str, Path]
Label = tp.Union[int, float, np.ndarray]

# Define a custom dataset class for spectrogram images
# initializing from torch.utils.data.Dataset
class HMSHBACSpecDataset(torch.utils.data.Dataset):

    def __init__(
        self,
        image_paths: tp.Sequence[FilePath],
        labels: tp.Sequence[Label],
        transform: A.Compose,
    ):
        self.image_paths = image_paths
        self.labels = labels
        self.transform = transform

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, index: int):
        img_path = self.image_paths[index]
        label = self.labels[index]

        img = np.load(img_path)  # shape: (Hz, Time) = (400, 300)
        
        # log transform
        img = np.clip(img,np.exp(-4), np.exp(8)) #clipping values for stability
        img = np.log(img)
        
        # normalize per image
        eps = 1e-6
        img_mean = img.mean(axis=(0, 1))
        img = img - img_mean
        img_std = img.std(axis=(0, 1))
        img = img / (img_std + eps)

        img = img[..., None] # shape: (Hz, Time) -> (Hz, Time, Channel)
        img = self._apply_transform(img)

        return {"data": img, "target": label}

    def _apply_transform(self, img: np.ndarray):
        """apply transform to image and mask"""
        transformed = self.transform(image=img)
        img = transformed["image"]
        return img

### loss

In [16]:
#Combine original metric with logits
class KLDivLossWithLogits(nn.KLDivLoss):

    def __init__(self):
        super().__init__(reduction="batchmean")

    def forward(self, y, t):
        y = nn.functional.log_softmax(y,  dim=1)
        loss = super().forward(y, t)

        return loss


class KLDivLossWithLogitsForVal(nn.KLDivLoss):
    
    def __init__(self):
        """"""
        super().__init__(reduction="batchmean")
        self.log_prob_list  = []
        self.label_list = []

    def forward(self, y, t):
        y = nn.functional.log_softmax(y, dim=1)
        self.log_prob_list.append(y.numpy())
        self.label_list.append(t.numpy())
        
    def compute(self):
        log_prob = np.concatenate(self.log_prob_list, axis=0)
        label = np.concatenate(self.label_list, axis=0)
        final_metric = super().forward(
            torch.from_numpy(log_prob),
            torch.from_numpy(label)
        ).item()
        self.log_prob_list = []
        self.label_list = []
        
        return final_metric

# Training

In [18]:
#training config
class CFG:
    model_name = "efficientnet_b2"
    img_size = 512
    max_epoch = 9
    batch_size = 16
    lr = 1.0e-03
    weight_decay = 1.0e-02
    es_patience =  5
    seed = 1086
    deterministic = True
    enable_amp = False
    device = "cuda"

In [19]:
def set_random_seed(seed: int = 42, deterministic: bool = False):
    """Set seeds"""
    random.seed(seed)
    np.random.seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)
    torch.manual_seed(seed)
    # torch.manual_seed(seed)  # type: ignore
    torch.backends.cudnn.deterministic = deterministic  # type: ignore
    
def to_device(
    tensors: tp.Union[tp.Tuple[torch.Tensor], tp.Dict[str, torch.Tensor]],
    device: torch.device, *args, **kwargs
):
    if isinstance(tensors, tuple):
        return (t.to(device, *args, **kwargs) for t in tensors)
    elif isinstance(tensors, dict):
        return {
            k: t.to(device, *args, **kwargs) for k, t in tensors.items()}
    else:
        return tensors.to(device, *args, **kwargs)

In [20]:
def get_path_label(val_fold, train_all: pd.DataFrame):
    """Get file path and target info."""
    
    train_idx = train_all[train_all["fold"] != val_fold].index.values
    val_idx   = train_all[train_all["fold"] == val_fold].index.values
    img_paths = []
    labels = train_all[CLASSES].values
    for label_id in train_all["label_id"].values:
        img_path = TRAIN_SPEC_SPLIT / f"{label_id}.npy"
        img_paths.append(img_path)

    train_data = {
        "image_paths": [img_paths[idx] for idx in train_idx],
        "labels": [labels[idx].astype("float32") for idx in train_idx]}

    val_data = {
        "image_paths": [img_paths[idx] for idx in val_idx],
        "labels": [labels[idx].astype("float32") for idx in val_idx]}
    
    return train_data, val_data, train_idx, val_idx

# resize images and convert to tensors
def get_transforms(CFG):
    train_transform = A.Compose([
        A.Resize(p=1.0, height=CFG.img_size, width=CFG.img_size),
        ToTensorV2(p=1.0)
    ])
    val_transform = A.Compose([
        A.Resize(p=1.0, height=CFG.img_size, width=CFG.img_size),
        ToTensorV2(p=1.0)
    ])
    return train_transform, val_transform

In [21]:
@log_time
def train_one_fold(CFG, val_fold, train_all, output_path):
    """Main"""
    torch.backends.cudnn.benchmark = True
    set_random_seed(CFG.seed, deterministic=CFG.deterministic)
    device = torch.device(CFG.device)
    
    train_path_label, val_path_label, _, _ = get_path_label(val_fold, train_all)
    train_transform, val_transform = get_transforms(CFG)
    
    train_dataset = HMSHBACSpecDataset(**train_path_label, transform=train_transform)
    val_dataset = HMSHBACSpecDataset(**val_path_label, transform=val_transform)
    
    train_loader = torch.utils.data.DataLoader(
        train_dataset, batch_size=CFG.batch_size, num_workers=4, shuffle=True, drop_last=True)
    val_loader = torch.utils.data.DataLoader(
        val_dataset, batch_size=CFG.batch_size, num_workers=4, shuffle=False, drop_last=False)
    
    model = HMSHBACSpecModel(
        model_name=CFG.model_name, pretrained=True, num_classes=6, in_channels=1)
    model.to(device)
    
    optimizer = optim.AdamW(params=model.parameters(), lr=CFG.lr, weight_decay=CFG.weight_decay)
    #Use dynamic learning rate
    scheduler = lr_scheduler.OneCycleLR(
        optimizer=optimizer, epochs=CFG.max_epoch,
        pct_start=0.0, steps_per_epoch=len(train_loader),
        max_lr=CFG.lr, div_factor=25, final_div_factor=4.0e-01
    )
    
    loss_func = KLDivLossWithLogits()
    loss_func.to(device)
    loss_func_val = KLDivLossWithLogitsForVal()
    
    use_amp = CFG.enable_amp
    scaler = amp.GradScaler(enabled=use_amp)
    
    best_val_loss = 1.0e+09
    best_epoch = 0
    train_loss = 0
    
    for epoch in range(1, CFG.max_epoch + 1):
        epoch_start = time()
        model.train()
        for batch in tqdm(train_loader):
            batch = to_device(batch, device)
            x, t = batch["data"], batch["target"]
                
            optimizer.zero_grad()
            with amp.autocast(use_amp):
                y = model(x)
                loss = loss_func(y, t)
            scaler.scale(loss).backward()
            scaler.step(optimizer)
            scaler.update()
            scheduler.step()
            train_loss += loss.item()
            
        train_loss /= len(train_loader)
            
        model.eval()
        for batch in val_loader:
            x, t = batch["data"], batch["target"]
            x = to_device(x, device)
            with torch.no_grad(), amp.autocast(use_amp):
                y = model(x)
            y = y.detach().cpu().to(torch.float32)
            loss_func_val(y, t)
        val_loss = loss_func_val.compute()        
        if val_loss < best_val_loss:
            best_epoch = epoch
            best_val_loss = val_loss
            print("save model")
            torch.save(model.state_dict(), str(output_path / f'snapshot_epoch_{epoch}.pth'))
        
        elapsed_time = time() - epoch_start
        print(f"[epoch {epoch}] train loss: {train_loss: .6f}, val loss: {val_loss: .6f}, elapsed_time: {elapsed_time: .3f}")
        logging.info(f"[epoch {epoch}] train loss: {train_loss: .6f}, val loss: {val_loss: .6f}, elapsed_time: {elapsed_time: .3f}")
        if epoch - best_epoch > CFG.es_patience:
            print("Early Stopping!")
            logging.info("Early Stopping!")
            break
            
        train_loss = 0
            
    return val_fold, best_epoch, best_val_loss

In [None]:
@log_time
def fold_training():
    score_list = []
    for fold_id in FOLDS:
        output_path = Path(f"fold{fold_id}")
        output_path.mkdir(exist_ok=True)
        print(f"[fold{fold_id}]")
        print('best_score now:',train_one_fold(CFG, fold_id, train, output_path))
        score_list.append(train_one_fold(CFG, fold_id, train, output_path))
    return score_list
score_list = fold_training()

[fold0]


  0%|          | 0/562 [00:00<?, ?it/s]

save model
[epoch 1] train loss:  0.915872, val loss:  0.818980, elapsed_time:  227.422


  0%|          | 0/562 [00:00<?, ?it/s]

save model
[epoch 2] train loss:  0.668964, val loss:  0.705246, elapsed_time:  227.421


  0%|          | 0/562 [00:00<?, ?it/s]

[epoch 3] train loss:  0.577018, val loss:  0.751587, elapsed_time:  227.108


  0%|          | 0/562 [00:00<?, ?it/s]

[epoch 4] train loss:  0.493835, val loss:  0.756671, elapsed_time:  227.338


  0%|          | 0/562 [00:00<?, ?it/s]

[epoch 5] train loss:  0.396719, val loss:  0.710771, elapsed_time:  227.297


  0%|          | 0/562 [00:00<?, ?it/s]

[epoch 6] train loss:  0.271711, val loss:  0.785533, elapsed_time:  227.256


  0%|          | 0/562 [00:00<?, ?it/s]

[epoch 7] train loss:  0.176244, val loss:  0.732991, elapsed_time:  227.404


  0%|          | 0/562 [00:00<?, ?it/s]

[epoch 8] train loss:  0.125580, val loss:  0.774059, elapsed_time:  227.630
Early Stopping!
train_one_fold took 1819.5308 seconds.
best_score now: (0, 2, 0.7052463889122009)


  0%|          | 0/562 [00:00<?, ?it/s]

save model
[epoch 1] train loss:  0.915872, val loss:  0.818980, elapsed_time:  227.718


  0%|          | 0/562 [00:00<?, ?it/s]

save model
[epoch 2] train loss:  0.668964, val loss:  0.705246, elapsed_time:  227.720


  0%|          | 0/562 [00:00<?, ?it/s]

[epoch 3] train loss:  0.577018, val loss:  0.751587, elapsed_time:  227.297


  0%|          | 0/562 [00:00<?, ?it/s]

[epoch 4] train loss:  0.493835, val loss:  0.756671, elapsed_time:  227.428


  0%|          | 0/562 [00:00<?, ?it/s]

[epoch 5] train loss:  0.396719, val loss:  0.710771, elapsed_time:  227.689


  0%|          | 0/562 [00:00<?, ?it/s]

[epoch 6] train loss:  0.271711, val loss:  0.785533, elapsed_time:  227.790


  0%|          | 0/562 [00:00<?, ?it/s]

[epoch 7] train loss:  0.176244, val loss:  0.732991, elapsed_time:  227.736


  0%|          | 0/562 [00:00<?, ?it/s]

[epoch 8] train loss:  0.125580, val loss:  0.774059, elapsed_time:  227.801
Early Stopping!
train_one_fold took 1821.8493 seconds.
[fold1]


  0%|          | 0/542 [00:00<?, ?it/s]

save model
[epoch 1] train loss:  0.958338, val loss:  0.683623, elapsed_time:  222.142


  0%|          | 0/542 [00:00<?, ?it/s]

save model
[epoch 2] train loss:  0.665999, val loss:  0.628296, elapsed_time:  222.150


  0%|          | 0/542 [00:00<?, ?it/s]

[epoch 3] train loss:  0.574089, val loss:  0.682199, elapsed_time:  222.060


  0%|          | 0/542 [00:00<?, ?it/s]

save model
[epoch 4] train loss:  0.482817, val loss:  0.623946, elapsed_time:  222.199


  0%|          | 0/542 [00:00<?, ?it/s]

[epoch 5] train loss:  0.373078, val loss:  0.649681, elapsed_time:  221.962


  0%|          | 0/542 [00:00<?, ?it/s]

[epoch 6] train loss:  0.253731, val loss:  0.677414, elapsed_time:  222.031


  0%|          | 0/542 [00:00<?, ?it/s]

[epoch 7] train loss:  0.162571, val loss:  0.648688, elapsed_time:  222.260


  0%|          | 0/542 [00:00<?, ?it/s]

[epoch 8] train loss:  0.115097, val loss:  0.669197, elapsed_time:  222.201


  0%|          | 0/542 [00:00<?, ?it/s]

[epoch 9] train loss:  0.085109, val loss:  0.681203, elapsed_time:  222.007
train_one_fold took 1999.6349 seconds.
best_score now: (1, 4, 0.6239461898803711)


  0%|          | 0/542 [00:00<?, ?it/s]

save model
[epoch 1] train loss:  0.958338, val loss:  0.683623, elapsed_time:  222.278


  0%|          | 0/542 [00:00<?, ?it/s]

save model
[epoch 2] train loss:  0.665999, val loss:  0.628296, elapsed_time:  222.263


  0%|          | 0/542 [00:00<?, ?it/s]

[epoch 3] train loss:  0.574089, val loss:  0.682199, elapsed_time:  222.134


  0%|          | 0/542 [00:00<?, ?it/s]

save model
[epoch 4] train loss:  0.482817, val loss:  0.623946, elapsed_time:  222.336


  0%|          | 0/542 [00:00<?, ?it/s]

[epoch 5] train loss:  0.373078, val loss:  0.649681, elapsed_time:  222.183


  0%|          | 0/542 [00:00<?, ?it/s]

[epoch 6] train loss:  0.253731, val loss:  0.677414, elapsed_time:  222.051


  0%|          | 0/542 [00:00<?, ?it/s]

[epoch 7] train loss:  0.162571, val loss:  0.648688, elapsed_time:  221.933


  0%|          | 0/542 [00:00<?, ?it/s]

[epoch 8] train loss:  0.115097, val loss:  0.669197, elapsed_time:  221.906


  0%|          | 0/542 [00:00<?, ?it/s]

[epoch 9] train loss:  0.085109, val loss:  0.681203, elapsed_time:  221.947
train_one_fold took 1999.7741 seconds.
[fold2]


  0%|          | 0/542 [00:00<?, ?it/s]

save model
[epoch 1] train loss:  0.942228, val loss:  0.799586, elapsed_time:  221.910


  0%|          | 0/542 [00:00<?, ?it/s]

save model
[epoch 2] train loss:  0.664017, val loss:  0.725259, elapsed_time:  222.097


  0%|          | 0/542 [00:00<?, ?it/s]

save model
[epoch 3] train loss:  0.570795, val loss:  0.702234, elapsed_time:  222.209


  0%|          | 0/542 [00:00<?, ?it/s]

[epoch 4] train loss:  0.484407, val loss:  0.703310, elapsed_time:  221.993


  0%|          | 0/542 [00:00<?, ?it/s]

[epoch 5] train loss:  0.383956, val loss:  0.712684, elapsed_time:  221.901


  0%|          | 0/542 [00:00<?, ?it/s]

[epoch 6] train loss:  0.261547, val loss:  0.796136, elapsed_time:  222.012


  0%|          | 0/542 [00:00<?, ?it/s]

[epoch 7] train loss:  0.173173, val loss:  0.805693, elapsed_time:  221.917


  0%|          | 0/542 [00:00<?, ?it/s]

[epoch 8] train loss:  0.117944, val loss:  0.799426, elapsed_time:  222.139


  0%|          | 0/542 [00:00<?, ?it/s]

[epoch 9] train loss:  0.090277, val loss:  0.800140, elapsed_time:  222.278
Early Stopping!
train_one_fold took 1999.1106 seconds.
best_score now: (2, 3, 0.7022339105606079)


  0%|          | 0/542 [00:00<?, ?it/s]

save model
[epoch 1] train loss:  0.942228, val loss:  0.799586, elapsed_time:  221.975


  0%|          | 0/542 [00:00<?, ?it/s]

save model
[epoch 2] train loss:  0.664017, val loss:  0.725259, elapsed_time:  222.042


  0%|          | 0/542 [00:00<?, ?it/s]

save model
[epoch 3] train loss:  0.570795, val loss:  0.702234, elapsed_time:  221.995


  0%|          | 0/542 [00:00<?, ?it/s]

[epoch 4] train loss:  0.484407, val loss:  0.703310, elapsed_time:  221.929


  0%|          | 0/542 [00:00<?, ?it/s]

[epoch 5] train loss:  0.383956, val loss:  0.712684, elapsed_time:  222.143


  0%|          | 0/542 [00:00<?, ?it/s]

[epoch 6] train loss:  0.261547, val loss:  0.796136, elapsed_time:  222.012


  0%|          | 0/542 [00:00<?, ?it/s]

[epoch 7] train loss:  0.173173, val loss:  0.805693, elapsed_time:  221.912


  0%|          | 0/542 [00:00<?, ?it/s]

[epoch 8] train loss:  0.117944, val loss:  0.799426, elapsed_time:  221.904


  0%|          | 0/542 [00:00<?, ?it/s]

[epoch 9] train loss:  0.090277, val loss:  0.800140, elapsed_time:  221.672
Early Stopping!
train_one_fold took 1998.6132 seconds.
[fold3]


  0%|          | 0/566 [00:00<?, ?it/s]

save model
[epoch 1] train loss:  0.952333, val loss:  0.968985, elapsed_time:  228.698


  0%|          | 0/566 [00:00<?, ?it/s]

save model
[epoch 2] train loss:  0.685139, val loss:  0.732771, elapsed_time:  228.731


  0%|          | 0/566 [00:00<?, ?it/s]

save model
[epoch 3] train loss:  0.589345, val loss:  0.700295, elapsed_time:  228.670


  0%|          | 0/566 [00:00<?, ?it/s]

[epoch 4] train loss:  0.496642, val loss:  0.704378, elapsed_time:  228.662


  0%|          | 0/566 [00:00<?, ?it/s]

[epoch 5] train loss:  0.389884, val loss:  0.728412, elapsed_time:  228.545


  0%|          | 0/566 [00:00<?, ?it/s]

save model
[epoch 6] train loss:  0.270265, val loss:  0.699552, elapsed_time:  228.733


  0%|          | 0/566 [00:00<?, ?it/s]

[epoch 7] train loss:  0.172215, val loss:  0.723998, elapsed_time:  228.424


  0%|          | 0/566 [00:00<?, ?it/s]

[epoch 8] train loss:  0.120982, val loss:  0.705256, elapsed_time:  228.443


  0%|          | 0/566 [00:00<?, ?it/s]

[epoch 9] train loss:  0.088474, val loss:  0.733891, elapsed_time:  228.548
train_one_fold took 2058.8083 seconds.
best_score now: (3, 6, 0.6995522379875183)


  0%|          | 0/566 [00:00<?, ?it/s]

save model
[epoch 1] train loss:  0.952333, val loss:  0.968985, elapsed_time:  228.383


  0%|          | 0/566 [00:00<?, ?it/s]

save model
[epoch 2] train loss:  0.685139, val loss:  0.732771, elapsed_time:  228.470


  0%|          | 0/566 [00:00<?, ?it/s]

save model
[epoch 3] train loss:  0.589345, val loss:  0.700295, elapsed_time:  228.416


  0%|          | 0/566 [00:00<?, ?it/s]

[epoch 4] train loss:  0.496642, val loss:  0.704378, elapsed_time:  228.428


  0%|          | 0/566 [00:00<?, ?it/s]

[epoch 5] train loss:  0.389884, val loss:  0.728412, elapsed_time:  228.332


  0%|          | 0/566 [00:00<?, ?it/s]

save model
[epoch 6] train loss:  0.270265, val loss:  0.699552, elapsed_time:  228.482


  0%|          | 0/566 [00:00<?, ?it/s]

[epoch 7] train loss:  0.172215, val loss:  0.723998, elapsed_time:  228.318


  0%|          | 0/566 [00:00<?, ?it/s]

[epoch 8] train loss:  0.120982, val loss:  0.705256, elapsed_time:  228.453


  0%|          | 0/566 [00:00<?, ?it/s]

[epoch 9] train loss:  0.088474, val loss:  0.733891, elapsed_time:  228.344
train_one_fold took 2056.3011 seconds.
[fold4]


  0%|          | 0/570 [00:00<?, ?it/s]

save model
[epoch 1] train loss:  0.949160, val loss:  0.771900, elapsed_time:  229.549


  0%|          | 0/570 [00:00<?, ?it/s]

[epoch 2] train loss:  0.673729, val loss:  0.802574, elapsed_time:  229.662


  0%|          | 0/570 [00:00<?, ?it/s]

[epoch 3] train loss:  0.579039, val loss:  0.819735, elapsed_time:  229.604


  0%|          | 0/570 [00:00<?, ?it/s]

save model
[epoch 4] train loss:  0.485732, val loss:  0.740581, elapsed_time:  229.791


  0%|          | 0/570 [00:00<?, ?it/s]

save model
[epoch 5] train loss:  0.388928, val loss:  0.719802, elapsed_time:  229.742


  0%|          | 0/570 [00:00<?, ?it/s]

[epoch 6] train loss:  0.266366, val loss:  0.756384, elapsed_time:  229.714


  0%|          | 0/570 [00:00<?, ?it/s]

[epoch 7] train loss:  0.179283, val loss:  0.802945, elapsed_time:  229.644


  0%|          | 0/570 [00:00<?, ?it/s]

[epoch 8] train loss:  0.124417, val loss:  0.801406, elapsed_time:  229.603


  0%|          | 0/570 [00:00<?, ?it/s]

[epoch 9] train loss:  0.092961, val loss:  0.820564, elapsed_time:  229.477
train_one_fold took 2067.8081 seconds.
best_score now: (4, 5, 0.71980220079422)


  0%|          | 0/570 [00:00<?, ?it/s]

# Inference Out Of Fold

## Copy best models

In [1]:
print(score_list)

NameError: name 'score_list' is not defined

In [None]:
best_log_list = []
for (fold_id, best_epoch, _) in score_list:
    
    exp_dir_path = Path(f"fold{fold_id}")
    best_model_path = exp_dir_path / f"snapshot_epoch_{best_epoch}.pth"
    copy_to = f"./best_model_fold{fold_id}.pth"
    shutil.copy(best_model_path, copy_to)
    
    for p in exp_dir_path.glob("*.pth"):
        p.unlink()

## Inference OOF

In [3]:
@log_time
def run_inference_loop(model, loader, device):
    model.to(device)
    model.eval()
    pred_list = []
    with torch.no_grad():
        for batch in tqdm(loader):
            x = to_device(batch["data"], device)
            y = model(x)
            pred_list.append(y.softmax(dim=1).detach().cpu().numpy())
        
    pred_arr = np.concatenate(pred_list)
    del pred_list
    return pred_arr

NameError: name 'log_time' is not defined

In [None]:
label_arr = train[CLASSES].values
oof_pred_arr = np.zeros((len(train), N_CLASSES))
score_list = []

for fold_id in range(N_FOLDS):
    print(f"\n[fold {fold_id}]")
    device = torch.device(CFG.device)

    # # get_dataloader
    _, val_path_label, _, val_idx = get_path_label(fold_id, train)
    _, val_transform = get_transforms(CFG)
    val_dataset = HMSHBACSpecDataset(**val_path_label, transform=val_transform)
    val_loader = torch.utils.data.DataLoader(
        val_dataset, batch_size=CFG.batch_size, num_workers=4, shuffle=False, drop_last=False)
    
    # # get model
    model_path = f"./best_model_fold{fold_id}.pth"
    model = HMSHBACSpecModel(
        model_name=CFG.model_name, pretrained=False, num_classes=6, in_channels=1)
    model.load_state_dict(torch.load(model_path, map_location=device))
    
    # # inference
    val_pred = run_inference_loop(model, val_loader, device)
    oof_pred_arr[val_idx] = val_pred
    
    del val_idx, val_path_label
    del model, val_loader
    torch.cpu.empty_cache()
    gc.collect()

## Calculate OOF score

In [None]:
import sys
sys.path.append('/kaggle/input/kaggle-kl-div')
from kaggle_kl_div import score

true = train[["label_id"] + CLASSES].copy()

oof = pd.DataFrame(oof_pred_arr, columns=CLASSES)
oof.insert(0, "label_id", train["label_id"])

cv_score = score(solution=true, submission=oof, row_id_column_name='label_id')
print('CV Score KL-Div for ResNet34d',cv_score)