In [1]:
import numpy as np
import pandas as pd
import polars as pl
import os
import random
import time
import datetime
import warnings
import yaml
import gc

from pathlib import Path
import matplotlib.pyplot as plt

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import DataLoader, Dataset
from torch.cuda.amp import autocast, GradScaler

import albumentations as A
from albumentations.pytorch import ToTensorV2

from sklearn.model_selection import KFold, StratifiedGroupKFold, GroupKFold

import SimpleITK as sitk

import timm
import transformers
import nibabel as nib

import pydicom

import cv2
from PIL import Image

warnings.simplefilter("ignore")

# Seeding

In [2]:
def seed_everything(seed: int):    
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True

# Splits

In [3]:
def split_fold(conf, df_):
    df = df_.clone()
    
    gkf = GroupKFold(n_splits=conf.fold_num)
    splitter = np.zeros(df.height)
    
    for fold, (_, valid_idx) in enumerate(gkf.split(X=df, groups=df['study_id'])):
        splitter[valid_idx] = fold
    
    df = df.with_columns(fold=pl.Series(splitter).cast(pl.Int8))
    
    return df

# Dataset

In [4]:
class LSDCCoordsTrainDataset(Dataset):
    def __init__(self, conf, df, series_study_id_level, transforms=None):
        super().__init__()
        self.conf = conf
        self.transforms = transforms
        self.series_study_id_level = series_study_id_level
        self.df = df
        
    def __len__(self):
        return len(self.series_study_id_level)
    
    def __getitem__(self, idx):
        
        study_id_level_sample = self.series_study_id_level[idx]
        this_series_cond = self.df.filter(pl.col('study_id_level') == study_id_level_sample)
        study_id = this_series_cond['study_id'].item(0)
        series_id = this_series_cond['series_id'].item(0)
        series_path = this_series_cond['series_path'].item(0)
        ins_num = this_series_cond['instance_number'].item(0)
        coords = this_series_cond.select(['x', 'y']).to_numpy()
        
        image = np.zeros((self.conf.image_size, self.conf.image_size, self.conf.in_chans))
        for chan in range(self.conf.in_chans):
            path = f'{series_path}/{ins_num}.npy'
            one_image = np.load(path)
            if chan == 1:
                image_size = one_image.shape
            image[:, :, chan] = cv2.resize(one_image, (self.conf.image_size, self.conf.image_size))

        coords[:, 0] = coords[:, 0] / image_size[1] # rescale x
        coords[:, 1] = coords[:, 1] / image_size[0] # rescale y
        coords = coords.flatten() # (l1_l2_x, l1_2_y, ...)
        
        if self.transforms is not None:
            transformed = self.transforms(image=image)
            image = transformed['image'] / 255.
        else:
            image = image / 255.
    
        batch = {}
        batch['ids'] = study_id_level_sample
        batch['images'] = image
        batch['labels'] = coords
        
        return batch
        
    def _read_dcm(self, path):
        dicom = pydicom.dcmread(path)
        data = dicom.pixel_array
        return data

In [5]:
# df = data_preprocess(CONF, train_df)

In [6]:
# dataset = LSDCCoordsTrainDataset(CONF, df, df['study_id_level'].unique())
# data = dataset[0]

In [7]:
def get_transforms(conf, types):
    tranforms_dict = {
        'train': A.Compose([
#             A.Resize(conf.image_size, conf.image_size, interpolation=cv2.INTER_LINEAR),
#             A.HorizontalFlip(p=0.5),
#             A.ShiftScaleRotate(shift_limit=0.0625, scale_limit=0.05, rotate_limit=10, p=0.5),
#             A.OneOf([
#                 A.GridDistortion(num_steps=5, distort_limit=0.05, p=1.0),
#                 A.ElasticTransform(alpha=1, sigma=50, alpha_affine=50, p=1.0)
#             ], p=0.25),
            ToTensorV2(),
        ]),
        
        'valid': A.Compose([
#             A.Resize(conf.image_size ,conf.image_size, interpolation=cv2.INTER_LINEAR),
            ToTensorV2(),
        ]),
    }
    return tranforms_dict[types]

# Model

In [8]:
class LSDCCoordsModel(nn.Module):
    def __init__(self, conf, pretrained=True):
        super().__init__()
        self.backbone = timm.create_model(
            conf.backbone,
            pretrained=pretrained,
            features_only=False,
            in_chans=conf.in_chans,
            num_classes=conf.num_class,
            global_pool='avg'
        )

    def forward(self, inputs):
        outputs = self.backbone(inputs)
        return outputs

# Utils

In [9]:
# https://www.kaggle.com/code/yasufuminakama/fb3-deberta-v3-base-baseline-train/notebook
class Averager:
    """Computes and stores the average and current value"""
    def __init__(self):
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count
        
    def get_average(self):
        return self.avg
    
    def get_value(self):
        return self.val

In [10]:
class TimerError(Exception):
    """A custom exception used to report errors in use of Timer class"""

class Timer:
    def __init__(self):
        self.split_time = []
        self._start_time = None

    def start(self):
        """Start a new timer"""
        if self._start_time is not None:
            raise TimerError(f"Timer is running. Use .stop() to stop it")

        self._start_time = time.perf_counter()

    def stop(self):
        """Stop the timer, and report the elapsed time"""
        if self._start_time is None:
            raise TimerError(f"Timer is not running. Use .start() to start it")
            
        self._start_time = None
    
    def get_time(self):
        if self._start_time is None:
            raise TimerError(f"Timer is not running. Use .start() to start it")
            
        return time.perf_counter() - self._start_time
    
    def split(self):
        if self._start_time is None:
            raise TimerError(f"Timer is not running. Use .start() to start it")
            
        self.split_time.append(time.perf_counter() - self._start_time)
    
    def get_split_time(self, idx):
        return self.split_time[idx]
    
    @staticmethod
    def formatting(second):
        return str(datetime.timedelta(seconds=round(second)))

In [11]:
def extract_config(conf_):
    config_dict = {}
    for k, v in vars(conf_).items():
        if not k.startswith('_'):
            config_dict[k] = v

    with open(conf_.save_path + 'config.yaml', 'w+') as file:
        yaml.dump(config_dict, file)

    print('Extracted config')

def data_preprocess(conf, df_):
    df = df_.clone()
    
    df = (
        df
#         .with_columns(pl.concat_str([
#             pl.col('series_id'),
#             pl.col('condition'),
#         ], separator='_').alias('series_id_condition'))
        .with_columns(pl.concat_str([
            pl.lit(conf.images_root),
            pl.col('study_id'),
            pl.col('series_id'),
        ],separator='/').alias('series_path'))
    )
    return df

def get_dataloader(conf, df, train_dataset, valid_dataset):

    train_dataset = LSDCCoordsTrainDataset(conf, df, train_dataset, get_transforms(conf, types='train'))
    valid_dataset = LSDCCoordsTrainDataset(conf, df, valid_dataset, get_transforms(conf, types='valid'))
    
    train_loader = DataLoader(
        train_dataset,
        batch_size=conf.batch_size,
        shuffle=True,
        num_workers=conf.num_workers,
        pin_memory=True,
        drop_last=True,
#         collate_fn=collate_fn,
    )
    
    valid_loader = DataLoader(
        valid_dataset,
        batch_size=conf.batch_size,
        shuffle=False,
        num_workers=conf.num_workers,
        pin_memory=True,
        drop_last=False,
#         collate_fn=collate_fn,
    )
    
    return train_loader, valid_loader

def get_model(conf):
    model = LSDCCoordsModel(conf)
    return model

def get_criterion(conf):
    criterion_dict = {
        'mse': nn.MSELoss()
    }
    return criterion_dict[conf.criterion]

def get_scheduler(conf, samples_per_epoch):
    scheduler_dict = {
        'cosine_warmup': {
            'scheduler': transformers.get_cosine_schedule_with_warmup,
            'hparams': {
                'num_warmup_steps': int(samples_per_epoch * conf.num_epochs * conf.warmup_ratios),
                'num_training_steps': samples_per_epoch * conf.num_epochs,
            }
        },
        'linear_warmup': {
            'scheduler':transformers.get_linear_schedule_with_warmup,
            'hparams': {
                'num_warmup_steps': int(samples_per_epoch * conf.num_epochs * conf.warmup_ratios),
                'num_training_steps': samples_per_epoch * conf.num_epochs,
            }
        },
        'constant_warmup': {
            'scheduler':transformers.get_constant_schedule_with_warmup,
            'hparams': {
                'num_warmup_steps': int(samples_per_epoch * conf.num_epochs * conf.warmup_ratios),
            }
        },
    }
    return scheduler_dict[conf.scheduler]['scheduler'], scheduler_dict[conf.scheduler]['hparams']

def get_optimizer(conf):
    optim_dict = {
        'adamw': optim.AdamW,
    }
    return optim_dict[conf.optimizer]

# Trainer

In [12]:
class Trainer:
    def __init__(
        self, debug_run, fold, conf, device, model, optimizer, scheduler, scheduler_hparams, criterion
    ):
        self.debug_run = debug_run
        self.current_fold = fold
        self.device = device
        self.model = model
        self.optimizer = optimizer(model.parameters(), lr=conf.lr, eps=conf.optim_eps, betas=(conf.optim_betas1, conf.optim_betas2))
        self.scheduler = scheduler(self.optimizer, **scheduler_hparams)
        self.criterion = criterion
        
        self.apex = conf.apex
        self.scaler = GradScaler(enabled=self.apex)
        self.exp_num = conf.exp
        self.num_epochs = conf.num_epochs
        self.verbose_step = conf.verbose_step
        self.save_path = conf.save_path
        
        self.best_score = 10_0000
        
        self.record_cols = ['fold', 'epoch', 'train_loss', 'valid_loss', 'score']
        self.record = pd.DataFrame(columns=self.record_cols)
    
    def fit(self, train_loader, valid_loader):
        self.model.to(self.device)
        
        self.log(f'exp: {self.exp_num}')
        self.log(f'--- FOLD {self.current_fold} ---')

        for epoch in range(self.num_epochs):
            self.current_epoch = epoch
            
            train_loss = self._train_fn(train_loader)

            valid_loss, labels_list, outputs_list = self._valid_fn(valid_loader)
            
            this_epoch_score = self._eval_fn(valid_loss)
            
            self.record = pd.concat([
                self.record,
                pd.DataFrame(dict(zip(self.record_cols, np.array([
                    self.current_fold, self.current_epoch, train_loss, valid_loss, this_epoch_score
                ]).reshape(-1, 1))))
            ], axis=0)

            self.log(f'-- [Fold: {self.current_fold}, Epoch: {self.current_epoch + 1}] DONE --\n')
            
            if self.debug_run: break
            
        return self.record
    
    def _train_fn(self, train_loader):
        self.log('TRAINL_LOOP')
        self.model.train()
        total_loss = Averager()
        current_lr = self.scheduler.get_lr()[0]
        timer = Timer()
        timer.start()
        
        for step, batch in enumerate(train_loader):

            inputs = batch['images'].to(self.device, dtype=torch.float)
            labels = batch['labels'].to(self.device, dtype=torch.float)
            
            batchsize = labels.shape[0]

            with autocast(enabled=self.apex):
                outputs = self.model(inputs)
                outputs = outputs.sigmoid()
                loss = self.criterion(outputs, labels)

            total_loss.update(loss.item(), batchsize)
            
            current_lr = self.scheduler.get_lr()[0]
            
            self.scaler.scale(loss).backward()
            self.scaler.step(self.optimizer)
            self.scaler.update()
            
            self.scheduler.step()
            self.optimizer.zero_grad()

            if step % self.verbose_step == 0 or step == (len(train_loader) - 1):
                self.log(
                    f'[TRAIN_F{self.current_fold}], ' + \
                    f'E: {self.current_epoch + 1}/{self.num_epochs}, ' + \
                    f'S: {str(step).zfill(len(str(len(train_loader))))}/{len(train_loader)}, ' + \
                    f'L: {total_loss.get_average():.5f}, ' + \
                    f'LR: {current_lr:.8f}, ' + \
                    f'T: {Timer.formatting(timer.get_time())}'
                )
                
            if self.debug_run: break
            # end of train loop
        timer.stop()
        
        return total_loss.get_average()

    def _valid_fn(self, valid_loader):
        self.log("\nVALID_LOOP")
        self.model.eval()
        
        ids_list = []
        outputs_list = []
        labels_list = []
        val_score_list = []
        
        total_loss = Averager()
        timer = Timer()
        timer.start()
        
        for step, batch in enumerate(valid_loader):
            
            ids = batch['ids']
            inputs = batch['images'].to(self.device, dtype=torch.float)
            labels = batch['labels'].to(self.device, dtype=torch.float)
            batchsize = labels.shape[0]
            
            with torch.no_grad():
                outputs = self.model(inputs)
                outputs = outputs.sigmoid()
                loss = self.criterion(outputs, labels)
                
            total_loss.update(loss.item(), batchsize)
            
            labels_list.append(labels.detach().cpu())
            outputs_list.append(outputs.detach().cpu())

            if step % self.verbose_step == 0 or step == (len(valid_loader) - 1):
                self.log(
                    f'[VALID_F{self.current_fold}], ' + \
                    f'E: {self.current_epoch + 1}/{self.num_epochs}, ' + \
                    f'S: {str(step).zfill(len(str(len(valid_loader))))}/{len(valid_loader)}, ' + \
                    f'L: {total_loss.get_average():.5f}, ' + \
                    f'T: {Timer.formatting(timer.get_time())}'
                )
            if self.debug_run: break
            # end of the valid loop
        
        labels_list = torch.concat(labels_list)
        outputs_list = torch.concat(outputs_list)

        return total_loss.get_average(), labels_list, outputs_list
    
    def _eval_fn(self, score):
        
        if self.best_score > score:
            self.best_score = score
            
            file_name = f'best_score_fold{self.current_fold}.pt'
            
            self.model.eval()
            torch.save({
                'model_state_dict': self.model.state_dict(),
                'exp': self.exp_num,
                'fold': self.current_fold,
                'epoch': self.current_epoch,
            }, Path(self.save_path, file_name))
            
            self.log(f'\n-> [SAVED] Fold: {self.current_fold}, Epoch: {self.current_epoch + 1}, score: {self.best_score}\n')
            
        return score
        
    def log(self, msg):
        print(msg)
        if not self.debug_run:
            with open(Path(self.save_path, 'train.log'), mode='a+', encoding='utf-8') as log:
                log.write(f'{msg}\n')

# Config

In [13]:
class CONF:
    exp = 'expCA001'
    
    images_root = '/kaggle/input/rsna24-lsdc-npy/train_images'
    save_path = '/kaggle/working/'
    seed = 42
    
    backbone = 'tf_efficientnet_b0.ns_jft_in1k'
    
    fold_num = 5
    train_fold_list = [0]
    
    num_class = 4
    in_chans = 3
    image_size = 320
    
    apex = True
    
    criterion = 'mse'
    optimizer = 'adamw'
    lr = 3e-3
    optim_eps = 1e-6
    optim_betas1 = 0.9
    optim_betas2 = 0.999
    scheduler = 'cosine_warmup'
    num_epochs = 10
    warmup_ratios = 0.1
    
    batch_size = 8
    num_workers = 0
    verbose_step = 200

# Load data

In [14]:
train_df = pl.read_csv('/kaggle/input/rsna24-lsdc-create-dataset/merged_train.csv')
train_df = train_df.filter(pl.col('series_description') == 'Axial T2')

five_lvl = train_df.group_by(['series_id']).len('level').filter(pl.col('level') % 10 == 0)
train_df = train_df.filter(pl.col('series_id').is_in(five_lvl['series_id']))
train_df

row_id,study_id,series_id,instance_number,series_description,condition,labels,level,x,y,name,study_id_level,fold
str,i64,i64,i64,str,str,i64,str,f64,f64,str,str,i64
"""100206310_left_subarticular_st…",100206310,1012284084,20,"""Axial T2""","""left_subarticular_stenosis""",9,"""l1_l2""",180.355677,165.0342,"""100206310_1012284084_0020""","""100206310_l1_l2""",1
"""100206310_left_subarticular_st…",100206310,1012284084,28,"""Axial T2""","""left_subarticular_stenosis""",9,"""l2_l3""",177.729138,160.218878,"""100206310_1012284084_0028""","""100206310_l2_l3""",1
"""100206310_left_subarticular_st…",100206310,1012284084,37,"""Axial T2""","""left_subarticular_stenosis""",10,"""l3_l4""",173.351573,158.467852,"""100206310_1012284084_0037""","""100206310_l3_l4""",1
"""100206310_left_subarticular_st…",100206310,1012284084,46,"""Axial T2""","""left_subarticular_stenosis""",11,"""l4_l5""",168.536252,156.27907,"""100206310_1012284084_0046""","""100206310_l4_l5""",1
"""100206310_left_subarticular_st…",100206310,1012284084,55,"""Axial T2""","""left_subarticular_stenosis""",10,"""l5_s1""",167.660739,157.154583,"""100206310_1012284084_0055""","""100206310_l5_s1""",1
…,…,…,…,…,…,…,…,…,…,…,…,…
"""992674144_right_subarticular_s…",992674144,1614310972,2,"""Axial T2""","""right_subarticular_stenosis""",12,"""l1_l2""",287.716814,353.699115,"""992674144_1614310972_0002""","""992674144_l1_l2""",2
"""992674144_right_subarticular_s…",992674144,1614310972,5,"""Axial T2""","""right_subarticular_stenosis""",12,"""l2_l3""",296.778761,344.637168,"""992674144_1614310972_0005""","""992674144_l2_l3""",2
"""992674144_right_subarticular_s…",992674144,1614310972,10,"""Axial T2""","""right_subarticular_stenosis""",12,"""l3_l4""",296.778761,333.309735,"""992674144_1614310972_0010""","""992674144_l3_l4""",2
"""992674144_right_subarticular_s…",992674144,1614310972,14,"""Axial T2""","""right_subarticular_stenosis""",12,"""l4_l5""",300.176991,331.044248,"""992674144_1614310972_0014""","""992674144_l4_l5""",2


# Training

In [15]:
def run_training(conf, df, debug_run=True):
    device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
    seed_everything(seed=conf.seed)
    
    cv_record = pd.DataFrame()

    extract_config(conf)
    df = data_preprocess(conf, df)
    df = split_fold(conf, df)
    
    if debug_run:
        conf.batch_size = 2
        conf.train_fold_list = [0]
    
    for fold in conf.train_fold_list:
        model = get_model(conf)
        
        train_dataset = df.filter(pl.col('fold') != fold)['study_id_level'].unique(maintain_order=True)
        valid_dataset = df.filter(pl.col('fold') == fold)['study_id_level'].unique(maintain_order=True)
        
        train_loader, valid_loader = get_dataloader(conf, df, train_dataset, valid_dataset)
        optimizer = get_optimizer(conf)
        scheduler, scheduler_hparams = get_scheduler(conf, len(train_loader))
        criterion = get_criterion(conf)
        
        trainer = Trainer(
            debug_run, fold, conf, device, model, optimizer, scheduler, scheduler_hparams, criterion
        )
        fold_record = trainer.fit(train_loader, valid_loader)
        
        cv_record = pd.concat([cv_record, fold_record], axis=0).reset_index(drop=True)
        
        if debug_run: break

    best_epoch_idx = [cv_record[cv_record['fold'] == i]['score'].idxmin() for i in conf.train_fold_list]
    best_epoch_record  = cv_record[cv_record.index.isin(best_epoch_idx)].reset_index(drop=True)
    
    cv_record[['fold', 'epoch']] = cv_record[['fold', 'epoch']].astype(int)
    best_epoch_record[['fold', 'epoch']] = best_epoch_record[['fold', 'epoch']].astype(int)
    
    display(cv_record)
    display(best_epoch_record)
    
    cv_record.to_csv('cv_record.csv', index=False)
    best_epoch_record.to_csv('best_eopch_record.csv', index=False)

In [16]:
run_training(CONF, train_df, debug_run=False)

Extracted config


model.safetensors:   0%|          | 0.00/21.4M [00:00<?, ?B/s]

exp: expCA001
--- FOLD 0 ---
TRAINL_LOOP
[TRAIN_F0], E: 1/10, S: 000/735, L: 0.08597, LR: 0.00000000, T: 0:00:04
[TRAIN_F0], E: 1/10, S: 200/735, L: 0.03268, LR: 0.00081633, T: 0:00:56
[TRAIN_F0], E: 1/10, S: 400/735, L: 0.01799, LR: 0.00163265, T: 0:01:40
[TRAIN_F0], E: 1/10, S: 600/735, L: 0.01242, LR: 0.00244898, T: 0:02:23
[TRAIN_F0], E: 1/10, S: 734/735, L: 0.01037, LR: 0.00299592, T: 0:02:51

VALID_LOOP
[VALID_F0], E: 1/10, S: 000/185, L: 0.00359, T: 0:00:01
[VALID_F0], E: 1/10, S: 184/185, L: 0.00177, T: 0:00:33

-> [SAVED] Fold: 0, Epoch: 1, score: 0.0017742383541634022

-- [Fold: 0, Epoch: 1] DONE --

TRAINL_LOOP
[TRAIN_F0], E: 2/10, S: 000/735, L: 0.00089, LR: 0.00300000, T: 0:00:00
[TRAIN_F0], E: 2/10, S: 200/735, L: 0.00061, LR: 0.00299324, T: 0:00:26
[TRAIN_F0], E: 2/10, S: 400/735, L: 0.00058, LR: 0.00297302, T: 0:00:52
[TRAIN_F0], E: 2/10, S: 600/735, L: 0.00052, LR: 0.00293951, T: 0:01:18
[TRAIN_F0], E: 2/10, S: 734/735, L: 0.00049, LR: 0.00290978, T: 0:01:35

VALID_LOO

Unnamed: 0,fold,epoch,train_loss,valid_loss,score
0,0,0,0.010374,0.001774,0.001774
1,0,1,0.000487,0.000781,0.000781
2,0,2,0.000263,0.000647,0.000647
3,0,3,0.000196,0.000321,0.000321
4,0,4,0.000149,9.1e-05,9.1e-05
5,0,5,0.000118,0.000111,0.000111
6,0,6,9.2e-05,8.7e-05,8.7e-05
7,0,7,6.1e-05,0.000109,0.000109
8,0,8,4.4e-05,8.3e-05,8.3e-05
9,0,9,3.4e-05,7.9e-05,7.9e-05


Unnamed: 0,fold,epoch,train_loss,valid_loss,score
0,0,9,3.4e-05,7.9e-05,7.9e-05
