In [1]:
import json
import joblib
from tqdm import tqdm
from pathlib import Path
from timeit import default_timer as timer

import pandas as pd
import numpy as np
from io import BytesIO
from PIL import Image
import h5py

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, Dataset
import albumentations as A
from albumentations.pytorch import ToTensorV2
from timm import create_model
from torch.optim.lr_scheduler import OneCycleLR

from accelerate import Accelerator

from isic_helper import DotDict
from isic_helper import get_folds
from isic_helper import compute_auc, compute_pauc
from isic_helper import set_seed
from isic_helper import time_to_str

In [2]:
cfg = DotDict()
cfg.infer = False
cfg.cpu = False
cfg.mixed_precision = "fp16"
cfg.tta = True

cfg.pos_ratio = 0.1

cfg.image_size = 64
cfg.lr = 5e-4
cfg.num_epochs = 1
cfg.seed = 2022
cfg.train_batch_size = 256
cfg.train_num_worker = 2
cfg.val_batch_size = 256
cfg.val_num_worker = 2
cfg.log_every = 10

cfg.models_output_dir = "models"
cfg.model_name = "resnet18_v1"

In [3]:
INPUT_PATH = Path("../input/isic-2024-challenge/")
MODELS_OUTPUT_PATH = Path(f"{cfg.models_output_dir}")
MODELS_OUTPUT_PATH.mkdir(exist_ok=True)

train_metadata = pd.read_csv(INPUT_PATH / "train-metadata.csv", low_memory=False)
train_images = h5py.File(INPUT_PATH / "train-image.hdf5", mode="r")

folds_df = get_folds()
train_metadata = train_metadata.merge(folds_df, on=["isic_id", "patient_id"], how="inner")
# train_metadata = train_metadata.sample(frac=0.05, random_state=cfg.seed).reset_index(drop=True)
print(f"Train data size: {train_metadata.shape}")

Train data size: (401059, 57)


In [4]:
id_column = "isic_id"
target_column = "final_target"
group_column = "patient_id"

train_ids = train_metadata[id_column]
groups = train_metadata[group_column]
folds = train_metadata["fold"]
y_train = train_metadata[target_column]

In [5]:
accelerator = Accelerator(cpu=cfg.cpu, mixed_precision=cfg.mixed_precision)

In [6]:
accelerator.device

device(type='cuda')

In [7]:
def dev_augment(image_size):
    transform = A.Compose([
        A.Transpose(p=0.5),
        A.VerticalFlip(p=0.5),
        A.HorizontalFlip(p=0.5),
        A.RandomBrightnessContrast(brightness_limit=0.2, contrast_limit=0.2, p=0.75),
        A.OneOf([
            A.MotionBlur(blur_limit=5),
            A.MedianBlur(blur_limit=5),
            A.GaussianBlur(blur_limit=5),
            A.GaussNoise(var_limit=(5.0, 30.0)),
        ], p=0.7),

        A.OneOf([
            A.OpticalDistortion(distort_limit=1.0),
            A.GridDistortion(num_steps=5, distort_limit=1.),
            A.ElasticTransform(alpha=3),
        ], p=0.7),

        A.CLAHE(clip_limit=4.0, p=0.7),
        A.HueSaturationValue(hue_shift_limit=10, sat_shift_limit=20, val_shift_limit=10, p=0.5),
        A.ShiftScaleRotate(shift_limit=0.1, scale_limit=0.1, rotate_limit=15, border_mode=0, p=0.85),
        A.Resize(image_size, image_size),
#         A.Cutout(max_h_size=int(image_size * 0.375), max_w_size=int(image_size * 0.375), num_holes=1, p=0.7),
        ToTensorV2()
    ], p=1.)
    return transform

def val_augment(image_size):
    transform = A.Compose([
        A.Resize(image_size, image_size),
#         A.Normalize(
#             mean=[0., 0., 0.],
#             std=[1, 1, 1],
#             max_pixel_value=255.0,
#             p=1.0
#         ),
        ToTensorV2()
    ], p=1.)
    return transform

class ISICDataset(Dataset):
    def __init__(self, metadata, images, augment, infer=False):
        self.metadata = metadata
        self.images = images
        self.augment = augment
        self.length = len(self.metadata)
        self.infer = infer
    
    def __len__(self):
        return self.length
    
    def __getitem__(self, index):
        data = self.metadata.iloc[index]
        
        image = np.array(Image.open(BytesIO(self.images[data[id_column]][()])))
        image = self.augment(image=image)["image"]
        
        record = {
            "image": image
        }
        
        if not self.infer:
            target = data[target_column]
            record["target"] = torch.tensor(target).float()
        
        return record

class ISICNet(nn.Module):
    def __init__(self, arch="resnet18", pretrained=False, infer=False):
        super(ISICNet, self).__init__()
        self.infer = infer
        self.model = create_model(model_name=arch, pretrained=pretrained, in_chans=3,  num_classes=0, global_pool='')
        self.classifier = nn.Linear(self.model.num_features, 1)
        
        self.dropouts = nn.ModuleList([nn.Dropout(0.5) for i in range(5)])
        
    def forward(self, batch):
        image = batch["image"]
        image = image.float() / 255
        
        x = self.model(image)
        bs = len(image)
        pool = F.adaptive_avg_pool2d(x, 1).reshape(bs,-1)
        
        if self.training:
            logit = 0
            for i in range(len(self.dropouts)):
                logit += self.classifier(self.dropouts[i](pool))
            logit = logit/len(self.dropouts)
        else:
            logit = self.classifier(pool)
        return logit

In [8]:
def make_over_sample(train_index, target, pos_ratio):
    target_series = pd.Series(y_train, index=train_index)
    
    # Separate positive and negative indices
    pos_indices = target_series[target_series == 1].index
    neg_indices = target_series[target_series == 0].index
    
    # Calculate the number of positive samples needed
    n_pos = len(pos_indices)
    n_neg = len(neg_indices)
    n_total = n_pos + n_neg
    n_desired_pos = int((pos_ratio * n_total) / (1 - pos_ratio))
    
    # Oversample positive indices
    if n_desired_pos > n_pos:
        pos_indices_oversampled = np.random.choice(pos_indices, size=n_desired_pos, replace=True)
    else:
        pos_indices_oversampled = pos_indices
    
    ned_indices_undersampled = np.unique(np.random.choice(neg_indices, size=n_desired_pos * 3, replace=True))
    
    # Combine with negative indices
    oversampled_indices = np.concatenate([ned_indices_undersampled, pos_indices_oversampled])
    np.random.shuffle(oversampled_indices)
    return oversampled_indices

In [9]:
best_num_epochs = {}
val_auc_scores = {}
val_pauc_scores = {}
all_folds = np.sort(folds.unique())
oof_predictions = np.zeros(train_metadata.shape[0])
for fold in all_folds:
    set_seed(cfg.seed)
    
    print(f"Running fold: {fold}")
    dev_index = folds[folds != fold].index
    val_index = folds[folds == fold].index
    
    oversampled_dev_index = make_over_sample(dev_index, y_train, cfg.pos_ratio)
    
    dev_metadata = train_metadata.loc[oversampled_dev_index, :].reset_index(drop=True);print(f"Dev data size after oversampling: {dev_metadata.shape}")
    val_metadata = train_metadata.loc[val_index, :].reset_index(drop=True);print(f"Val data size: {val_metadata.shape}")
    
    print(dev_metadata[target_column].value_counts(normalize=True))
    
    dev_dataset = ISICDataset(dev_metadata, train_images, augment=dev_augment(image_size=cfg.image_size))
    val_dataset = ISICDataset(val_metadata, train_images, augment=val_augment(image_size=cfg.image_size))

    dev_dataloader = DataLoader(dev_dataset, shuffle=True, batch_size=cfg.train_batch_size, num_workers=cfg.train_num_worker, pin_memory=True)
    val_dataloader = DataLoader(val_dataset, shuffle=False, batch_size=cfg.val_batch_size, num_workers=cfg.val_num_worker, drop_last=False, pin_memory=True)
    
    net = ISICNet(pretrained=True)
    net = net.to(accelerator.device)
        
    optimizer = torch.optim.Adam(net.parameters(), lr=cfg.lr / 5)
    lr_scheduler = OneCycleLR(optimizer=optimizer, max_lr=cfg.lr, epochs=cfg.num_epochs, steps_per_epoch=len(dev_dataloader))

    net, optimizer, dev_dataloader, val_dataloader, lr_scheduler = accelerator.prepare(
        net, optimizer, dev_dataloader, val_dataloader, lr_scheduler
    )
    
    print("Ready to train")
    
    overall_step = 0
    starting_epoch = 0
    best_pauc_score = -np.Inf
    best_auc_score = -np.Inf
    best_epoch = None
    best_val_preds = None

    for epoch in range(starting_epoch, cfg.num_epochs):
        net.train()
        for step, batch in tqdm(enumerate(dev_dataloader), total=len(dev_dataloader)):
            # We could avoid this line since we set the accelerator with `device_placement=True`.
            batch = {k: v.to(accelerator.device) for k, v in batch.items()}
            optimizer.zero_grad()
            outputs = net(batch)
            loss = F.binary_cross_entropy_with_logits(outputs, batch["target"].unsqueeze(1))
            accelerator.backward(loss)
            optimizer.step()
            lr_scheduler.step()

        net.eval()
        val_preds = []
        val_y = []
        for step, batch in tqdm(enumerate(val_dataloader), total=len(val_dataloader)):
            # We could avoid this line since we set the accelerator with `device_placement=True`.
            batch = {k: v.to(accelerator.device) for k, v in batch.items()}
            image0 = batch['image'].clone().detach()
            val_preds_batch = 0
            counter = 0
            with torch.no_grad():
                outputs = net(batch)
            preds = torch.sigmoid(outputs)
            val_y_batch = batch["target"]
            preds, val_y_batch = accelerator.gather_for_metrics((preds, val_y_batch))
            val_preds_batch += preds.data.cpu().numpy().reshape(-1)
            counter += 1
            if cfg.tta:
                batch["image"] = torch.flip(image0,dims=[2])
                with torch.no_grad():
                    outputs = net(batch)
                preds = torch.sigmoid(outputs)
                preds = accelerator.gather_for_metrics((preds))
                val_preds_batch += preds.data.cpu().numpy().reshape(-1)
                counter += 1
                
                batch["image"] = torch.flip(image0,dims=[3])
                with torch.no_grad():
                    outputs = net(batch)
                preds = torch.sigmoid(outputs)
                preds = accelerator.gather_for_metrics((preds))
                val_preds_batch += preds.data.cpu().numpy().reshape(-1)
                counter += 1
                
                for k in [1, 2, 3]:
                    batch["image"] = torch.rot90(image0,k, dims=[2, 3])
                    with torch.no_grad():
                        outputs = net(batch)
                    preds = torch.sigmoid(outputs)
                    preds = accelerator.gather_for_metrics((preds))
                    val_preds_batch += preds.data.cpu().numpy().reshape(-1)
                    counter += 1
            val_preds_batch = val_preds_batch / counter   
            val_preds.append(val_preds_batch)    
            val_y.append(val_y_batch.data.cpu().numpy().reshape(-1))
            
        val_preds = np.concatenate(val_preds)
        val_y = np.concatenate(val_y)
        auc = compute_auc(val_y, val_preds) 
        pauc = compute_pauc(val_y, val_preds, min_tpr=0.80)
        
        if pauc >= best_pauc_score:
            best_auc_score = auc
            best_pauc_score = pauc
            best_epoch = epoch
            best_val_preds = val_preds
        print(f"Epoch pauc: {pauc} | Best auc: {best_auc_score} | Best pauc: {best_pauc_score} | Best epoch: {best_epoch}")
        
        output_dir = f"fold_{fold}/model_{cfg.model_name}_epoch_{epoch}"
        if cfg.models_output_dir is not None:
            output_dir = Path(f"{cfg.models_output_dir}/{output_dir}")
        accelerator.save_state(output_dir)
    
    best_num_epochs[f"fold_{fold}"] = best_epoch
    val_auc_scores[f"fold_{fold}"] = best_auc_score
    val_pauc_scores[f"fold_{fold}"] = best_pauc_score
    
    oof_predictions[val_index] = best_val_preds
    print("\n")
#     break

Running fold: 1
Dev data size after oversampling: (126580, 57)
Val data size: (80214, 57)
final_target
0    0.718368
1    0.281632
Name: proportion, dtype: float64




model.safetensors:   0%|          | 0.00/46.8M [00:00<?, ?B/s]

Ready to train


100%|██████████| 495/495 [14:32<00:00,  1.76s/it]
100%|██████████| 314/314 [00:49<00:00,  6.37it/s]


Epoch pauc: 0.15405812351542783 | Best auc: 0.9342812122551818 | Best pauc: 0.15405812351542783 | Best epoch: 0


Running fold: 2
Dev data size after oversampling: (126587, 57)
Val data size: (80212, 57)
final_target
0    0.718383
1    0.281617
Name: proportion, dtype: float64




Ready to train


100%|██████████| 495/495 [12:40<00:00,  1.54s/it]
100%|██████████| 314/314 [00:46<00:00,  6.74it/s]


Epoch pauc: 0.15756396497405303 | Best auc: 0.9366411421126023 | Best pauc: 0.15756396497405303 | Best epoch: 0


Running fold: 3
Dev data size after oversampling: (126579, 57)
Val data size: (80211, 57)
final_target
0    0.718366
1    0.281634
Name: proportion, dtype: float64




Ready to train


100%|██████████| 495/495 [12:17<00:00,  1.49s/it]
100%|██████████| 314/314 [00:46<00:00,  6.69it/s]


Epoch pauc: 0.13751996730354044 | Best auc: 0.9215612559433928 | Best pauc: 0.13751996730354044 | Best epoch: 0


Running fold: 4
Dev data size after oversampling: (126579, 57)
Val data size: (80211, 57)
final_target
0    0.718366
1    0.281634
Name: proportion, dtype: float64




Ready to train


100%|██████████| 495/495 [12:46<00:00,  1.55s/it]
100%|██████████| 314/314 [00:46<00:00,  6.70it/s]


Epoch pauc: 0.15354856422607976 | Best auc: 0.9362398759531267 | Best pauc: 0.15354856422607976 | Best epoch: 0


Running fold: 5
Dev data size after oversampling: (126586, 57)
Val data size: (80211, 57)
final_target
0    0.718381
1    0.281619
Name: proportion, dtype: float64




Ready to train


100%|██████████| 495/495 [12:41<00:00,  1.54s/it]
100%|██████████| 314/314 [00:46<00:00,  6.74it/s]


Epoch pauc: 0.14213351073071784 | Best auc: 0.922324648093058 | Best pauc: 0.14213351073071784 | Best epoch: 0




In [10]:
oof_preds_df = pd.DataFrame({
    id_column: train_ids,
    group_column: groups,
    "fold": folds,
    target_column: y_train,
    f"oof_{cfg.model_name}": oof_predictions
})
oof_preds_df.to_csv(f"oof_preds_{cfg.model_name}.csv")
oof_preds_df.head()

Unnamed: 0,isic_id,patient_id,fold,final_target,oof_resnet18_v1
0,ISIC_0015670,IP_1235828,4,0,0.057194
1,ISIC_0015845,IP_8170065,1,0,0.328083
2,ISIC_0015864,IP_6724798,5,0,0.011359
3,ISIC_0015902,IP_4111386,2,0,0.018475
4,ISIC_0024200,IP_8313778,1,0,0.157407


In [11]:
best_num_epochs

{'fold_1': 0, 'fold_2': 0, 'fold_3': 0, 'fold_4': 0, 'fold_5': 0}

In [12]:
val_auc_scores

{'fold_1': 0.9342812122551818,
 'fold_2': 0.9366411421126023,
 'fold_3': 0.9215612559433928,
 'fold_4': 0.9362398759531267,
 'fold_5': 0.922324648093058}

In [13]:
val_pauc_scores

{'fold_1': 0.15405812351542783,
 'fold_2': 0.15756396497405303,
 'fold_3': 0.13751996730354044,
 'fold_4': 0.15354856422607976,
 'fold_5': 0.14213351073071784}

In [14]:
cv_auc_oof = compute_auc(oof_preds_df[target_column], oof_preds_df[f"oof_{cfg.model_name}"])
cv_pauc_oof = compute_pauc(oof_preds_df[target_column], oof_preds_df[f"oof_{cfg.model_name}"], min_tpr=0.8)

cv_auc_avg = np.mean(list(val_auc_scores.values()))
cv_pauc_avg = np.mean(list(val_pauc_scores.values()))

In [15]:
print(f"CV AUC OOF: {cv_auc_oof}")
print(f"CV PAUC OOF: {cv_pauc_oof}")
print(f"CV AUC AVG: {cv_auc_avg}")
print(f"CV PAUC AVG: {cv_pauc_avg}")

CV AUC OOF: 0.9296521006265026
CV PAUC OOF: 0.14841819159902828
CV AUC AVG: 0.9302096268714724
CV PAUC AVG: 0.14896482614996379


In [16]:
params = vars(cfg)
params = {k: v for k, v in params.items() if not k.startswith("_")}

metadata = {
    "params": params,
    "best_num_epochs": best_num_epochs,
    "val_auc_scores": val_auc_scores,
    "val_pauc_scores": val_pauc_scores,
    "cv_auc_oof": cv_auc_oof,
    "cv_pauc_oof": cv_pauc_oof,
    "cv_auc_avg": cv_auc_avg,
    "cv_pauc_avg": cv_pauc_avg
}

with open("run_metadata.json", "w") as f:
    json.dump(metadata, f)