In [1]:
import json
import joblib
from tqdm import tqdm
from pathlib import Path

import pandas as pd
import numpy as np

import h5py
from io import BytesIO
from PIL import Image

import torch
from torch.utils.data import DataLoader, Dataset
import torch.nn as nn
import torch.nn.functional as F

from timm import create_model

import albumentations as A
from albumentations.pytorch import ToTensorV2

from accelerate import Accelerator

from isic_helper import DotDict, get_folds

In [2]:
model_name = "resnet18"
version = "v5"
path = f"/kaggle/input/isic-scd-{model_name.replace('_', '-')}-{version}-train"

In [3]:
id_column = "isic_id"
target_column = "target"
group_column = "patient_id"

In [4]:
label_mapping = {
    "2024": {
        "Hidradenoma": "unknown",
        "Lichen planus like keratosis": "BKL",
        "Pigmented benign keratosis": "BKL",
        "Seborrheic keratosis": "BKL",
        "Solar lentigo": "BKL",
        "Nevus": "NV",
        "Angiofibroma": "unknown",
        "Dermatofibroma": "DF",
        "Fibroepithelial polyp": "unknown",
        "Scar": "unknown",
        "Hemangioma": "unknown",
        "Trichilemmal or isthmic-catagen or pilar cyst": "unknown",
        "Lentigo NOS": "BKL",
        "Verruca": "unknown",
        "Solar or actinic keratosis": "AKIEC",
        "Atypical intraepithelial melanocytic proliferation": "unknown",
        "Atypical melanocytic neoplasm": "unknown",
        "Basal cell carcinoma": "BCC",
        "Squamous cell carcinoma in situ": "SCC",
        "Squamous cell carcinoma, Invasive": "SCC",
        "Squamous cell carcinoma, NOS": "SCC",
        "Melanoma in situ": "MEL",
        "Melanoma Invasive": "MEL",
        "Melanoma metastasis": "MEL",
        "Melanoma, NOS": "MEL"
    },
    "2020": {
        "nevus": "NV",
        "melanoma": "MEL",
        "seborrheic keratosis": "BKL",
        "lentigo NOS": "BKL",
        "lichenoid keratosis": "BKL",
        "other": "unknown",
        "solar lentigo": "BKL",
        "scar": "unknown",
        "cafe-au-lait macule": "unknown",
        "atypical melanocytic proliferation": "unknown",
        "pigmented benign keratosis": "BKL"
    },
    "2019": {
        "nevus": "NV",
        "melanoma": "MEL",
        "seborrheic keratosis": "BKL",
        "pigmented benign keratosis": "BKL",
        "dermatofibroma": "DF",
        "squamous cell carcinoma": "SCC",
        "basal cell carcinoma": "BCC",
        "vascular lesion": "VASC",
        "actinic keratosis": "AKIEC",
        "solar lentigo": "BKL",
    },
}


all_labels = np.unique(list(label_mapping["2024"].values()) +
                       list(label_mapping["2020"].values()) +
                       list(label_mapping["2019"].values()))
label2idx = {label: idx for idx, label in enumerate(all_labels)}
malignant_labels = ["BCC", "MEL", "SCC"]
malignant_idx = [label2idx[label] for label in malignant_labels]
out_dim = 9

In [5]:
def val_augment(image_size):
    transform = A.Compose([A.Resize(image_size, image_size), ToTensorV2()], p=1.0)
    return transform


class ISICDataset(Dataset):
    def __init__(self, metadata, images, augment, infer=False):
        self.metadata = metadata
        self.images = images
        self.augment = augment
        self.length = len(self.metadata)
        self.infer = infer

    def __len__(self):
        return self.length

    def __getitem__(self, index):
        row = self.metadata.iloc[index]

        image = np.array(Image.open(BytesIO(self.images[row["isic_id"]][()])))
        image = self.augment(image=image)["image"]

        data = image.float().div(255)

        if not self.infer:
            label = torch.tensor(row["label"]).long()
            return data, label

        return data

    
class ISICNet(nn.Module):
    def __init__(self, model_name, out_dim, pretrained=True, infer=False):
        super(ISICNet, self).__init__()
        self.infer = infer
        self.model = create_model(
            model_name=model_name,
            pretrained=pretrained,
            in_chans=3,
            num_classes=0,
            global_pool="",
        )
        self.classifier = nn.Linear(self.model.num_features, out_dim)

        self.dropouts = nn.ModuleList([nn.Dropout(0.5) for _ in range(5)])

    def forward(self, data):
        image = data
        x = self.model(image)
        bs = len(image)
        pool = F.adaptive_avg_pool2d(x, 1).reshape(bs, -1)

        if self.training:
            logit = 0
            for i in range(len(self.dropouts)):
                logit += self.classifier(self.dropouts[i](pool))
            logit = logit / len(self.dropouts)
        else:
            logit = self.classifier(pool)
        return logit
    

def get_trans(img, iteration):
    if iteration >= 6:
        img = img.transpose(2, 3)
    if iteration % 6 == 0:
        return img
    elif iteration % 6 == 1:
        return torch.flip(img, dims=[2])
    elif iteration % 6 == 2:
        return torch.flip(img, dims=[3])
    elif iteration % 6 == 3:
        return torch.rot90(img, 1, dims=[2, 3])
    elif iteration % 6 == 4:
        return torch.rot90(img, 2, dims=[2, 3])
    elif iteration % 6 == 5:
        return torch.rot90(img, 3, dims=[2, 3])

    
def predict(model, test_dataloader, accelerator, out_dim, n_tta, malignant_idx, log_interval=50):
    model.eval()
    test_probs = []
    total_steps = len(test_dataloader)
    with torch.no_grad():
        for step, data in enumerate(test_dataloader):
            logits = torch.zeros((data.shape[0], out_dim)).to(accelerator.device)
            probs = torch.zeros((data.shape[0], out_dim)).to(accelerator.device)
            for idx in range(n_tta):
                logits_iter = model(get_trans(data, idx))
                logits += logits_iter
                probs += logits_iter.softmax(1)
            logits /= n_tta
            probs /= n_tta

            logits, probs = accelerator.gather((logits, probs))
            test_probs.append(probs)

            if step % log_interval == 0:
                print(
                    f"Step: {step + 1}/{total_steps}"
                )

    test_probs = torch.cat(test_probs).cpu().numpy()
    if out_dim == 9:
        binary_probs = test_probs[:, malignant_idx].sum(1)
    else:
        binary_probs = val_probs[:, 1]
    return binary_probs

In [6]:
INPUT_PATH = Path("../input/isic-2024-challenge/")

train_metadata = pd.read_csv(INPUT_PATH / "train-metadata.csv", low_memory=False)
test_metadata = pd.read_csv(INPUT_PATH / "test-metadata.csv")

folds_df = get_folds()
train_metadata = train_metadata.merge(folds_df, on=["isic_id", "patient_id"], how="inner")
print(f"Train data size: {train_metadata.shape}")
print(f"Test data size: {test_metadata.shape}")

test_images = h5py.File(INPUT_PATH / "test-image.hdf5", mode="r")

Train data size: (401059, 57)
Test data size: (3, 44)


In [7]:
def get_dnn_predictions(train, test, test_images, model_name, version, path):
    with open(path / f"{model_name}_{version}_run_metadata.json", "r") as f:
        run_metadata = json.load(f)
    print(run_metadata)
    
    test_dataset = ISICDataset(
        test, test_images, augment=val_augment(run_metadata["params"]["image_size"]), infer=True
    )
    test_dataloader = DataLoader(
        test_dataset,
        batch_size=run_metadata["params"]["val_batch_size"],
        shuffle=False,
        num_workers=2,
        drop_last=False,
        pin_memory=True,
    )
    
    all_folds = np.unique(train["fold"])
    test_predictions_df = pd.DataFrame({id_column: test_metadata[id_column]})
    for fold in all_folds:
        accelerator = Accelerator(
            mixed_precision=run_metadata["params"]["mixed_precision"],
        )
        
        model = ISICNet(model_name=model_name, out_dim=out_dim, pretrained=False, infer=True)
        model = model.to(accelerator.device)
        
        model, test_dataloader = accelerator.prepare(model, test_dataloader)
        model_filepath = path / f"models/fold_{fold}"
        accelerator.load_state(model_filepath)

        test_predictions_df[f"fold_{fold}"] = predict(model, test_dataloader, accelerator, out_dim=out_dim, n_tta=run_metadata["params"]["n_tta"], malignant_idx=malignant_idx)
    test_predictions_df[target_column] = test_predictions_df[[f"fold_{fold}" for fold in all_folds]].mean(axis=1)
    return test_predictions_df[[id_column, target_column]]

In [8]:
test_preds_df = get_dnn_predictions(train_metadata, test_metadata, test_images, model_name, version, Path(path))

{'params': {'mixed_precision': 'fp16', 'image_size': 64, 'train_batch_size': 256, 'val_batch_size': 512, 'num_workers': 4, 'learning_rate': 0.001, 'num_epochs': 6, 'n_tta': 6, 'seed': 2022}, 'best_num_epochs': {'fold_1': 6, 'fold_2': 6, 'fold_3': 6, 'fold_4': 6, 'fold_5': 2}, 'val_auc_scores': {'fold_1': 0.9586380401484536, 'fold_2': 0.9532613001427737, 'fold_3': 0.9402818509690383, 'fold_4': 0.9495382561056271, 'fold_5': 0.9375429374306242}, 'val_pauc_scores': {'fold_1': 0.17233685365759183, 'fold_2': 0.1685326437192062, 'fold_3': 0.15481711197913414, 'fold_4': 0.16303521733161946, 'fold_5': 0.16015732818548134}, 'cv_auc_oof': 0.9470367334571145, 'cv_pauc_oof': 0.16271825095694034, 'cv_auc_avg': 0.9478524769593033, 'cv_pauc_avg': 0.1637758309746066, 'cv_auc_std': 0.007899703488679211, 'cv_pauc_std': 0.006161478619758375}
Step: 1/1
Step: 1/1
Step: 1/1
Step: 1/1
Step: 1/1


In [9]:
test_preds_df.head()

Unnamed: 0,isic_id,target
0,ISIC_0015657,0.001117
1,ISIC_0015729,0.000133
2,ISIC_0015740,8e-05


In [10]:
test_preds_df[target_column].describe()

count    3.000000
mean     0.000443
std      0.000584
min      0.000080
25%      0.000106
50%      0.000133
75%      0.000625
max      0.001117
Name: target, dtype: float64

In [11]:
test_preds_df[[id_column, target_column]].head()

Unnamed: 0,isic_id,target
0,ISIC_0015657,0.001117
1,ISIC_0015729,0.000133
2,ISIC_0015740,8e-05


In [12]:
test_preds_df[[id_column, target_column]].to_csv("submission.csv", index=False)