In [20]:
from timm import create_model
import torch

In [21]:
from urllib.request import urlopen
from PIL import Image

img = Image.open(urlopen(
    'https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/beignets-task-guide.png'
))

In [93]:
model = create_model(
    model_name="eva02_small_patch14_336",
#     model_name="resnet18",
    pretrained=True,
    in_chans=3,
    num_classes=0,
#     global_pool="",
)

In [94]:
model

Eva(
  (patch_embed): PatchEmbed(
    (proj): Conv2d(3, 384, kernel_size=(14, 14), stride=(14, 14))
    (norm): Identity()
  )
  (pos_drop): Dropout(p=0.0, inplace=False)
  (rope): RotaryEmbeddingCat()
  (blocks): ModuleList(
    (0-11): 12 x EvaBlock(
      (norm1): LayerNorm((384,), eps=1e-06, elementwise_affine=True)
      (attn): EvaAttention(
        (qkv): Linear(in_features=384, out_features=1152, bias=False)
        (attn_drop): Dropout(p=0.0, inplace=False)
        (norm): Identity()
        (proj): Linear(in_features=384, out_features=384, bias=True)
        (proj_drop): Dropout(p=0.0, inplace=False)
      )
      (drop_path1): Identity()
      (norm2): LayerNorm((384,), eps=1e-06, elementwise_affine=True)
      (mlp): GluMlp(
        (fc1): Linear(in_features=384, out_features=2048, bias=True)
        (act): SiLU()
        (drop1): Dropout(p=0.0, inplace=False)
        (norm): Identity()
        (fc2): Linear(in_features=1024, out_features=384, bias=True)
        (drop2): Dr

In [95]:
img = torch.randn((3, 336, 336)).unsqueeze(0)

In [96]:
output = model(img)

In [97]:
output.shape

torch.Size([1, 384])

In [84]:
bs = len(img)

In [85]:
bs

1

In [86]:
import torch.nn.functional as F

In [87]:
# pool = F.adaptive_avg_pool2d(output, 1).reshape(bs, -1)

In [88]:
pool = F.adaptive_avg_pool1d(output, 1).reshape(bs, -1)

In [89]:
pool.shape

torch.Size([1, 577])

In [None]:
img = torch.randn((3, ))

In [1]:
!pip install -q libauc

In [4]:
from libauc.losses import pAUC_DRO_Loss

In [10]:
criterion = pAUC_DRO_Loss(data_len=32, gamma=0.9, Lambda=1.0)

In [11]:
import torch

In [36]:
preds = torch.randn(32, 1, requires_grad=True).to("cuda")

In [37]:
target = torch.tensor([0]*30 + [1]*2).long().to("cuda")

In [38]:
index = torch.randint(32, (32,), requires_grad=False).to("cuda")

In [39]:
loss = loss_fn(preds, target, index)

In [40]:
loss

tensor(17.7397, device='cuda:0', grad_fn=<MeanBackward0>)

In [38]:
import json
import time
from pathlib import Path
from io import BytesIO

import albumentations as A
import h5py
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
from accelerate import Accelerator
from accelerate.utils import (
    DistributedDataParallelKwargs,
    ProjectConfiguration,
    set_seed,
)
from safetensors import safe_open
from albumentations.pytorch import ToTensorV2
from PIL import Image
from sklearn.metrics import auc, roc_curve, roc_auc_score
from timm import create_model
from torch.utils.data import DataLoader, Dataset
from libauc.sampler import DualSampler
from libauc.losses import pAUCLoss
from libauc.optimizers import SOPAs

from isic_helper import DotDict, get_folds

In [43]:
def dev_augment(image_size):
    transform = A.Compose(
        [
            A.Transpose(p=0.5),
            A.VerticalFlip(p=0.5),
            A.HorizontalFlip(p=0.5),
            A.RandomBrightnessContrast(
                brightness_limit=0.2, contrast_limit=0.2, p=0.75
            ),
            A.OneOf(
                [
                    A.MotionBlur(blur_limit=(5, 7)),
                    A.MedianBlur(blur_limit=(5, 7)),
                    A.GaussianBlur(blur_limit=(5, 7)),
                    A.GaussNoise(var_limit=(5.0, 30.0)),
                ],
                p=0.7,
            ),
            A.OneOf(
                [
                    A.OpticalDistortion(distort_limit=1.0),
                    A.GridDistortion(num_steps=5, distort_limit=1.0),
                    A.ElasticTransform(alpha=3),
                ],
                p=0.7,
            ),
            A.CLAHE(clip_limit=4.0, p=0.7),
            A.HueSaturationValue(
                hue_shift_limit=10, sat_shift_limit=20, val_shift_limit=10, p=0.5
            ),
            A.ShiftScaleRotate(
                shift_limit=0.1, scale_limit=0.1, rotate_limit=15, border_mode=0, p=0.85
            ),
            A.Resize(image_size, image_size),
            A.CoarseDropout(
                max_height=int(image_size * 0.375),
                max_width=int(image_size * 0.375),
                max_holes=1,
                min_holes=1,
                p=0.7,
            ),
            A.Normalize(),
            ToTensorV2(),
        ],
        p=1.0,
    )
    return transform


def val_augment(image_size):
    transform = A.Compose(
        [A.Resize(image_size, image_size), A.Normalize(), ToTensorV2()], p=1.0
    )
    return transform


class ISICDataset(Dataset):
    def __init__(self, metadata, images, augment, infer=False):
        self.metadata = metadata
        self.images = images
        self.augment = augment
        self.length = len(self.metadata)
        self.infer = infer
        
        if not infer:
            self.targets = metadata["target"].values
            # for loss function
            self.pos_indices = np.flatnonzero(self.targets == 1)
            self.pos_index_map = {}
            for i, idx in enumerate(self.pos_indices):
                self.pos_index_map[idx] = i

    def __len__(self):
        return self.length

    def __getitem__(self, index):
        row = self.metadata.iloc[index]
        image = np.array(Image.open(BytesIO(self.images[row["isic_id"]][()])))
        if self.augment is not None:
            image = self.augment(image=image)["image"].float()
        if self.infer:
            return image, index
        else:
            label = torch.tensor(row["target"]).float()
            return image, label, index


class ISICNet(nn.Module):
    def __init__(
        self,
        model_name,
        pretrained=True
    ):
        super(ISICNet, self).__init__()
        self.model = create_model(
            model_name=model_name,
            pretrained=pretrained,
            in_chans=3,
            num_classes=0,
            global_pool="",
        )
        in_dim = self.model.num_features
        self.classifier = nn.Linear(in_dim, 1)
        self.dropouts = nn.ModuleList([nn.Dropout(0.5) for _ in range(5)])

    def forward(self, images):
        x = self.model(images)
        bs = len(images)
        pool = F.adaptive_avg_pool2d(x, 1).reshape(bs, -1)
        if self.training:
            logits = 0
            for i in range(len(self.dropouts)):
                logits += self.classifier(self.dropouts[i](pool))
            logits = logits / len(self.dropouts)
        else:
            logits = self.classifier(pool)
        return logits

    
def train_epoch(
    epoch,
    model,
    optimizer,
    criterion,
    dev_dataloader,
    lr_scheduler,
    accelerator,
    log_interval=100,
):
    model.train()
    train_loss = []
    total_steps = len(dev_dataloader)
    for step, (images, labels, index) in enumerate(dev_dataloader):
        optimizer.zero_grad()
        logits = model(images)
        probs = torch.sigmoid(logits)
        loss = criterion(probs, labels.unsqueeze(1), index)
        accelerator.backward(loss)
        optimizer.step()
#         lr_scheduler.step()

        loss_value = accelerator.gather(loss).item()
        train_loss.append(loss_value)
        smooth_loss = sum(train_loss[-100:]) / min(len(train_loss), 100)
        if (step == 0) or ((step + 1) % log_interval == 0):
            print(
                f"Epoch: {epoch} | Step: {step + 1}/{total_steps} |"
                f" Loss: {loss_value:.5f} | Smooth loss: {smooth_loss:.5f}"
            )
    train_loss = np.mean(train_loss)
    return train_loss


def get_trans(img, iteration):
    if iteration >= 6:
        img = img.transpose(2, 3)
    if iteration % 6 == 0:
        return img
    elif iteration % 6 == 1:
        return torch.flip(img, dims=[2])
    elif iteration % 6 == 2:
        return torch.flip(img, dims=[3])
    elif iteration % 6 == 3:
        return torch.rot90(img, 1, dims=[2, 3])
    elif iteration % 6 == 4:
        return torch.rot90(img, 2, dims=[2, 3])
    elif iteration % 6 == 5:
        return torch.rot90(img, 3, dims=[2, 3])


def val_epoch(
    epoch,
    model,
    criterion,
    val_dataloader,
    accelerator,
    n_tta,
    log_interval=100,
):
    model.eval()
    val_probs = []
    val_labels = []
    val_loss = []
    total_steps = len(val_dataloader)
    with torch.no_grad():
        for step, (images, labels, index) in enumerate(val_dataloader):
            logits = 0
            probs = 0
            for i in range(n_tta):
                logits_iter = model(get_trans(images, i))
                logits += logits_iter
                probs += torch.sigmoid(logits_iter)
            logits /= n_tta
            probs /= n_tta

            labels = labels.unsqueeze(1)
            loss = criterion(probs, labels, index)
            val_loss.append(loss.detach().cpu().numpy())

            probs, labels = accelerator.gather((probs, labels))
            val_probs.append(probs)
            val_labels.append(labels)

            if (step == 0) or ((step + 1) % log_interval == 0):
                print(f"Epoch: {epoch} | Step: {step + 1}/{total_steps}")

    val_loss = np.mean(val_loss)
    val_probs = torch.cat(val_probs).cpu().numpy()
    val_labels = torch.cat(val_labels).cpu().numpy()
    val_auc = compute_auc(val_labels, val_probs)
    val_pauc = compute_pauc(val_labels, val_probs, min_tpr=0.8)
    return (
        val_loss,
        val_auc,
        val_pauc,
        val_probs,
        val_labels
    )

def compute_auc(y_true, y_pred) -> float:
    """
    Compute the Area Under the Receiver Operating Characteristic Curve (ROC AUC).

    Args:
        y_true: ground truth of 1s and 0s
        y_pred: predictions of scores ranging [0, 1]

    Returns:
        Float value range [0, 1]
    """
    return roc_auc_score(y_true, y_pred)


def compute_pauc(y_true, y_pred, min_tpr: float = 0.80) -> float:
    """
    2024 ISIC Challenge metric: pAUC

    Given a solution file and submission file, this function returns the
    partial area under the receiver operating characteristic (pAUC)
    above a given true positive rate (TPR) = 0.80.
    https://en.wikipedia.org/wiki/Partial_Area_Under_the_ROC_Curve.

    (c) 2024 Nicholas R Kurtansky, MSKCC

    Args:
        min_tpr:
        y_true: ground truth of 1s and 0s
        y_pred: predictions of scores ranging [0, 1]

    Returns:
        Float value range [0, max_fpr]
    """

    # rescale the target. set 0s to 1s and 1s to 0s (since sklearn only has max_fpr)
    v_gt = abs(y_true - 1)

    # flip the submissions to their compliments
    v_pred = -1.0 * y_pred

    max_fpr = abs(1 - min_tpr)

    # using sklearn.metric functions: (1) roc_curve and (2) auc
    fpr, tpr, _ = roc_curve(v_gt, v_pred, sample_weight=None)
    if max_fpr is None or max_fpr == 1:
        return auc(fpr, tpr)
    if max_fpr <= 0 or max_fpr > 1:
        raise ValueError("Expected min_tpr in range [0, 1), got: %r" % min_tpr)

    # Add a single point at max_fpr by linear interpolation
    stop = np.searchsorted(fpr, max_fpr, "right")
    x_interp = [fpr[stop - 1], fpr[stop]]
    y_interp = [tpr[stop - 1], tpr[stop]]
    tpr = np.append(tpr[:stop], np.interp(max_fpr, x_interp, y_interp))
    fpr = np.append(fpr[:stop], max_fpr)
    partial_auc = auc(fpr, tpr)

    #     # Equivalent code that uses sklearn's roc_auc_score
    #     v_gt = abs(np.asarray(solution.values)-1)
    #     v_pred = np.array([1.0 - x for x in submission.values])
    #     max_fpr = abs(1-min_tpr)
    #     partial_auc_scaled = roc_auc_score(v_gt, v_pred, max_fpr=max_fpr)
    #     # change scale from [0.5, 1.0] to [0.5 * max_fpr**2, max_fpr]
    #     # https://math.stackexchange.com/questions/914823/shift-numbers-into-a-different-range
    #     partial_auc = 0.5 * max_fpr**2 + (max_fpr - 0.5 * max_fpr**2) / (1.0 - 0.5) * (partial_auc_scaled - 0.5)

    return partial_auc


In [30]:
INPUT_DIR = Path("/kaggle/input")
WEIGHTS_DIR = Path(".")

args = DotDict()

args.data_dir = INPUT_DIR / "isic-2024-challenge"
args.model_name = "efficientnet_b0"
args.version = "v1"
args.model_identifier = f"deep_auc_{args.model_name}_{args.version}"
args.model_dir = WEIGHTS_DIR / args.model_identifier
args.model_dir.mkdir(parents=True, exist_ok=True)
args.pretrained_model_dir = INPUT_DIR / f"isic-scd-{args.model_name.replace('_', '-')}-{args.version}-train"
args.logging_dir = "logs"
args.fold = 1

args.mixed_precision = "fp16"
args.image_size = 64
args.batch_size = 64
args.num_workers = 4
args.init_lr = 1e-3
args.gamma = 500
args.margin = 1.0
args.weight_decay = 1e-5
args.num_epochs = 10
args.n_tta: int = 6
args.seed = 2022
args.debug = True

In [49]:
train_metadata = pd.read_csv(args.data_dir / "train-metadata.csv", low_memory=False)
test_metadata = pd.read_csv(args.data_dir / "test-metadata.csv")

folds_df = get_folds()
train_metadata = train_metadata.merge(folds_df, on=["isic_id", "patient_id"], how="inner")
print(f"Train data size: {train_metadata.shape}")
print(f"Test data size: {test_metadata.shape}")

train_images = h5py.File(args.data_dir / "train-image.hdf5", mode="r")
test_images = h5py.File(args.data_dir / "test-image.hdf5", mode="r")

Train data size: (401059, 57)
Test data size: (3, 44)


In [50]:
if args.debug:
    train_metadata = train_metadata.sample(frac=0.05, random_state=args.seed)

In [51]:
train_metadata["target"].value_counts()

target
0    20040
1       13
Name: count, dtype: int64

In [52]:
logging_dir = Path(args.model_dir, args.logging_dir)
accelerator_project_config = ProjectConfiguration(
    project_dir=args.model_dir, logging_dir=str(logging_dir)
)
kwargs = DistributedDataParallelKwargs()
accelerator = Accelerator(
    mixed_precision=args.mixed_precision,
    project_config=accelerator_project_config,
    kwargs_handlers=[kwargs],
)
print(accelerator.state)

if args.seed is not None:
    set_seed(args.seed)

dev_index = train_metadata[train_metadata["fold"] != args.fold].index
val_index = train_metadata[train_metadata["fold"] == args.fold].index

dev_metadata = train_metadata.loc[dev_index, :].reset_index(drop=True)
val_metadata = train_metadata.loc[val_index, :].reset_index(drop=True)

dev_dataset = ISICDataset(
    dev_metadata, train_images, augment=dev_augment(args.image_size)
)
val_dataset = ISICDataset(
    val_metadata, train_images, augment=val_augment(args.image_size)
)

sampling_rate = 0.5
sampler = DualSampler(dev_dataset, args.batch_size, sampling_rate=sampling_rate)

dev_dataloader = DataLoader(
    dev_dataset,
    batch_size=args.batch_size,
    sampler=sampler,
    num_workers=args.num_workers,
)
val_dataloader = DataLoader(
    val_dataset,
    batch_size=args.batch_size,
    shuffle=False,
    num_workers=args.num_workers,
    drop_last=False,
    pin_memory=True,
)

model = ISICNet(
    model_name=args.model_name, pretrained=False,
)
model = model.to(accelerator.device)

criterion = pAUCLoss("1w", data_len=len(dev_dataset), margin=args.margin, gamma=args.gamma)
optimizer = SOPAs(model.parameters(), 
                 mode='adam',
                 lr=args.init_lr, 
                 weight_decay=args.weight_decay)
lr_scheduler = torch.optim.lr_scheduler.OneCycleLR(
    optimizer,
    pct_start=1 / args.num_epochs,
    max_lr=args.init_lr * 10,
    div_factor=10,
    epochs=args.num_epochs,
    steps_per_epoch=len(dev_dataloader),
)

model_filepath = args.pretrained_model_dir / f"models/fold_{args.fold}/model.safetensors"
tensors = {}
with safe_open(model_filepath, framework="pt", device="cpu") as f:
    for key in f.keys():
        if "classifier" not in key:
            tensors[key] = f.get_tensor(key)
msg = model.load_state_dict(tensors, strict=False)
print(msg)
model.classifier.reset_parameters()

(
    model,
    optimizer,
    dev_dataloader,
    val_dataloader,
    lr_scheduler
) = accelerator.prepare(
    model, optimizer, dev_dataloader, val_dataloader, lr_scheduler
)

best_val_auc = 0
best_val_pauc = 0
best_val_loss = 0
best_epoch = 0
best_val_probs = None

for epoch in range(1, args.num_epochs + 1):
    print(f"Fold {args.fold} | Epoch {epoch}")
    start_time = time.time()

    train_loss = train_epoch(
        epoch,
        model,
        optimizer,
        criterion,
        dev_dataloader,
        lr_scheduler,
        accelerator,
    )
    (
        val_loss,
        val_auc,
        val_pauc,
        val_probs,
        val_targets,
    ) = val_epoch(
        epoch,
        model,
        criterion,
        val_dataloader,
        accelerator,
        args.n_tta,
    )

    if val_pauc > best_val_pauc:
        print(
            f"pAUC: {best_val_pauc:.5f} --> {val_pauc:.5f}, saving model..."
        )
        best_val_pauc = val_pauc
        best_val_auc = val_auc
        best_val_loss = val_loss
        best_epoch = epoch
        best_val_probs = binary_probs
        output_dir = f"{args.model_dir}/models/fold_{args.fold}"
        accelerator.save_state(output_dir)
    else:
        print(
            f"pAUC: {best_val_pauc:.5f} --> {val_pauc:.5f}, skipping model save..."
        )
    print(
        f"Fold: {args.fold} | Epoch: {epoch} |"
        f" Train loss: {train_loss:.5f} | Val loss: {val_loss:.5f}"
        f" Val AUC: {val_auc:.5f} | Val pAUC: {val_pauc:.5f}"
    )
    elapsed_time = time.time() - start_time
    elapsed_mins = int(elapsed_time // 60)
    elapsed_secs = int(elapsed_time % 60)
    print(f"Epoch {epoch} took {elapsed_mins}m {elapsed_secs}s")

print(
    f"Fold: {args.fold} | "
    f"Best Val pAUC: {best_val_pauc} | Best AUC: {best_val_auc} |"
    f" Best loss: {best_val_loss} |"
    f" Best epoch: {best_epoch}"
)
oof_df = pd.DataFrame(
    {
        "isic_id": val_metadata["isic_id"],
        "patient_id": val_metadata["patient_id"],
        "fold": args.fold,
        "target": val_metadata["target"],
        f"oof_{args.model_identifier}": best_val_probs,
    }
)
oof_df.to_csv(
    f"{args.model_dir}/oof_preds_{args.model_identifier}_fold_{args.fold}.csv",
    index=False,
)

fold_metadata = {
    "fold": args.fold,
    "best_epoch": best_epoch,
    "best_val_auc": best_val_auc,
    "best_val_pauc": best_val_pauc,
    "best_val_loss": float(best_val_loss),
}
with open(f"{args.model_dir}/models/fold_{args.fold}/metadata.json", "w") as f:
    json.dump(fold_metadata, f)
print(f"Finished training fold {args.fold}")


Distributed environment: NO
Num processes: 1
Process index: 0
Local process index: 0
Device: cuda

Mixed precision type: fp16

_IncompatibleKeys(missing_keys=['classifier.weight', 'classifier.bias'], unexpected_keys=[])
Fold 1 | Epoch 1


ValueError: could not broadcast input array from shape (20,) into shape (32,)