In [1]:
import h5py
from io import BytesIO
from PIL import Image
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
import wandb
import copy
import os

import albumentations as A
from albumentations.pytorch import ToTensorV2

import torch
from torch.utils.data import Dataset, DataLoader, WeightedRandomSampler
import torchvision
from torchvision.transforms import v2

from torchvision.models import efficientnet_b0, EfficientNet_B0_Weights

!pip install torcheval
from torcheval.metrics import BinaryAUROC

Collecting torcheval
  Downloading torcheval-0.0.7-py3-none-any.whl.metadata (8.6 kB)
Downloading torcheval-0.0.7-py3-none-any.whl (179 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m179.2/179.2 kB[0m [31m3.5 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: torcheval
Successfully installed torcheval-0.0.7


In [2]:
meta1 = pd.read_csv('/kaggle/input/isic-2018-jpg-224x224-resized/train-metadata.csv')
meta2 = pd.read_csv('/kaggle/input/isic-2019-jpg-224x224-resized/train-metadata.csv')
meta3 = pd.read_csv('/kaggle/input/isic-2020-jpg-224x224-resized/train-metadata.csv')

In [3]:
meta1 = meta1[meta1['target'] == 1][['isic_id', 'target']]
meta2 = meta2[meta2['target'] == 1][['isic_id', 'target']]
meta3 = meta3[meta3['target'] == 1][['isic_id', 'target']]

meta1['year'] = 18
meta2['year'] = 19
meta3['year'] = 20

meta = pd.concat([meta1, meta2, meta3])

meta = meta.reset_index(drop=True)

In [4]:
remove = []
for idx, row in meta.iterrows():
    if not os.path.isfile(f'/kaggle/input/isic-20{row["year"]}-jpg-224x224-resized/train-image/image/{row["isic_id"]}.jpg'):
        remove.append(idx)
        
meta = meta.drop(index=remove)

In [5]:
train_metadata = pd.read_csv('/kaggle/input/isic-2024-challenge/train-metadata.csv')
train_metadata = train_metadata[['isic_id', 'target']]
train_metadata['year'] = 24

  train_metadata = pd.read_csv('/kaggle/input/isic-2024-challenge/train-metadata.csv')


In [6]:
train_metadata = pd.concat([train_metadata, meta])
train_metadata = train_metadata.reset_index(drop=True)

In [7]:
# Image dataset for training

class ImageDataset(Dataset):
    def __init__(self, dataframe, augmentation=None):
        self.dataframe = dataframe
        self.targets = dataframe['target'].values
        self.augmentation = augmentation
        
    def __len__(self):
        return len(self.targets)
    
    def __getitem__(self, idx):
        
        row = self.dataframe.iloc[idx]
        
        if row['year'] == 24:
            with Image.open(f'/kaggle/input/isic-2024-challenge/train-image/image/{row["isic_id"]}.jpg') as img:
                image = np.array(img)
        else:
            with Image.open(f'/kaggle/input/isic-20{row["year"]}-jpg-224x224-resized/train-image/image/{row["isic_id"]}.jpg') as img:
                image = np.array(img)

        target = self.targets[idx]
            
        if self.augmentation:
            image = self.augmentation(image=image)['image']
            
        return image, target

In [8]:
class customModel(torch.nn.Module):
    def __init__(self, pretrainedModel):
        super(customModel, self).__init__()
        self.pretrainedModel = pretrainedModel
        
        self.classifier = torch.nn.Sequential(
            torch.nn.Dropout(p=0.25),
            torch.nn.Linear(1000, 1),
            torch.nn.Sigmoid()
        )
        
    def forward(self, x):
        x = self.pretrainedModel(x)
        x = self.classifier(x)
        return x

In [9]:
if torch.cuda.is_available():
    print("GPU is available")
    device = torch.device("cuda")
else:
    print("GPU is not available, using CPU instead")
    device = torch.device("cpu")

GPU is available


In [10]:
EPOCHS = 10
batchSize = 128
learningRate = 0.0005
imgSize = 224
imgsPerSample = 150_000

In [11]:
# From 2020 winners
transforms_train_w = A.Compose([
    A.Transpose(p=0.5),
    A.VerticalFlip(p=0.5),
    A.HorizontalFlip(p=0.5),
    A.RandomBrightnessContrast(p=0.2),
    A.OneOf([
        A.MotionBlur(blur_limit=5),
        A.MedianBlur(blur_limit=5),
        A.GaussianBlur(blur_limit=5),
        A.GaussNoise(var_limit=(5.0, 30.0)),
    ], p=0.7),

    A.OneOf([
        A.OpticalDistortion(distort_limit=1.0),
        A.GridDistortion(num_steps=5, distort_limit=1.),
        A.ElasticTransform(alpha=3),
    ], p=0.7),

    A.CLAHE(clip_limit=4.0, p=0.7),
    A.HueSaturationValue(hue_shift_limit=10, sat_shift_limit=20, val_shift_limit=10, p=0.5),
    A.ShiftScaleRotate(shift_limit=0.1, scale_limit=0.1, rotate_limit=15, border_mode=0, p=0.85),
    A.Resize(imgSize, imgSize, interpolation=2),
    A.CoarseDropout(max_height=int(imgSize * 0.375), max_width=int(imgSize * 0.375), max_holes=1, p=0.3),    
    A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
    ToTensorV2()
])

transforms_train = A.Compose([
    A.Transpose(p=0.5),
    A.VerticalFlip(p=0.5),
    A.HorizontalFlip(p=0.5),
    A.RandomBrightnessContrast(p=0.2),
    A.OneOf([
        A.MotionBlur(blur_limit=5),
        A.MedianBlur(blur_limit=5),
        A.GaussianBlur(blur_limit=5),
        A.GaussNoise(var_limit=(5.0, 30.0)),
    ], p=0.7),
    A.CLAHE(clip_limit=4.0, p=0.7),
    A.HueSaturationValue(hue_shift_limit=10, sat_shift_limit=20, val_shift_limit=10, p=0.5),
    A.Resize(imgSize, imgSize, interpolation=2),  
    A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
    ToTensorV2()
])

transforms_test = A.Compose([
    A.Resize(imgSize, imgSize, interpolation=2),
    A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
    ToTensorV2()
])

train = ImageDataset(train_metadata, augmentation=transforms_train_w)



In [12]:
class_counts = train_metadata['target'].value_counts().to_dict()
class_weights = {cls: 1.0 / count for cls, count in class_counts.items()}
sample_weights = [class_weights[target] for target in train_metadata['target']]

class_weights

{0.0: 2.4958444190422947e-06, 1.0: 0.00016231131309852296}

In [13]:
class_counts

{0.0: 400666, 1.0: 6161}

In [14]:
sampler = WeightedRandomSampler(weights=sample_weights, num_samples=imgsPerSample, replacement=True)

trainDataloader = DataLoader(train, batch_size=batchSize, sampler=sampler)

pretrainedModel = efficientnet_b0(weights=EfficientNet_B0_Weights.DEFAULT)
model = customModel(pretrainedModel)
model = model.to(device)

criterion = torch.nn.BCELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learningRate)

Downloading: "https://download.pytorch.org/models/efficientnet_b0_rwightman-7f5810bc.pth" to /root/.cache/torch/hub/checkpoints/efficientnet_b0_rwightman-7f5810bc.pth
100%|██████████| 20.5M/20.5M [00:00<00:00, 85.6MB/s]


In [15]:
from kaggle_secrets import UserSecretsClient
user_secrets = UserSecretsClient()
wandb_key = user_secrets.get_secret("wandb")
wandb.login(key=wandb_key)

[34m[1mwandb[0m: W&B API key is configured. Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


True

In [16]:
run = wandb.init(
    project="kaggle-skin-cancer",

    config={
    "learning_rate": learningRate,
    "architecture": "EfficientNet_b0",
    "epochs": EPOCHS,
    }
)

[34m[1mwandb[0m: Currently logged in as: [33moskarkuuse[0m. Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: wandb version 0.17.5 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade
[34m[1mwandb[0m: Tracking run with wandb version 0.17.0
[34m[1mwandb[0m: Run data is saved locally in [35m[1m/kaggle/working/wandb/run-20240807_090450-ohmzhce1[0m
[34m[1mwandb[0m: Run [1m`wandb offline`[0m to turn off syncing.
[34m[1mwandb[0m: Syncing run [33mhappy-salad-25[0m
[34m[1mwandb[0m: ⭐️ View project at [34m[4mhttps://wandb.ai/oskarkuuse/kaggle-skin-cancer[0m
[34m[1mwandb[0m: 🚀 View run at [34m[4mhttps://wandb.ai/oskarkuuse/kaggle-skin-cancer/runs/ohmzhce1[0m


In [17]:
best_model = None
best_loss = np.inf
batch_idx = 0
metric = BinaryAUROC()
metric = metric.to(device)

for epoch in range(1, EPOCHS + 1):
    
    epoch_loss = 0
    epoch_acc = 0
    
    model.train()
    for images, labels in trainDataloader:
        
        labels = labels.float()
        
        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()
        
        outputs = model(images).squeeze(dim=1)
        loss = criterion(outputs, labels)
        
        loss.backward()
        optimizer.step()
        
        loss_val = loss.item()
        
        epoch_loss += loss_val
        epoch_acc += sum((outputs.detach().cpu() >= 0.5) == labels.cpu()).item() / batchSize
        
        metric.update(outputs, labels)
        
        wandb.log({"Batch loss": loss_val})
        
        batch_idx += 1
    
    wandb.log({"epoch": epoch, "BCEloss": epoch_loss / len(trainDataloader)})
    wandb.log({"epoch": epoch, "accuracy": epoch_acc / len(trainDataloader)})
    
    wandb.log({"epoch": epoch, "AUC": metric.compute().item()})
    metric.reset()
    
#     if epoch_loss / len(trainDataloader) < best_loss:
#         best_loss = epoch_loss / len(trainDataloader)
#         best_model = copy.deepcopy(model.state_dict())

    torch.save(model.state_dict(), f'model_params_epoch_{epoch}.pt')
    
run.finish()

[34m[1mwandb[0m:                                                                                
[34m[1mwandb[0m: 
[34m[1mwandb[0m: Run history:
[34m[1mwandb[0m:        AUC ▁▄▅▆▆▇▇███
[34m[1mwandb[0m:    BCEloss █▅▄▄▄▃▂▂▁▁
[34m[1mwandb[0m: Batch loss ▆█▇▇▃▇▅▅▄▂▆▄▅▃▄█▄▂▃▃▅▂▇▂▂▆▃▄▃▃▅▂▁▆▄▄▁▆▂▇
[34m[1mwandb[0m:   accuracy ▁▄▄▅▅▆▇▇▇█
[34m[1mwandb[0m:      epoch ▁▁▁▂▂▂▃▃▃▃▃▃▄▄▄▅▅▅▆▆▆▆▆▆▇▇▇███
[34m[1mwandb[0m: 
[34m[1mwandb[0m: Run summary:
[34m[1mwandb[0m:        AUC 0.99796
[34m[1mwandb[0m:    BCEloss 0.0488
[34m[1mwandb[0m: Batch loss 0.06475
[34m[1mwandb[0m:   accuracy 0.98209
[34m[1mwandb[0m:      epoch 10
[34m[1mwandb[0m: 
[34m[1mwandb[0m: 🚀 View run [33mhappy-salad-25[0m at: [34m[4mhttps://wandb.ai/oskarkuuse/kaggle-skin-cancer/runs/ohmzhce1[0m
[34m[1mwandb[0m: ⭐️ View project at: [34m[4mhttps://wandb.ai/oskarkuuse/kaggle-skin-cancer[0m
[34m[1mwandb[0m: Synced 4 W&B file(s), 0 media file(s), 0 artifact file(s) and 0 other

In [18]:
# if best_model != None:
#     torch.save(best_model, 'model_best_params.pt')

In [19]:
# torch.save(model.state_dict(), 'model_last_params.pt')