In [11]:
import os
import pandas as pd
from torch.utils.data import Dataset
import glob
from pathlib import Path

In [2]:
IN_KAGGLE = "KAGGLE_KERNEL_RUN_TYPE" in os.environ

In [3]:
if IN_KAGGLE:
    ROOT_DIR = "/kaggle/input/isic-2024-challenge"
    SCORE_PATH = "/kaggle/input/isic-2024-challenge-selfclean-scores/ISIC_2024_Challenge_SelfClean_Scores.csv"
else:
    ROOT_DIR = "../data/ISIC_2024"
    SCORE_PATH = "../data/ISIC_2024/ISIC_2024_Challenge_SelfClean_Scores.csv"
TRAIN_DIR = "train-image/image"

In [8]:

class ISICDataset(Dataset):
    def __init__(self, df: pd.DataFrame, transform=None):
        self.df = df
        self.file_names = df["file_path"].values
        self.targets = df["target"].values
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, index: int):
        img_path = self.file_names[index]
        target = self.targets[index]

        img = Image.open(img_path)
        img = img.convert("RGB")

        if self.transform:
            img = self.transform(img)

        return img, int(target)

In [12]:

train_dir = Path(ROOT_DIR) / TRAIN_DIR
df = pd.read_csv(f"{ROOT_DIR}/train-metadata.csv")

train_images = sorted(glob.glob(f"{train_dir}/*.jpg"))
df["file_path"] = df["isic_id"].apply(
    lambda image_id: f"{train_dir}/{image_id}.jpg"
)
df = df[df["file_path"].isin(train_images)].reset_index(drop=True)
df['target_name'] = df['target'].replace(0, 'Benign').replace(1, 'Malignant')

dataset = ISICDataset(df=df)
df.head()

FileNotFoundError: [Errno 2] No such file or directory: '../data/ISIC_2024/train-metadata.csv'

In [13]:
IMG_SIZE=224
BATCH_SIZE=32

DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

def load_model(model_name="efficientnet_v2_m"):
    if model_name == "mobilenet_v3_small":
    # mobile net
        model = models.mobilenet_v3_small()
        model.classifier[3] = torch.nn.Linear(model.classifier[3].in_features, 1)
    if model_name == "efficientnet_v2_m":
        model = models.efficientnet_v2_m(weights=None)
        model.classifier[1] = torch.nn.Linear(model.classifier[1].in_features, 1)
    if model_name == "efficientnet_b0":
        model = models.efficientnet_b0(weights=None)
        model.classifier[1] = torch.nn.Linear(model.classifier[1].in_features, 1)

    if model_name == "vgg16":
        model = models.vgg16(pretrained=True)
        model.classifier[6] = torch.nn.Linear(model.classifier[6].in_features, 1)        
    

    model = model.to(DEVICE)
    optimizer = optim.Adam(model.parameters(), lr=0.001)
    criterion = BCEWithLogitsLoss()
    return model, optimizer, criterion

In [14]:
DEVICE

device(type='cpu')

In [None]:
# find mean and std of train data
train_dataset = ISICDataset(train_data,
                          transform=transforms.Compose([
                                transforms.Resize((IMG_SIZE, IMG_SIZE)),            
                                transforms.ToTensor(),
                        ]))    

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)    

# mean = 0.0
# for images, _,_ in train_loader:
#     batch_samples = images.size(0) # batch size (the last batch can have smaller size!)        
#     images = images.view(batch_samples, images.size(1), -1) 
#     mean += images.mean(2).sum(0)  
# train_data_mean = mean / len(train_loader.dataset)

# var = 0.0
# for images, _,_ in train_loader:
#     batch_samples = images.size(0)
#     images = images.view(batch_samples, images.size(1), -1)
#     var += ((images - train_data_mean.unsqueeze(1))**2).sum([0,2])
# train_data_std = torch.sqrt(var / (len(train_loader.dataset)*IMG_SIZE*IMG_SIZE))
# print(f"mean {train_data_mean}, std {train_data_std}")

In [None]:


def train(model, train_loader, optimizer, criterion):
    total_loss = 0
    all_targets = []
    all_probs = []        

    model.train()
    for input,_, targets in train_loader:        
        input = input.to(DEVICE)
        targets = targets.to(DEVICE)

        targets = targets.unsqueeze(1) # make the target [batch, 1]
        targets = targets.float() # BCEWithLogitsLoss requires targets as float()
        optimizer.zero_grad()
        output = model(input)
        loss = criterion(output, targets)
        total_loss += loss.item()
        
        sigmoid = torch.nn.Sigmoid()
        probs = sigmoid(output).cpu().detach().numpy()

        all_targets.extend(targets.cpu().detach().numpy().flatten())
        all_probs.extend(probs.flatten())

        loss.backward()
        optimizer.step()
    
    pauc = compute_pauc(np.array(all_targets), np.array(all_probs))
    return total_loss, pauc

def val(model, val_loader, criterion):
    total_loss= 0
    all_targets = []
    all_probs = []        
    model.eval()
    with torch.no_grad():
        for input, _, targets in val_loader:
            input = input.to(DEVICE)
            targets = targets.to(DEVICE)

            targets = targets.unsqueeze(1) # make the target [batch, 1]
            targets = targets.float() # BCEWithLogitsLoss requires targets as float()

            output = model(input)
            val_loss = criterion(output, targets)
            total_loss +=  val_loss.item()

            sigmoid = torch.nn.Sigmoid()
            probs = sigmoid(output).cpu().detach().numpy()
            
            all_targets.extend(targets.cpu().detach().numpy().flatten())
            all_probs.extend(probs.flatten())           
    
    pauc = compute_pauc(np.array(all_targets), np.array(all_probs))
    return total_loss, pauc, all_probs



In [None]:
EXP_ID    = 1
MODEL_NAME = "efficientnet_v2_m"
NUM_EPOCHS = 30
# BATCH_SIZE = 32
NOTE="with_external_db"
EXP_NAME = "{:03}_{}_{}_{}_{}".format(EXP_ID, MODEL_NAME, NUM_EPOCHS, BATCH_SIZE, NOTE)  # you can name your experiment whatever you like
SAVE_PATH = "/kaggle/working"

# train_trans = transforms.Compose([    
#     transforms.Resize((IMG_SIZE, IMG_SIZE)),       
#     transforms.ToTensor(),
#     transforms.Normalize(mean=train_data_mean, std=train_data_std),
# ])
# val_trans =  transforms.Compose([    
#     transforms.Resize((IMG_SIZE, IMG_SIZE)),  
#     transforms.ToTensor(),
#     transforms.Normalize(mean=train_data_mean, std=train_data_std),
# ])

trn_dataset = ISICDataset(train_data,transform=train_trans)
val_dataset = ISICDataset(val_data,transform=val_trans)

train_loader = DataLoader(trn_dataset, batch_size=BATCH_SIZE, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False)

print(f"{len(trn_dataset)}, {len(val_dataset)}")
model, optimizer, criterion = load_model("efficientnet_v2_m")

best_val_loss, best_val_pauc = 100, 0

## training loop

for epoch in range(NUM_EPOCHS):
    train_loss, train_pauc = train(model, train_loader, optimizer, criterion)
    val_loss, val_pauc, _ = val(model, val_loader, criterion)        
    
    if val_pauc > best_val_pauc:
        best_val_pauc = val_pauc
        os.makedirs(f"{SAVE_PATH}/{EXP_NAME}", exist_ok=True)            
        torch.save(model.state_dict(),f"{SAVE_PATH}/{EXP_NAME}/best_all.pth")
        print(f"Epoch {epoch}, train_loss {train_loss:.4f}, train_pauc {train_pauc:.2f}, val_loss {val_loss:.4f}, val_pauc {val_pauc:.2f} --> Best val_pauc {val_pauc:.2f} at epoch {epoch}")    

    else:        
        print(f"Epoch {epoch}, train_loss {train_loss:.4f}, train_pauc {train_pauc:.2f}, val_loss {val_loss:.4f}, val_pauc {val_pauc:.2f}") 


