In [1]:
import json
from pathlib import Path
from pprint import pprint

import h5py
import numpy as np
import pandas as pd

from torch.utils.data import DataLoader
from accelerate import Accelerator

from isic_helper import get_folds

In [2]:
model_name = "efficientnet_b0"
version = "v2"
mode = "pretrain"
path = f"/kaggle/input/isic-scd-{model_name.replace('_', '-')}-{version}-{mode}"

In [3]:
import sys
sys.path.append(path)

In [4]:
from dataset import test_augment, ISICDataset
from models import ISICNet
from engine import predict

In [5]:
id_column = "isic_id"
target_column = "target"
group_column = "patient_id"

In [6]:
INPUT_PATH = Path("../input/isic-2024-challenge/")

train_metadata = pd.read_csv(INPUT_PATH / "train-metadata.csv", low_memory=False)
test_metadata = pd.read_csv(INPUT_PATH / "test-metadata.csv")

folds_df = get_folds()
train_metadata = train_metadata.merge(folds_df, on=["isic_id", "patient_id"], how="inner")
print(f"Train data size: {train_metadata.shape}")
print(f"Test data size: {test_metadata.shape}")

test_images = h5py.File(INPUT_PATH / "test-image.hdf5", mode="r")

Train data size: (401059, 57)
Test data size: (3, 44)


In [7]:
def get_dnn_predictions(train, test, test_images, model_name, version, path):
    with open(path / f"{model_name}_{version}_{mode}_run_metadata.json", "r") as f:
        run_metadata = json.load(f)
    pprint(run_metadata["params"])
    
    test_dataset = ISICDataset(
        test, test_images, augment=test_augment(run_metadata["params"]["image_size"]), infer=True
    )
    test_dataloader = DataLoader(
        test_dataset,
        batch_size=512,
        shuffle=False,
        num_workers=4,
        drop_last=False,
        pin_memory=True,
    )
    
#     all_folds = np.unique(train["fold"])
    all_folds = [1]
    test_predictions_df = pd.DataFrame({id_column: test_metadata[id_column]})
    for fold in all_folds:
        accelerator = Accelerator(
            mixed_precision=run_metadata["params"]["mixed_precision"],
        )
        
        model = ISICNet(model_name=model_name, pretrained=False)
        model = model.to(accelerator.device)
        
        model, test_dataloader = accelerator.prepare(model, test_dataloader)
        model_filepath = path / f"models/fold_{fold}"
        accelerator.load_state(model_filepath)

        test_predictions_df[f"fold_{fold}"] = predict(model, test_dataloader, accelerator, n_tta=run_metadata["params"]["n_tta"])
    test_predictions_df[target_column] = test_predictions_df[[f"fold_{fold}" for fold in all_folds]].mean(axis=1)
    return test_predictions_df[[id_column, target_column]]

In [8]:
test_preds_df = get_dnn_predictions(train_metadata, test_metadata, test_images, model_name, version, Path(path))

{'batch_size': 64,
 'debug': True,
 'ext': '2020,2019',
 'image_size': 128,
 'init_lr': 3e-05,
 'mixed_precision': 'fp16',
 'n_tta': 8,
 'num_epochs': 15,
 'num_workers': 8,
 'only_malignant': True,
 'seed': 2022}
Step: 1/1


In [9]:
test_preds_df.head()

Unnamed: 0,isic_id,target
0,ISIC_0015657,0.41866
1,ISIC_0015729,0.535086
2,ISIC_0015740,0.389089


In [10]:
test_preds_df[target_column].describe()

count    3.000000
mean     0.447612
std      0.077184
min      0.389089
25%      0.403875
50%      0.418660
75%      0.476873
max      0.535086
Name: target, dtype: float64

In [11]:
test_preds_df[[id_column, target_column]].head()

Unnamed: 0,isic_id,target
0,ISIC_0015657,0.41866
1,ISIC_0015729,0.535086
2,ISIC_0015740,0.389089


In [12]:
test_preds_df[[id_column, target_column]].to_csv("submission.csv", index=False)