In [4]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np  # linear algebra
import pandas as pd  # data processing, CSV file I/O (e.g. pd.read_csv)
from PIL import Image
from io import BytesIO
import os
import sys
from torchvision import transforms
from torch.utils.data import Dataset
import torch._dynamo

# torch._dynamo.config.suppress_errors = True
sys.path.append("./ISIC2024/")
import timm
from IPython.display import clear_output
import h5py
import io
from tqdm import tqdm
from classification import *
from lightning.pytorch import Trainer
from torch.utils.data import DataLoader


torch.set_float32_matmul_precision("high")

TEST_METADATA = "./dataset/data2024/test-metadata.csv"
TEST_HDF5 = "./dataset/data2024/test-image.hdf5"
TEST_SUBMISSION = "./dataset/data2024/sample_submission.csv"
# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory
# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All"
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [5]:
# def read_images_from_hdf5(file_path):
#     images = {}
#     try:
#         with h5py.File(file_path, "r") as file:
#             for key in tqdm(file.keys(), desc="Reading Files"):
#                 try:
#                     image_data = file[key][()]
#                     image = Image.open(io.BytesIO(image_data))
#                     images[key] = image
#                 except Exception as e:
#                     print(f"Error! from {key}: {e}")
#     except Exception as e:
#         print(f"Error occured while reading files : {e}")

#     return images

class ISIC_test_image(Dataset):
    def __init__(self, hdf5_path, metadata):
        self.hdf5_path = hdf5_path
        self.isic_ids = metadata["isic_id"].values

    def __len__(self):
        return len(self.isic_ids)

    def __getitem__(self, idx):
        image_data = h5py.File(self.hdf5_path, mode="r")[self.isic_ids[idx]][()]
        image = Image.open(io.BytesIO(image_data)).resize((224, 224))
        image = transforms.ToTensor()(image)

        return image

In [6]:
test_metadata = pd.read_csv(TEST_METADATA)

test_loader = ISIC_test_image(TEST_HDF5, test_metadata)

test_dataset = DataLoader(
    test_loader,
    batch_size=cfg.TRAIN.BATCH_SIZE,
    num_workers=cfg.TRAIN.NUM_WORKERS,
    prefetch_factor=cfg.TRAIN.PREFETCH_FACTOR,
    shuffle=False,
)

In [35]:
checkpoint_dict = {
    "fold1": "./weights_convnext_small/fold_1/ckpt_auc_0.1943-v1.ckpt",
    "fold2": "./weights_convnext_small/fold_2/ckpt_auc_0.1943-v4.ckpt",
    "fold3": "./weights_convnext_small/fold_3/ckpt_auc_0.1937.ckpt",
    "fold4": "./weights_convnext_small/fold_4/ckpt_auc_0.1972-v3.ckpt",
    "fold5": "./weights_convnext_small/fold_5/ckpt_recall_0.9606.ckpt",
}
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = convnext_small(
    pretrained=cfg.TRAIN.PRETRAIN,
    in_22k=cfg.TRAIN.CONVEXT.IN22K,
    in_chans=cfg.DATA.IN_CHANNEL,
    num_classes=cfg.DATA.NUM_CLASS,
    drop_path_rate=cfg.TRAIN.CONVEXT.DROPOUT,
)
# model = torch.compile(model)
classifier = Classifier(
    model,
    cfg.DATA.CLASS_WEIGHT,
    cfg.DATA.NUM_CLASS,
    cfg.OPT.LEARNING_RATE,
    cfg.OPT.FACTOR_LR,
    cfg.OPT.PATIENCE_LR,
)
classifier = classifier.to(device)
sum_predictions = 0
test_predictions = []
classifier.eval()
with torch.inference_mode():
    for images in tqdm(test_dataset, desc="Prediction Loop"):
        images = images.to(device)
        preds = 0
        for fold in range(1, 6):
            classifier = Classifier.load_from_checkpoint(
                checkpoint_path=checkpoint_dict[f"fold{fold}"],
                model=model,
                class_weight=cfg.DATA.CLASS_WEIGHT,
                num_classes=cfg.DATA.NUM_CLASS,
                learning_rate=cfg.OPT.LEARNING_RATE,
                factor_lr=cfg.OPT.FACTOR_LR,
                patience_lr=cfg.OPT.PATIENCE_LR,
            )
            preds += classifier(images)
            
        preds /= 5
        test_predictions.append(preds.cpu().numpy()[:, 1])

test_predictions = np.concatenate(test_predictions)

Prediction Loop: 100%|██████████| 1/1 [00:05<00:00,  5.24s/it]


In [37]:
example = pd.read_csv(TEST_SUBMISSION)
example.target = test_predictions
example.to_csv("submission.csv", index=False)