In [1]:
from torch.utils.data import DataLoader, Dataset
import h5py
import os
import json
import numpy as np
from tqdm import tqdm
from transformers import ViTForImageClassification, ViTImageProcessor
from accelerate import Accelerator
import os
import torch
from transformers import (ViTForImageClassification, ViTImageProcessor, DeiTForImageClassification, 
                          CvtForImageClassification, AutoFeatureExtractor, CLIPModel, CLIPProcessor, 
                          ViTMAEForPreTraining, AutoImageProcessor, ViTModel)
import timm
import json
import pandas as pd 
from PIL import Image
import io
import numpy as np
import warnings
warnings.filterwarnings("ignore")


In [3]:
accelerator = Accelerator()  # Automatically detects multi-GPU and mixed precision setups
device = accelerator.device  # Get the device from Accelerator

h5_file = "/kaggle/input/isic-2024-challenge/train-image.hdf5"
base_dir = "/kaggle/input/2019-finetuned-vits/" 

vit_models = [
    ("google/vit-base-patch16-224", 64),
#     ("facebook/deit-base-distilled-patch16-224", 64),
#     ("microsoft/cvt-13", 64),
#     ("facebook/dino-vitb16", 64),
#     ("facebook/vit-mae-base", 32)
 ]

print("The device used:", device)


The device used: cuda


In [6]:

class H5ImageDataset(Dataset):
    def __init__(self, h5_file, preprocessor,limit=100000):
        self.h5_file = h5_file
        self.preprocessor = preprocessor
        self.h5_data = h5py.File(h5_file, 'r')
        self.image_ids = list(self.h5_data.keys())  # Assuming keys are image IDs
        if limit is not None:
            self.image_ids = self.image_ids[:limit] 

    def __len__(self):
        return len(self.image_ids)

    def __getitem__(self, idx):
        image_id = self.image_ids[idx]
        image_data = self.h5_data[image_id][()]  # Extract the image as an array
        
        # Check if the image data is in bytes and decode it
        if isinstance(image_data, np.bytes_):
            image_data = np.frombuffer(image_data, dtype=np.uint8)
            image_data = Image.open(io.BytesIO(image_data))
        elif isinstance(image_data, np.ndarray):
            image_data = Image.fromarray(image_data)

        # Now pass the image to the preprocessor
        inputs = self.preprocessor(images=image_data, return_tensors="pt")
        return inputs['pixel_values'].squeeze(0), image_id


# def get_model_and_preprocessor(model_dir, num_labels=2, base_dir="/kaggle/input/2019-finetuned-vits/"):
#     # Modify the model_dir string for local path compatibility
#     model_dir = model_dir.replace("/", "_").replace("-", "_")
    
#     # Define the paths for the model and preprocessor
#     config_path = os.path.join(base_dir, f"{model_dir}_config.json")
#     model_path = os.path.join(base_dir, f"best_model_{model_dir}.pth")
#     preprocessor_config_path = os.path.join(base_dir, f"{model_dir}_preprocessor_config.json")
    
#     # Load the preprocessor config
#     with open(preprocessor_config_path, 'r') as f:
#         preprocessor_config = json.load(f)

#     if "vit" in model_dir:
#         model = ViTForImageClassification.from_pretrained(config_path, num_labels=num_labels)
#         preprocessor = ViTImageProcessor.from_pretrained(preprocessor_config)
#     elif "dino" in model_dir:
#         model = ViTModel.from_pretrained(config_path)
#         preprocessor = ViTImageProcessor.from_pretrained(preprocessor_config)
#     elif "deit" in model_dir:
#         model = DeiTForImageClassification.from_pretrained(config_path, num_labels=num_labels)
#         preprocessor = AutoFeatureExtractor.from_pretrained(preprocessor_config)
#     elif "cvt" in model_dir:
#         model = CvtForImageClassification.from_pretrained(config_path, num_labels=num_labels)
#         preprocessor = AutoFeatureExtractor.from_pretrained(preprocessor_config)
#     elif "timm" in model_dir:
#         model = timm.create_model(model_dir.split('/')[-1], pretrained=True, num_classes=num_labels)
#         data_config = timm.data.resolve_model_data_config(model)
#         preprocessor = timm.data.create_transform(**data_config)
#     elif "clip" in model_dir:
#         model = CLIPModel.from_pretrained(config_path)
#         preprocessor = CLIPProcessor.from_pretrained(preprocessor_config)
#     elif "mae" in model_dir:
#         model = ViTMAEForPreTraining.from_pretrained(config_path)
#         preprocessor = AutoImageProcessor.from_pretrained(preprocessor_config)
#     else:
#         raise ValueError(f"Unsupported model: {model_dir}")
    
#     # Load the model's weights from the .pth file
#     model.load_state_dict(torch.load(model_path, map_location=torch.device('cuda' if torch.cuda.is_available() else 'cpu')))
#     model.eval()  # Set the model to evaluation mode

#     return model, preprocessor
def get_model_and_preprocessor(model_dir, num_labels=2, base_dir="/kaggle/input/2019-finetuned-vits/"):
    model_dir = model_dir.replace("/", "_").replace("-", "_")
    # Define the path to the model folder (no need to access individual files inside it)
    model_folder = os.path.join(base_dir, model_dir + '/')

    # Detect if there's a custom `best_model.pth` or use the default Hugging Face model
    model_path = os.path.join(model_folder, "best_model.pth")

    if "vit" in model_dir:
        model = ViTForImageClassification.from_pretrained(model_folder, num_labels=num_labels)
        preprocessor = ViTImageProcessor.from_pretrained(model_folder)
    elif "dino" in model_dir:
        model = ViTModel.from_pretrained(model_folder)
        preprocessor = ViTImageProcessor.from_pretrained(model_folder)
    elif "deit" in model_dir:
        model = DeiTForImageClassification.from_pretrained(model_folder, num_labels=num_labels)
        preprocessor = AutoFeatureExtractor.from_pretrained(model_folder)
    elif "cvt" in model_dir:
        model = CvtForImageClassification.from_pretrained(model_folder, num_labels=num_labels)
        preprocessor = AutoFeatureExtractor.from_pretrained(model_folder)
    elif "timm" in model_dir:
        model = timm.create_model(model_dir.split('/')[-1], pretrained=True, num_classes=num_labels)
        data_config = timm.data.resolve_model_data_config(model)
        preprocessor = timm.data.create_transform(**data_config)
    elif "clip" in model_dir:
        model = CLIPModel.from_pretrained(model_folder)
        preprocessor = CLIPProcessor.from_pretrained(model_folder)
    elif "mae" in model_dir:
        model = ViTMAEForPreTraining.from_pretrained(model_folder)
        preprocessor = AutoImageProcessor.from_pretrained(model_folder)
    else:
        raise ValueError(f"Unsupported model: {model_dir}")

    # Check if custom weights exist (best_model.pth)
    if os.path.exists(model_path):
        model.load_state_dict(torch.load(model_path, map_location=accelerator.device))
    
    model.eval()  # Set the model to evaluation mode

    return model, preprocessor

def generate_predictions(model, preprocessor, h5_file, batch_size=32):
    dataset = H5ImageDataset(h5_file, preprocessor)
    dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=False)

    # Prepare the model and dataloader using Accelerator
    model, dataloader = accelerator.prepare(model, dataloader)

    predictions = {}
    model.to(accelerator.device)

    with torch.no_grad():
        for batch, image_ids in tqdm(dataloader):
            batch = batch.to(accelerator.device)
            outputs = model(batch)
            logits = outputs.logits
            
            # Apply softmax to get probabilities
            probs = torch.softmax(logits, dim=-1)

            # Extract the probability of class 1 (positive class)
            class_1_probs = probs[:, 1].cpu().numpy()  # Get probability for class 1
            
            # Collect results
            for image_id, prob in zip(image_ids, class_1_probs):
                predictions[image_id] = prob  # Store only class 1 probability

    return predictions

def average_predictions(model_preds_list):
    """
    Average predictions from multiple models.
    """
    combined_predictions = {}
    num_models = len(model_preds_list)

    for model_preds in model_preds_list:
        for image_id, pred in model_preds.items():
            if image_id not in combined_predictions:
                combined_predictions[image_id] = np.zeros_like(pred)
            combined_predictions[image_id] += pred / num_models  # Average probabilities

    return combined_predictions



In [7]:

all_model_predictions = []
for model_dir, batch_size in vit_models:
    model, preprocessor = get_model_and_preprocessor(model_dir)
    predictions = generate_predictions(model, preprocessor, h5_file, batch_size=batch_size)
    all_model_predictions.append(predictions)

# Average predictions if using multiple models
if len(all_model_predictions) > 1:
    averaged_predictions = average_predictions(all_model_predictions)
else:
    averaged_predictions = all_model_predictions[0]

100%|██████████| 1563/1563 [24:45<00:00,  1.05it/s]


In [20]:
df_metadata = pd.read_csv('/kaggle/input/isic-2024-challenge/train-metadata.csv')
df_sub = df_metadata[['isic_id','target']]

In [21]:
df_sub.head()

Unnamed: 0,isic_id,target
0,ISIC_0015670,0
1,ISIC_0015845,0
2,ISIC_0015864,0
3,ISIC_0015902,0
4,ISIC_0024200,0


In [22]:
df_sub["preds"] = df_sub["isic_id"].map(averaged_predictions)


In [23]:
df_sub.head()

Unnamed: 0,isic_id,target,preds
0,ISIC_0015670,0,0.035321
1,ISIC_0015845,0,0.126805
2,ISIC_0015864,0,0.009842
3,ISIC_0015902,0,0.01867
4,ISIC_0024200,0,0.001902


In [29]:
from sklearn.metrics import roc_auc_score

In [39]:
solution = df_sub["target"].head(100000)
preds = df_sub['preds'].head(100000)
def comp_score(solution: pd.DataFrame, submission: pd.DataFrame, min_tpr: float=0.80):
    v_gt = abs(np.asarray(solution.values)-1)
    v_pred = np.array([1.0 - x for x in submission.values])
    max_fpr = abs(1-min_tpr)
    partial_auc_scaled = roc_auc_score(v_gt, v_pred, max_fpr=max_fpr)
    partial_auc = 0.5 * max_fpr**2 + (max_fpr - 0.5 * max_fpr**2) / (1.0 - 0.5) * (partial_auc_scaled - 0.5)
    return partial_auc

In [40]:
comp_score(solution,preds)

0.032208155829447534

In [None]:
df_sub = pd.read_csv("/kaggle/input/isic-2024-challenge/sample_submission.csv")
df_sub["target"] = df_sub["isic_id"].map(averaged_predictions)
df_sub.to_csv("submission.csv", index=False)
df_sub