In [1]:
%%writefile main.py

import os
import gc
import cv2
import math
import copy
import time
import random
import glob
from matplotlib import pyplot as plt

import h5py
from PIL import Image
from io import BytesIO

# For data manipulation
import numpy as np
import pandas as pd

# Pytorch Imports
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.optim import lr_scheduler
from torch.utils.data import Dataset, DataLoader
from torch.cuda import amp
import torchvision

# Utils
import joblib
from tqdm import tqdm
from collections import defaultdict

# Sklearn Imports
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import StratifiedKFold

# For Image Models
import timm

# Albumentations for augmentations
import albumentations as A
from albumentations.pytorch import ToTensorV2

# For colored terminal text
from colorama import Fore, Back, Style
b_ = Fore.BLUE
sr_ = Style.RESET_ALL

import warnings
warnings.filterwarnings("ignore")

# For descriptive error messages
os.environ['CUDA_LAUNCH_BLOCKING'] = "1"

Writing main.py


In [2]:
%%writefile -a main.py

CONFIG = {
    "seed": 42,
    "img_size": 384,
    "model_name": "tf_efficientnet_b0_ns",
    "valid_batch_size": 64,
    "device": torch.device("cuda:0" if torch.cuda.is_available() else "cpu"),
}

Appending to main.py


In [3]:
%%writefile -a main.py

def set_seed(seed=42):
    '''Sets the seed of the entire notebook so results are the same every time we run.
    This is for REPRODUCIBILITY.'''
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    # When running on the CuDNN backend, two further options must be set
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    # Set a fixed value for the hash seed
    os.environ['PYTHONHASHSEED'] = str(seed)
    
set_seed(CONFIG['seed'])

Appending to main.py


In [4]:
%%writefile -a main.py

ROOT_DIR = "/kaggle/input/isic-2024-challenge"
TEST_CSV = f'{ROOT_DIR}/test-metadata.csv'
TEST_HDF = f'{ROOT_DIR}/test-image.hdf5'
SAMPLE = f'{ROOT_DIR}/sample_submission.csv'

b0_weight_files = [
    "/kaggle/input/effnetb0-1-50-ratio-384/pAUC0.1542_Loss0.0066_epoch19_fold0.bin",
    "/kaggle/input/effnetb0-1-50-ratio-384/pAUC0.1466_Loss0.0067_epoch20_fold1.bin",
    "/kaggle/input/effnetb0-1-50-ratio-384/pAUC0.1668_Loss0.0066_epoch21_fold2.bin",
    "/kaggle/input/effnetb0-1-50-ratio-384/pAUC0.1416_Loss0.0075_epoch9_fold3.bin",
    "/kaggle/input/effnetb0-1-50-ratio-384/pAUC0.1614_Loss0.0059_epoch18_fold4.bin"
]

Appending to main.py


In [5]:
%%writefile -a main.py

df = pd.read_csv(TEST_CSV)
df['target'] = 0 # dummy
df

Appending to main.py


In [6]:
%%writefile -a main.py

df_sub = pd.read_csv(SAMPLE)
df_sub

Appending to main.py


In [7]:
%%writefile -a main.py

import cv2
import numpy as np
import pandas as pd
import h5py
from torch.utils.data import Dataset

class ISICDataset(Dataset):
    def __init__(self, df, file_hdf, transforms=None):
        self.df = df
        self.fp_hdf = h5py.File(file_hdf, mode="r")
        self.isic_ids = df['isic_id'].values
        self.targets = df['target'].values
        self.transforms = transforms
        
    def __len__(self):
        return len(self.isic_ids)
    
    def __getitem__(self, index):
        isic_id = self.isic_ids[index]
        img_bytes = self.fp_hdf[isic_id][()]
        img = cv2.imdecode(np.frombuffer(img_bytes, np.uint8), cv2.IMREAD_COLOR)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        target = self.targets[index]
        
        if self.transforms:
            img = self.transforms(image=img)["image"]
            
        return {
            'image': img,
            'target': target,
        }

Appending to main.py


In [8]:
%%writefile -a main.py

data_transforms = {
    "valid": A.Compose([
        A.Resize(CONFIG['img_size'], CONFIG['img_size']),
        A.Normalize(
                mean=[0.485, 0.456, 0.406], 
                std=[0.229, 0.224, 0.225], 
                max_pixel_value=255.0, 
                p=1.0
            ),
        ToTensorV2()], p=1.)
}

Appending to main.py


In [9]:
%%writefile -a main.py

class GeM(nn.Module):
    def __init__(self, p=3, eps=1e-6):
        super(GeM, self).__init__()
        self.p = nn.Parameter(torch.ones(1)*p)
        self.eps = eps

    def forward(self, x):
        return self.gem(x, p=self.p, eps=self.eps)
        
    def gem(self, x, p=3, eps=1e-6):
        return F.avg_pool2d(x.clamp(min=eps).pow(p), (x.size(-2), x.size(-1))).pow(1./p)
        
    def __repr__(self):
        return self.__class__.__name__ + \
                '(' + 'p=' + '{:.4f}'.format(self.p.data.tolist()[0]) + \
                ', ' + 'eps=' + str(self.eps) + ')'

Appending to main.py


In [10]:
%%writefile -a main.py

class ISICModel(nn.Module):
    def __init__(self, model_name, num_classes=1, pretrained=True, checkpoint_path=None):
        super(ISICModel, self).__init__()
        self.model = timm.create_model(model_name, pretrained=pretrained, checkpoint_path=checkpoint_path)

        in_features = self.model.classifier.in_features
        self.model.classifier = nn.Identity()
        self.model.global_pool = nn.Identity()
        self.pooling = GeM()
        self.linear = nn.Linear(in_features, num_classes)
        self.sigmoid = nn.Sigmoid()

    def forward(self, images):
        features = self.model(images)
        pooled_features = self.pooling(features).flatten(1)
        output = self.sigmoid(self.linear(pooled_features))
        return output
    

b0_models = []

for weight_file in b0_weight_files:
    b0_model = ISICModel(CONFIG['model_name'], pretrained=False)
    b0_model.load_state_dict(torch.load(weight_file))
    b0_model.to(CONFIG['device'])
    b0_model.eval()
    b0_models.append(b0_model)

# model = ISICModel(CONFIG['model_name'], pretrained=False)
# model.load_state_dict( torch.load(BEST_WEIGHT) )
# model.to(CONFIG['device']);

Appending to main.py


In [11]:
%%writefile -a main.py

test_dataset = ISICDataset(df, TEST_HDF, transforms=data_transforms["valid"])
test_loader = DataLoader(test_dataset, batch_size=CONFIG['valid_batch_size'], 
                          num_workers=2, shuffle=False, pin_memory=True)

Appending to main.py


In [12]:
%%writefile -a main.py

# Prediction function
def predict(models, data_loader):
    preds = []
    with torch.no_grad():
        bar = tqdm(enumerate(data_loader), total=len(data_loader))
        for step, data in bar:
            images = data['image'].to(CONFIG["device"], dtype=torch.float)
            batch_size = images.size(0)
            
            # Get predictions from all models
            batch_preds = []
            for model in models:
                outputs = model(images)
                batch_preds.append(outputs.detach().cpu().numpy())
            
            # Average predictions
            avg_preds = np.mean(batch_preds, axis=0)
            preds.append(avg_preds)
    
    return np.concatenate(preds).flatten()

# Get predictions
b0_preds = predict(b0_models, test_loader)

Appending to main.py


In [13]:
%%writefile -a main.py

df_sub["target"] = b0_preds
df_sub.to_csv("submission.csv", index=False)

Appending to main.py


In [14]:
%%writefile -a main.py

df_sub

Appending to main.py
