# Assemble Model using DenseNet, EfficientNet, ResNet50, XGBoost, Light GBM, and CatBoost

## Dependencies

In [1]:
import numpy as np
import pandas as pd
import h5py
import torch
import cv2
import torch.nn as nn
import torch.optim as optim
import matplotlib.pyplot as plt
import os
from torch.utils.data import Dataset
from torchvision import transforms, models
from torchvision.models import densenet121, DenseNet121_Weights
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from tqdm import tqdm
# from dataset import HDF5Dataset
from sklearn.metrics import roc_auc_score, roc_curve
from isic_metric import score

## Data Loading

In [2]:
images = []
labels = []
metadata = []

### First, load the malignant data from original database

In [3]:
original_train_hdf5_path = 'train-image.hdf5'
original_train_metadata_path = 'train-metadata.csv'
original_train_metadata = pd.read_csv(original_train_metadata_path,low_memory=False)   
original_train_hdf5 = h5py.File(original_train_hdf5_path, 'r')

In [4]:
for i in tqdm(range(len(original_train_metadata))):
    if original_train_metadata.iloc[i]['target'] == 0: # skip non-malignant images
        continue
    image_id = original_train_metadata.iloc[i]['isic_id']
    image = original_train_hdf5[image_id][()]
    image = np.frombuffer(image, dtype=np.uint8)
    image = cv2.imdecode(image, cv2.IMREAD_COLOR)
    image = cv2.resize(image, (128, 128))
    image = image / 255
    
    images.append(image)
    labels.append(1)
    metadata.append(original_train_metadata.iloc[i])
    
# original_train_hdf5.close()

100%|██████████| 401059/401059 [00:19<00:00, 20853.92it/s]


### Second, load the augmented malignant images

In [5]:
augmented_malignant_hdf5_path = 'augmented_data.hdf5'
augmented_malignant_metadata_path = 'augmented_metadata.csv'
augmented_malignant_metadata = pd.read_csv(augmented_malignant_metadata_path,low_memory=False)
augmented_malignant_hdf5 = h5py.File(augmented_malignant_hdf5_path, 'r')
n_augmentations = 5

In [6]:
for i in tqdm(range(len(augmented_malignant_metadata))):

    for j in range(n_augmentations):
        image_id = f"{augmented_malignant_metadata.iloc[i]['isic_id']}_aug{j}"
        image = augmented_malignant_hdf5[image_id][()]
        image = np.frombuffer(image, dtype=np.uint8)
        image = cv2.imdecode(image, cv2.IMREAD_COLOR)
        # show image
        # plt.imshow(image)
        # plt.axis('off')
        # plt.show()
        image = cv2.resize(image, (128, 128))
        image = image / 255
    
        images.append(image)
        labels.append(1)
        metadata.append(augmented_malignant_metadata.iloc[i])
    
augmented_malignant_hdf5.close()

100%|██████████| 1965/1965 [00:08<00:00, 229.63it/s]


### Third, load the ISIC full database's malignant examples

In [7]:
isic_metadata_path = 'isic_metadata.csv'
isic_hdf5_path = 'isic_image.hdf5'
isic_metadata = pd.read_csv(isic_metadata_path,low_memory=False)
isic_hdf5 = h5py.File(isic_hdf5_path, 'r')

In [8]:
malignant_count = 0
for i in tqdm(range(len(isic_metadata))):
    if isic_metadata.iloc[i]['benign_malignant'] == 'malignant': # skip non-malignant images
        image_id = isic_metadata.iloc[i]['isic_id']
        image = isic_hdf5[image_id][()]
        image = np.frombuffer(image, dtype=np.uint8)
        image = cv2.imdecode(image, cv2.IMREAD_COLOR)
        image = cv2.resize(image, (128, 128))
        image = image / 255
        malignant_count += 1
        
        images.append(image)
        labels.append(1)
        metadata.append(isic_metadata.iloc[i])
print(f"malignant count: {malignant_count}")
isic_hdf5.close()

100%|██████████| 81722/81722 [00:25<00:00, 3186.92it/s] 

malignant count: 9239





In [9]:
malignant_count = len(labels)
print(f"malignant count: {malignant_count}")

malignant count: 19457


### Lastly, load the same amount of beign data from original database

In [10]:
idx = 0
benign_loaded = 0

pbar = tqdm(total=malignant_count, desc="Loading benign images")
while benign_loaded < malignant_count:
    if original_train_metadata.iloc[idx]['target'] == 0:
        image_id = original_train_metadata.iloc[idx]['isic_id']
        image = original_train_hdf5[image_id][()]
        image = np.frombuffer(image, dtype=np.uint8)
        image = cv2.imdecode(image, cv2.IMREAD_COLOR)
        image = cv2.resize(image, (128, 128))
        image = image / 255
        images.append(image)
        labels.append(0)
        metadata.append(original_train_metadata.iloc[idx])
        benign_loaded += 1
        pbar.update(1)
    idx += 1
pbar.close()

original_train_hdf5.close()
print(f"benign count: {benign_loaded}")

Loading benign images: 100%|██████████| 19457/19457 [00:13<00:00, 1433.21it/s]

benign count: 19457





In [11]:
print(len(images))

38914


### Generate Dataset

In [12]:
from dataset import HDF5Dataset
from sklearn.model_selection import train_test_split

# 70% for training
X_train, X_temp, y_train, y_temp = train_test_split(
    images, labels, test_size=0.3, stratify=labels, random_state=42
)

# 15% validation, 15% testing
X_val, X_test, y_val, y_test = train_test_split(
    X_temp, y_temp, test_size=0.5, stratify=y_temp, random_state=42
)

# split metadata
metadata_train, metadata_temp = train_test_split(
    metadata, test_size=0.3, stratify=labels, random_state=42
)
metadata_val, metadata_test = train_test_split(
    metadata_temp, test_size=0.5, stratify=y_temp, random_state=42
)

In [13]:
print(f'Training data: {len(X_train)}')
print(f'Validation data: {len(X_val)}')
print(f'Test data: {len(X_test)}')
print(f'Metadata Training data: {len(metadata_train)}')
print(f'Metadata Validation data: {len(metadata_val)}')
print(f'Metadata Test data: {len(metadata_test)}')

Training data: 27239
Validation data: 5837
Test data: 5838
Metadata Training data: 27239
Metadata Validation data: 5837
Metadata Test data: 5838


### Load ISIC Competition Test data

In [14]:
isic_test_metadata_path = 'test-metadata.csv'
isic_test_metadata = pd.read_csv(isic_test_metadata_path,low_memory=False)
isic_test_hdf5_path = 'test-image.hdf5'
isic_test_hdf5 = h5py.File(isic_test_hdf5_path, 'r')

In [34]:
from PIL import Image

X_test_isic = []
X_test_isic_id = []
metadata_test_isic = []
for i in tqdm(range(len(isic_test_metadata))):
    image_id = isic_test_metadata.iloc[i]['isic_id']
    image = isic_test_hdf5[image_id][()]
    image = np.frombuffer(image, dtype=np.uint8)
    image = cv2.imdecode(image, cv2.IMREAD_COLOR)
    image = cv2.resize(image, (128, 128))
    image = image / 255
    
    # since it does not have labels, we need to manualy ensure it works with the models
    if isinstance(image, np.ndarray):
        # First ensure it's uint8
        if image.dtype != np.uint8:
            image = (image * 255).astype(np.uint8)
        image = Image.fromarray(image)
    X_test_isic.append(image)
    metadata_test_isic.append(isic_test_metadata.iloc[i])
    X_test_isic_id.append(image_id)

100%|██████████| 3/3 [00:00<00:00, 703.70it/s]


## Load CNN models

In [16]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

Using device: cuda


### DenseNet

In [26]:
from torchvision.models import densenet121, DenseNet121_Weights
from ModelTrainer import Trainer
densenet_weights = DenseNet121_Weights.DEFAULT
densenet_transform = densenet_weights.transforms()
densenet_train_dataset = HDF5Dataset(X_train, y_train, augment=True, transform=densenet_transform)
densenet_val_dataset = HDF5Dataset(X_val, y_val, augment=False, transform=densenet_transform)
densenet_model = densenet121(weights=densenet_weights)
lr = 1e-4
num_epochs = 20

#### NOTE!! Still need to run this block below if loading weights!

In [27]:
dense_net_trainer = Trainer(device, densenet_train_dataset, densenet_val_dataset, "DenseNet121", densenet_weights, densenet_transform, densenet_model, lr, num_epochs)

In [None]:
dense_net_trainer.train()

#### Convert DenseNet's output predictions

In [None]:
from sklearn.base import BaseEstimator, ClassifierMixin
import torch.nn.functional as F
from PIL import Image

class TorchCNNWrapper(BaseEstimator, ClassifierMixin):
    def __init__(self, model, device='cpu', transform=None, threshold=0.5):
        self.model = model.eval().to(device)
        self.device = device
        self.transform = transform
        self.threshold = threshold

    def _prepare_image(self, img):
        if isinstance(img, np.ndarray):
            if img.dtype != np.uint8:
                img = (img * 255).astype(np.uint8)
            img = Image.fromarray(img)
        return self.transform(img).unsqueeze(0).to(self.device)  # Add batch dim

    def predict_proba(self, X):
        self.model.eval()
        probs = []
    
        with torch.no_grad():
            for img in tqdm(X, desc="Predicting with CNN"):
                img_tensor = self._prepare_image(img)
                logits = self.model(img_tensor)
                prob = torch.sigmoid(logits).cpu().item()
                probs.append([1 - prob, prob])
    
        return np.array(probs)

    def predict(self, X):
        probs = self.predict_proba(X)
        return (probs[:, 1] >= self.threshold).astype(int)

### Load model if already trained

In [28]:
densenet_model_path = "DenseNet121_checkpoints/DenseNet121_epoch_20.pth"
densenet_checkpoint = torch.load(densenet_model_path, weights_only=False, map_location=device)
densenet_model.load_state_dict(checkpoint['model_state_dict'])

<All keys matched successfully>

In [None]:
densenet_model.eval()
densenet_wrapper = TorchCNNWrapper(
    model=densenet_model,
    device=device,
    transform=densenet_transform
)

In [None]:
densenet_wrapper_probas = densenet_wrapper.predict_proba(X_train)[:, 1]

In [None]:
densenet_wrapper_probas[:10]

### Calculate test pAUC 

In [30]:
# predict X_test_isic using DenseNet
densenet_model.eval()
predictions = []

with torch.no_grad():
    for img in tqdm(X_test_isic):
        # Apply the transformation (PIL → normalized tensor)
        img_tensor = densenet_transform(img).unsqueeze(0).to(device)  # shape: [1, C, H, W]

        # Get prediction
        logits = densenet_model(img_tensor)
        prob = torch.sigmoid(logits).cpu().item()  # probability for class 1 (malignant)
        predictions.append(prob)


100%|██████████| 3/3 [00:00<00:00,  3.82it/s]


In [31]:
print(predictions)

[4.950661241309717e-05, 9.08891342987772e-06, 4.524763880908722e-06]


In [35]:
X_test_isic_id

['ISIC_0015657', 'ISIC_0015729', 'ISIC_0015740']

In [38]:
# Convert both to Series or DataFrames
densenet_submission = pd.concat([
    pd.Series(X_test_isic_id, name="image_id"),
    pd.Series(predictions, name="target")
], axis=1)

densenet_submission.head()

Unnamed: 0,image_id,target
0,ISIC_0015657,5e-05
1,ISIC_0015729,9e-06
2,ISIC_0015740,5e-06


## EfficientNet

In [None]:
from torchvision.models import efficientnet_b3, EfficientNet_B3_Weights
efficientnet_weights = EfficientNet_B3_Weights.DEFAULT
efficientnet_transform = efficientnet_weights.transforms()
efficientnet_train_dataset = HDF5Dataset(X_train, y_train, augment=True, transform=efficientnet_transform)
efficientnet_val_dataset = HDF5Dataset(X_val, y_val, augment=False, transform=efficientnet_transform)
efficientnet_model = efficientnet_b3(weights=efficientnet_weights)
lr = 1e-4
num_epochs = 20

In [None]:
efficientnet_trainer = Trainer(device, efficientnet_train_dataset, efficientnet_val_dataset, "EfficientNet-B3", efficientnet_weights, efficientnet_transform, efficientnet_model, lr, num_epochs)
efficientnet_trainer.train()

### Load EfficientNet Model if already trained

In [None]:
efficientnet_model_path = "EfficientNet-B3_checkpoints/EfficientNet-B3_epoch_20.pth"
efficientnet_model.load_state_dict(torch.load(efficientnet_model_path))

## ResNet50

In [42]:
from torchvision.models import resnet50, ResNet50_Weights

resnet_weights = ResNet50_Weights.DEFAULT
resnet_transform = resnet_weights.transforms()

resnet_train_dataset = HDF5Dataset(X_train, y_train, transform=resnet_transform)
resnet_val_dataset = HDF5Dataset(X_val, y_val, transform=resnet_transform)
resnet_model = resnet50(pretrained=True)
resnet_model.fc = nn.Linear(resnet_model.fc.in_features, 1)

lr = 1e-4
num_epochs = 20



In [None]:
resnet_trainer = Trainer(device, resnet_train_dataset, resnet_val_dataset, "ResNet50", resnet_weights, resnet_transform, resnet_model, lr, num_epochs)
resnet_trainer.train()

Epoch 1/20: 100%|██████████| 213/213 [01:11<00:00,  2.97it/s]


Epoch 1: Acc = 0.9808, Precision = 0.9986, Recall = 0.9630, F1 = 0.9805, pAUC = 0.1986


Epoch 2/20: 100%|██████████| 213/213 [01:12<00:00,  2.92it/s]


### Load ResNet50 if already trained

In [None]:
resnet_model_path = "ResNet50_checkpoints/ResNet50_epoch_20.pth"
resnet_model.load_state_dict(torch.load(resnet_model_path))

## Load Tree based models

### XGBoost

In [None]:
import xgboost as xgb
from xgboost.callback import EarlyStopping
print(xgb.__version__)
XGBoost_model = xgb.XGBClassifier(
    n_estimators=1000,
    learning_rate=0.01,
    max_depth=5,
    subsample=0.8,
    colsample_bytree=0.8,
    eval_metric='logloss',
    use_label_encoder=False,
    n_jobs=-1,
    random_state=42
)

In [None]:
metadata = np.array(metadata)
X_metadata_train, 

In [None]:
XGBoost_model.fit(
    X_train,
    y_train,
    eval_set=[(X_val, y_val)],
    # early_stopping_rounds=50,  # <-- now accepted
    verbose=True
)