In [None]:
import os
import pandas as pd
import numpy as np
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import classification_report, confusion_matrix
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.models import load_model
from tensorflow.keras.applications import DenseNet169, InceptionV3, MobileNetV2
from tensorflow.keras.optimizers import RMSprop
from tensorflow.keras.layers import GlobalAveragePooling2D, Dense, Reshape, Multiply, Conv2D, Flatten, Dropout
from tensorflow.keras.models import Model
import seaborn as sns
import matplotlib.pyplot as plt

In [None]:
# Combine train and test directories into one DataFrame
def build_dataframe(data_dirs):
    records = []
    for data_dir in data_dirs:
        for cls in sorted(os.listdir(data_dir)):
            cls_dir = os.path.join(data_dir, cls)
            for fname in os.listdir(cls_dir):
                fpath = os.path.join(cls_dir, fname)
                records.append({'filepath': fpath, 'class': cls})
    df = pd.DataFrame(records)
    df['class_id'] = df['class'].astype('category').cat.codes
    return df, sorted(df['class'].unique())

# Set up directories and load data
train_dir = '/kaggle/input/skin-cancer-malignant-vs-benign/train'
test_dir  = '/kaggle/input/skin-cancer-malignant-vs-benign/test'
df, class_names = build_dataframe([train_dir, test_dir])
num_classes = len(class_names)

In [None]:
# CBAM Attention Block
def cbam_block(input_tensor, reduction_ratio=16):
    # Channel Attention
    channel = GlobalAveragePooling2D()(input_tensor)
    channel = Dense(input_tensor.shape[-1] // reduction_ratio, activation='relu')(channel)
    channel = Dense(input_tensor.shape[-1], activation='sigmoid')(channel)
    channel = Reshape((1, 1, input_tensor.shape[-1]))(channel)
    channel_attention = Multiply()([input_tensor, channel])

    # Spatial Attention
    spatial = Conv2D(1, (7,7), padding='same', activation='sigmoid')(channel_attention)
    spatial_attention = Multiply()([channel_attention, spatial])

    return spatial_attention

In [None]:
# Set image size and batch size
IMG_SIZE=(224,224); BATCH_SIZE=32

def create_model(base_model_class):
    base = base_model_class(include_top=False, weights='imagenet', input_shape=(224,224,3))
    base.trainable = False
    att = cbam_block(base.output)
    x = Flatten()(att)
    x = Dense(1024, activation='relu')(x)
    x = Dropout(0.2)(x)
    x = Dense(128, activation='relu')(x)
    out = Dense(num_classes, activation='softmax')(x)
    model = Model(inputs=base.input, outputs=out)
    model.compile(optimizer=RMSprop(2e-5), loss='categorical_crossentropy', metrics=['accuracy'])
    return model

In [None]:
# Ensemble fusion functions(Simplified for binary classification, original fusion functions present in the "skincan_model.ipynb" file)
def generateRank1(score): return 1 - np.exp(-((score-1)**2)/2.0)
def generateRank2(score): return 1 - np.tanh(((score-1)**2)/2)
def doFusion(res_list):
    fused = []
    for i in range(len(res_list[0])):
        ranks = [generateRank1(res[i]) * generateRank2(res[i]) for res in res_list]
        fused.append(np.argmin(sum(ranks)))
    return np.array(fused)

In [None]:
# Set up Stratified K-Fold on combined DataFrame
kf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

for fold, (train_idx, test_idx) in enumerate(kf.split(df['filepath'], df['class_id']),1):
    print(f"\n**** Fold {fold}/5 ****")
    train_df = df.iloc[train_idx]
    test_df  = df.iloc[test_idx]

    # Create train generator
    train_gen = ImageDataGenerator(
        rescale=1./255, rotation_range=40, width_shift_range=0.2,
        height_shift_range=0.2, shear_range=0.2, zoom_range=0.2,
        horizontal_flip=True, fill_mode='nearest'
    ).flow_from_dataframe(
        train_df, x_col='filepath', y_col='class',
        target_size=IMG_SIZE, batch_size=BATCH_SIZE,
        class_mode='categorical', shuffle=True
    )
    # Create test generator
    test_gen = ImageDataGenerator(rescale=1./255).flow_from_dataframe(
        test_df, x_col='filepath', y_col='class',
        target_size=IMG_SIZE, batch_size=BATCH_SIZE,
        class_mode='categorical', shuffle=False
    )

    # Train and evaluate base learners
    preds = {}
    y_true = test_gen.classes
    for name, Base in [('DenseNet169',DenseNet169),('InceptionV3',InceptionV3),('MobileNetV2',MobileNetV2)]:
        print(f"-- {name} --")
        model = create_model(Base)
        ckpt = f"{name}_fold{fold}.h5"

        # Fit the model
        model.fit(train_gen, 
                  validation_data=test_gen, 
                  epochs=25,
                  callbacks=[ModelCheckpoint(ckpt, monitor='val_accuracy', save_best_only=True)], 
                  verbose=1)
        
        # Load the best model and evaluate
        m = load_model(ckpt, compile=False)
        res = m.predict(test_gen)
        preds[name] = res
        y_pred = np.argmax(res,axis=-1)
        # Print classification report
        print(classification_report(y_true, y_pred, target_names=class_names, digits=4))
        # Plot confusion matrix
        cm = confusion_matrix(y_true, y_pred)
        sns.heatmap(
            cm,
            annot=True,
            fmt="d",
            cmap="Blues",  
            xticklabels=class_names,
            yticklabels=class_names,
            annot_kws={"size": 20},  
            cbar=True
        )
        plt.title(f"{name} Confusion Matrix Fold {fold}")
        plt.xlabel('Predicted', fontsize=18)
        plt.ylabel('Actual', fontsize=18)
        plt.xticks(fontsize=18)
        plt.yticks(fontsize=18)
        plt.tight_layout()
        plt.show()

    # Ensemble on fold
    ens_pred = doFusion([preds['DenseNet169'], preds['InceptionV3'], preds['MobileNetV2']])
    print("-- Ensemble --")

    # Print ensemble classification report
    print(classification_report(y_true, ens_pred, target_names=class_names, digits=4))

    # Plot ensemble confusion matrix
    cm = confusion_matrix(y_true, ens_pred)
    sns.heatmap(
        cm,
        annot=True,
        fmt="d",
        cmap="Blues",  
        xticklabels=class_names,
        yticklabels=class_names,
        annot_kws={"size": 20},  
        cbar=True
    )
    plt.xlabel('Predicted', fontsize=18)
    plt.ylabel('Actual', fontsize=18)
    plt.xticks(fontsize=18)
    plt.yticks(fontsize=18)
    plt.tight_layout()
    plt.show()

print("Training and Testing of Skin Cancer Classification Ensemble model is complete.")
