In [None]:
import os
import cv2
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt

from tensorflow.keras import layers, models
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.utils import to_categorical

from sklearn.model_selection import StratifiedShuffleSplit
from sklearn.metrics import accuracy_score, roc_auc_score
from sklearn.preprocessing import label_binarize


In [None]:
dataset_path = "/kaggle/input/minor-dataset/Data_Minor"

classes = ["HR", "DR", "RVO"]
num_classes = len(classes)

img_size = (224, 224)
input_shape = (224, 224, 3)

batch_size = 4
epochs = 20
N_SPLITS = 5          # repeated splits
BOOTSTRAPS = 1000    # CI estimation


In [None]:
def apply_clahe(image):
    lab = cv2.cvtColor(image, cv2.COLOR_RGB2LAB)
    l, a, b = cv2.split(lab)
    clahe = cv2.createCLAHE(2.0, (8,8))
    l = clahe.apply(l)
    return cv2.cvtColor(cv2.merge((l,a,b)), cv2.COLOR_LAB2RGB)

def preprocess_image(image):
    image = apply_clahe(image)
    return image.astype(np.float32) / 255.0


In [None]:
def load_images():
    images, labels = [], []
    for label, cls in enumerate(classes):
        folder = os.path.join(dataset_path, cls)
        for f in os.listdir(folder):
            img = cv2.imread(os.path.join(folder, f))
            img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
            img = cv2.resize(img, img_size)
            img = preprocess_image(img)
            images.append(img)
            labels.append(label)
    return np.array(images), np.array(labels)

images, labels = load_images()
print("Dataset:", images.shape)


In [None]:
def transformer_block(x, num_heads=4, ff_dim=128):
    attn = layers.MultiHeadAttention(num_heads, key_dim=x.shape[-1])(x, x)
    x = layers.Add()([x, attn])
    x = layers.LayerNormalization()(x)

    ff = layers.Dense(ff_dim, activation='relu')(x)
    ff = layers.Dense(x.shape[-1])(ff)
    x = layers.Add()([x, ff])
    return layers.LayerNormalization()(x)

def build_hybrid(input_shape, num_classes):
    inputs = layers.Input(shape=input_shape)

    x = layers.Conv2D(32, 3, activation='relu')(inputs)
    x = layers.MaxPooling2D()(x)
    x = layers.Conv2D(64, 3, activation='relu')(x)
    x = layers.MaxPooling2D()(x)

    x = layers.Reshape((-1, 64))(x)

    for _ in range(2):
        x = transformer_block(x)

    x = layers.GlobalAveragePooling1D()(x)
    x = layers.Dense(128, activation='relu')(x)
    outputs = layers.Dense(num_classes, activation='softmax')(x)

    return models.Model(inputs, outputs)


In [None]:
splitter = StratifiedShuffleSplit(
    n_splits=N_SPLITS,
    test_size=0.2,
    random_state=42
)

acc_scores = []
auc_scores = []

all_y_true = []
all_y_prob = []

for split_id, (train_idx, test_idx) in enumerate(splitter.split(images, labels)):
    print(f"\nSplit {split_id+1}/{N_SPLITS}")

    X_train, X_test = images[train_idx], images[test_idx]
    y_train, y_test = labels[train_idx], labels[test_idx]

    model = build_hybrid(input_shape, num_classes)
    model.compile(
        optimizer=Adam(1e-4),
        loss="categorical_crossentropy",
        metrics=["accuracy"]
    )

    model.fit(
        X_train,
        to_categorical(y_train, num_classes),
        epochs=epochs,
        batch_size=batch_size,
        verbose=0
    )

    y_prob = model.predict(X_test, batch_size=4)
    y_pred = np.argmax(y_prob, axis=1)

    acc = accuracy_score(y_test, y_pred)

    y_test_bin = label_binarize(y_test, classes=[0,1,2])
    auc = roc_auc_score(
        y_test_bin,
        y_prob,
        average="macro",
        multi_class="ovr"
    )

    acc_scores.append(acc)
    auc_scores.append(auc)

    all_y_true.append(y_test)
    all_y_prob.append(y_prob)

    tf.keras.backend.clear_session()


In [None]:
print("\nREPEATED SPLIT RESULTS")
print("----------------------")
print(f"Accuracy: {np.mean(acc_scores)*100:.2f}% ± {np.std(acc_scores)*100:.2f}")
print(f"AUC: {np.mean(auc_scores):.4f} ± {np.std(auc_scores):.4f}")


In [None]:
def bootstrap_ci(y_true, y_prob, metric_fn, n_bootstrap=1000):
    rng = np.random.RandomState(42)
    scores = []
    n = len(y_true)

    for _ in range(n_bootstrap):
        idx = rng.choice(n, n, replace=True)
        scores.append(metric_fn(y_true[idx], y_prob[idx]))

    return (
        np.mean(scores),
        np.percentile(scores, 2.5),
        np.percentile(scores, 97.5)
    )


In [None]:
def acc_metric(y_true, y_prob):
    return accuracy_score(y_true, np.argmax(y_prob, axis=1))

y_true_all = np.concatenate(all_y_true)
y_prob_all = np.vstack(all_y_prob)

acc_mean, acc_low, acc_high = bootstrap_ci(
    y_true_all, y_prob_all, acc_metric, BOOTSTRAPS
)


In [None]:
def auc_metric(y_true, y_prob):
    y_bin = label_binarize(y_true, classes=[0,1,2])
    return roc_auc_score(y_bin, y_prob, average="macro", multi_class="ovr")

auc_mean, auc_low, auc_high = bootstrap_ci(
    y_true_all, y_prob_all, auc_metric, BOOTSTRAPS
)
