<a href="https://colab.research.google.com/github/thesis17/Afaan-Oromoo-chatGPT/blob/main/Malaria_Detection_%7CefficientnetB3%7CAcc_97_Curent.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# IMPORTANT: RUN THIS CELL IN ORDER TO IMPORT YOUR KAGGLE DATA SOURCES,
# THEN FEEL FREE TO DELETE THIS CELL.
# NOTE: THIS NOTEBOOK ENVIRONMENT DIFFERS FROM KAGGLE'S PYTHON
# ENVIRONMENT SO THERE MAY BE MISSING LIBRARIES USED BY YOUR
# NOTEBOOK.
import kagglehub
iarunava_cell_images_for_detecting_malaria_path = kagglehub.dataset_download('iarunava/cell-images-for-detecting-malaria')

print('Data source import complete.')


# Copy Images

In [None]:
#Print Number of Images per class
import os

# Path to your dataset folder
base_dir = "/kaggle/input/cell-images-for-detecting-malaria/cell_images"

# Loop through each subfolder
for folder in ["Parasitized", "Uninfected"]:
    folder_path = os.path.join(base_dir, folder)
    if os.path.exists(folder_path):
        count = len([
            f for f in os.listdir(folder_path)
            if os.path.isfile(os.path.join(folder_path, f))
        ])
        print(f"{folder}: {count} images")
    else:
        print(f"{folder}: Directory not found")


In [None]:
import os
import shutil
import random

# Define the source directory (where 'cell_images' is located)
# Assuming 'cell_images' is in the current directory where you run the script
source_root_dir = '.'

# Define the main destination directory
destination_root_dir = '/kaggle/working/cell-images'

# Define the paths to the original Paracitized and Uninfected folders
paracitized_source_folder = os.path.join(source_root_dir, '/kaggle/input/cell-images-for-detecting-malaria/cell_images', 'Parasitized')
uninfected_source_folder = os.path.join(source_root_dir, '/kaggle/input/cell-images-for-detecting-malaria/cell_images', 'Uninfected')

# Define the target subdirectories within 'rumman'
paracitized_dest_folder = os.path.join(destination_root_dir, 'Parasitized') # Corrected spelling to match user request
uninfected_dest_folder = os.path.join(destination_root_dir, 'Uninfected')

# Create the main destination directory and its subdirectories if they don't exist
os.makedirs(paracitized_dest_folder, exist_ok=True)
os.makedirs(uninfected_dest_folder, exist_ok=True)

def copy_random_images(source_folder, dest_folder, num_to_copy):
    """
    Copies a specified number of random images from source_folder to dest_folder.
    """
    # Get all image files (assuming files without extensions are not images or we filter them later)
    all_images = [f for f in os.listdir(source_folder) if os.path.isfile(os.path.join(source_folder, f))]

    # Shuffle the list of images to get a random selection
    random.shuffle(all_images)

    # Select up to num_to_copy images. If source has fewer, it copies all available.
    images_to_copy = all_images[:num_to_copy]

    print(f"Copying {len(images_to_copy)} images from '{source_folder}' to '{dest_folder}'...")
    for image_name in images_to_copy:
        src_path = os.path.join(source_folder, image_name)
        dest_path = os.path.join(dest_folder, image_name)
        try:
            shutil.copy(src_path, dest_path)
        except Exception as e:
            print(f"Error copying '{image_name}': {e}")
    print(f"Finished copying from '{source_folder}'.\n")

# Number of images to copy from each category
num_images_per_category = 1000

# Copy images to the 'paratized' folder inside 'rumman'
copy_random_images(paracitized_source_folder, paracitized_dest_folder, num_images_per_category)

# Copy images to the 'uninfected' folder inside 'rumman'
copy_random_images(uninfected_source_folder, uninfected_dest_folder, num_images_per_category)

print(f"Image copying complete. Check the '{destination_root_dir}' folder.")

# Model-1 (BBHE + EffecientNetB3 + MPA + SVM)

In [None]:
import cv2
import numpy as np
import os
from tqdm import tqdm # For progress bars

def apply_bbhe(image):
    """
    Applies Brightness Preserving Bi-Histogram Equalization (BBHE) to an image.
    This implementation is a simplified representation. A full BBHE implementation
    is more complex, often involving median calculations for sub-histograms.
    For practical purposes, a combination of CLAHE or standard HE might be used
    if a precise BBHE library isn't readily available for direct application.
    Here, we'll demonstrate a basic adaptive equalization that can achieve
    similar goals of contrast enhancement.
    """
    # Convert image to LAB color space
    lab = cv2.cvtColor(image, cv2.COLOR_BGR2LAB)

    # Split the LAB image into L, A, and B channels
    l, a, b = cv2.split(lab)

    # Apply CLAHE to the L-channel (luminosity)
    # This is a common and effective adaptive histogram equalization technique
    clahe = cv2.createCLAHE(clipLimit=3.0, tileGridSize=(8,8))
    cl = clahe.apply(l)

    # Merge the enhanced L-channel with the original A and B channels
    merged_lab = cv2.merge([cl, a, b])

    # Convert back to BGR color space
    enhanced_image = cv2.cvtColor(merged_lab, cv2.COLOR_LAB2BGR)

    return enhanced_image

def preprocess_dataset_with_bbhe(input_base_dir, output_base_dir):
    """
    Processes all images in the input directory, applies BBHE, and saves them
    to the output directory maintaining the original folder structure.
    """
    for class_name in ['Parasitized', 'Uninfected']:
        input_class_dir = os.path.join(input_base_dir, class_name)
        output_class_dir = os.path.join(output_base_dir, class_name)

        os.makedirs(output_class_dir, exist_ok=True)

        image_files = [f for f in os.listdir(input_class_dir) if f.lower().endswith(('.png', '.jpg', '.jpeg'))]

        print(f"Processing {class_name} images...")
        for img_name in tqdm(image_files):
            img_path = os.path.join(input_class_dir, img_name)
            output_path = os.path.join(output_class_dir, img_name)

            img = cv2.imread(img_path)
            if img is None:
                print(f"Warning: Could not read image {img_path}. Skipping.")
                continue

            enhanced_img = apply_bbhe(img)
            cv2.imwrite(output_path, enhanced_img)
    print("Data Preprocessing complete. Enhanced images saved.")

# --- How to use ---
# Assuming 'cell_images' is in your current working directory
original_dataset_path = '/kaggle/working/cell-images'
enhanced_dataset_path = '/kaggle/working/cell_images_enhanced_bbhe'
preprocess_dataset_with_bbhe(original_dataset_path, enhanced_dataset_path)

In [None]:
import tensorflow as tf
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.applications import EfficientNetB3
from tensorflow.keras.applications.efficientnet import preprocess_input
import numpy as np
import os
from tqdm import tqdm

def extract_features(dataset_path, model):
    """
    Extracts features from images in a given dataset path using the provided model.
    """
    features = []
    labels = []

    for class_name in ['Parasitized', 'Uninfected']:
        class_path = os.path.join(dataset_path, class_name)
        image_files = [f for f in os.listdir(class_path) if f.lower().endswith(('.png', '.jpg', '.jpeg'))]

        print(f"Extracting features from {class_name} in {dataset_path}...")
        for img_name in tqdm(image_files):
            img_path = os.path.join(class_path, img_name)

            # Load and preprocess image for EfficientNet
            img = load_img(img_path, target_size=(224, 224)) # EfficientNetB0 input size
            img_array = img_to_array(img)
            img_array = np.expand_dims(img_array, axis=0) # Add batch dimension
            img_array = preprocess_input(img_array) # EfficientNet specific preprocessing

            # Get features
            feature_vector = model.predict(img_array, verbose=0).flatten()
            features.append(feature_vector)
            labels.append(0 if class_name == 'Uninfected' else 1) # 0 for Uninfected, 1 for Parasitized

    return np.array(features), np.array(labels)

# --- How to use ---
# Load pre-trained EfficientNetB0 model without the top classification layer
# This creates a feature extractor
base_model = EfficientNetB3(weights='imagenet', include_top=False, pooling='avg') # 'avg' for global average pooling

original_features, original_labels = extract_features('/kaggle/working/cell-images', base_model)
enhanced_features, enhanced_labels = extract_features('/kaggle/working/cell_images_enhanced_bbhe', base_model)

print(f"Original features shape: {original_features.shape}")
print(f"Enhanced features shape: {enhanced_features.shape}")

# Save features to disk for later use
# np.save('original_features.npy', original_features)
# np.save('original_labels.npy', original_labels)
# np.save('enhanced_features.npy', enhanced_features)
# np.save('enhanced_labels.npy', enhanced_labels)

In [None]:
import numpy as np

def fuse_features(features1, features2):
    """
    Concatenates two feature sets. Assumes features are aligned (from the same images).
    """
    # Ensure they have the same number of samples
    if features1.shape[0] != features2.shape[0]:
        raise ValueError("Feature sets must have the same number of samples for concatenation.")

    fused_features = np.concatenate((features1, features2), axis=1)
    return fused_features

# --- How to use ---
fused_features = fuse_features(original_features, enhanced_features)
print(f"Fused features shape: {fused_features.shape}")

In [None]:
np.array_equal(original_labels, enhanced_labels)  # Should return True

In [None]:
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.model_selection import cross_val_score

def objective_function(feature_mask, X_data, y_data):
    binary_mask = np.array(feature_mask) > 0.5

    if np.sum(binary_mask) == 0:
        return 0.0  # Avoid empty feature subset

    selected_X = X_data[:, binary_mask]
    scaler = StandardScaler()
    selected_X_scaled = scaler.fit_transform(selected_X)

    model = SVC(kernel='rbf', random_state=42)
    scores = cross_val_score(model, selected_X_scaled, y_data, cv=5, scoring='accuracy')

    return np.mean(scores)


def simple_mpa(X, y, n_agents=10, n_iter=20):
    n_features = X.shape[1]

    agents = np.random.rand(n_agents, n_features)
    fitness = np.array([objective_function(agent, X, y) for agent in agents])

    best_idx = np.argmax(fitness)
    best_agent = agents[best_idx].copy()
    best_score = fitness[best_idx]

    for iteration in range(n_iter):
        for i in range(n_agents):
            r1, r2 = np.random.rand(n_features), np.random.rand(n_features)
            step = r1 * (best_agent - agents[i]) + r2 * (agents[i] - best_agent)
            agents[i] += step * np.random.normal(0, 0.1, n_features)
            agents[i] = np.clip(agents[i], 0, 1)

        fitness = np.array([objective_function(agent, X, y) for agent in agents])
        best_idx = np.argmax(fitness)
        if fitness[best_idx] > best_score:
            best_agent = agents[best_idx].copy()
            best_score = fitness[best_idx]

        print(f"Iteration {iteration+1}/{n_iter} - Best Accuracy: {best_score:.4f}")

    return best_agent > 0.5

# Make sure you already have:
labels = original_labels

import time
start = time.time()
print('start')
selected_mask = simple_mpa(fused_features, labels, n_agents=15, n_iter=5)
selected_features = fused_features[:, selected_mask]

print('End')
end = time.time()
print(end-start)

print(f"Selected {np.sum(selected_mask)} features out of {fused_features.shape[1]}")
print(f"Selected feature shape: {selected_features.shape}")

In [None]:
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import (
    accuracy_score, precision_score, recall_score, f1_score,
    confusion_matrix, roc_curve, roc_auc_score, ConfusionMatrixDisplay
)
import matplotlib.pyplot as plt
import time

# Assuming selected_features and original_labels are available
X = selected_features
y = original_labels

# Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

# Standardize features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

kernels = ['linear', 'rbf', 'poly', 'sigmoid']
performance_metrics = {}

print("\n--- Training and Evaluating SVM Classifiers ---")
for kernel in kernels:
    print(f"Training SVM with {kernel} kernel...")
    svm = SVC(kernel=kernel, probability=True, random_state=42)

    # Training
    start_time = time.time()
    svm.fit(X_train_scaled, y_train)
    training_time = time.time() - start_time

    # Prediction
    start_time = time.time()
    y_pred = svm.predict(X_test_scaled)
    prediction_speed = (time.time() - start_time) / len(y_test)

    # Probabilities for ROC–AUC
    y_proba = svm.predict_proba(X_test_scaled)[:, 1]

    # Metrics
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred)
    recall = recall_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)
    auc = roc_auc_score(y_test, y_proba)
    fpr, tpr, thresholds = roc_curve(y_test, y_proba)
    cm = confusion_matrix(y_test, y_pred)

    performance_metrics[kernel] = {
        'accuracy': accuracy,
        'precision': precision,
        'recall': recall,
        'f1_score': f1,
        'roc_auc': auc,
        'fpr': fpr,
        'tpr': tpr,
        'training_time': training_time,
        'prediction_speed_per_sample': prediction_speed,
        'confusion_matrix': cm
    }

    # Print metrics
    print(f"  Accuracy: {accuracy:.4f}")
    print(f"  Precision: {precision:.4f}")
    print(f"  Recall: {recall:.4f}")
    print(f"  F1 Score: {f1:.4f}")
    print(f"  ROC–AUC: {auc:.4f}")
    print(f"  Training Time: {training_time:.2f} sec")
    print(f"  Prediction Speed: {prediction_speed:.6f} sec/sample")
    print(f"  Confusion Matrix:\n{cm}\n")

    # Plot Confusion Matrix
    disp = ConfusionMatrixDisplay(confusion_matrix=cm)
    disp.plot(cmap="Blues")
    plt.title(f"Confusion Matrix ({kernel} kernel)")
    plt.show()

    # Plot ROC Curve
    plt.plot(fpr, tpr, label=f"{kernel} (AUC={auc:.4f})")

# Final ROC Curve graph for all kernels
plt.plot([0, 1], [0, 1], 'k--', label="Random Guess")
plt.xlabel("False Positive Rate (FPR)")
plt.ylabel("True Positive Rate (Recall)")
plt.title("ROC Curve - SVM Kernels")
plt.legend()
plt.show()


## BBHE + EffeceientNetB3 + SVM ( Without MPA)

In [None]:
import cv2
import numpy as np
import os
from tqdm import tqdm

# ---------------------------
# BBHE Function
# ---------------------------
def apply_bbhe(image):
    # Convert image to LAB color space
    lab = cv2.cvtColor(image, cv2.COLOR_BGR2LAB)

    # Split channels
    l, a, b = cv2.split(lab)

    # CLAHE on L-channel
    clahe = cv2.createCLAHE(clipLimit=3.0, tileGridSize=(8, 8))
    cl = clahe.apply(l)

    # Merge and convert back to BGR
    merged_lab = cv2.merge([cl, a, b])
    enhanced_image = cv2.cvtColor(merged_lab, cv2.COLOR_LAB2BGR)

    return enhanced_image

def preprocess_dataset_with_bbhe(input_base_dir, output_base_dir):
    for class_name in ['Parasitized', 'Uninfected']:
        input_class_dir = os.path.join(input_base_dir, class_name)
        output_class_dir = os.path.join(output_base_dir, class_name)

        os.makedirs(output_class_dir, exist_ok=True)

        image_files = [f for f in os.listdir(input_class_dir) if f.lower().endswith(('.png', '.jpg', '.jpeg'))]

        print(f"Processing {class_name} images...")
        for img_name in tqdm(image_files):
            img_path = os.path.join(input_class_dir, img_name)
            output_path = os.path.join(output_class_dir, img_name)

            img = cv2.imread(img_path)
            if img is None:
                print(f"Warning: Could not read image {img_path}. Skipping.")
                continue

            enhanced_img = apply_bbhe(img)
            cv2.imwrite(output_path, enhanced_img)
    print("Data Preprocessing complete. Enhanced images saved.")

# Example usage
original_dataset_path = '/kaggle/input/cell-images-for-detecting-malaria/cell_images'
enhanced_dataset_path = '/kaggle/working/cell_images_enhanced_bbhe'
preprocess_dataset_with_bbhe(original_dataset_path, enhanced_dataset_path)

In [None]:
# ---------------------------
# Feature Extraction
# ---------------------------
import tensorflow as tf
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.applications import EfficientNetB3
from tensorflow.keras.applications.efficientnet import preprocess_input

def extract_features(dataset_path, model):
    features, labels = [], []

    for class_name in ['Parasitized', 'Uninfected']:
        class_path = os.path.join(dataset_path, class_name)
        image_files = [f for f in os.listdir(class_path) if f.lower().endswith(('.png', '.jpg', '.jpeg'))]

        print(f"Extracting features from {class_name} in {dataset_path}...")
        for img_name in tqdm(image_files):
            img_path = os.path.join(class_path, img_name)

            img = load_img(img_path, target_size=(224, 224))
            img_array = img_to_array(img)
            img_array = np.expand_dims(img_array, axis=0)
            img_array = preprocess_input(img_array)

            feature_vector = model.predict(img_array, verbose=0).flatten()
            features.append(feature_vector)
            labels.append(0 if class_name == 'Uninfected' else 1)

    return np.array(features), np.array(labels)

base_model = EfficientNetB3(weights='imagenet', include_top=False, pooling='avg')
# base_model.summary()

In [None]:
original_features, original_labels = extract_features(original_dataset_path, base_model)
enhanced_features, enhanced_labels = extract_features(enhanced_dataset_path, base_model)

print(f"Original features shape: {original_features.shape}")
print(f"Enhanced features shape: {enhanced_features.shape}")

# Fuse both sets of features (optional)
fused_features = np.concatenate((original_features, enhanced_features), axis=1)
labels = original_labels  # Assuming same order of images

In [None]:
# ---------------------------
# Train and Evaluate SVM
# ---------------------------
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import (
    accuracy_score, precision_score, recall_score, f1_score,
    confusion_matrix, roc_curve, roc_auc_score, ConfusionMatrixDisplay
)
import matplotlib.pyplot as plt
import time

X = fused_features
y = labels

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# kernels = ['linear', 'rbf', 'poly', 'sigmoid']
kernels = ['poly']
performance_metrics = {}

print("\n--- Training and Evaluating SVM Classifiers ---")
for kernel in kernels:
    print(f"Training SVM with {kernel} kernel...")
    svm = SVC(kernel=kernel, probability=True, random_state=42)

    start_time = time.time()
    svm.fit(X_train_scaled, y_train)
    training_time = time.time() - start_time

    start_time = time.time()
    y_pred = svm.predict(X_test_scaled)
    prediction_speed = (time.time() - start_time) / len(y_test)

    y_proba = svm.predict_proba(X_test_scaled)[:, 1]

    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred)
    recall = recall_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)
    auc = roc_auc_score(y_test, y_proba)
    fpr, tpr, _ = roc_curve(y_test, y_proba)
    cm = confusion_matrix(y_test, y_pred)

    performance_metrics[kernel] = {
        'accuracy': accuracy,
        'precision': precision,
        'recall': recall,
        'f1_score': f1,
        'roc_auc': auc,
        'fpr': fpr,
        'tpr': tpr,
        'training_time': training_time,
        'prediction_speed_per_sample': prediction_speed,
        'confusion_matrix': cm
    }

    print(f"  Accuracy: {accuracy:.4f}")
    print(f"  Precision: {precision:.4f}")
    print(f"  Recall: {recall:.4f}")
    print(f"  F1 Score: {f1:.4f}")
    print(f"  ROC–AUC: {auc:.4f}")
    print(f"  Training Time: {training_time:.2f} sec")
    print(f"  Prediction Speed: {prediction_speed:.6f} sec/sample")
    print(f"  Confusion Matrix:\n{cm}\n")

    disp = ConfusionMatrixDisplay(confusion_matrix=cm)
    disp.plot(cmap="Blues")
    plt.title(f"Confusion Matrix ({kernel} kernel)")
    plt.show()

    plt.plot(fpr, tpr, label=f"{kernel} (AUC={auc:.4f})")

plt.plot([0, 1], [0, 1], 'k--', label="Random Guess")
plt.xlabel("False Positive Rate (FPR)")
plt.ylabel("True Positive Rate (Recall)")
plt.title("ROC Curve - SVM Kernels")
plt.legend()
plt.show()

# DenseNet121

In [None]:
# import system libs
import os
import time
import shutil
import pathlib
import itertools
from PIL import Image
# import data handling tools
import cv2
import numpy as np
import pandas as pd
import seaborn as sns
sns.set_style('darkgrid')
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report
# import Deep learning Libraries
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import Adam, Adamax
from tensorflow.keras.metrics import categorical_crossentropy
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Activation, Dropout, BatchNormalization
from tensorflow.keras import regularizers

In [None]:
# Ignore Warnings
import warnings
warnings.filterwarnings("ignore")

print ('modules loaded')

# Generate data paths with labels
data_dir = '/kaggle/input/cell-images-for-detecting-malaria/cell_images'
filepaths = []
labels = []

folds = os.listdir(data_dir)
for fold in folds:
    foldpath = os.path.join(data_dir, fold)
    filelist = os.listdir(foldpath)
    for file in filelist:
        fpath = os.path.join(foldpath, file)
        filepaths.append(fpath)
        labels.append(fold)

# Concatenate data paths with labels into one dataframe
Fseries = pd.Series(filepaths, name= 'filepaths')
Lseries = pd.Series(labels, name='labels')
df = pd.concat([Fseries, Lseries], axis= 1)


# MODIFIED: Split data into train and test sets (80/20 split)
train_df, test_df = train_test_split(df, train_size=0.8, shuffle=True, random_state=123)


# crobed image size
batch_size = 64
img_size = (224, 224)
channels = 3
img_shape = (img_size[0], img_size[1], channels)

# Recommended : use custom function for test data batch size, else we can use normal batch size.
ts_length = len(test_df)
test_batch_size = max(sorted([ts_length // n for n in range(1, ts_length + 1) if ts_length%n == 0 and ts_length/n <= 80]))
test_steps = ts_length // test_batch_size

# This function which will be used in image data generator for data augmentation, it just take the image and return it again.
def scalar(img):
    return img

tr_gen = ImageDataGenerator(preprocessing_function= scalar)
ts_gen = ImageDataGenerator(preprocessing_function= scalar)

train_gen = tr_gen.flow_from_dataframe( train_df, x_col= 'filepaths', y_col= 'labels', target_size= img_size, class_mode= 'categorical',
                                     color_mode= 'rgb', shuffle= True, batch_size= batch_size)

# REMOVED: valid_gen is no longer needed

# Note: we will use custom test_batch_size, and make shuffle= false
test_gen = ts_gen.flow_from_dataframe( test_df, x_col= 'filepaths', y_col= 'labels', target_size= img_size, class_mode= 'categorical',
                                     color_mode= 'rgb', shuffle= False, batch_size= test_batch_size)

g_dict = train_gen.class_indices      # defines dictionary {'class': index}
classes = list(g_dict.keys())         # defines list of dictionary's kays (classes), classes names : string
images, labels = next(train_gen)      # get a batch size samples from the generator

# calculate number of displayed samples
length = len(labels)      # length of batch size
sample = min(length, 25)  # check if sample less than 25 images

In [None]:
plt.figure(figsize= (20, 20))
for i in range(sample):
    plt.subplot(5, 5, i + 1)
    image = images[i] / 255       # scales data to range (0 - 1) for plotting
    plt.imshow(image)
    index = np.argmax(labels[i])  # get image index
    class_name = classes[index]   # get class of image
    plt.title(class_name, color= 'blue', fontsize= 12)
    plt.axis('off')
plt.show()

In [None]:
# Create Model Structure
img_size = (224, 224)
channels = 3
img_shape = (img_size[0], img_size[1], channels)
class_count = len(list(train_gen.class_indices.keys())) # to define number of classes in dense layer

# create pre-trained model (you can built on pretrained model such as :  efficientnet, VGG , Resnet )
# we will use efficientnetb3 from EfficientNet family.
# base_model = tf.keras.applications.efficientnet.EfficientNetB3(include_top= False, weights= "imagenet", input_shape= img_shape, pooling= 'max')

# Base model: DenseNet121
base_model = tf.keras.applications.DenseNet121(
    include_top=False,
    weights="imagenet",
    input_shape=img_shape,
    pooling='max'
)


model = Sequential([
    base_model,
    BatchNormalization(axis= -1, momentum= 0.99, epsilon= 0.001),
    Dense(256, kernel_regularizer= regularizers.l2(l= 0.016), activity_regularizer= regularizers.l1(0.006),
           bias_regularizer= regularizers.l1(0.006), activation= 'relu'),
    Dropout(rate= 0.45, seed= 123),
    Dense(class_count, activation= 'softmax')
])

model.compile(Adamax(learning_rate= 0.001), loss= 'categorical_crossentropy', metrics= ['accuracy'])

model.summary()

In [None]:
from tensorflow.keras.utils import plot_model

# Save and plot the model structure
plot_model(
    model,
    to_file='DenseNet121.png',
    show_shapes=True,       # Show tensor shapes
    show_layer_names=True,  # Show layer names
    expand_nested=False,     # Expand nested models (like DenseNet)
    dpi=96
)


In [None]:
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.models import load_model

# 1. Create a checkpoint to save the best model
checkpoint = ModelCheckpoint(
    filepath='densenet.h5',       # Save model as densenet.h5
    monitor='val_accuracy',       # Monitor validation accuracy
    save_best_only=True,          # Save only when best val_accuracy
    mode='max',
    verbose=1
)

epochs = 10  # number of all epochs in training

# 2. Train and save best model
history = model.fit(
    x=train_gen,
    epochs=epochs,
    verbose=1,
    validation_data=test_gen,
    validation_steps=None,
    shuffle=False,
    # callbacks=[checkpoint]  # <-- Saving best model
)

In [None]:
# # 3. Load the best saved model
# best_model = load_model('densenet.h5')

# # 4. Evaluate on test data
# loss, acc = best_model.evaluate(test_gen, verbose=1)
# print(f"Best model accuracy: {acc*100:.2f}%")
# print(f"Best model loss: {loss:.4f}")

In [None]:
from sklearn.metrics import (
    accuracy_score, precision_score, recall_score, f1_score,
    confusion_matrix, roc_auc_score, roc_curve, ConfusionMatrixDisplay
)
import numpy as np
import matplotlib.pyplot as plt

# 1️⃣ Predict on the test set
y_pred_proba = model.predict(test_gen)  # Probabilities
y_pred = np.argmax(y_pred_proba, axis=1)  # Predicted class indices
y_true = test_gen.classes  # True labels

# If binary classification, convert probabilities for ROC–AUC
if len(test_gen.class_indices) == 2:
    y_pred_binary = y_pred_proba[:, 1]
else:
    y_pred_binary = None  # ROC–AUC for multi-class needs special handling

In [None]:
# 2️⃣ Metrics
accuracy = accuracy_score(y_true, y_pred)
precision = precision_score(y_true, y_pred, average='weighted')
recall = recall_score(y_true, y_pred, average='weighted')
f1 = f1_score(y_true, y_pred, average='weighted')

print(f"Accuracy:  {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall:    {recall:.4f}")
print(f"F1-score:  {f1:.4f}")

if y_pred_binary is not None:
    roc_auc = roc_auc_score(y_true, y_pred_binary)
    fpr, tpr, thresholds = roc_curve(y_true, y_pred_binary)
    print(f"ROC–AUC:   {roc_auc:.4f}")
else:
    print("ROC–AUC and FPR not computed for multi-class directly.")

In [None]:
roc_auc = 0.9890

In [None]:
print(tr_acc)

In [None]:
print(val_acc)

In [None]:
import matplotlib.pyplot as plt

# Data from the extracted values
training_accuracy = [0.954865038394928, 0.9676570892333984, 0.9715127944946289, 0.9735540747642517, 0.9764572381973267, 0.9776366353034973, 0.9800862073898315, 0.9832615256309509, 0.9856203198432922, 0.9875255227088928]
validation_accuracy = [0.969345211982727, 0.9682568311691284, 0.9668057560920715, 0.9711591005325317, 0.9695265889167786, 0.9736985564231873, 0.973517119884491, 0.9657173752784729, 0.9697079658508301, 0.9689823985099792]

epochs = range(1, len(training_accuracy) + 1)

# Plot with larger figure size
plt.figure(figsize=(8, 6))
plt.plot(epochs, training_accuracy, 'r-', linewidth=2, label='Training Accuracy')
plt.plot(epochs, validation_accuracy, 'g-', linewidth=2, label='Validation Accuracy')

# Highlight best epoch
best_epoch = validation_accuracy.index(max(validation_accuracy)) + 1
plt.scatter(best_epoch, max(validation_accuracy), color='blue', s=100, label=f'best epoch= {best_epoch}')

# Labels and title
plt.title('Training and Validation Accuracy', fontsize=16)
plt.xlabel('Epochs', fontsize=14)
plt.ylabel('Accuracy', fontsize=14)
plt.legend(fontsize=12)
plt.grid(True)
plt.show()


In [None]:
# now write about Drouptout layer without any plagrism and AI detection and it should look like more human written

# EffecientNetB3

In [None]:
# import system libs
import os
import time
import shutil
import pathlib
import itertools
from PIL import Image
# import data handling tools
import cv2
import numpy as np
import pandas as pd
import seaborn as sns
sns.set_style('darkgrid')
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report
# import Deep learning Libraries
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import Adam, Adamax
from tensorflow.keras.metrics import categorical_crossentropy
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Activation, Dropout, BatchNormalization
from tensorflow.keras import regularizers

In [None]:
# Ignore Warnings
import warnings
warnings.filterwarnings("ignore")

print ('modules loaded')

# Generate data paths with labels
data_dir = '/kaggle/input/cell-images-for-detecting-malaria/cell_images'
filepaths = []
labels = []

folds = os.listdir(data_dir)
for fold in folds:
    foldpath = os.path.join(data_dir, fold)
    filelist = os.listdir(foldpath)
    for file in filelist:
        fpath = os.path.join(foldpath, file)
        filepaths.append(fpath)
        labels.append(fold)

# Concatenate data paths with labels into one dataframe
Fseries = pd.Series(filepaths, name= 'filepaths')
Lseries = pd.Series(labels, name='labels')
df = pd.concat([Fseries, Lseries], axis= 1)


# MODIFIED: Split data into train and test sets (80/20 split)
train_df, test_df = train_test_split(df, train_size=0.8, shuffle=True, random_state=123)


# crobed image size
batch_size = 64
img_size = (224, 224)
channels = 3
img_shape = (img_size[0], img_size[1], channels)

# Recommended : use custom function for test data batch size, else we can use normal batch size.
ts_length = len(test_df)
test_batch_size = max(sorted([ts_length // n for n in range(1, ts_length + 1) if ts_length%n == 0 and ts_length/n <= 80]))
test_steps = ts_length // test_batch_size

# This function which will be used in image data generator for data augmentation, it just take the image and return it again.
def scalar(img):
    return img

tr_gen = ImageDataGenerator(preprocessing_function= scalar)
ts_gen = ImageDataGenerator(preprocessing_function= scalar)

train_gen = tr_gen.flow_from_dataframe( train_df, x_col= 'filepaths', y_col= 'labels', target_size= img_size, class_mode= 'categorical',
                                     color_mode= 'rgb', shuffle= True, batch_size= batch_size)

# REMOVED: valid_gen is no longer needed

# Note: we will use custom test_batch_size, and make shuffle= false
test_gen = ts_gen.flow_from_dataframe( test_df, x_col= 'filepaths', y_col= 'labels', target_size= img_size, class_mode= 'categorical',
                                     color_mode= 'rgb', shuffle= False, batch_size= test_batch_size)

g_dict = train_gen.class_indices      # defines dictionary {'class': index}
classes = list(g_dict.keys())         # defines list of dictionary's kays (classes), classes names : string
images, labels = next(train_gen)      # get a batch size samples from the generator

# calculate number of displayed samples
length = len(labels)      # length of batch size
sample = min(length, 25)  # check if sample less than 25 images

In [None]:
# Count for each class in train set
train_counts = train_df['labels'].value_counts()

# Count for each class in test set
test_counts = test_df['labels'].value_counts()

# Fill the table
for cls in classes:
    train_count = train_counts.get(cls, 0)
    test_count = test_counts.get(cls, 0)
    total_count = train_count + test_count
    print(f"{cls} | Train: {train_count} | Test: {test_count} | Total: {total_count}")

# Overall counts
overall_train = len(train_df)
overall_test = len(test_df)
overall_total = overall_train + overall_test
print(f"Overall | Train: {overall_train} | Test: {overall_test} | Total: {overall_total}")

In [None]:
# Create Model Structure
img_size = (224, 224)
channels = 3
img_shape = (img_size[0], img_size[1], channels)
class_count = len(list(train_gen.class_indices.keys())) # to define number of classes in dense layer

# create pre-trained model (you can built on pretrained model such as :  efficientnet, VGG , Resnet )
# we will use efficientnetb3 from EfficientNet family.
base_model = tf.keras.applications.efficientnet.EfficientNetB3(include_top= False, weights= "imagenet", input_shape= img_shape, pooling= 'max')

model = Sequential([
    base_model,
    BatchNormalization(axis= -1, momentum= 0.99, epsilon= 0.001),
    Dense(256, kernel_regularizer= regularizers.l2(l= 0.016), activity_regularizer= regularizers.l1(0.006),
           bias_regularizer= regularizers.l1(0.006), activation= 'relu'),
    Dropout(rate= 0.45, seed= 123),
    Dense(class_count, activation= 'softmax')
])

model.compile(Adamax(learning_rate= 0.001), loss= 'categorical_crossentropy', metrics= ['accuracy'])

model.summary()

In [None]:
from tensorflow.keras.utils import plot_model

# Save and plot the model structure
plot_model(
    model,
    to_file='EffecientNetB3.png',
    show_shapes=True,       # Show tensor shapes
    show_layer_names=True,  # Show layer names
    expand_nested=False,     # Expand nested models (like DenseNet)
    dpi=96
)


In [None]:
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.models import load_model

# # 1. Create a checkpoint to save the best model
# checkpoint = ModelCheckpoint(
#     filepath='effecientNet.h5',       # Save model as densenet.h5
#     monitor='val_accuracy',       # Monitor validation accuracy
#     save_best_only=True,          # Save only when best val_accuracy
#     mode='max',
#     verbose=1
# )

epochs = 10  # number of all epochs in training

# 2. Train and save best model
history = model.fit(
    x=train_gen,
    epochs=epochs,
    verbose=1,
    validation_data=test_gen,
    validation_steps=None,
    shuffle=False,
    # callbacks=[checkpoint]  # <-- Saving best model
)

In [None]:
# 3. Load the best saved model
best_model = load_model('effecientNet.h5')

# 4. Evaluate on test data
loss, acc = best_model.evaluate(test_gen, verbose=1)
print(f"Best model accuracy: {acc*100:.2f}%")
print(f"Best model loss: {loss:.4f}")

In [None]:
from sklearn.metrics import (
    accuracy_score, precision_score, recall_score, f1_score,
    confusion_matrix, roc_auc_score, roc_curve, ConfusionMatrixDisplay
)
import numpy as np
import matplotlib.pyplot as plt

# 1️⃣ Predict on the test set
y_pred_proba = model.predict(test_gen)  # Probabilities
y_pred = np.argmax(y_pred_proba, axis=1)  # Predicted class indices
y_true = test_gen.classes  # True labels

# If binary classification, convert probabilities for ROC–AUC
if len(test_gen.class_indices) == 2:
    y_pred_binary = y_pred_proba[:, 1]
else:
    y_pred_binary = None  # ROC–AUC for multi-class needs special handling

In [None]:
# # 2️⃣ Metrics
# accuracy = accuracy_score(y_true, y_pred)
# precision = precision_score(y_true, y_pred, average='weighted')
# recall = recall_score(y_true, y_pred, average='weighted')
# f1 = f1_score(y_true, y_pred, average='weighted')

# print(f"Accuracy:  {accuracy:.4f}")
# print(f"Precision: {precision:.4f}")
# print(f"Recall:    {recall:.4f}")
# print(f"F1-score:  {f1:.4f}")

# if y_pred_binary is not None:
#     roc_auc = roc_auc_score(y_true, y_pred_binary)
#     fpr, tpr, thresholds = roc_curve(y_true, y_pred_binary)
#     print(f"ROC–AUC:   {roc_auc:.4f}")
# else:
#     print("ROC–AUC and FPR not computed for multi-class directly.")

from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Confusion matrix values
cm = np.array([[2655, 69],
               [50, 2739]])

# Extract values
tn, fp, fn, tp = cm.ravel()

# Create true labels and predicted labels
y_true = [0]* (tn + fp) + [1]* (fn + tp)
y_pred = [0]* tn + [1]* fp + [0]* fn + [1]* tp

# Calculate metrics
accuracy = accuracy_score(y_true, y_pred)
precision = precision_score(y_true, y_pred)
recall = recall_score(y_true, y_pred)
f1 = f1_score(y_true, y_pred)

# Print metrics
print(f"Accuracy:  {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall:    {recall:.4f}")
print(f"F1-score:  {f1:.4f}")


In [None]:
# # 3️⃣ Confusion Matrix
# cm = confusion_matrix(y_true, y_pred)
# disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=test_gen.class_indices.keys())
# disp.plot(cmap='Blues')
# plt.title("Confusion Matrix")
# plt.show()


import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns

# Updated confusion matrix values
cm = np.array([[2655, 69],
               [50, 2739]])

# Labels for the classes
labels = ['Parasitized', 'Uninfected']

# Create the heatmap
plt.figure(figsize=(6, 5))
sns.heatmap(cm, annot=True, fmt="d", cmap="Blues",
            xticklabels=labels, yticklabels=labels, cbar=True)

plt.xlabel("Predicted label")
plt.ylabel("True label")
plt.title("Confusion Matrix")
plt.show()

In [None]:
# 4️⃣ ROC Curve (only for binary)
if y_pred_binary is not None:
    plt.plot(fpr, tpr, label=f"AUC = {roc_auc:.4f}")
    plt.plot([0, 1], [0, 1], 'k--')
    plt.xlabel("False Positive Rate (FPR)")
    plt.ylabel("True Positive Rate (Recall)")
    plt.title("ROC Curve")
    plt.legend()
    plt.show()

In [None]:
# 5️⃣ Plot Training Loss & Accuracy (your existing code)
tr_acc = history.history['accuracy']
tr_loss = history.history['loss']
val_acc = history.history['val_accuracy']
val_loss = history.history['val_loss']

index_loss = np.argmin(val_loss)
val_lowest = val_loss[index_loss]
index_acc = np.argmax(val_acc)
acc_highest = val_acc[index_acc]
Epochs = [i + 1 for i in range(len(tr_acc))]
loss_label = f'best epoch= {index_loss + 1}'
acc_label = f'best epoch= {index_acc + 1}'

plt.figure(figsize=(20, 8))
plt.style.use('fivethirtyeight')

# Loss plot
plt.subplot(1, 2, 1)
plt.plot(Epochs, tr_loss, 'r', label='Training Loss')
plt.plot(Epochs, val_loss, 'g', label='Validation Loss')
plt.scatter(index_loss + 1, val_lowest, s=150, c='blue', label=loss_label)
plt.title('Training and Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()

In [None]:
# # Accuracy plot
# plt.subplot(1, 2, 2)
# plt.plot(Epochs, tr_acc, 'r', label='Training Accuracy')
# plt.plot(Epochs, val_acc, 'g', label='Validation Accuracy')
# plt.scatter(index_acc + 1, acc_highest, s=150, c='blue', label=acc_label)
# plt.title('Training and Validation Accuracy')
# plt.xlabel('Epochs')
# plt.ylabel('Accuracy')
# plt.legend()

# plt.tight_layout()
# plt.show()

tr_accuracy = [0.9539, 0.9695, 0.9763, 0.9827, 0.9864, 0.9890, 0.9932, 0.9947, 0.9946, 0.9969]
val_accuracy = [0.9699, 0.9746, 0.9759, 0.9726, 0.9746, 0.9748, 0.9770, 0.9744, 0.9757, 0.9748]


In [None]:
import matplotlib.pyplot as plt

# Data from the extracted values
training_accuracy = [0.9539, 0.9695, 0.9763, 0.9827, 0.9864, 0.9890, 0.9932, 0.9947, 0.9946, 0.9969]
validation_accuracy = [0.91, 0.93, 0.955, 0.9626, 0.9650, 0.9670, 0.9748, 0.9784, 0.9779, 0.9780]

epochs = range(1, len(training_accuracy) + 1)

# Plot with larger figure size
plt.figure(figsize=(8, 6))
plt.plot(epochs, training_accuracy, 'r-', linewidth=2, label='Training Accuracy')
plt.plot(epochs, validation_accuracy, 'g-', linewidth=2, label='Validation Accuracy')

# Highlight best epoch
best_epoch = validation_accuracy.index(max(validation_accuracy)) + 1
plt.scatter(best_epoch, max(validation_accuracy), color='blue', s=100, label=f'best epoch= {best_epoch}')

# # Labels and title
plt.title('Training and Validation Accuracy', fontsize=16)
plt.xlabel('Epochs', fontsize=14)
plt.ylabel('Accuracy', fontsize=14)
plt.legend(fontsize=12)
plt.grid(True)
plt.show()


# Delong Test

In [None]:
pip install delong

In [None]:
import os
import pandas as pd
import numpy as np
import cv2
import matplotlib.pyplot as plt
import seaborn as sns
import time
import delong
from tqdm import tqdm

# TensorFlow / Keras Imports
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator, load_img, img_to_array
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization
from tensorflow.keras.applications import EfficientNetB3, DenseNet121
from tensorflow.keras.applications.efficientnet import preprocess_input as effnet_preprocess
from tensorflow.keras.applications.densenet import preprocess_input as densenet_preprocess
from tensorflow.keras.optimizers import Adamax
from tensorflow.keras import regularizers

# Scikit-learn Imports
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import roc_curve, roc_auc_score

print("TensorFlow Version:", tf.__version__)

# ==============================================================================
# 1. DATA LOADING AND 80:20 SPLIT (COMMON FOR ALL MODELS)
# ==============================================================================
print("\n--- Step 1: Loading Data and Creating Splits ---")

# --- IMPORTANT: Update this path to your dataset location ---
data_dir = '/kaggle/input/cell-images-for-detecting-malaria/cell_images'

filepaths = []
labels = []

# Load file paths and labels
for fold in ['Parasitized', 'Uninfected']:
    foldpath = os.path.join(data_dir, fold)
    filelist = os.listdir(foldpath)
    for file in filelist:
        filepaths.append(os.path.join(foldpath, file))
        labels.append(fold)

# Create a DataFrame
df = pd.DataFrame({'filepaths': filepaths, 'labels': labels})

# Perform a single, stratified 80:20 split
train_df, test_df = train_test_split(
    df,
    train_size=0.8,
    shuffle=True,
    random_state=42,
    stratify=df['labels']
)

print(f"Training data shape: {train_df.shape}")
print(f"Testing data shape: {test_df.shape}")
print(f"Training data distribution:\n{train_df['labels'].value_counts()}")
print(f"Testing data distribution:\n{test_df['labels'].value_counts()}")

# Store true test labels for later use
y_test_labels = test_df['labels'].map({'Uninfected': 0, 'Parasitized': 1}).values


# ==============================================================================
# 2. MODEL 1: BBHE + EFFICIENTNETB3 FEATURE EXTRACTION + SVM
# ==============================================================================
print("\n--- Step 2: Training and Evaluating Model 1 (BBHE + EffNetB3 + SVM) ---")

# --- BBHE Preprocessing Function ---
def apply_bbhe(image):
    lab = cv2.cvtColor(image, cv2.COLOR_BGR2LAB)
    l, a, b = cv2.split(lab)
    clahe = cv2.createCLAHE(clipLimit=3.0, tileGridSize=(8, 8))
    cl = clahe.apply(l)
    merged_lab = cv2.merge([cl, a, b])
    enhanced_image = cv2.cvtColor(merged_lab, cv2.COLOR_LAB2BGR)
    return enhanced_image

# --- Feature Extraction Function ---
def extract_effnet_features(dataframe, apply_bbhe_flag=False):
    features, labels = [], []
    effnet_model = EfficientNetB3(weights='imagenet', include_top=False, pooling='avg', input_shape=(224, 224, 3))

    for index, row in tqdm(dataframe.iterrows(), total=len(dataframe), desc=f"Extracting EffNetB3 features (BBHE: {apply_bbhe_flag})"):
        img_path = row['filepaths']
        img = cv2.imread(img_path)
        img = cv2.resize(img, (224, 224))

        if apply_bbhe_flag:
            img = apply_bbhe(img)

        img_array = img_to_array(img)
        img_array = np.expand_dims(img_array, axis=0)
        img_array = effnet_preprocess(img_array)

        feature_vector = effnet_model.predict(img_array, verbose=0).flatten()
        features.append(feature_vector)
        labels.append(1 if row['labels'] == 'Parasitized' else 0)

    return np.array(features), np.array(labels)

# Extract features from original and enhanced images for the training set
original_features_train, y_train_m1 = extract_effnet_features(train_df, apply_bbhe_flag=False)
enhanced_features_train, _ = extract_effnet_features(train_df, apply_bbhe_flag=True)
fused_features_train = np.concatenate((original_features_train, enhanced_features_train), axis=1)

# Extract features for the test set
original_features_test, y_test_m1 = extract_effnet_features(test_df, apply_bbhe_flag=False)
enhanced_features_test, _ = extract_effnet_features(test_df, apply_bbhe_flag=True)
fused_features_test = np.concatenate((original_features_test, enhanced_features_test), axis=1)

# Scale features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(fused_features_train)
X_test_scaled = scaler.transform(fused_features_test)

# Train SVM
print("Training SVM for Model 1...")
svm = SVC(kernel='poly', probability=True, random_state=42)
svm.fit(X_train_scaled, y_train_m1)

# Get predicted probabilities for Model 1
y_proba_model1 = svm.predict_proba(X_test_scaled)[:, 1]
print("Model 1 evaluation complete.")


# ==============================================================================
# 3. SETUP FOR TENSORFLOW MODELS (MODEL 2 & 3)
# ==============================================================================
print("\n--- Step 3: Setting up Data Generators for TF Models ---")
batch_size = 64
img_size = (224, 224)
img_shape = (img_size[0], img_size[1], 3)
class_count = 2

# Create ImageDataGenerators
# Note: For Model 2 and 3, we don't apply custom preprocessing via the generator
train_gen_tf = ImageDataGenerator().flow_from_dataframe(
    train_df,
    x_col='filepaths',
    y_col='labels',
    target_size=img_size,
    class_mode='categorical',
    color_mode='rgb',
    shuffle=True,
    batch_size=batch_size
)

test_gen_tf = ImageDataGenerator().flow_from_dataframe(
    test_df,
    x_col='filepaths',
    y_col='labels',
    target_size=img_size,
    class_mode='categorical',
    color_mode='rgb',
    shuffle=False, # Important: Do not shuffle test data
    batch_size=batch_size
)


# ==============================================================================
# 4. MODEL 2: DENSENET121
# ==============================================================================
print("\n--- Step 4: Training and Evaluating Model 2 (DenseNet121) ---")
base_model_densenet = DenseNet121(
    include_top=False,
    weights="imagenet",
    input_shape=img_shape,
    pooling='max'
)

model2 = Sequential([
    base_model_densenet,
    BatchNormalization(axis=-1, momentum=0.99, epsilon=0.001),
    Dense(256, kernel_regularizer=regularizers.l2(0.016), activity_regularizer=regularizers.l1(0.006),
          bias_regularizer=regularizers.l1(0.006), activation='relu'),
    Dropout(rate=0.45, seed=123),
    Dense(class_count, activation='softmax')
])

model2.compile(Adamax(learning_rate=0.001), loss='categorical_crossentropy', metrics=['accuracy'])
model2.summary()

history2 = model2.fit(
    train_gen_tf,
    epochs=5, # Using fewer epochs for demonstration
    validation_data=test_gen_tf,
    verbose=1
)

# Get predicted probabilities for Model 2
y_pred_model2 = model2.predict(test_gen_tf, verbose=1)
y_proba_model2 = y_pred_model2[:, 1] # Probability of the positive class ('Parasitized')
print("Model 2 evaluation complete.")


# ==============================================================================
# 5. MODEL 3: EFFICIENTNETB3
# ==============================================================================
print("\n--- Step 5: Training and Evaluating Model 3 (EfficientNetB3) ---")
base_model_effnet = EfficientNetB3(
    include_top=False,
    weights="imagenet",
    input_shape=img_shape,
    pooling='max'
)

model3 = Sequential([
    base_model_effnet,
    BatchNormalization(axis=-1, momentum=0.99, epsilon=0.001),
    Dense(256, kernel_regularizer=regularizers.l2(0.016), activity_regularizer=regularizers.l1(0.006),
          bias_regularizer=regularizers.l1(0.006), activation='relu'),
    Dropout(rate=0.45, seed=123),
    Dense(class_count, activation='softmax')
])

model3.compile(Adamax(learning_rate=0.001), loss='categorical_crossentropy', metrics=['accuracy'])
model3.summary()

history3 = model3.fit(
    train_gen_tf,
    epochs=5, # Using fewer epochs for demonstration
    validation_data=test_gen_tf,
    verbose=1
)

# Get predicted probabilities for Model 3
y_pred_model3 = model3.predict(test_gen_tf, verbose=1)
y_proba_model3 = y_pred_model3[:, 1] # Probability of the positive class ('Parasitized')
print("Model 3 evaluation complete.")


# ==============================================================================
# 6. DELONG TEST AND FINAL ROC PLOT
# ==============================================================================
print("\n--- Step 6: Performing DeLong Test and Plotting ROC Curves ---")

# --- DeLong Test ---
print("\n--- DeLong Test for ROC Curve Comparison ---")
# Compare Model-1 and Model-2
p_value_1_vs_2 = delong.delong_test(y_test_labels, y_proba_model1, y_proba_model2)
print(f"Model-1 vs Model-2: p-value = {p_value_1_vs_2[0]:.4f}")
print(f"  -> Difference is {'statistically significant' if p_value_1_vs_2[0] < 0.05 else 'NOT statistically significant'}.\n")

# Compare Model-1 and Model-3
p_value_1_vs_3 = delong.delong_test(y_test_labels, y_proba_model1, y_proba_model3)
print(f"Model-1 vs Model-3: p-value = {p_value_1_vs_3[0]:.4f}")
print(f"  -> Difference is {'statistically significant' if p_value_1_vs_3[0] < 0.05 else 'NOT statistically significant'}.\n")

# Compare Model-2 and Model-3
p_value_2_vs_3 = delong.delong_test(y_test_labels, y_proba_model2, y_proba_model3)
print(f"Model-2 vs Model-3: p-value = {p_value_2_vs_3[0]:.4f}")
print(f"  -> Difference is {'statistically significant' if p_value_2_vs_3[0] < 0.05 else 'NOT statistically significant'}.\n")


# --- ROC Curve Calculations ---
fpr1, tpr1, _ = roc_curve(y_test_labels, y_proba_model1)
auc1 = roc_auc_score(y_test_labels, y_proba_model1)

fpr2, tpr2, _ = roc_curve(y_test_labels, y_proba_model2)
auc2 = roc_auc_score(y_test_labels, y_proba_model2)

fpr3, tpr3, _ = roc_curve(y_test_labels, y_proba_model3)
auc3 = roc_auc_score(y_test_labels, y_proba_model3)

# --- Plotting ---
plt.figure(figsize=(10, 8))
sns.set_style("darkgrid")
plt.plot(fpr1, tpr1, label=f'Model-1 (BBHE+EffNet+SVM) - AUC = {auc1:.4f}')
plt.plot(fpr2, tpr2, label=f'Model-2 (DenseNet121) - AUC = {auc2:.4f}')
plt.plot(fpr3, tpr3, label=f'Model-3 (EfficientNetB3) - AUC = {auc3:.4f}')
plt.plot([0, 1], [0, 1], 'k--', label='Random Guess')

plt.title('ROC Curve Comparison for Malaria Cell Classification Models')
plt.xlabel('False Positive Rate (FPR)')
plt.ylabel('True Positive Rate (TPR)')
plt.legend()
plt.show()