In [30]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from glob import glob
from sklearn.model_selection import KFold
from sklearn.metrics import confusion_matrix, precision_recall_fscore_support, roc_auc_score, roc_curve
from sklearn.preprocessing import LabelEncoder, label_binarize
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPool2D
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import EarlyStopping, Callback
from itertools import cycle
import tensorflow as tf

In [31]:
# Paths
train_dir = r"C:\Users\guita\Desktop\Senior project\dataset\HAM10000\train"
val_dir = r"C:\Users\guita\Desktop\Senior project\dataset\HAM10000\val"
test_dir = r"C:\Users\guita\Desktop\Senior project\dataset\HAM10000\test"

In [32]:
# Constants
SIZE = 256
BATCH_SIZE = 128
EPOCHS = 100
NUM_CLASSES = len(os.listdir(train_dir))

In [33]:
# Load images and labels
def load_images_from_directory(directory):
    labels = []
    images = []
    class_names = sorted(os.listdir(directory))
    for class_name in class_names:
        class_dir = os.path.join(directory, class_name)
        for img_path in glob(os.path.join(class_dir, '*.jpg')):  # or another image format
            img = plt.imread(img_path)
            img_resized = tf.image.resize(img, (SIZE, SIZE)).numpy()
            images.append(img_resized)
            labels.append(class_name)
    return np.array(images), np.array(labels), class_names

In [None]:
# 5-Fold Cross-Validation
kf = KFold(n_splits=5, shuffle=True, random_state=42)
fold_no = 1

for train_index, val_index in kf.split(X_train):
    X_train_fold, X_val_fold = X_train[train_index], X_train[val_index]
    y_train_fold, y_val_fold = y_train_encoded[train_index], y_train_encoded[val_index]

    # Create ImageDataGenerator instances
    train_datagen = ImageDataGenerator(
        rescale=1./255,
        rotation_range=20,
        width_shift_range=0.2,
        height_shift_range=0.2,
        shear_range=0.2,
        zoom_range=0.2,
        horizontal_flip=True,
        fill_mode='nearest'
    )

    val_datagen = ImageDataGenerator(rescale=1./255)

In [None]:
# Model definition
    model = Sequential([
        Conv2D(256, (3, 3), activation="relu", input_shape=(SIZE, SIZE, 3)),
        MaxPool2D(pool_size=(2, 2)),
        Dropout(0.3),
        Conv2D(128, (3, 3), activation='relu'),
        MaxPool2D(pool_size=(2, 2)),
        Dropout(0.3),
        Conv2D(64, (3, 3), activation='relu'),
        MaxPool2D(pool_size=(2, 2)),
        Dropout(0.3),
        Flatten(),
        Dense(32),
        Dense(NUM_CLASSES, activation='softmax')
    ])
    model.compile(loss='categorical_crossentropy', optimizer='Adam', metrics=['acc'])
    model.summary()

In [None]:
# Metrics callback for precision, recall, F1 score
    class MetricsCallback(Callback):
        def __init__(self, val_data, val_labels):
            super(MetricsCallback, self).__init__()
            self.val_data = val_data
            self.val_labels = val_labels

        def on_epoch_end(self, epoch, logs=None):
            y_true = self.val_labels
            y_pred = self.model.predict(self.val_data)
            y_pred_classes = np.argmax(y_pred, axis=1)

            precision, recall, f1_score, _ = precision_recall_fscore_support(y_true, y_pred_classes, average='weighted')

            val_loss = logs['val_loss']
            val_accuracy = logs.get('val_accuracy', logs.get('val_acc'))  # Handle different versions

            print(f'Epoch {epoch + 1} - '
                  f'Validation Loss: {val_loss:.4f}, '
                  f'Validation Accuracy: {val_accuracy:.4f}, '
                  f'Validation Precision: {precision:.4f}, '
                  f'Validation Recall: {recall:.4f}, '
                  f'Validation F1 Score: {f1_score:.4f}')

    # Early stopping
    early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

    # Metrics callback
    metrics_callback = MetricsCallback(val_data=val_datagen.flow(X_val_fold, y_val_fold, batch_size=BATCH_SIZE),
                                       val_labels=y_val_fold)

In [None]:
# Train the model
    history = model.fit(
        train_datagen.flow(X_train_fold, y_train_fold, batch_size=BATCH_SIZE),
        epochs=EPOCHS,
        validation_data=val_datagen.flow(X_val_fold, y_val_fold, batch_size=BATCH_SIZE),
        callbacks=[early_stopping, metrics_callback],
        verbose=2
    )

In [None]:
# Evaluate the model on the validation fold
    val_score = model.evaluate(val_datagen.flow(X_val_fold, y_val_fold, batch_size=BATCH_SIZE))
    print(f'Fold {fold_no} - Validation accuracy:', val_score[1])

    # Confusion matrix for validation fold
    y_val_pred = model.predict(val_datagen.flow(X_val_fold, y_val_fold, batch_size=BATCH_SIZE))
    y_val_pred_classes = np.argmax(y_val_pred, axis=1)
    cm_val = confusion_matrix(y_val_fold, y_val_pred_classes)
    plt.figure(figsize=(8, 6))
    sns.heatmap(cm_val, annot=True, fmt='d', cmap='Blues')
    plt.title(f'Confusion Matrix - Validation Fold {fold_no}')
    plt.show()

In [None]:
# AUC-ROC curve for validation fold
    y_val_true_binary = label_binarize(y_val_fold, classes=np.unique(y_val_fold))
    y_val_pred_binary = label_binarize(y_val_pred_classes, classes=np.unique(y_val_fold))

    fpr_val = dict()
    tpr_val = dict()
    roc_auc_val = dict()

    for i in range(NUM_CLASSES):
        fpr_val[i], tpr_val[i], _ = roc_curve(y_val_true_binary[:, i], y_val_pred_binary[:, i])
        roc_auc_val[i] = roc_auc_score(y_val_true_binary[:, i], y_val_pred_binary[:, i])

    plt.figure(figsize=(8, 8))
    colors = cycle(['aqua', 'darkorange', 'cornflowerblue'])

    for i, color in zip(range(NUM_CLASSES), colors):
        plt.plot(fpr_val[i], tpr_val[i], color=color, lw=2, label=f'Class {i} (AUC = {roc_auc_val[i]:.2f})')

    plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title(f'ROC Curve - Validation Fold {fold_no}')
    plt.legend(loc='lower right')
    plt.show()

    fold_no += 1

In [None]:
# Evaluate the model on the test set (after cross-validation)
test_images, test_labels, _ = load_images_from_directory(test_dir)
test_labels_encoded = LabelEncoder().fit_transform(test_labels)

test_datagen = ImageDataGenerator(rescale=1./255)
test_score = model.evaluate(test_datagen.flow(test_images, test_labels_encoded, batch_size=BATCH_SIZE))
print('Test accuracy:', test_score[1])
