# Increasing representation of minority classes by adding duplicate minority class images with augmentation such that each minority class contains ~1000 images

# Loading Dependencies

In [1]:
# Dependencies to Visualize the model
import tensorflow as tf
%matplotlib inline
from IPython.display import Image, SVG
import matplotlib.pyplot as plt
import numpy as np
# Setting seed for reproducibility
np.random.seed(0)

# Filepaths, pandas, numpy, Tensorflow, and scikit-image
import os
import pandas as pd
import numpy as np
import tensorflow as tf
import skimage as sk

# Stratify Images in Main Image Repository
Only run if images are still conglomerated 

In [None]:
import os
import shutil
import pandas as pd
from sklearn.model_selection import train_test_split

# Read the CSV file
csv_file = "Resources/HAM10000_metadata.csv"
metadata = pd.read_csv(csv_file)

# Define the source directory where all the images are located
source_dir = "Resources/Skin Cancer"

# Define the target directories for train and val splits
train_dir = "Resources/Skin Cancer/train"
val_dir = "Resources/Skin Cancer/val"

# Define the split ratio (e.g., 0.8 for 80% train, 0.2 for 20% val)
split_ratio = 0.8

# Create the target directories if they don't exist
os.makedirs(train_dir, exist_ok=True)
os.makedirs(val_dir, exist_ok=True)

# Get the unique class labels
class_labels = metadata["dx"].unique()

# Create subdirectories for each class in train and val directories
for label in class_labels:
    os.makedirs(os.path.join(train_dir, label), exist_ok=True)
    os.makedirs(os.path.join(val_dir, label), exist_ok=True)

# Split the metadata into train and validation sets using stratified splitting
# Stratify was required here to ensure that an 80-20 split occured for all classes in the dataset, not just an 80-20 split of the entire dataset.
train_metadata, val_metadata = train_test_split(
    metadata, stratify=metadata["dx"], test_size=1 - split_ratio, random_state=42
)

# Move or copy the images to the respective train and validation directories
for _, row in train_metadata.iterrows():
    image_id = row["image_id"]
    image_path = os.path.join(source_dir, f"{image_id}.jpg")
    class_label = row["dx"]
    target_dir = os.path.join(train_dir, class_label)
    shutil.copy(image_path, target_dir)

for _, row in val_metadata.iterrows():
    image_id = row["image_id"]
    image_path = os.path.join(source_dir, f"{image_id}.jpg")
    class_label = row["dx"]
    target_dir = os.path.join(val_dir, class_label)
    shutil.copy(image_path, target_dir)

print("Stratified splitting and image organization completed.")

# Preprocessing of Images

In [2]:
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Define the path to your image directory
image_directory = "resources_augmented/Skin Cancer"

# Defining original image size
image_size = (600, 450)

# Define the batch size
batch_size = 32

# EDIT THIS OUT 
# Create an ImageDataGenerator for data augmentation
train_datagen = ImageDataGenerator(
    rescale=1.0 / 255,  # Normalize pixel values
    # rotation_range=20,  # Randomly rotate images by 20 degrees
    # width_shift_range=0.2,  # Randomly shift images horizontally by 20%
    # height_shift_range=0.2,  # Randomly shift images vertically by 20%
    # horizontal_flip=True,  # Randomly flip images horizontally
    # zoom_range=0.2,  # Randomly zoom images by 20%
)

# Load and preprocess the train dataset with data augmentation
train_dataset = train_datagen.flow_from_directory(
    directory=os.path.join(image_directory, "train"),  # Use the 'train' directory
    target_size=image_size,
    batch_size=batch_size,
    class_mode="categorical",
    shuffle=True,
    seed=42,
)

# Create an ImageDataGenerator for validation data (no augmentation)
val_datagen = ImageDataGenerator(rescale=1.0 / 255)  # Normalize pixel values

# Load and preprocess the validation dataset without data augmentation
val_dataset = val_datagen.flow_from_directory(
    directory=os.path.join(image_directory, "val"),  # Use the 'val' directory
    target_size=image_size,
    batch_size=batch_size,
    class_mode="categorical",
    shuffle=False,
    seed=42,
)

def get_class_counts(dataset):
    class_labels = list(dataset.class_indices.keys())
    class_counts = dict(zip(class_labels, [0] * len(class_labels)))
    for _, labels in dataset:
        for label in labels:
            class_counts[class_labels[int(label.argmax())]] += 1
        if sum(class_counts.values()) >= dataset.samples:
            break
    return class_counts

train_counts = get_class_counts(train_dataset)
val_counts = get_class_counts(val_dataset)

# Combine the counts from training and validation
combined_counts = {class_name: train_counts.get(class_name, 0) + val_counts.get(class_name, 0) for class_name in set(train_counts) | set(val_counts)}
total_samples = sum(combined_counts.values())

print("\nOverall Dataset Class Distribution:")
print(f"Total samples across train and validation datasets: {total_samples}")
for class_name, count in combined_counts.items():
    percentage = (count / total_samples) * 100
    print(f"{class_name}: {count} ({percentage:.2f}%)")

# Print class distribution for training and validation datasets individually
print("\nTrain Dataset Class Distribution:")
for class_name, count in train_counts.items():
    percentage = (count / train_dataset.samples) * 100
    print(f"{class_name}: {count} ({percentage:.2f}%)")

print("\nValidation Dataset Class Distribution:")
for class_name, count in val_counts.items():
    percentage = (count / val_dataset.samples) * 100
    print(f"{class_name}: {count} ({percentage:.2f}%)")

# Print the class names and dataset shapes
print("\nSummary:")
print("Class Names:", list(train_dataset.class_indices.keys()))
print("Train Dataset Shape:", train_dataset.image_shape)
print("Validation Dataset Shape:", val_dataset.image_shape)

Found 9121 images belonging to 7 classes.
Found 2003 images belonging to 7 classes.

Overall Dataset Class Distribution:
Total samples across train and validation datasets: 11124
akiec: 562 (5.05%)
df: 519 (4.67%)
bkl: 1099 (9.88%)
mel: 1113 (10.01%)
bcc: 603 (5.42%)
nv: 6705 (60.28%)
vasc: 523 (4.70%)

Train Dataset Class Distribution:
akiec: 497 (5.45%)
bcc: 500 (5.48%)
bkl: 879 (9.64%)
df: 496 (5.44%)
mel: 890 (9.76%)
nv: 5364 (58.81%)
vasc: 495 (5.43%)

Validation Dataset Class Distribution:
akiec: 65 (3.25%)
bcc: 103 (5.14%)
bkl: 220 (10.98%)
df: 23 (1.15%)
mel: 223 (11.13%)
nv: 1341 (66.95%)
vasc: 28 (1.40%)

Summary:
Class Names: ['akiec', 'bcc', 'bkl', 'df', 'mel', 'nv', 'vasc']
Train Dataset Shape: (600, 450, 3)
Validation Dataset Shape: (600, 450, 3)


# Artificial Image Generation 
This snippet is meant to add augmented versions of photos from minority classes back into the minority class training data to help offset the inequality between the classes.

In [4]:
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import os

# Define the path to your image directory
image_directory = "resources_augmented/Skin Cancer/train"

# Define the class names
class_names = ['akiec', 'bcc', 'bkl', 'df', 'mel', 'nv', 'vasc']

# Define the number of additional samples to generate for each minority class
num_additional_samples = 1000

# Create an instance of ImageDataGenerator for data augmentation
data_gen = ImageDataGenerator(
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    horizontal_flip=True
)

# Iterate over the minority classes
for class_name in class_names:
    # Define the path to the class directory
    class_directory = os.path.join(image_directory, class_name)
    
    # Get the list of image files in the class directory
    image_files = os.listdir(class_directory)
    
    # Check if the class is a minority class
    if len(image_files) < num_additional_samples:
        # Calculate the number of additional samples needed
        num_samples_to_generate = num_additional_samples - len(image_files)
        
        # Randomly select a subset of image files to augment
        selected_files = np.random.choice(image_files, size=num_samples_to_generate, replace=True)
        
        # Generate additional samples using data augmentation
        for image_file in selected_files:
            # Load the image
            image_path = os.path.join(class_directory, image_file)
            image = tf.keras.preprocessing.image.load_img(image_path, target_size=(600, 450))
            image = tf.keras.preprocessing.image.img_to_array(image)
            image = tf.expand_dims(image, axis=0)
            
            # Generate augmented samples
            augmented_images = data_gen.flow(image, batch_size=1, save_to_dir=class_directory,
                                             save_prefix='augmented', save_format='jpg')
            
            # Generate one augmented sample
            next(augmented_images)
        
        print(f"Generated {num_samples_to_generate} additional samples for class {class_name}")
    else:
        print(f"Class {class_name} has sufficient samples. No augmentation needed.")

print("Data augmentation completed.")

# Get class distribution and report after data augmentation
train_datagen = ImageDataGenerator(
    rescale=1.0 / 255  # Normalize pixel values
)

train_dataset = train_datagen.flow_from_directory(
    directory=image_directory,  # Use the 'train' directory
    target_size=(600, 450),
    batch_size=32,
    class_mode="categorical",
    shuffle=True,
    seed=42
)

def get_class_counts(dataset):
    class_labels = list(dataset.class_indices.keys())
    class_counts = dict(zip(class_labels, [0] * len(class_labels)))
    for _, labels in dataset:
        for label in labels:
            class_counts[class_labels[int(label.argmax())]] += 1
        if sum(class_counts.values()) >= dataset.samples:
            break
    return class_counts

train_counts = get_class_counts(train_dataset)

print("\nTrain Dataset Class Distribution after Augmentation:")
for class_name, count in train_counts.items():
    percentage = (count / train_dataset.samples) * 100
    print(f"{class_name}: {count} ({percentage:.2f}%)")

Generated 32 additional samples for class akiec
Generated 13 additional samples for class bcc
Generated 1 additional samples for class bkl
Generated 31 additional samples for class df
Generated 2 additional samples for class mel
Class nv has sufficient samples. No augmentation needed.
Generated 37 additional samples for class vasc
Data augmentation completed.
Found 11356 images belonging to 7 classes.

Train Dataset Class Distribution after Augmentation:
akiec: 998 (8.79%)
bcc: 999 (8.80%)
bkl: 1000 (8.81%)
df: 998 (8.79%)
mel: 1000 (8.81%)
nv: 5364 (47.23%)
vasc: 997 (8.78%)


# Continuous Testing 
This section contains a conglomerated model run. It was created so that the models could be tested overnight. I'm going to bed now.

In [5]:
# Importing Dependencies
import tensorflow as tf
from tensorflow.keras.applications import VGG16, ResNet50, InceptionV3
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, Dropout
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam, RMSprop, SGD
from tensorflow.keras.callbacks import TensorBoard, ReduceLROnPlateau
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.metrics import Precision, Recall, AUC
from tensorflow.keras.models import load_model
from tensorflow.keras.callbacks import CSVLogger

import csv
import os

from sklearn.utils import class_weight

# ///////////////////////////////////////////////////////////////////////////////////////////
# CHECK RUN NUMBER BEFORE RUNNING
# Run Number - use to create new directory or add to existing directory
run_number = 12
run_dir = f"run{run_number}"
os.makedirs(run_dir, exist_ok=True)

# ///////////////////////////////////////////////////////////////////////////////////////////


# Creating a CSV file to store the model results
csv_file = "model_results.csv"
fieldnames = [
    "architecture",
    "optimizer",
    "loss",
    "accuracy",
    "precision",
    "recall",
    "auc",
]

# Write the header to the CSV file. Data is written to the file after model_result
with open(csv_file, mode="w", newline="") as file:
    writer = csv.DictWriter(file, fieldnames=fieldnames)
    writer.writeheader()

batch_size = 32
architectures = ["InceptionV3"]  # Options: "VGG16", "ResNet50", "InceptionV3"
optimizers = ["Adam"]  # Options: "Adam", "RMSprop", "SGD"

# Create directory to store models
models_dir = f"{run_dir}/models"
os.makedirs(models_dir, exist_ok=True)

# Initialize model_results to store the evaluation results
model_results = []

for architecture in architectures:
    # Set the input shape and preprocessing function based on the selected architecture
    if architecture == "VGG16" :
        input_shape = (224, 224, 3)
        preprocessing_function = tf.keras.applications.vgg16.preprocess_input
    elif architecture == "ResNet50":
        input_shape = (224, 224, 3)
        preprocessing_function = tf.keras.applications.resnet50.preprocess_input
    elif architecture == "InceptionV3":
        input_shape = (299, 299, 3)
        preprocessing_function = tf.keras.applications.inception_v3.preprocess_input

    # Load and preprocess data using tf.keras.preprocessing
    # This adds data augmentation to the training dataset
    train_datagen = ImageDataGenerator(
        preprocessing_function=preprocessing_function,
        # rotation_range=20,
        # width_shift_range=0.2,
        # height_shift_range=0.2,
        # horizontal_flip=True,
    )

    train_dataset = train_datagen.flow_from_directory(
        "resources_augmented/Skin Cancer/train",
        target_size=input_shape[:2],
        batch_size=batch_size,
        class_mode="categorical",
    )

    
    val_datagen = ImageDataGenerator(preprocessing_function=preprocessing_function)

    # Define the validation dataset without data augmentation
    val_dataset = val_datagen.flow_from_directory(
        "resources_augmented/Skin Cancer/val",
        target_size=input_shape[:2],
        batch_size=batch_size,
        class_mode="categorical",
    )

    # Define the pre-trained model architecture. Will select proper model based on current 'architecture' in for loop
    if architecture == "VGG16":
        base_model = VGG16(
            weights="imagenet", include_top=False, input_shape=input_shape
        )
    elif architecture == "ResNet50":
        base_model = ResNet50(
            weights="imagenet", include_top=False, input_shape=input_shape
        )
    elif architecture == "InceptionV3":
        base_model = InceptionV3(
            weights="imagenet", include_top=False, input_shape=input_shape
        )

    # Freeze the layers of the pre-trained model
    for layer in base_model.layers:
        layer.trainable = False

    # Get the number of unique classes from the train_dataset
    num_classes = len(train_dataset.class_indices)

   # Add custom layers on top of the pre-trained model
    x = base_model.output
    x = GlobalAveragePooling2D()(x)
    x = Dense(2048, activation="relu")(x)
    x = Dropout(0.5)(x)
    predictions = Dense(num_classes, activation="softmax")(x)

    # Create the final model
    model = Model(inputs=base_model.input, outputs=predictions)

    # Step through and adjust for each optimizer
    for optimizer_name in optimizers:
        # Define the optimizer
        if optimizer_name == "Adam":
            optimizer = Adam(learning_rate=0.001)
        elif optimizer_name == "RMSprop":
            optimizer = RMSprop(learning_rate=0.001)
        elif optimizer_name == "SGD":
            optimizer = SGD(learning_rate=0.001, momentum=0.9)

        # Create learning rate scheduler that monitors validation loss
        lr_scheduler = ReduceLROnPlateau(
            monitor="val_loss", factor=0.1, patience=5, verbose=1
        )

       
       # Compile the model
        model.compile(
            optimizer=optimizer,
            loss="categorical_crossentropy",
            metrics=["accuracy", Precision(), Recall(), AUC()],
        )
        # Create a TensorBoard callback with a separate log directory for each model and optimizer
        tensorboard_callback = TensorBoard(
            log_dir=f"./{run_dir}/logs/{architecture}_{optimizer_name}",
            histogram_freq=1,
        )

        # Create an EarlyStopping callback to prevent overfitting
        early_stopping = EarlyStopping(
            monitor="val_loss", patience=5, restore_best_weights=True
        )

        # Create a ModelCheckpoint callback to save the best model weights
        checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
            filepath=os.path.join(
                models_dir, f"model_{architecture}_{optimizer_name}.weights.h5"
            ),
            save_weights_only=True,
            save_best_only=True,
            monitor="val_loss",
            verbose=1,
        )

        # Create a CSVLogger callback with keras
        csv_logger = CSVLogger(
            os.path.join(
                models_dir, f"model_{architecture}_{optimizer_name}_training.csv"
            )
        )

        # Calculate class weights using sklearn's class_weight
        class_weights = class_weight.compute_class_weight(
            'balanced',
            classes=np.unique(train_dataset.classes),
            y=train_dataset.classes
        )
        class_weight_dict = dict(enumerate(class_weights))
      
        # Train the model with the checkpoint callback and weights
        epochs = 10
        history = model.fit(
            train_dataset,
            steps_per_epoch=train_dataset.samples // batch_size,
            validation_data=val_dataset,
            validation_steps=val_dataset.samples // batch_size,
            epochs=epochs,
            class_weight=class_weight_dict,
            callbacks=[
                tensorboard_callback,
                lr_scheduler,
                checkpoint_callback,
                csv_logger,
            ],
        )

        # Save the optimizer state to avoid retraining the model
        model.save(
            os.path.join(models_dir, f"model_{architecture}_{optimizer_name}.h5")
        )

         # Evaluate the model on the validation set and print the results
        loss, accuracy, precision, recall, auc = model.evaluate(val_dataset)
        print(f"Model: {architecture}, Optimizer: {optimizer_name}")
        print(f"Validation Loss: {loss:.4f}")
        print(f"Validation Accuracy: {accuracy:.4f}")
        print(f"Validation Precision: {precision:.4f}")
        print(f"Validation Recall: {recall:.4f}")
        print(f"Validation AUC-ROC: {auc:.4f}")

        # Append the model results to the list
        model_result = {
            "architecture": architecture,
            "optimizer": optimizer_name,
            "loss": loss,
            "accuracy": accuracy,
            "precision": precision,
            "recall": recall,
            "auc": auc,
        }
        model_results.append(model_result)

        # Write the current model result to the CSV file
        with open(csv_file, mode="a", newline="") as file:
            writer = csv.DictWriter(file, fieldnames=fieldnames)
            writer.writerow(model_result)

        # Save the model
        model.save(
            os.path.join(models_dir, f"model_{architecture}_{optimizer_name}.h5")
        )

Found 11356 images belonging to 7 classes.
Found 2003 images belonging to 7 classes.
Epoch 1/10


  self._warn_if_super_not_called()


[1m354/354[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 820ms/step - accuracy: 0.4800 - auc: 0.8177 - loss: 1.7632 - precision: 0.5620 - recall: 0.3554
Epoch 1: val_loss improved from inf to 0.87756, saving model to run12/models\model_InceptionV3_Adam.weights.h5
[1m354/354[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m351s[0m 977ms/step - accuracy: 0.4802 - auc: 0.8179 - loss: 1.7620 - precision: 0.5623 - recall: 0.3556 - val_accuracy: 0.6910 - val_auc: 0.9394 - val_loss: 0.8776 - val_precision: 0.8660 - val_recall: 0.5181 - learning_rate: 0.0010
Epoch 2/10
[1m  1/354[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m5:25[0m 922ms/step - accuracy: 0.5625 - auc: 0.8911 - loss: 1.4376 - precision: 0.7500 - recall: 0.4688

  self.gen.throw(typ, value, traceback)



Epoch 2: val_loss improved from 0.87756 to 0.61393, saving model to run12/models\model_InceptionV3_Adam.weights.h5
[1m354/354[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 10ms/step - accuracy: 0.5625 - auc: 0.8911 - loss: 0.7208 - precision: 0.7500 - recall: 0.4688 - val_accuracy: 0.6316 - val_auc: 0.8758 - val_loss: 0.6139 - val_precision: 0.8182 - val_recall: 0.4737 - learning_rate: 0.0010
Epoch 3/10
[1m354/354[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 848ms/step - accuracy: 0.6399 - auc: 0.9202 - loss: 1.0440 - precision: 0.7745 - recall: 0.4977
Epoch 3: val_loss did not improve from 0.61393
[1m354/354[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m353s[0m 996ms/step - accuracy: 0.6399 - auc: 0.9202 - loss: 1.0440 - precision: 0.7745 - recall: 0.4977 - val_accuracy: 0.6638 - val_auc: 0.9364 - val_loss: 0.8869 - val_precision: 0.8213 - val_recall: 0.5166 - learning_rate: 0.0010
Epoch 4/10
[1m  1/354[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m5:01[0m 855ms/step 



[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m55s[0m 874ms/step - accuracy: 0.6519 - auc: 0.9316 - loss: 0.9015 - precision: 0.7993 - recall: 0.5405




Model: InceptionV3, Optimizer: Adam
Validation Loss: 0.9016
Validation Accuracy: 0.6570
Validation Precision: 0.9313
Validation Recall: 0.7994
Validation AUC-ROC: 0.5412


# Evaluating and Visualizing the Data

In [6]:
# Importing Dependencies
import tensorflow as tf
from tensorflow.keras.applications import VGG16, ResNet50, InceptionV3
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, Dropout
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam, RMSprop, SGD
from tensorflow.keras.callbacks import TensorBoard, ReduceLROnPlateau
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.metrics import Precision, Recall, AUC
from tensorflow.keras.models import load_model
from tensorflow.keras.callbacks import CSVLogger
from tensorflow.keras.preprocessing.image import ImageDataGenerator

import csv
import os

from sklearn.utils import class_weight
from sklearn.metrics import confusion_matrix, classification_report, roc_curve, auc
import matplotlib.pyplot as plt
import numpy as np
import os

model_results = "model_results.csv"

# Run Number - use to create new directory or add to existing directory
run_number = 12
run_dir = f"run{run_number}"
os.makedirs(run_dir, exist_ok=True)


architectures = ["InceptionV3"] # Options: "VGG16", "ResNet50", "InceptionV3"
optimizers = ["Adam"] # Options: "Adam", "RMSprop", "SGD"

batch_size = 32

# Define the preprocessing function based on the architectures you want to visualize
architectures_to_visualize = ["InceptionV3"]  # Specify the architectures you want to visualize
preprocessing_functions = {
    "VGG16": tf.keras.applications.vgg16.preprocess_input,
    "ResNet50": tf.keras.applications.resnet50.preprocess_input,
    "InceptionV3": tf.keras.applications.inception_v3.preprocess_input
}


# Create a directory to store the visualization results
visualizations_dir = f"{run_dir}/visualizations"
os.makedirs(visualizations_dir, exist_ok=True)

#

# Iterate over each model architecture and optimizer
for architecture in architectures:
    # Set the preprocessing function based on the architecture
    preprocessing_function = preprocessing_functions[architecture]

    # Set the input shape and preprocessing function based on the selected architecture
    if architecture == "VGG16":
        input_shape = (224, 224, 3)
        preprocessing_function = tf.keras.applications.vgg16.preprocess_input
    if architecture == "ResNet50":
        input_shape = (224, 224, 3)
        preprocessing_function = tf.keras.applications.resnet50.preprocess_input
    elif architecture == "InceptionV3":
        input_shape = (299, 299, 3)
        preprocessing_function = tf.keras.applications.inception_v3.preprocess_input

    # Redefine the validation dataset with the corresponding preprocessing function
    val_datagen = ImageDataGenerator(preprocessing_function=preprocessing_function)
    val_dataset = val_datagen.flow_from_directory(
        "Resources/Skin Cancer/val",
        target_size=input_shape[:2],
        batch_size=batch_size,
        class_mode="categorical",
    )

    # Get the class names from the redefined val_dataset
    class_names = list(val_dataset.class_indices.keys())


    for optimizer_name in optimizers:
        # Load the trained model
        model = load_model(os.path.join(f"{run_dir}/models/", f"model_{architecture}_{optimizer_name}.h5"))

        # Make predictions on the validation dataset using the redefined val_dataset
        y_pred = model.predict(val_dataset)
        y_pred_classes = np.argmax(y_pred, axis=1)

        # Get the true labels of the validation dataset
        y_true = val_dataset.classes

        # Compute the confusion matrix
        cm = confusion_matrix(y_true, y_pred_classes)
        print(f"Confusion Matrix for {architecture}_{optimizer_name}:")
        print(cm)

        # Save the confusion matrix as a CSV file
        cm_filename = f"confusion_matrix_{architecture}_{optimizer_name}.csv"
        np.savetxt(os.path.join(visualizations_dir, cm_filename), cm, delimiter=",")

        # Compute the classification report
        cr = classification_report(y_true, y_pred_classes, target_names=class_names)
        print(f"Classification Report for {architecture}_{optimizer_name}:")
        print(cr)

        # Save the classification report as a text file
        cr_filename = f"classification_report_{architecture}_{optimizer_name}.txt"
        with open(os.path.join(visualizations_dir, cr_filename), "w") as file:
            file.write(cr)

        # Compute the ROC curve and AUC for each class
        fpr = dict()
        tpr = dict()
        roc_auc = dict()
        for i in range(len(class_names)):
            fpr[i], tpr[i], _ = roc_curve(y_true == i, y_pred[:, i])
            roc_auc[i] = auc(fpr[i], tpr[i])

        # Plot the ROC curve for each class
        plt.figure(figsize=(8, 6))
        for i in range(len(class_names)):
            plt.plot(
                fpr[i],
                tpr[i],
                label=f"ROC curve of class {class_names[i]} (AUC = {roc_auc[i]:.2f})",
            )
        plt.plot([0, 1], [0, 1], "k--")
        plt.xlim([0.0, 1.0])
        plt.ylim([0.0, 1.05])
        plt.xlabel("False Positive Rate")
        plt.ylabel("True Positive Rate")
        plt.title(f"ROC Curve for {architecture}_{optimizer_name}")
        plt.legend(loc="lower right")
        plt.tight_layout()
        plt.savefig(os.path.join(visualizations_dir, f"roc_curve_{architecture}_{optimizer_name}.png"))
        plt.close()

        # Visualize the model's predictions on a subset of the validation data
        subset_size = 10
        subset_indices = np.random.choice(len(val_dataset), subset_size, replace=False)
        subset_images = []
        subset_labels = []
        val_dataset.reset()  # Reset the validation dataset iterator
        for i in range(len(val_dataset)):
            if i in subset_indices:
                image_batch, label_batch = next(val_dataset)
                for image, label in zip(image_batch, label_batch):
                    subset_images.append(image)
                    subset_labels.append(label)

        subset_images = np.array(subset_images)
        subset_labels = np.array(subset_labels)

        subset_preds = model.predict(subset_images)
        subset_pred_classes = np.argmax(subset_preds, axis=1)

        # Generate the visualization plot
        plt.figure(figsize=(15, 10))
        for i in range(subset_size):
            plt.subplot(2, 5, i + 1)
            plt.imshow(subset_images[i])
            plt.title(
                f"True: {class_names[np.argmax(subset_labels[i])]}\\nPred: {class_names[subset_pred_classes[i]]}"
            )
            plt.axis("off")
        plt.tight_layout()
        plt.savefig(os.path.join(visualizations_dir, f"predictions_{architecture}_{optimizer_name}.png"))
        plt.close()


# Read the CSV file and store the model results
model_results = []
with open("model_results.csv", "r") as file:
    csv_reader = csv.DictReader(file)
    for row in csv_reader:
        model_results.append(row)

# Create a bar plot for validation precision
plt.figure(figsize=(10, 6))
plt.bar(range(len(model_results)), [float(result["precision"]) for result in model_results])
plt.xticks(
    range(len(model_results)),
    [f"{result['architecture']}_{result['optimizer']}" for result in model_results],
    rotation=45,
)
plt.xlabel("Model")
plt.ylabel("Validation Precision")
plt.title("Validation Precision for Different Models")
plt.tight_layout()
plt.savefig(os.path.join(visualizations_dir, "validation_precision_comparison.png"))
plt.close()

# Create a bar plot for validation recall
plt.figure(figsize=(10, 6))
plt.bar(range(len(model_results)), [float(result["recall"]) for result in model_results])
plt.xticks(
    range(len(model_results)),
    [f"{result['architecture']}_{result['optimizer']}" for result in model_results],
    rotation=45,
)
plt.xlabel("Model")
plt.ylabel("Validation Recall")
plt.title("Validation Recall for Different Models")
plt.tight_layout()
plt.savefig(os.path.join(visualizations_dir, "validation_recall_comparison.png"))
plt.close()

# Create a bar plot for validation AUC-ROC
plt.figure(figsize=(10, 6))
plt.bar(range(len(model_results)), [float(result["auc"]) for result in model_results])
plt.xticks(
    range(len(model_results)),
    [f"{result['architecture']}_{result['optimizer']}" for result in model_results],
    rotation=45,
)
plt.xlabel("Model")
plt.ylabel("Validation AUC-ROC")
plt.title("Validation AUC-ROC for Different Models")
plt.tight_layout()
plt.savefig(os.path.join(visualizations_dir, "validation_auc_comparison.png"))
plt.close()

Found 2003 images belonging to 7 classes.




[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m58s[0m 896ms/step
Confusion Matrix for InceptionV3_Adam:
[[  4  10   6   1  12  31   1]
 [  3  13  11   1  24  51   0]
 [ 12  26  33   2  41 102   4]
 [  2   3   3   0   5   9   1]
 [ 11  14  21   0  48 123   6]
 [ 91 103 165  17 267 676  22]
 [  2   2   4   0   4  16   0]]
Classification Report for InceptionV3_Adam:
              precision    recall  f1-score   support

       akiec       0.03      0.06      0.04        65
         bcc       0.08      0.13      0.09       103
         bkl       0.14      0.15      0.14       220
          df       0.00      0.00      0.00        23
         mel       0.12      0.22      0.15       223
          nv       0.67      0.50      0.58      1341
        vasc       0.00      0.00      0.00        28

    accuracy                           0.39      2003
   macro avg       0.15      0.15      0.14      2003
weighted avg       0.48      0.39      0.42      2003

[1m10/10[0m [32m━━━━━━━

