In [1]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from glob import glob
from sklearn.metrics import confusion_matrix, precision_recall_fscore_support, roc_auc_score, roc_curve
from sklearn.preprocessing import LabelEncoder, label_binarize
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPool2D
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import EarlyStopping, Callback
from itertools import cycle
import tensorflow as tf


In [2]:
# Paths
train_dir = r"C:\Users\User\Documents\GUITAR\dataset\HAM10000\train"0
val_dir = r"C:\Users\User\Documents\GUITAR\dataset\HAM10000\val"
test_dir = r"C:\Users\User\Documents\GUITAR\dataset\HAM10000\test"

In [3]:
# Constants
SIZE = 150
BATCH_SIZE = 128
EPOCHS = 100
NUM_CLASSES = len(os.listdir(train_dir))

In [4]:
# Create ImageDataGenerator instances
train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

val_datagen = ImageDataGenerator(rescale=1./255)
test_datagen = ImageDataGenerator(rescale=1./255)

In [5]:
# Generate data from directories
train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=(SIZE, SIZE),
    batch_size=BATCH_SIZE,
    class_mode='categorical',
    shuffle=True
)

val_generator = val_datagen.flow_from_directory(
    val_dir,
    target_size=(SIZE, SIZE),
    batch_size=BATCH_SIZE,
    class_mode='categorical',
    shuffle=False
)

test_generator = test_datagen.flow_from_directory(
    test_dir,
    target_size=(SIZE, SIZE),
    batch_size=BATCH_SIZE,
    class_mode='categorical',
    shuffle=False
)

Found 8018 images belonging to 7 classes.
Found 2005 images belonging to 7 classes.
Found 1515 images belonging to 7 classes.


In [6]:
# Model definition
model = Sequential([
    Conv2D(256, (3, 3), activation="relu", input_shape=(SIZE, SIZE, 3)),
    MaxPool2D(pool_size=(2, 2)),
    Dropout(0.3),
    Conv2D(128, (3, 3), activation='relu'),
    MaxPool2D(pool_size=(2, 2)),
    Dropout(0.3),
    Conv2D(64, (3, 3), activation='relu'),
    MaxPool2D(pool_size=(2, 2)),
    Dropout(0.3),
    Flatten(),
    Dense(32),
    Dense(NUM_CLASSES, activation='softmax')
])
model.compile(loss='categorical_crossentropy', optimizer='Adam', metrics=['acc'])
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 148, 148, 256)     7168      
                                                                 
 max_pooling2d (MaxPooling2D  (None, 74, 74, 256)      0         
 )                                                               
                                                                 
 dropout (Dropout)           (None, 74, 74, 256)       0         
                                                                 
 conv2d_1 (Conv2D)           (None, 72, 72, 128)       295040    
                                                                 
 max_pooling2d_1 (MaxPooling  (None, 36, 36, 128)      0         
 2D)                                                             
                                                                 
 dropout_1 (Dropout)         (None, 36, 36, 128)       0

In [7]:
# Metrics callback for precision, recall, F1 score
class MetricsCallback(Callback):
    def __init__(self, val_generator):
        super(MetricsCallback, self).__init__()
        self.val_generator = val_generator

    def on_epoch_end(self, epoch, logs=None):
        y_true = self.val_generator.classes
        y_pred = self.model.predict(self.val_generator)
        y_pred_classes = np.argmax(y_pred, axis=1)

        precision, recall, f1_score, _ = precision_recall_fscore_support(y_true, y_pred_classes, average='weighted')

        val_loss = logs['val_loss']
        val_accuracy = logs.get('val_accuracy', logs.get('val_acc'))  # Handle different versions

        print(f'Epoch {epoch + 1} - '
              f'Validation Loss: {val_loss:.4f}, '
              f'Validation Accuracy: {val_accuracy:.4f}, '
              f'Validation Precision: {precision:.4f}, '
              f'Validation Recall: {recall:.4f}, '
              f'Validation F1 Score: {f1_score:.4f}')

# Early stopping
early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

# Metrics callback
metrics_callback = MetricsCallback(val_generator)

In [None]:
# Train the model
history = model.fit(
    train_generator,
    epochs=EPOCHS,
    validation_data=val_generator,
    callbacks=[early_stopping, metrics_callback],
    verbose=2
)

Epoch 1/100
Epoch 1 - Validation Loss: 1.1335, Validation Accuracy: 0.6688, Validation Precision: 0.4473, Validation Recall: 0.6688, Validation F1 Score: 0.5361
63/63 - 71s - loss: 1.1811 - acc: 0.6598 - val_loss: 1.1335 - val_acc: 0.6688 - 71s/epoch - 1s/step
Epoch 2/100


  _warn_prf(average, modifier, msg_start, len(result))


Epoch 2 - Validation Loss: 1.1688, Validation Accuracy: 0.6693, Validation Precision: 0.5424, Validation Recall: 0.6693, Validation F1 Score: 0.5589
63/63 - 64s - loss: 1.0096 - acc: 0.6697 - val_loss: 1.1688 - val_acc: 0.6693 - 64s/epoch - 1s/step
Epoch 3/100


  _warn_prf(average, modifier, msg_start, len(result))


Epoch 3 - Validation Loss: 1.0107, Validation Accuracy: 0.6663, Validation Precision: 0.5529, Validation Recall: 0.6663, Validation F1 Score: 0.5837
63/63 - 64s - loss: 0.9693 - acc: 0.6675 - val_loss: 1.0107 - val_acc: 0.6663 - 64s/epoch - 1s/step
Epoch 4/100


  _warn_prf(average, modifier, msg_start, len(result))


Epoch 4 - Validation Loss: 0.9204, Validation Accuracy: 0.6733, Validation Precision: 0.5541, Validation Recall: 0.6733, Validation F1 Score: 0.5738
63/63 - 64s - loss: 0.9278 - acc: 0.6716 - val_loss: 0.9204 - val_acc: 0.6733 - 64s/epoch - 1s/step
Epoch 5/100


  _warn_prf(average, modifier, msg_start, len(result))


Epoch 5 - Validation Loss: 0.8712, Validation Accuracy: 0.6843, Validation Precision: 0.5703, Validation Recall: 0.6843, Validation F1 Score: 0.6019
63/63 - 64s - loss: 0.9032 - acc: 0.6676 - val_loss: 0.8712 - val_acc: 0.6843 - 64s/epoch - 1s/step
Epoch 6/100


  _warn_prf(average, modifier, msg_start, len(result))


Epoch 6 - Validation Loss: 0.8774, Validation Accuracy: 0.6728, Validation Precision: 0.5838, Validation Recall: 0.6728, Validation F1 Score: 0.5615
63/63 - 64s - loss: 0.9093 - acc: 0.6707 - val_loss: 0.8774 - val_acc: 0.6728 - 64s/epoch - 1s/step
Epoch 7/100


  _warn_prf(average, modifier, msg_start, len(result))


Epoch 7 - Validation Loss: 0.8956, Validation Accuracy: 0.6768, Validation Precision: 0.6112, Validation Recall: 0.6768, Validation F1 Score: 0.6234
63/63 - 65s - loss: 0.8655 - acc: 0.6800 - val_loss: 0.8956 - val_acc: 0.6768 - 65s/epoch - 1s/step
Epoch 8/100


  _warn_prf(average, modifier, msg_start, len(result))


Epoch 8 - Validation Loss: 0.8537, Validation Accuracy: 0.6843, Validation Precision: 0.6124, Validation Recall: 0.6843, Validation F1 Score: 0.6104
63/63 - 66s - loss: 0.8644 - acc: 0.6802 - val_loss: 0.8537 - val_acc: 0.6843 - 66s/epoch - 1s/step
Epoch 9/100


  _warn_prf(average, modifier, msg_start, len(result))


Epoch 9 - Validation Loss: 0.8658, Validation Accuracy: 0.6878, Validation Precision: 0.6494, Validation Recall: 0.6878, Validation F1 Score: 0.6464
63/63 - 66s - loss: 0.8531 - acc: 0.6833 - val_loss: 0.8658 - val_acc: 0.6878 - 66s/epoch - 1s/step
Epoch 10/100


  _warn_prf(average, modifier, msg_start, len(result))


Epoch 10 - Validation Loss: 0.8366, Validation Accuracy: 0.6888, Validation Precision: 0.6214, Validation Recall: 0.6888, Validation F1 Score: 0.6304
63/63 - 87s - loss: 0.8464 - acc: 0.6877 - val_loss: 0.8366 - val_acc: 0.6888 - 87s/epoch - 1s/step
Epoch 11/100


  _warn_prf(average, modifier, msg_start, len(result))


Epoch 11 - Validation Loss: 0.8280, Validation Accuracy: 0.6928, Validation Precision: 0.6495, Validation Recall: 0.6928, Validation F1 Score: 0.6597
63/63 - 395s - loss: 0.8363 - acc: 0.6883 - val_loss: 0.8280 - val_acc: 0.6928 - 395s/epoch - 6s/step
Epoch 12/100


  _warn_prf(average, modifier, msg_start, len(result))


Epoch 12 - Validation Loss: 0.8251, Validation Accuracy: 0.6898, Validation Precision: 0.6312, Validation Recall: 0.6898, Validation F1 Score: 0.6208
63/63 - 468s - loss: 0.8423 - acc: 0.6949 - val_loss: 0.8251 - val_acc: 0.6898 - 468s/epoch - 7s/step
Epoch 13/100


  _warn_prf(average, modifier, msg_start, len(result))


Epoch 13 - Validation Loss: 0.8044, Validation Accuracy: 0.7022, Validation Precision: 0.6452, Validation Recall: 0.7022, Validation F1 Score: 0.6306
63/63 - 484s - loss: 0.8216 - acc: 0.6906 - val_loss: 0.8044 - val_acc: 0.7022 - 484s/epoch - 8s/step
Epoch 14/100


  _warn_prf(average, modifier, msg_start, len(result))


Epoch 14 - Validation Loss: 0.8099, Validation Accuracy: 0.6918, Validation Precision: 0.6435, Validation Recall: 0.6918, Validation F1 Score: 0.6417
63/63 - 469s - loss: 0.8042 - acc: 0.6958 - val_loss: 0.8099 - val_acc: 0.6918 - 469s/epoch - 7s/step
Epoch 15/100


  _warn_prf(average, modifier, msg_start, len(result))


Epoch 15 - Validation Loss: 0.8131, Validation Accuracy: 0.7047, Validation Precision: 0.6581, Validation Recall: 0.7047, Validation F1 Score: 0.6668
63/63 - 482s - loss: 0.8024 - acc: 0.6957 - val_loss: 0.8131 - val_acc: 0.7047 - 482s/epoch - 8s/step
Epoch 16/100


  _warn_prf(average, modifier, msg_start, len(result))


Epoch 16 - Validation Loss: 0.8080, Validation Accuracy: 0.7252, Validation Precision: 0.6828, Validation Recall: 0.7252, Validation F1 Score: 0.6896
63/63 - 476s - loss: 0.8269 - acc: 0.6947 - val_loss: 0.8080 - val_acc: 0.7252 - 476s/epoch - 8s/step
Epoch 17/100


  _warn_prf(average, modifier, msg_start, len(result))


Epoch 17 - Validation Loss: 0.7845, Validation Accuracy: 0.7137, Validation Precision: 0.6637, Validation Recall: 0.7137, Validation F1 Score: 0.6612
63/63 - 475s - loss: 0.8045 - acc: 0.7017 - val_loss: 0.7845 - val_acc: 0.7137 - 475s/epoch - 8s/step
Epoch 18/100


  _warn_prf(average, modifier, msg_start, len(result))


Epoch 18 - Validation Loss: 0.8100, Validation Accuracy: 0.7057, Validation Precision: 0.6634, Validation Recall: 0.7057, Validation F1 Score: 0.6678
63/63 - 478s - loss: 0.7864 - acc: 0.7077 - val_loss: 0.8100 - val_acc: 0.7057 - 478s/epoch - 8s/step
Epoch 19/100


  _warn_prf(average, modifier, msg_start, len(result))


Epoch 19 - Validation Loss: 0.8135, Validation Accuracy: 0.7082, Validation Precision: 0.6659, Validation Recall: 0.7082, Validation F1 Score: 0.6782
63/63 - 395s - loss: 0.7790 - acc: 0.7110 - val_loss: 0.8135 - val_acc: 0.7082 - 395s/epoch - 6s/step
Epoch 20/100


  _warn_prf(average, modifier, msg_start, len(result))


Epoch 20 - Validation Loss: 0.7787, Validation Accuracy: 0.7122, Validation Precision: 0.6664, Validation Recall: 0.7122, Validation F1 Score: 0.6604
63/63 - 369s - loss: 0.7704 - acc: 0.7139 - val_loss: 0.7787 - val_acc: 0.7122 - 369s/epoch - 6s/step
Epoch 21/100


  _warn_prf(average, modifier, msg_start, len(result))


In [None]:
# Evaluate the model on the test set
test_score = model.evaluate(test_generator)
print('Test accuracy:', test_score[1])

In [None]:
# Plot the training and validation accuracy and loss at each epoch
fig, axs = plt.subplots(1, 2, figsize=(12, 5))

# Plot Training & Validation Loss
axs[0].plot(history.history['loss'], label='Train Loss')
axs[0].plot(history.history['val_loss'], label='Validation Loss')
axs[0].legend()
axs[0].set_title('Training & Validation Loss')

# Plot Training & Validation Accuracy
axs[1].plot(history.history['acc'], label='Train Accuracy')
axs[1].plot(history.history['val_acc'], label='Validation Accuracy')
axs[1].legend()
axs[1].set_title('Training & Validation Accuracy')

plt.tight_layout()
plt.show()

In [None]:
# Confusion matrix for validation data
y_val_true = val_generator.classes
y_val_pred = model.predict(val_generator)
y_val_pred_classes = np.argmax(y_val_pred, axis=1)
cm_val = confusion_matrix(y_val_true, y_val_pred_classes)
plt.figure(figsize=(8, 6))
sns.heatmap(cm_val, annot=True, fmt='d', cmap='Blues')
plt.title('Confusion Matrix - Validation Data')
plt.show()

In [None]:
# AUC-ROC curve for validation data
y_val_true_binary = label_binarize(y_val_true, classes=np.unique(y_val_true))
y_val_pred_binary = label_binarize(y_val_pred_classes, classes=np.unique(y_val_true))

fpr_val = dict()
tpr_val = dict()
roc_auc_val = dict()

for i in range(NUM_CLASSES):
    fpr_val[i], tpr_val[i], _ = roc_curve(y_val_true_binary[:, i], y_val_pred_binary[:, i])
    roc_auc_val[i] = roc_auc_score(y_val_true_binary[:, i], y_val_pred_binary[:, i])

plt.figure(figsize=(8, 8))
colors = cycle(['aqua', 'darkorange', 'cornflowerblue'])

for i, color in zip(range(NUM_CLASSES), colors):
    plt.plot(fpr_val[i], tpr_val[i], color=color, lw=2, label=f'Class {i} (AUC = {roc_auc_val[i]:.2f})')

plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC Curve - Validation Data')
plt.legend(loc='lower right')
plt.show()