In [1]:
import numpy as np
import pandas as pd
import os
import cv2 as cv
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.preprocessing.image import img_to_array, load_img
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
path_1 = 'dataset/HAM10000_images_part_1'
path_2 = 'dataset/HAM10000_images_part_2'
meta_path = 'dataset/HAM10000_metadata.csv'
classes = {
    'nv': 'Melanocytic nevi',
    'mel': 'Melanoma',
    'bkl': 'Benign keratosis-like lesions',
    'bcc': 'Basal cell carcinoma',
    'akiec': 'Actinic keratoses',
    'vasc': 'Vascular lesions',
    'df': 'Dermatofibroma'
}
encoder = {
    'nv': 0,
    'mel': 1,
    'bkl': 2,
    'bcc': 3,
    'akiec': 4,
    'vasc': 5,
    'df': 6
}
no_classes = len(classes)
meta = pd.read_csv(meta_path)
meta.head()

Unnamed: 0,lesion_id,image_id,dx,dx_type,age,sex,localization
0,HAM_0000118,ISIC_0027419,bkl,histo,80.0,male,scalp
1,HAM_0000118,ISIC_0025030,bkl,histo,80.0,male,scalp
2,HAM_0002730,ISIC_0026769,bkl,histo,80.0,male,scalp
3,HAM_0002730,ISIC_0025661,bkl,histo,80.0,male,scalp
4,HAM_0001466,ISIC_0031633,bkl,histo,75.0,male,ear


In [3]:
import tensorflow as tf
import cv2
import numpy as np
import matplotlib.pyplot as plt

# Define data augmentation layers
data_augmentation = tf.keras.Sequential([
    tf.keras.layers.RandomFlip("horizontal_and_vertical"),
    tf.keras.layers.RandomRotation(0.2),
    tf.keras.layers.RandomZoom(0.3),
    tf.keras.layers.RandomTranslation(height_factor=0.2, width_factor=0.2),
    tf.keras.layers.RandomContrast(0.8),
])

# image = cv2.imread(image_path)
# image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
# image_resized = cv2.resize(image_rgb, (224, 224))
# plt.figure(figsize=(15, 10))
# for i in range(50):
#     augmented_image = data_augmentation(np.expand_dims(image_resized, 0))
#     augmented_image_np = np.array(augmented_image[0], dtype=np.uint8)
#     plt.subplot(5, 10, i + 1)
#     plt.imshow(augmented_image[0]/255)
#     plt.axis('off')

In [4]:

images_data = []
labels = []
missed = []

for i in range(len(meta)):
    row = meta.iloc[i]
    image_name = row['image_id']
    image_class = row['dx']
    
    image_path = os.path.join(path_1, image_name + '.jpg') if os.path.exists(os.path.join(path_1, image_name + '.jpg')) else os.path.join(path_2, image_name + '.jpg')
    
    try:
        image = load_img(image_path, target_size=(224, 224))
        image = img_to_array(image) / 255.0
    except Exception as e:
        missed.append((image_name, str(e)))
        continue
    
    images_data.append(image)
    labels.append(encoder[image_class])

    # Data augmentation
    if image_class != 'nv':
        for _ in range(3):
            augmented_image = data_augmentation(np.expand_dims(image, 0))
            images_data.append(augmented_image[0])
            labels.append(encoder[image_class])

# Convert lists to NumPy arrays
images_data = np.array(images_data)
labels = np.array(labels)

print(f"Processed {len(images_data)} images with {len(missed)} misses.")

# Optionally, split into training and test sets
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(images_data, labels, test_size=0.2, random_state=42)

# Create tf.data.Dataset objects
train_dataset = tf.data.Dataset.from_tensor_slices((X_train, y_train)).shuffle(1000).batch(32).prefetch(tf.data.AUTOTUNE)
test_dataset = tf.data.Dataset.from_tensor_slices((X_test, y_test)).batch(32).prefetch(tf.data.AUTOTUNE)

Processed 19945 images with 0 misses.


In [5]:
model = tf.keras.Sequential([
    tf.keras.layers.Conv2D(32, (3, 3), activation='relu', input_shape=(224, 224, 3)),
    tf.keras.layers.MaxPooling2D((2, 2)),
    tf.keras.layers.Conv2D(64, (3, 3), activation='relu'),
    tf.keras.layers.MaxPooling2D((2, 2)),
    tf.keras.layers.Conv2D(128, (3, 3), activation='relu'),
    tf.keras.layers.MaxPooling2D((2, 2)),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dense(no_classes, activation='softmax')
])

# Compile the model
model.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 222, 222, 32)      896       
                                                                 
 max_pooling2d (MaxPooling2  (None, 111, 111, 32)      0         
 D)                                                              
                                                                 
 conv2d_1 (Conv2D)           (None, 109, 109, 64)      18496     
                                                                 
 max_pooling2d_1 (MaxPoolin  (None, 54, 54, 64)        0         
 g2D)                                                            
                                                                 
 conv2d_2 (Conv2D)           (None, 52, 52, 128)       73856     
                                                                 
 max_pooling2d_2 (MaxPoolin  (None, 26, 26, 128)      

In [6]:
y_train = tf.keras.utils.to_categorical(y_train, no_classes)
y_test = tf.keras.utils.to_categorical(y_test, no_classes)

: 

In [7]:
history = model.fit(
    X_train, y_train,
    batch_size=32,
    epochs=10,
    validation_data=(X_test, y_test),
)

In [None]:
test_loss, test_accuracy = model.evaluate(X_test, y_test)

print(f'Test Loss: {test_loss}')
print(f'Test Accuracy: {test_accuracy}')

In [None]:
model.save(f'diff_weights/custom_model2_{test_accuracy:.3f}_.h5')

In [None]:

# Plot training & validation accuracy values
plt.figure(figsize=(12, 4))

plt.subplot(1, 2, 1)
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('Model accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc='upper left')

# Plot training & validation loss values
plt.subplot(1, 2, 2)
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc='upper left')

plt.tight_layout()
plt.show()


In [None]:
from sklearn.metrics import confusion_matrix, classification_report
y_pred = model.predict(X_test)
y_pred_classes = np.argmax(y_pred, axis=1)
y_true = np.argmax(y_test, axis=1)

# Generate the confusion matrix
conf_matrix = confusion_matrix(y_true, y_pred_classes)

# Plot the confusion matrix
plt.figure(figsize=(10, 8))
sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues', xticklabels=classes.keys(), yticklabels=classes.keys())
plt.title('Confusion Matrix')
plt.ylabel('True Label')
plt.xlabel('Predicted Label')
plt.show()

# Print the classification report
# class_report = classification_report(y_true, y_pred_classes, target_names=classes.keys())
# print(class_report)


In [None]:
plt.figure(figsize=(15,15))
for i in range(no_classes):
    plt.subplot(4,2,i+1)
    true_positives = conf_matrix[i,i]
    true_negatives = np.sum(conf_matrix) - np.sum(conf_matrix[i,:]) - np.sum(conf_matrix[:,i]) + true_positives
    false_positives = conf_matrix[i,:].sum() - true_positives
    false_negatives = conf_matrix[:,i].sum() - true_positives
    mat = [[true_positives,true_negatives],[false_positives,false_negatives]]
    sns.set_style('darkgrid') 
    plt.gca().set_frame_on(True)
    sns.heatmap(mat, annot=True, fmt='d', cmap='Blues', xticklabels=['Positive','Negative'], yticklabels=['True','False'])
plt.tight_layout()
plt.show()

In [None]:
confusion_matrix = confusion_matrix[1:,1:]
plt.figure(figsize=(15,15))
for i in range(no_classes):
    plt.subplot(4,2,i+1)
    true_positives = conf_matrix[i,i]
    true_negatives = np.sum(conf_matrix) - np.sum(conf_matrix[i,:]) - np.sum(conf_matrix[:,i]) + true_positives
    false_positives = conf_matrix[i,:].sum() - true_positives
    false_negatives = conf_matrix[:,i].sum() - true_positives
    mat = [[true_positives,true_negatives],[false_positives,false_negatives]]
    sns.set_style('darkgrid') 
    plt.gca().set_frame_on(True)
    sns.heatmap(mat, annot=True, fmt='d', cmap='Blues', xticklabels=['Positive','Negative'], yticklabels=['True','False'])
plt.tight_layout()
plt.show()