In [None]:
import pandas as pd
import numpy as np
import os
import tensorflow as tf
import cv2
from keras import backend as K
from keras.layers import Layer,InputSpec
import keras.layers as kl
from glob import glob
from sklearn.metrics import roc_curve, auc
from keras.preprocessing import image
from tensorflow.keras.models import Sequential
from sklearn.metrics import roc_auc_score
from tensorflow.keras import callbacks 
from tensorflow.keras.callbacks import ModelCheckpoint,EarlyStopping
from  matplotlib import pyplot as plt
from tensorflow.keras import Model
from tensorflow.keras.layers import concatenate,Dense, Conv2D, MaxPooling2D, Flatten,Input,Activation,add,AveragePooling2D,BatchNormalization,Dropout
%matplotlib inline
import shutil
from sklearn.metrics import  precision_score, recall_score, accuracy_score,classification_report ,confusion_matrix
from tensorflow.python.platform import build_info as tf_build_info
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import train_test_split

from PIL import ImageFile
ImageFile.LOAD_TRUNCATED_IMAGES = True

In [None]:
data_pd = pd.read_csv('../HAM10000_metadata')
data_pd.head()

In [None]:
train_dir = os.path.join('train_dir')
test_dir = os.path.join('test_dir')

In [None]:
df_count = data_pd.groupby('lesion_id').count()
df_count.head()

In [None]:
df_count = df_count[df_count['dx'] == 1]
df_count.reset_index(inplace=True)

In [None]:
def duplicates(x):
    unique = set(df_count['lesion_id'])
    if x in unique:
        return 'no' 
    else:
        return 'duplicates'

In [None]:
data_pd['is_duplicate'] = data_pd['lesion_id'].apply(duplicates)
data_pd.head()

In [None]:
df_count = data_pd[data_pd['is_duplicate'] == 'no']

In [None]:
train, test_df = train_test_split(df_count, test_size=0.15, stratify=df_count['dx'])

In [None]:
def identify_trainOrtest(x):
    test_data = set(test_df['image_id'])
    if str(x) in test_data:
        return 'test'
    else:
        return 'train'

#creating train_df
data_pd['train_test_split'] = data_pd['image_id'].apply(identify_trainOrtest)
train_df = data_pd[data_pd['train_test_split'] == 'train']
train_df.head()

In [None]:
test_df.head()

In [None]:
# Image id of train and test images
train_list = list(train_df['image_id'])
test_list = list(test_df['image_id'])

In [None]:
len(test_list)

In [None]:
len(train_list)

In [None]:
# Set the image_id as the index in data_pd
data_pd.set_index('image_id', inplace=True)

In [30]:
os.mkdir(train_dir)
os.mkdir(test_dir)

In [31]:
targetnames = ['akiec', 'bcc', 'bkl', 'df', 'mel', 'nv', 'vasc']

In [32]:
for i in targetnames:
    directory1=train_dir+'/'+i
    directory2=test_dir+'/'+i
    os.mkdir(directory1)
    os.mkdir(directory2)

In [33]:
for image in train_list:
    file_name = image+'.jpg'
    label = data_pd.loc[image, 'dx']

    # path of source image 
    source = os.path.join('../images', file_name)

    # copying the image from the source to target file
    target = os.path.join(train_dir, label, file_name)

    shutil.copyfile(source, target)

In [34]:
for image in test_list:

    file_name = image+'.jpg'
    label = data_pd.loc[image, 'dx']

    # path of source image 
    source = os.path.join('../images', file_name)

    # copying the image from the source to target file
    target = os.path.join(test_dir, label, file_name)

    shutil.copyfile(source, target)

In [35]:
targetnames = ['akiec', 'bcc', 'bkl', 'df', 'mel', 'nv', 'vasc']

# Augmenting images and storing them in temporary directories 
for img_class in targetnames:

    #creating temporary directories
    # creating a base directory
    aug_dir = 'aug_dir'
    os.mkdir(aug_dir)
    # creating a subdirectory inside the base directory for images of the same class
    img_dir = os.path.join(aug_dir, 'img_dir')
    os.mkdir(img_dir)

    img_list = os.listdir('train_dir/' + img_class)

    # Copy images from the class train dir to the img_dir 
    for file_name in img_list:

        # path of source image in training directory
        source = os.path.join('train_dir/' + img_class, file_name)

        # creating a target directory to send images 
        target = os.path.join(img_dir, file_name)

        # copying the image from the source to target file
        shutil.copyfile(source, target)

    # Temporary augumented dataset directory.
    source_path = aug_dir

    # Augmented images will be saved to training directory
    save_path = 'train_dir/' + img_class

    # Creating Image Data Generator to augment images
    datagen = tf.keras.preprocessing.image.ImageDataGenerator(

        rotation_range=180,
        width_shift_range=0.1,
        height_shift_range=0.1,
        zoom_range=0.1,
        horizontal_flip=True,
        vertical_flip=True,
        fill_mode='nearest'

    )

    batch_size = 50

    aug_datagen = datagen.flow_from_directory(source_path,save_to_dir=save_path,save_format='jpg',target_size=(299, 299),batch_size=batch_size)

    # Generate the augmented images
    aug_images = 8000 

    num_files = len(os.listdir(img_dir))
    num_batches = int(np.ceil((aug_images - num_files) / batch_size))

    # creating 8000 augmented images per class
    for i in range(0, num_batches):
        images, labels = next(aug_datagen)

    # delete temporary directory 
    shutil.rmtree('aug_dir')


Found 304 images belonging to 1 classes.
Found 488 images belonging to 1 classes.
Found 1033 images belonging to 1 classes.
Found 109 images belonging to 1 classes.
Found 1079 images belonging to 1 classes.
Found 6042 images belonging to 1 classes.
Found 132 images belonging to 1 classes.


In [None]:
train_path = 'train_dir'
test_path = 'test_dir'
batch_size = 16

In [None]:
datagen=ImageDataGenerator(preprocessing_function=tf.keras.applications.inception_resnet_v2.preprocess_input)

In [None]:
image_size = 299
print("\nTrain Batches: ")
train_batches = datagen.flow_from_directory(directory=train_path,
                                            target_size=(image_size,image_size),
                                            batch_size=batch_size,
                                            shuffle=True)

print("\nTest Batches: ")
test_batches =datagen.flow_from_directory(test_path,
                                           target_size=(image_size,image_size),
                                           batch_size=batch_size,
                                           shuffle=False)

In [None]:

irv2 = tf.keras.applications.InceptionResNetV2(
    include_top=True,
    weights="imagenet",
    input_tensor=None,
    input_shape=None,
    pooling=None,
    classifier_activation="softmax",

)

# Exclude the last 28 layers of the model.
conv = irv2.layers[-28].output


In [None]:


conv  = Activation('relu')(conv)
conv = Dropout(0.5)(conv)


In [None]:

output = Flatten()(conv)
output = Dense(7, activation='softmax')(output)
model = Model(inputs=irv2.input, outputs=output)

In [42]:
model.summary()

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_1 (InputLayer)           [(None, 299, 299, 3  0           []                               
                                )]                                                                
                                                                                                  
 conv2d (Conv2D)                (None, 149, 149, 32  864         ['input_1[0][0]']                
                                )                                                                 
                                                                                                  
 batch_normalization (BatchNorm  (None, 149, 149, 32  96         ['conv2d[0][0]']                 
 alization)                     )                                                             

In [None]:
opt1=tf.keras.optimizers.Adam(learning_rate=0.01,epsilon=0.1)
model.compile(optimizer=opt1,
             loss='categorical_crossentropy',
             metrics=['accuracy'])

In [None]:
class_weights = {   
                    0: 1.0,  # akiec
                    1: 1.0,  # bcc
                    2: 1.0,  # bkl
                    3: 1.0,  # df
                    4: 4.0,  # mel
                    5: 1.0,  # nv
                    6: 1.0,  # vasc
                }


checkpoint=  ModelCheckpoint(filepath = 'saved_model.hdf5',monitor='val_accuracy',save_best_only=True,save_weights_only=True)




In [None]:
Earlystop = EarlyStopping(monitor='val_loss', mode='min',patience=30, min_delta=0.001)
history = model.fit(train_batches,
                    steps_per_epoch=(len(train_df)/10),
                    epochs=150,
                    verbose=1,
                    validation_data=test_batches,validation_steps=len(test_df)/batch_size,callbacks=[checkpoint,Earlystop],class_weight=class_weights)

Epoch 1/150
Epoch 2/150
Epoch 3/150
Epoch 4/150
Epoch 5/150
Epoch 6/150
Epoch 7/150
Epoch 8/150
Epoch 9/150
Epoch 10/150
Epoch 11/150

In [None]:
from tensorflow.keras import models
model.load_weights("saved_model.hdf5")

In [25]:
Earlystop = EarlyStopping(monitor='val_loss', mode='min',patience=30, min_delta=0.001)
history = model.fit(train_batches,
                    steps_per_epoch=(len(train_df)/10),
                    epochs=139,
                    verbose=1,
                    validation_data=test_batches,validation_steps=len(test_df)/batch_size,callbacks=[checkpoint,Earlystop],class_weight=class_weights)

Epoch 1/139
Epoch 2/139
Epoch 3/139
Epoch 4/139
Epoch 5/139
Epoch 6/139
Epoch 7/139
Epoch 8/139
Epoch 9/139
Epoch 10/139
Epoch 11/139
Epoch 12/139
Epoch 13/139
Epoch 14/139
Epoch 15/139
Epoch 16/139

KeyboardInterrupt: 

In [27]:
predictions = model.predict(test_batches, steps=len(test_df)/batch_size, verbose=0)

In [None]:
import re


data = '''Epoch 1/150
918/918 [==============================] - 4823s 5s/step - loss: 1.9951 - accuracy: 0.4538 - val_loss: 0.7977 - val_accuracy: 0.6944
Epoch 2/150
918/918 [==============================] - 4773s 5s/step - loss: 1.2939 - accuracy: 0.6133 - val_loss: 0.6049 - val_accuracy: 0.7754
Epoch 3/150
918/918 [==============================] - 4792s 5s/step - loss: 1.0839 - accuracy: 0.6766 - val_loss: 0.8387 - val_accuracy: 0.7089
Epoch 4/150
918/918 [==============================] - 4779s 5s/step - loss: 0.9119 - accuracy: 0.7335 - val_loss: 0.4945 - val_accuracy: 0.8333
Epoch 5/150
918/918 [==============================] - 4784s 5s/step - loss: 0.8265 - accuracy: 0.7559 - val_loss: 0.4475 - val_accuracy: 0.8539
Epoch 6/150
918/918 [==============================] - 4788s 5s/step - loss: 0.7020 - accuracy: 0.7979 - val_loss: 0.4252 - val_accuracy: 0.8853
Epoch 7/150
918/918 [==============================] - 4755s 5s/step - loss: 0.9091 - accuracy: 0.7350 - val_loss: 0.3458 - val_accuracy: 0.8986
Epoch 8/150
918/918 [==============================] - 4822s 5s/step - loss: 0.6620 - accuracy: 0.8052 - val_loss: 0.3991 - val_accuracy: 0.8635
Epoch 9/150
918/918 [==============================] - 4811s 5s/step - loss: 0.5817 - accuracy: 0.8332 - val_loss: 0.4153 - val_accuracy: 0.8587
Epoch 10/150
918/918 [==============================] - 4825s 5s/step - loss: 0.5246 - accuracy: 0.8507 - val_loss: 0.3328 - val_accuracy: 0.8792
Epoch 1/139
918/918 [==============================] - 4859s 5s/step - loss: 0.2547 - accuracy: 0.9289 - val_loss: 0.3392 - val_accuracy: 0.9010
Epoch 2/139
918/918 [==============================] - 4774s 5s/step - loss: 0.2688 - accuracy: 0.9270 - val_loss: 0.3706 - val_accuracy: 0.8792
Epoch 3/139
918/918 [==============================] - 4734s 5s/step - loss: 0.2835 - accuracy: 0.9197 - val_loss: 0.3113 - val_accuracy: 0.8949
Epoch 4/139
918/918 [==============================] - 4754s 5s/step - loss: 0.2794 - accuracy: 0.9231 - val_loss: 0.3796 - val_accuracy: 0.8732
Epoch 5/139
918/918 [==============================] - 4749s 5s/step - loss: 0.2700 - accuracy: 0.9244 - val_loss: 0.2915 - val_accuracy: 0.9263
Epoch 6/139
918/918 [==============================] - 4770s 5s/step - loss: 0.2451 - accuracy: 0.9329 - val_loss: 0.3401 - val_accuracy: 0.8973
Epoch 7/139
918/918 [==============================] - 4772s 5s/step - loss: 0.2449 - accuracy: 0.9331 - val_loss: 0.4078 - val_accuracy: 0.8925
Epoch 8/139
918/918 [==============================] - 4803s 5s/step - loss: 0.2284 - accuracy: 0.9348 - val_loss: 0.4153 - val_accuracy: 0.8853
Epoch 9/139
918/918 [==============================] - 4871s 5s/step - loss: 0.2106 - accuracy: 0.9446 - val_loss: 0.3723 - val_accuracy: 0.9179
Epoch 10/139
918/918 [==============================] - 4744s 5s/step - loss: 0.2095 - accuracy: 0.9422 - val_loss: 0.3562 - val_accuracy: 0.8986
Epoch 11/139
918/918 [==============================] - 4710s 5s/step - loss: 0.1711 - accuracy: 0.9542 - val_loss: 0.4637 - val_accuracy: 0.8768
Epoch 12/139
918/918 [==============================] - 4747s 5s/step - loss: 0.1678 - accuracy: 0.9528 - val_loss: 0.4481 - val_accuracy: 0.8986
Epoch 13/139
918/918 [==============================] - 4766s 5s/step - loss: 0.1698 - accuracy: 0.9538 - val_loss: 0.3744 - val_accuracy: 0.9094
Epoch 14/139
918/918 [==============================] - 4690s 5s/step - loss: 0.1494 - accuracy: 0.9595 - val_loss: 0.3809 - val_accuracy: 0.9106
Epoch 15/139
918/918 [==============================] - 4764s 5s/step - loss: 0.1656 - accuracy: 0.9555 - val_loss: 0.4324 - val_accuracy: 0.8949
'''


train_loss = []
train_accuracy = []
val_loss = []
val_accuracy = []

for line in data.splitlines():
    if line.startswith("918"):
        train_loss.append(float(re.search(r"loss: (\d+\.?\d*)", line).group(1)))
        train_accuracy.append(float(re.search(r"accuracy: (\d+\.?\d*)", line).group(1)))
        val_loss.append(float(re.search(r"val_loss: (\d+\.?\d*)", line).group(1)))
        val_accuracy.append(float(re.search(r"val_accuracy: (\d+\.?\d*)", line).group(1)))

print(train_loss)
print(train_accuracy)
print(val_loss)
print(val_accuracy)


In [None]:
# Save the accuracy history plot
plt.figure()
plt.plot(train_accuracy, label='Training Accuracy')
plt.plot(val_accuracy, label='Validation Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.title('Training and Validation Accuracy History')
plt.legend()
plt.savefig('accuracy_history.png')


In [None]:
#geting predictions on test dataset
y_pred = np.argmax(predictions, axis=1)
targetnames = ['akiec', 'bcc', 'bkl', 'df', 'mel', 'nv', 'vasc']
#getting the true labels per image 
y_true = test_batches.classes
#getting the predicted labels per image 
y_prob=predictions
from tensorflow.keras.utils import to_categorical
y_test = to_categorical(y_true)

# Creating classification report 
report = classification_report(y_true, y_pred, target_names=targetnames)

print("\nClassification Report:")
print(report)

In [None]:
print("Precision: "+ str(precision_score(y_true, y_pred, average='weighted')))
print("Recall: "+ str(recall_score(y_true, y_pred, average='weighted')))
print("Accuracy: " + str(accuracy_score(y_true, y_pred)))
print("weighted Roc score: " + str(roc_auc_score(y_true,y_prob,multi_class='ovr',average='weighted')))

In [None]:

print("Precision: "+ str(precision_score(y_true, y_pred, average='macro')))
print("Recall: "+ str(recall_score(y_true, y_pred, average='macro')))
print("Accuracy: " + str(accuracy_score(y_true, y_pred)))
print("Macro Roc score: " + str(roc_auc_score(y_true,y_prob,multi_class='ovr',average='macro')))

In [None]:
print("Precision: "+ str(precision_score(y_true, y_pred, average='micro')))
print("Recall: "+ str(recall_score(y_true, y_pred, average='micro')))
print("Accuracy: " + str(accuracy_score(y_true, y_pred)))
tpr={}
fpr={}
roc_auc={}
fpr["micro"], tpr["micro"], _ = roc_curve(y_test.ravel(), y_prob.ravel())
roc_auc["micro"] = auc(fpr["micro"], tpr["micro"])
print("Micro Roc score: " + str(roc_auc["micro"]))

In [None]:
fpr = {}
tpr = {}
roc_auc = {}
for i in range(7):
    r = roc_auc_score(y_test[:, i], y_prob[:, i])
    print("The ROC AUC score of "+targetnames[i]+" is: "+str(r))

In [None]:
# Compute ROC curve and ROC area for each class
fpr = {}
tpr = {}
roc_auc = dict()
for i in range(7):
    fpr[i], tpr[i], _ = roc_curve(y_test[:, i], y_prob[:, i], drop_intermediate=False)
    roc_auc[i] = auc(fpr[i], tpr[i])

In [None]:

plt.plot(fpr[0], tpr[0],'v-',label='akiec: ROC curve of (area = %0.2f)' % roc_auc[0])
plt.plot(fpr[1], tpr[1],'c',label='bcc: ROC curve of (area = %0.2f)' % roc_auc[1])
plt.plot(fpr[2], tpr[2],'b',label='bkl: ROC curve of (area = %0.2f)' % roc_auc[2])
plt.plot(fpr[3], tpr[3],'g',label='df: ROC curve of (area = %0.2f)' % roc_auc[3])
plt.plot(fpr[4], tpr[4],'y',label='mel: ROC curve of (area = %0.2f)' % roc_auc[4])
plt.plot(fpr[5], tpr[5],'o-',label='nv: ROC curve of (area = %0.2f)' % roc_auc[5])
plt.plot(fpr[6], tpr[6],'r',label='vasc: ROC curve of (area = %0.2f)' % roc_auc[6])

plt.plot([0, 1], [0, 1], 'k--')
plt.xlim([-0.1, 1.1])
plt.ylim([-0.1, 1.1])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver operating characteristic of %s'%targetnames[i])
plt.legend(loc="lower right")
plt.show()

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from tensorflow.keras.models import load_model
from sklearn.metrics import confusion_matrix, classification_report, roc_auc_score, roc_curve, auc
from sklearn.metrics import precision_score, recall_score, accuracy_score

# Load the model with the saved weights
from tensorflow.keras import models
model.load_weights("saved_model.hdf5")

# Evaluate the model on the test set to get accuracy
_, test_accuracy = model.evaluate(test_batches)

# Predictions and true labels
y_pred = np.argmax(predictions, axis=1)
targetnames = ['akiec', 'bcc', 'bkl', 'df', 'mel', 'nv', 'vasc']

# Getting the true labels per image
y_true = test_batches.classes

# Getting the predicted labels per image
y_prob = predictions
y_test = to_categorical(y_true)

# Creating classification report
report = classification_report(y_true, y_pred, target_names=targetnames)
print("\nClassification Report:")
print(report)

# Save the loss history plot
plt.figure()
plt.plot(train_loss, label='Training Loss')
plt.plot(val_loss, label='Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.title('Training and Validation Loss History')
plt.legend()
plt.savefig('loss_history.png')

# Confusion matrix
conf_matrix = confusion_matrix(y_true, y_pred)

# Save the confusion matrix as an image
plt.figure()
plt.matshow(conf_matrix, cmap='coolwarm')
plt.colorbar()
plt.xlabel('Predicted')
plt.ylabel('True')
plt.title('Confusion Matrix')
plt.savefig('confusion_matrix.png')

# ROC AUC scores
fpr, tpr, roc_auc = {}, {}, {}
for i in range(7):
    fpr[i], tpr[i], _ = roc_curve(y_test[:, i], y_prob[:, i], drop_intermediate=False)
    roc_auc[i] = auc(fpr[i], tpr[i])

# Save ROC curves for each class as an image
plt.figure()
for i in range(7):
    plt.plot(fpr[i], tpr[i], label=f'{targetnames[i]} (AUC: {roc_auc[i]:.2f})')
plt.plot([0, 1], [0, 1], 'k--')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver Operating Characteristic of %s' % targetnames[i])
plt.legend(loc="lower right")
plt.savefig('roc_curves.png')
