In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
import pandas as pd
import numpy as np
import os
import tensorflow as tf
import cv2
from keras import backend as K
from keras.layers import Layer,InputSpec
import keras.layers as kl
from glob import glob
from sklearn.metrics import roc_curve, auc
from keras.preprocessing import image
from tensorflow.keras.models import Sequential
from sklearn.metrics import roc_auc_score
from tensorflow.keras import callbacks 
from tensorflow.keras.callbacks import ModelCheckpoint,EarlyStopping
from  matplotlib import pyplot as plt
from tensorflow.keras import Model
from tensorflow.keras.layers import concatenate,Dense, Conv2D, MaxPooling2D, Flatten,Input,Activation,add,AveragePooling2D,BatchNormalization,Dropout
%matplotlib inline
import shutil
from sklearn.metrics import  precision_score, recall_score, accuracy_score,classification_report ,confusion_matrix
from tensorflow.python.platform import build_info as tf_build_info
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import train_test_split

from PIL import ImageFile
ImageFile.LOAD_TRUNCATED_IMAGES = True

In [3]:
data_pd = pd.read_csv('/content/drive/MyDrive/ColabData/HAM10000/csv/HAM10000_metadata.csv')
data_pd.head()

Unnamed: 0,lesion_id,image_id,dx,dx_type,age,sex,localization,dataset
0,HAM_0000118,ISIC_0027419,bkl,histo,80.0,male,scalp,vidir_modern
1,HAM_0000118,ISIC_0025030,bkl,histo,80.0,male,scalp,vidir_modern
2,HAM_0002730,ISIC_0026769,bkl,histo,80.0,male,scalp,vidir_modern
3,HAM_0002730,ISIC_0025661,bkl,histo,80.0,male,scalp,vidir_modern
4,HAM_0001466,ISIC_0031633,bkl,histo,75.0,male,ear,vidir_modern


In [4]:
# from zipfile import ZipFile
# filename="/content/drive/MyDrive/ColabData/HAM10000/images/HAM10000_Images_All.zip"
# with ZipFile(filename,'r') as zip:
#   zip.extractall("/content/drive/MyDrive/ColabData/HAM10000/images/")
#   print("done")

In [5]:
image_dir        = "/content/drive/MyDrive/ColabData/HAM10000/images"
metadata_csv     = "/content/drive/MyDrive/ColabData/HAM10000/csv/HAM10000_metadata.csv"
path_splitter    = "/"
images_type      = ".jpg" 

In [6]:
train_dir = os.path.join(image_dir, 'train_dir\\')
test_dir = os.path.join(image_dir, 'test_dir\\')

df_count = data_pd.groupby('lesion_id').count()
df_count.head()

df_count = df_count[df_count['dx'] == 1]

In [7]:
df_count.reset_index(inplace=True)

unique = set(df_count['lesion_id'])
def duplicates(x):
    if x in unique:
        return 'no' 
    else:
        return 'duplicates'
data_pd['is_duplicate'] = data_pd['lesion_id'].apply(duplicates)
data_pd.head()

Unnamed: 0,lesion_id,image_id,dx,dx_type,age,sex,localization,dataset,is_duplicate
0,HAM_0000118,ISIC_0027419,bkl,histo,80.0,male,scalp,vidir_modern,duplicates
1,HAM_0000118,ISIC_0025030,bkl,histo,80.0,male,scalp,vidir_modern,duplicates
2,HAM_0002730,ISIC_0026769,bkl,histo,80.0,male,scalp,vidir_modern,duplicates
3,HAM_0002730,ISIC_0025661,bkl,histo,80.0,male,scalp,vidir_modern,duplicates
4,HAM_0001466,ISIC_0031633,bkl,histo,75.0,male,ear,vidir_modern,duplicates


In [8]:
df_count = data_pd[data_pd['is_duplicate'] == 'no']

In [9]:
train, test_df = train_test_split(df_count, test_size=0.15, stratify=df_count['dx'])

test_data = set(test_df['image_id'])
def identify_trainOrtest(x):
    if str(x) in test_data:
        return 'test'
    else:
        return 'train'
    
#creating train_df
data_pd['train_test_split'] = data_pd['image_id'].apply(identify_trainOrtest)
train_df = data_pd[data_pd['train_test_split'] == 'train']
train_df.head()

# Image id of train and test images
train_list = list(train_df['image_id'])
test_list = list(test_df['image_id'])

# Set the image_id as the index in data_pd
data_pd.set_index('image_id', inplace=True)

In [10]:
train_dir = os.path.join(image_dir, 'train_dir/')
test_dir = os.path.join(image_dir, 'test_dir/')

In [11]:
batch_size = 16

In [12]:
train_path  = train_dir
test_path=test_dir

In [13]:
datagen=ImageDataGenerator(preprocessing_function=tf.keras.applications.inception_resnet_v2.preprocess_input)


In [14]:
image_size = 299
print("\nTrain Batches: ")
train_batches = datagen.flow_from_directory(directory=train_path,
                                            target_size=(image_size,image_size),
                                            batch_size=batch_size,
                                            shuffle=True)

print("\nTest Batches: ")
test_batches =datagen.flow_from_directory(test_path,
                                           target_size=(image_size,image_size),
                                           batch_size=batch_size,
                                           shuffle=False)


Train Batches: 
Found 52411 images belonging to 7 classes.

Test Batches: 
Found 828 images belonging to 7 classes.


In [15]:
#Soft Attention

from keras import backend as K
from keras.layers import Layer,InputSpec
import keras.layers as kl
import tensorflow as tf



class SoftAttention(Layer):
    def __init__(self,ch,m,concat_with_x=False,aggregate=False,**kwargs):
        self.channels=int(ch)
        self.multiheads = m
        self.aggregate_channels = aggregate
        self.concat_input_with_scaled = concat_with_x

        
        super(SoftAttention,self).__init__(**kwargs)

    def build(self,input_shape):

        self.i_shape = input_shape

        kernel_shape_conv3d = (self.channels, 3, 3) + (1, self.multiheads) # DHWC
    
        self.out_attention_maps_shape = input_shape[0:1]+(self.multiheads,)+input_shape[1:-1]
        
        if self.aggregate_channels==False:

            self.out_features_shape = input_shape[:-1]+(input_shape[-1]+(input_shape[-1]*self.multiheads),)
        else:
            if self.concat_input_with_scaled:
                self.out_features_shape = input_shape[:-1]+(input_shape[-1]*2,)
            else:
                self.out_features_shape = input_shape
        

        self.kernel_conv3d = self.add_weight(shape=kernel_shape_conv3d,
                                        initializer='he_uniform',
                                        name='kernel_conv3d')
        self.bias_conv3d = self.add_weight(shape=(self.multiheads,),
                                      initializer='zeros',
                                      name='bias_conv3d')

        super(SoftAttention, self).build(input_shape)

    def call(self, x):

        exp_x = K.expand_dims(x,axis=-1)

        c3d = K.conv3d(exp_x,
                     kernel=self.kernel_conv3d,
                     strides=(1,1,self.i_shape[-1]), padding='same', data_format='channels_last')
        conv3d = K.bias_add(c3d,
                        self.bias_conv3d)
        conv3d = kl.Activation('relu')(conv3d)

        conv3d = K.permute_dimensions(conv3d,pattern=(0,4,1,2,3))

        
        conv3d = K.squeeze(conv3d, axis=-1)
        conv3d = K.reshape(conv3d,shape=(-1, self.multiheads ,self.i_shape[1]*self.i_shape[2]))

        softmax_alpha = K.softmax(conv3d, axis=-1) 
        softmax_alpha = kl.Reshape(target_shape=(self.multiheads, self.i_shape[1],self.i_shape[2]))(softmax_alpha)

        
        if self.aggregate_channels==False:
            exp_softmax_alpha = K.expand_dims(softmax_alpha, axis=-1)       
            exp_softmax_alpha = K.permute_dimensions(exp_softmax_alpha,pattern=(0,2,3,1,4))
   
            x_exp = K.expand_dims(x,axis=-2)
   
            u = kl.Multiply()([exp_softmax_alpha, x_exp])   
  
            u = kl.Reshape(target_shape=(self.i_shape[1],self.i_shape[2],u.shape[-1]*u.shape[-2]))(u)

        else:
            exp_softmax_alpha = K.permute_dimensions(softmax_alpha,pattern=(0,2,3,1))

            exp_softmax_alpha = K.sum(exp_softmax_alpha,axis=-1)

            exp_softmax_alpha = K.expand_dims(exp_softmax_alpha, axis=-1)

            u = kl.Multiply()([exp_softmax_alpha, x])   

        if self.concat_input_with_scaled:
            o = kl.Concatenate(axis=-1)([u,x])
        else:
            o = u
        
        return [o, softmax_alpha]

    def compute_output_shape(self, input_shape): 
        return [self.out_features_shape, self.out_attention_maps_shape]

    
    def get_config(self):
        return super(SoftAttention,self).get_config()

In [16]:
irv2 = tf.keras.applications.InceptionResNetV2(
    include_top=True,
    weights="imagenet",
    input_tensor=None,
    input_shape=None,
    pooling=None,
    classifier_activation="softmax",

)

# Excluding the last 28 layers of the model.
conv = irv2.layers[-28].output

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/inception_resnet_v2/inception_resnet_v2_weights_tf_dim_ordering_tf_kernels.h5


In [17]:
attention_layer,map2 = SoftAttention(aggregate=True,m=16,concat_with_x=False,ch=int(conv.shape[-1]),name='soft_attention')(conv)
attention_layer=(MaxPooling2D(pool_size=(2, 2),padding="same")(attention_layer))
conv=(MaxPooling2D(pool_size=(2, 2),padding="same")(conv))

conv = concatenate([conv,attention_layer])
conv  = Activation('relu')(conv)
conv = Dropout(0.5)(conv)

In [18]:
output = Flatten()(conv)
output = Dense(7, activation='softmax')(output)
model = Model(inputs=irv2.input, outputs=output)

In [19]:
model.summary()

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_1 (InputLayer)           [(None, 299, 299, 3  0           []                               
                                )]                                                                
                                                                                                  
 conv2d (Conv2D)                (None, 149, 149, 32  864         ['input_1[0][0]']                
                                )                                                                 
                                                                                                  
 batch_normalization (BatchNorm  (None, 149, 149, 32  96         ['conv2d[0][0]']                 
 alization)                     )                                                             

In [20]:
opt1=tf.keras.optimizers.Adam(learning_rate=0.01,epsilon=0.1)
model.compile(optimizer=opt1,
             loss='categorical_crossentropy',
             metrics=['accuracy'])

In [21]:
class_weights = {   
                    0: 1.0,  # akiec
                    1: 1.0,  # bcc
                    2: 1.0,  # bkl
                    3: 1.0,  # df
                    4: 5.0,  # mel
                    5: 1.0,  # nv
                    6: 1.0,  # vasc
                }


checkpoint=  ModelCheckpoint(filepath ='IRV2+SA.hdf5',monitor='val_accuracy',save_best_only=True,save_weights_only=True)

In [22]:
Earlystop = EarlyStopping(monitor='val_loss', mode='min',patience=30, min_delta=0.001)
history = model.fit(train_batches,
                    steps_per_epoch=(len(train_df)/10),
                    epochs=150,
                    verbose=2,
                    validation_data=test_batches,validation_steps=len(test_df)/batch_size,callbacks=[checkpoint,Earlystop],class_weight=class_weights)

Epoch 1/150
918/918 - 5438s - loss: 1.9804 - accuracy: 0.4524 - val_loss: 0.7546 - val_accuracy: 0.7838 - 5438s/epoch - 6s/step
Epoch 2/150
918/918 - 3550s - loss: 1.3840 - accuracy: 0.5932 - val_loss: 0.5785 - val_accuracy: 0.7874 - 3550s/epoch - 4s/step
Epoch 3/150
918/918 - 2534s - loss: 1.1820 - accuracy: 0.6526 - val_loss: 0.5337 - val_accuracy: 0.8092 - 2534s/epoch - 3s/step
Epoch 4/150
918/918 - 1879s - loss: 1.0804 - accuracy: 0.6864 - val_loss: 0.9156 - val_accuracy: 0.6667 - 1879s/epoch - 2s/step
Epoch 5/150
918/918 - 1343s - loss: 0.9397 - accuracy: 0.7261 - val_loss: 0.5793 - val_accuracy: 0.8152 - 1343s/epoch - 1s/step
Epoch 6/150
918/918 - 1004s - loss: 0.8364 - accuracy: 0.7622 - val_loss: 0.3624 - val_accuracy: 0.8659 - 1004s/epoch - 1s/step
Epoch 7/150
918/918 - 746s - loss: 0.7303 - accuracy: 0.7930 - val_loss: 0.3753 - val_accuracy: 0.8732 - 746s/epoch - 812ms/step
Epoch 8/150
918/918 - 546s - loss: 0.6824 - accuracy: 0.8040 - val_loss: 0.2978 - val_accuracy: 0.9118 

In [23]:
from tensorflow.keras import models
model.load_weights("IRV2+SA.hdf5")

In [24]:
predictions = model.predict(test_batches, steps=len(test_df)/batch_size, verbose=0)

In [25]:
#geting predictions on test dataset
y_pred = np.argmax(predictions, axis=1)
targetnames = ['akiec', 'bcc', 'bkl', 'df', 'mel', 'nv', 'vasc']
#getting the true labels per image 
y_true = test_batches.classes
#getting the predicted labels per image 
y_prob=predictions
from tensorflow.keras.utils import to_categorical
y_test = to_categorical(y_true)

# Creating classification report 
report = classification_report(y_true, y_pred, target_names=targetnames)

print("\nClassification Report:")
print(report)


Classification Report:
              precision    recall  f1-score   support

       akiec       0.95      0.87      0.91        23
         bcc       0.95      0.73      0.83        26
         bkl       0.90      0.95      0.93        66
          df       0.83      0.83      0.83         6
         mel       0.94      0.88      0.91        34
          nv       0.99      1.00      0.99       663
        vasc       1.00      1.00      1.00        10

    accuracy                           0.98       828
   macro avg       0.94      0.90      0.91       828
weighted avg       0.98      0.98      0.98       828



In [26]:
print("Precision: "+ str(precision_score(y_true, y_pred, average='weighted')))
print("Recall: "+ str(recall_score(y_true, y_pred, average='weighted')))
print("Accuracy: " + str(accuracy_score(y_true, y_pred)))
print("weighted Roc score: " + str(roc_auc_score(y_true,y_prob,multi_class='ovr',average='weighted')))

Precision: 0.9769837465867393
Recall: 0.9770531400966184
Accuracy: 0.9770531400966184
weighted Roc score: 0.9950639096517542
