In [None]:
import numpy as np
import tensorflow as tf
import pandas as pd
import matplotlib.pyplot as plt
import efficientnet.tfkeras as efn 

import sklearn

In [None]:
from tensorflow.python.keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import train_test_split

from ImageDataAugmentor.image_data_augmentor import *
from albumentations import (Compose, GaussNoise, Blur, 
                            Affine, Perspective,Sharpen, Superpixels,
                            CenterCrop, ChannelDropout, ChannelShuffle, 
                            CLAHE, CoarseDropout, Downscale, 
                            ElasticTransform, Equalize, FancyPCA, 
                            Flip, GaussianBlur, GlassBlur, 
                            GridDistortion, GridDropout, Posterize, 
                            RandomBrightness, RandomContrast, RandomFog, #RandomBrightnessContrast,
                            RandomGamma, 
                            RandomGridShuffle, #RandomRain, RandomShow, Transpose
                            RandomShadow, RandomRotate90, #RandomResizedCrop
                            
                            VerticalFlip, HorizontalFlip, RandomBrightness, RandomContrast, 
                            OpticalDistortion, HueSaturationValue, ShiftScaleRotate, Cutout, OneOf,
                            ColorJitter
                           )

In [None]:
print("Num GPUs Available: ", len(tf.config.experimental.list_physical_devices('GPU')))

gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
  try:
    # Currently, memory growth needs to be the same across GPUs
    for gpu in gpus:
      tf.config.experimental.set_memory_growth(gpu, True)
    logical_gpus = tf.config.experimental.list_logical_devices('GPU')
    print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
  except RuntimeError as e:
    # Memory growth must be set before GPUs have been initialized
    print(e)

In [None]:
''' 
Read in the CSV files that provide information on the images. 
Set path that will be used to locate the files.  
'''

## The ISIC 2020 Training Images
train_dir = '/home/mikylab/cycleGan/melanomaImages/train/'
train_csv = pd.read_csv('/home/mikylab/test2train/csv_files/train.csv')


## The ISIC 2016 Training Images to be used as a test set 
test_dir_16 = '/home/mikylab/cycleGan/2016_test_data/'
test_csv_16 = pd.read_csv('/home/mikylab/test2train/csv_files/ISBI2016_GroundTruth.csv')

## The ISIC 2017 Training Images to be used as a test set 
test_dir_17 = '/home/mikylab/cycleGan/ISIC-2017_TestData/'
test_csv_17 = pd.read_csv('/home/mikylab/test2train/csv_files/ISIC-2017_Test_GroundTruth.csv')

In [None]:
## Calculate the number of the benign and malignant images in the original training set
neg, pos = np.bincount(train_csv['target'])
total = neg + pos
print('Melanoma Classification:\n    Total: {}\n    Positive: {} ({:.2f}% of total)\n'.format(
    total, pos, 100 * pos / total))

In [None]:
## Sample a subset of benign images to mitigate the class imbalance
mal_train = train_csv[train_csv['target']==1]
ben_train = train_csv[train_csv['target']==0].sample(n=2000, random_state = 316)

In [None]:
## Combine benign and malignant image subsets for training and add .jpg to the files' names
train_data =  pd.concat([mal_train, ben_train], ignore_index=True, sort =False)
train_data['image_name'] = train_data['image_name'] + '.jpg'


## Remove unnecessary columns
train_data = train_data.drop(['patient_id', 'sex', 'age_approx', 'anatom_site_general_challenge', 'diagnosis', 'benign_malignant'], axis = 1)


In [None]:
## Adjust the CSV for the two test sets
test_data_16 = pd.DataFrame({'image_dir': test_csv_16['ISIC_0000003'], 'target': test_csv_16['0.0']})
test_data_16['image_dir'] = test_data_16['image_dir'] + '.jpg'

test_data_17 = pd.DataFrame({'image_dir': test_csv_17['image_id'], 'target': test_csv_17['melanoma']})
test_data_17['image_dir'] = test_data_17['image_dir'] + '.jpg'

In [None]:
## Calculate the number of the benign and malignant images in the adjusted training set

neg, pos = np.bincount(train_data['target'])
total = neg + pos
print('Melanoma Classification:\n    Total: {}\n    Positive: {} ({:.2f}% of total)\n'.format(
    total, pos, 100 * pos / total))

In [None]:
## Divide the training set into training and validation sets

x_train, x_val, y_train, y_val = train_test_split(train_data['image_name'], train_data['target'], test_size = 0.20, shuffle = True, random_state = 316)
train_gen = pd.DataFrame({'image_dir': x_train, 'target': y_train})
val_gen = pd.DataFrame({'image_dir': x_val, 'target': y_val})

train_gen['target'].astype(dtype = 'int16')
val_gen['target'].astype(dtype = 'int16')

In [None]:
## Count how many benign and malignant images are in each set. 

''' 
train_gen[train_gen['target'] == 1].shape
val_gen[val_gen['target'] == 1].shape
train_gen[train_gen['target'] == 0].shape

train_gen.to_csv('CNN_trainset.csv', index = False)
val_gen.to_csv('CNN_valset.csv', index = False)

'''

In [None]:
## Calculate a weight for each class to mitagate the class imbalance
# Scaling by total/2 helps keep the loss to a similar magnitude.

weight_for_0 = (1 / neg) * (total / 2.0)
weight_for_1 = (1 / pos) * (total / 2.0)

class_weight = {0: weight_for_0, 1: weight_for_1}

print('Weight for class 0: {:.2f}'.format(weight_for_0))
print('Weight for class 1: {:.2f}'.format(weight_for_1))

In [None]:
## Additional augmentations from the https://albumentations.ai library

AUGMENTATIONS = Compose([
                        VerticalFlip(p =.5), 
                        HorizontalFlip(p =.5),
                        RandomBrightness(p = .5), 
                        RandomContrast(p =.5), 
                        OneOf([
                            OpticalDistortion(distort_limit =1.0), 
                            GridDistortion(num_steps=5, distort_limit = 1.), 
                            ElasticTransform(alpha=3),
                        ]),
                        CLAHE(clip_limit=4.0, p=0.7), 
                        HueSaturationValue(hue_shift_limit = 10, sat_shift_limit= 20, val_shift_limit = 10, p = .5), 
                        ShiftScaleRotate (shift_limit = 0.2, scale_limit = 0.1, rotate_limit = 15, border_mode = 0, p = .85), 
                        Cutout(max_h_size = int(256*.375), max_w_size = int(256*.375), num_holes =1, p = 0.7), 
                        
                        Affine(scale = [0.7, 1.3],  translate_percent = .25, rotate = [-360, 360], shear = [0, 20]),
                        ColorJitter(brightness=[0.9, 1.1], contrast=[0.9, 1.1], saturation=[0.9, 1.1], hue=[0, .2], always_apply=False, p=0.5)
    
])



In [None]:
## ImageDataAugmentor from https://github.com/mjkvaak/ImageDataAugmentor, used to allow the Albumentations library

train_image_gen = ImageDataAugmentor(rescale=1./255,
                                     augment = AUGMENTATIONS
                                    )
test_image_gen = ImageDataGenerator(rescale=1./255)

In [None]:
## Creation of the image generator datasets, images resized to (256, 256) and batch set to 1. 

Train_Data = train_image_gen.flow_from_dataframe(dataframe = train_gen,
                                              directory = train_dir,
                                              x_col = 'image_dir', 
                                              y_col = 'target', 
                                              class_mode = 'raw', 
                                              target_size = (256, 256),
                                              color_mode = 'rgb', 
                                              batch_size = 1, 
                                              seed = 316, 
                                              shuffle = True,
                                    )

Val_Data = test_image_gen.flow_from_dataframe(dataframe = val_gen,
                                              directory = train_dir,
                                              x_col = 'image_dir', 
                                              y_col = 'target', 
                                              class_mode = 'raw',
                                              target_size = (256, 256),
                                              color_mode = 'rgb', 
                                              batch_size = 1, 
                                              seed = 316, 
                                              shuffle = True,
                                    )

In [None]:
Test_Data_16 = test_image_gen.flow_from_dataframe(dataframe = test_data_16, 
                                              directory = test_dir_16, 
                                              x_col = 'image_dir', 
                                              y_col = 'target', 
                                              class_mode = 'raw',
                                              target_size = (256, 256),
                                              color_mode = 'rgb', 
                                              batch_size = 1, 
                                              seed = 316, 
                                              shuffle = False,
                                              )

Test_Data_17 = test_image_gen.flow_from_dataframe(dataframe = test_data_17, 
                                              directory = test_dir_17, 
                                              x_col = 'image_dir', 
                                              y_col = 'target', 
                                              class_mode = 'raw',
                                              target_size = (256, 256),
                                              color_mode = 'rgb', 
                                              batch_size = 1, 
                                              seed = 316, 
                                              shuffle = False,
                                              )

In [None]:
## Create the CNN model from the tf.keras EfficientNetB3 that was originally trained on the imagenet dataset
def create_model():  
    enb3 = tf.keras.applications.EfficientNetB3(weights="imagenet", include_top=False, input_shape=(256, 256, 3), pooling = 'avg')
    x = tf.keras.layers.Flatten()(enb3.output)
    output = tf.keras.layers.Dense(1, activation='sigmoid')(x)
    model = tf.keras.Model(enb3.input, output)
    return model 

In [None]:
model = create_model()
model.summary()


In [None]:
## Set the initial learning rate and the learning rate scheduler
initial_learning_rate = 0.01
lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
    initial_learning_rate, decay_steps=20, decay_rate=0.96, staircase=True
)

In [None]:
## Compile the model using AUC as the metric 
opt = tf.keras.optimizers.Adam(learning_rate = 1e-5 )
model.compile(loss='binary_crossentropy', metrics=tf.keras.metrics.AUC(name="auc"),optimizer=opt)

early_stopping = tf.keras.callbacks.EarlyStopping(monitor = 'val_loss', mode = 'min', patience = 100, verbose = 1, restore_best_weights = True)
cb = early_stopping

In [None]:
## Use of class weight is optional. 

history = model.fit(
    Train_Data,
    steps_per_epoch= 300,
    epochs= 400,
    validation_data=Val_Data,
    callbacks=cb,
    #class_weight = class_weight,
    validation_steps= 100)

In [None]:
## Plot the model metrics from training
print(history.history.keys())
plt.plot(history.history['auc'])
plt.plot(history.history['val_auc'])
plt.title('model auc')
plt.ylabel('auc')
plt.xlabel('epoch')
plt.legend(['train', 'val'], loc='upper left')
plt.show()

# summarize history for loss
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'val'], loc='upper left')
plt.show()

In [None]:
model.save('cnn_melanoma_weights_full_7.1.h5')

In [None]:
## Evaluate or predict the model's performance on Val_Data, Test_Data_17 or Test_Data_16 datasets
model.evaluate(Test_Data_17)

In [None]:
## Sample a single image and use the CNN to make a classification prediction
def image_pred(data):
    train_img, train_class = data.next()
    train_img = train_img[0,:, :]
    img_array = tf.expand_dims(train_img, axis=0)
    plt.imshow(train_img)
    plt.title(' Truth: ' + str(train_class)+ " Pred: " + str(model.predict(img_array)[0]))
    
image_pred(Test_Data_16)

In [None]:
## Load a pretained model 
load_model = tf.keras.models.load_model('cnn_melanoma_new_aug_7.1.h5')

In [None]:
## Compile the model with additional metrics. 

METRICS = [
      tf.keras.metrics.TruePositives(name='tp'),
      tf.keras.metrics.FalsePositives(name='fp'),
      tf.keras.metrics.TrueNegatives(name='tn'),
      tf.keras.metrics.FalseNegatives(name='fn'), 
      tf.keras.metrics.BinaryAccuracy(name='accuracy'),
      tf.keras.metrics.Precision(name='precision'),
      tf.keras.metrics.Recall(name='recall'),
      tf.keras.metrics.AUC(name='auc'),
      tf.keras.metrics.AUC(name='prc', curve='PR'), # precision-recall curve
      tf.keras.metrics.SpecificityAtSensitivity(.82)
]

load_model.compile(loss='binary_crossentropy', metrics=METRICS,optimizer=opt)


In [None]:
## Plot the ROC curve
def plot_roc(name, labels, predictions, **kwargs):
    fp, tp, _ = sklearn.metrics.roc_curve(labels, predictions)
    plt.plot(100*fp, 100*tp, label=name, linewidth=2, **kwargs)
    plt.xlabel('False positives [%]')
    plt.ylabel('True positives [%]')
    plt.grid(True)
    ax = plt.gca()
    ax.set_aspect('equal')
    

In [None]:
## Predict the classes on the datasets' images 
predicted_labels_test = model.predict(Test_Data_16).squeeze()
true_labels_test =test_data_16['target'].to_numpy().reshape(378,)

In [None]:
predicted_labels_test_17 = model.predict(Test_Data_17).squeeze()
true_labels_test_17 =test_data_17['target'].to_numpy().reshape(600,)

In [None]:
predicted_labels_val = model.predict(Val_Data).squeeze()
true_labels_val =val_gen['target'].to_numpy().reshape(517,)

In [None]:
colors = plt.rcParams['axes.prop_cycle'].by_key()['color']
plot_roc("Val Baseline", true_labels_val, predicted_labels_val, color=colors[0], linestyle='--')
plot_roc("Test_2016 Baseline", true_labels_test, predicted_labels_test, color=colors[1])
plot_roc("Test_2017 Baseline", true_labels_test_17, predicted_labels_test_17, color=colors[2])


plt.legend(loc='lower right')