In [1]:
import cv2
import os
from operator import itemgetter
from numpy import array
import csv
import time
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.applications.densenet import DenseNet121
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
import pandas as pd

In [2]:
# imports all the images from a specified folder, with a specific extension
# and resizes to a specific imgHeight, imgWidth

def import_dataset(path, mode, fileExtension='.jpg', imgWidth=224, imgHeight=224):
    datasetFilenamesImages = []
    dataset = []
    print("Start importing " + mode + " images...")
    for filename in os.listdir(path):
        if filename.endswith(fileExtension): 
            completePath = os.path.join(path, filename)
            image = cv2.imread(completePath, cv2.IMREAD_COLOR)
            image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
            image = cv2.resize(image, dsize=(imgHeight, imgWidth), interpolation=cv2.INTER_AREA)
            filenameImage = [filename, image]
            datasetFilenamesImages.append(filenameImage)
        else:
            continue
    
    datasetFilenamesImages = sorted(datasetFilenamesImages, key=itemgetter(0))
    for x in datasetFilenamesImages:
        dataset.append(x[1])
    
    return array(dataset)


def assign_labels(path_groundtruth):
    target = []
    counter = {'MEL': 0, 'NV': 0, 'BCC': 0, 'AKIEC': 0, 'BKL': 0, 'DF': 0, 'VASC': 0}
    i=0
    with open(path_groundtruth, 'r') as file:
        reader = csv.reader(file)
        for row in reader:
            if i == 0:
                i += 1
                continue
            if row[1] == '1.0': # MEL
                counter['MEL'] += 1
                target.append(0)
            elif row[2] == '1.0': # NV
                counter['NV'] += 1
                target.append(1)
            elif row[3] == '1.0': # BCC
                counter['BCC'] += 1
                target.append(2)
            elif row[4] == '1.0': # AKIEC
                counter['AKIEC'] += 1
                target.append(3)
            elif row[5] == '1.0': # BKL
                counter['BKL'] += 1
                target.append(4)
            elif row[6] == '1.0': # DF
                counter['DF'] += 1
                target.append(5)
            elif row[7] == '1.0':   # VASC
                counter['VASC'] += 1
                target.append(6) # BCC
            else:
                continue
    print(counter)
    file.close()
    return counter, target


def create_model(model = 'densenet', noClasses=2, imgWidth=224, imgHeight=224):
    if model == 'densenet':
        densenet = DenseNet121(include_top=False, weights=None, input_shape=(imgHeight, imgWidth, 3))
        model = tf.keras.Sequential(densenet)
        model.add(GlobalAveragePooling2D())
        model.add(tf.keras.layers.Dropout(0.5))
        model.add(Dense(units=noClasses, activation="softmax"))
    elif model == 'resnet':
        from tensorflow.keras.applications.resnet import ResNet101
        resnet = ResNet101(include_top=False, weights=None, input_shape=(imgHeight, imgWidth, 3))
        model = tf.keras.Sequential(densenet)
        model.add(GlobalAveragePooling2D())
        model.add(tf.keras.layers.Dropout(0.5))
        model.add(Dense(units=noClasses, activation="softmax"))
    else:
        print("That model is not available.")
        exit(0)
        
    return model

In [3]:
# Create 5 classifiers and load the weights

model1 = create_model('densenet', 2) # MEL vs NMEL
model2 = create_model('densenet', 2) # NV vs MELA
model3 = create_model('densenet', 2) # BEN vs MAL
model4 = create_model('densenet', 3) # BKL vs DF vs VASC
model5 = create_model('densenet', 2) # AKIEC vs BCC

ckpt1_path = "/home/ruben/Desktop/isic_2018/model_weights/hier/densenet/a/cp.ckpt"
ckpt2_path = "/home/ruben/Desktop/isic_2018/model_weights/hier/densenet/b/cp.ckpt"
ckpt3_path = "/home/ruben/Desktop/isic_2018/model_weights/hier/densenet/c/cp.ckpt"
ckpt4_path = "/home/ruben/Desktop/isic_2018/model_weights/hier/densenet/d/cp.ckpt"
ckpt5_path = "/home/ruben/Desktop/isic_2018/model_weights/hier/densenet/e/cp.ckpt"

model1.load_weights(ckpt1_path)
model2.load_weights(ckpt2_path)
model3.load_weights(ckpt3_path)
model4.load_weights(ckpt4_path)
model5.load_weights(ckpt5_path)

<tensorflow.python.training.tracking.util.CheckpointLoadStatus at 0x7f24104453a0>

In [5]:
# Create flat model

flat_path = '/home/ruben/Desktop/isic_2018/model_weights/flat/densenet/cp.ckpt'
x_flat = []
y_flat = []

flat = create_model('densenet', 7)
flat.load_weights(flat_path)

<tensorflow.python.training.tracking.util.CheckpointLoadStatus at 0x7f24a51353a0>

In [6]:
# Define ETA thresholds

eta_a = 0.6
eta_b = 0.6
eta_c = 0.6
eta_d = 0.6
eta_e = 0.6

In [7]:
p_val = '/home/ruben/Desktop/isic_2018/val_2018'
t_val = '/home/ruben/Desktop/isic_2018/val_2018/labels.csv'

counter, y_a = assign_labels(t_val)
x_a = import_dataset(p_val, 'validation')
print("len(x_a): ", len(x_a))
print("len(y_a): ", len(y_a))

{'MEL': 1, 'NV': 1, 'BCC': 1, 'AKIEC': 1, 'BKL': 1, 'DF': 1, 'VASC': 1}
Start importing validation images...
len(x_a):  7
len(y_a):  7


In [8]:
x_b = []
x_c = []
y_b = []
y_c = []
indexes_a_nmel = []
indexes_a_mel = []

if len(x_a) >= 1:
    y_hat_a = model1.predict_classes(x_a)
    y_hat_a_probabilities = model1.predict(x_a)
    
    x_a_aux = []
    y_a_aux = []
    y_hat_a_aux = []
    
    for i in range(0, len(y_hat_a_probabilities)):
        probabilitiesList = y_hat_a_probabilities[i]
        probabilitiesList.sort()
        highestProb = probabilitiesList[1]
        secondHighestProb = probabilitiesList[0]
        if (highestProb - secondHighestProb) < eta_a:
            x_flat.append(x_a[i])
            y_flat.append(y_a[i])
        else:
            x_a_aux.append(x_a[i])
            y_a_aux.append(y_a[i])
            y_hat_a_aux.append(y_hat_a[i])
    
    x_a = array(x_a_aux)
    y_a = y_a_aux
    y_hat_a = y_hat_a_aux
     
if len(x_a) >= 1:
    for i in range(0, len(y_hat_a)):
        if y_hat_a[i] == 1: # NMEL
            # indexes: what indexes of y_hat_a are nmel lesions
            indexes_a_nmel.append(i)
            x_c.append(x_a[i])
            y_c.append(y_a[i])

        elif y_hat_a[i] == 0: # MEL
            indexes_a_mel.append(i)
            x_b.append(x_a[i])
            y_b.append(y_a[i])

    x_b = array(x_b)
    x_c = array(x_c)

    print("len(y_hat_a): ", len(y_hat_a))
    print("len(x_b): ", len(x_b))
    print("len(x_c): ", len(x_c))  
else:
    x_b = []
    x_c = []
    x_b = array(x_b)
    x_c = array(x_b)

Instructions for updating:
Please use instead:* `np.argmax(model.predict(x), axis=-1)`,   if your model does multi-class classification   (e.g. if it uses a `softmax` last-layer activation).* `(model.predict(x) > 0.5).astype("int32")`,   if your model does binary classification   (e.g. if it uses a `sigmoid` last-layer activation).


In [10]:
x_b_aux = []
y_b_aux = []
y_hat_b_aux = []
    
if(len(x_b) != 0):
    y_hat_b = model2.predict_classes(x_b)
    y_hat_b_probabilities = model2.predict(x_b)
    
    for i in range(0, len(y_hat_b_probabilities)):
        probabilitiesList = y_hat_b_probabilities[i]
        probabilitiesList.sort()
        highestProb = probabilitiesList[1]
        secondHighestProb = probabilitiesList[0]
        if (highestProb - secondHighestProb) < eta_b:
            x_flat.append(x_b[i])
            y_flat.append(y_b[i])
        else:
            x_b_aux.append(x_b[i])
            y_b_aux.append(y_b[i])
            y_hat_b_aux.append(y_hat_b[i])
        
else:
    y_hat_b = []
    
x_b = array(x_b_aux)
y_b = y_b_aux
y_hat_b = y_hat_b_aux
zippedList = list(zip(y_hat_b, y_b))
df_b = pd.DataFrame(zippedList, columns = ['MEL_PRED', 'MEL_TRUTH'])
df_b.to_csv('/home/ruben/Desktop/classifier_b.csv', index=False)

In [11]:
indexes_c_mal = []
indexes_c_ben = []

if(len(x_c) != 0):
    y_hat_c = model3.predict_classes(x_c)
    y_hat_c_probabilities = model3.predict(x_c)
    
    x_c_aux = []
    y_c_aux = []
    y_hat_c_aux = []

    for i in range(0, len(y_hat_c_probabilities)):
        
        probabilitiesList = y_hat_c_probabilities[i]
        probabilitiesList.sort()
        highestProb = probabilitiesList[1]
        secondHighestProb = probabilitiesList[0]
        
        if (highestProb - secondHighestProb) < eta_c:
            x_flat.append(x_c[i])
            y_flat.append(y_c[i])
        else:
            x_c_aux.append(x_c[i])
            y_c_aux.append(y_c[i])
            y_hat_c_aux.append(y_hat_c[i])
    
    x_c = array(x_c_aux)
    y_c = y_c_aux
    y_hat_c = y_hat_c_aux

    print("len(y_hat_c): ", len(y_hat_c))
    print("len(x_d): ", len(x_d))
    print("len(x_e): ", len(x_e))      

In [12]:
x_d = []
x_e = []
y_d = []
y_e = []

if len(x_c) >= 1:
    
    for i in range(0, len(y_hat_c)):

        if y_hat_c[i] == 1: # MAL
            indexes_c_mal.append(i)
            x_e.append(x_c[i])
            y_e.append(y_c[i])


        elif y_nmel_pred[i] == 0: # BEN
            indexes_2_ben.append(i)
            x_d.append(x_c[i])
            y_d.append(y_c[i])


    x_d = array(x_d)
    x_e = array(x_e)
    
else:
    x_d = []
    x_e = []
    x_d = array(x_d)
    x_e = array(x_e)

In [13]:
if(len(x_d) != 0):
    y_hat_d = model4.predict_classes(x_d)
    y_hat_d_probabilities = model4.predict(x_d)
    
    x_d_aux = []
    y_d_aux = []
    y_hat_d_aux = []

    for i in range(0, len(y_hat_d_probabilities)):
        
        probabilitiesList = y_hat_d_probabilities[i]
        probabilitiesList.sort()
        highestProb = probabilitiesList[1]
        secondHighestProb = probabilitiesList[0]
        
        if (highestProb - secondHighestProb) < eta_d:
            x_flat.append(x_d[i])
            y_flat.append(y_d[i])
        else:
            x_d_aux.append(x_d[i])
            y_d_aux.append(y_d[i])
            y_hat_d_aux.append(y_hat_d[i])
else:
    y_hat_d = []

zippedList = list(zip(y_hat_d, y_d))
df_d = pd.DataFrame(zippedList, columns = ['BEN_PRED', 'BEN_TRUTH'])
df_d.to_csv('/home/ruben/Desktop/classifier_d.csv', index=False)

In [16]:
x_e_aux = []
y_e_aux = []
y_hat_e_aux = []
    
if(len(x_e) != 0):
    y_hat_e = model5.predict_classes(x_e)
    y_hat_e_probabilities = model5.predict(x_e)

    for i in range(0, len(y_hat_e_probabilities)):
        
        probabilitiesList = y_hat_e_probabilities[i]
        probabilitiesList.sort()
        highestProb = probabilitiesList[1]
        secondHighestProb = probabilitiesList[0]
        
        if (highestProb - secondHighestProb) < eta_e:
            x_flat.append(x_e[i])
            y_flat.append(y_e[i])
        else:
            x_e_aux.append(x_e[i])
            y_e_aux.append(y_e[i])
            y_hat_e_aux.append(y_hat_e[i])
    
else:
    y_hat_e = []
    
zippedList = list(zip(y_hat_e, y_e))
df_e = pd.DataFrame(zippedList, columns = ['MAL_PRED', 'MAL_TRUTH'])
df_e.to_csv('/home/ruben/Desktop/classifier_e.csv', index=False)

In [20]:
if len(x_flat) >= 1:
    x_flat = array(x_flat)
    y_hat_flat = flat.predict_classes(x_flat)
    
else:
    y_hat_flat = []
   

zippedList = list(zip(y_hat_flat, y_flat))
df_flat = pd.DataFrame(zippedList, columns = ['FLAT_PRED', 'FLAT_TRUTH'])
df_flat.to_csv('/home/ruben/Desktop/flat_classifier.csv', index=False)