In [310]:
import matplotlib.pyplot as plt
#from PIL import Image
import matplotlib.image as mpimg
import numpy as np
#from tensorflow.keras.utils import to_categorical
from tensorflow.keras.preprocessing.image import ImageDataGenerator, img_to_array, load_img
from tensorflow.keras import layers, models
from tensorflow.keras import optimizers
from tensorflow.keras.layers import Input, Dense, Reshape, Flatten, Dropout, multiply
from tensorflow.keras.layers import BatchNormalization, Activation, Embedding, ZeroPadding2D
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.models import Model
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint
import tensorflow.metrics
import pandas as pd
from sklearn.model_selection import train_test_split
import os, shutil, random
from sklearn.metrics import confusion_matrix
import itertools
from sklearn.preprocessing import LabelBinarizer
from sklearn.preprocessing import MinMaxScaler

### Define preprocessing function for demographic data

In [32]:
def preprocess_dem(train,test):


    #replace missing age values by median
    train['age'] = train['age'].fillna(train['age'].median())
    test['age'] = test['age'].fillna(train['age'].median())
    
    # performin min-max scaling each continuous feature column to
    # the range [0, 1]
    cs = MinMaxScaler()
    train["age"] = cs.fit_transform(train["age"].values.reshape(-1,1))
    test["age"] = cs.transform(test["age"].values.reshape(-1,1))
 
    train['sex'].replace("unknown",train['sex'].value_counts().index[0], inplace=True)
    train["sex"] = np.where(train['sex']=="female",1,0)
    
    test['sex'].replace("unknown",train['sex'].value_counts().index[0], inplace=True)
    test["sex"] = np.where(test['sex']=="female",1,0)
    
    # return the concatenated training and testing data
    return (train, test)

### Load demographic data for all images (inkl. augmented images)

In [6]:
diagnose = ['bkl', 'nv', 'df', 'mel', 'vasc', 'bcc', 'akiec']

train_dir = "/Users/leona/PythonWork/Github_Projects/Final_Pro/data/ISIC2018_Task3_Training_Input/train/"
validation_dir = "/Users/leona/PythonWork/Github_Projects/Final_Pro/data/ISIC2018_Task3_Training_Input/validation/"

In [14]:
def get_imagelist(classes,path):
    img_list = []
    for c in classes:
        img_list.append(os.listdir(path + c))
    img_list = [item for sublist in img_list for item in sublist]
    return img_list

In [15]:
train_list = get_imagelist(diagnose,train_dir)
val_list = get_imagelist(diagnose,validation_dir)

### Load main, train and test dataframe

In [230]:
meta = pd.read_pickle("./meta.pkl")

In [319]:
train = pd.read_pickle("./aug_train.pkl")

In [320]:
validation = pd.read_pickle("./aug_test.pkl")

In [321]:
train_pr, validation_pr = preprocess_dem(train,validation)

### Define custom image generator

In [362]:
lb = LabelBinarizer()
lb.fit(train_pr.dx)

def get_input(path):
    img = image.load_img(path, target_size=(224, 224))
    img_array = image.img_to_array(img)
    img_array = np.expand_dims(img_array, axis=0) 
    return(img_array)

def preprocess_input(img):
    """ Same preprocessing function as mobilenets preprocess input"""
    img /= 255.
    img -= 0.5
    img *= 2.
    return img

def multi_input_generator(df, batch_size, source_dir):
    """Read images and metadata from dataframe. 
    Arguments: 
    - source_dir = either train or validation diectory"""
  
    while True:
        batch = df.sample(n=batch_size, replace=False)
        batch_paths = batch.aug_id.to_list()
        
        batch_input1 = []
        batch_input2 = []
        batch_output = [] 
          
        # Read in each input, perform preprocessing and get labels
        for i in batch.index:
            
            full_path = source_dir + str(batch.loc[i].dx) + "/" + str(batch.loc[i].aug_id)
            input1 = get_input(full_path)
            input2 = [batch.loc[i].age, batch.loc[i].sex]
            output = batch.loc[i].dx
            
            input_pre = preprocess_input(input1)
            batch_input1 += [ input_pre ]
            batch_input2 += [ input2 ]
            batch_output += [ output ]
        
        # flatten the image list so that it looks like the tensorflow iterator
        batch_input1 = [val for sublist in batch_input1 for val in sublist]
        
        # Return a tuple of ([input,input],output) to feed the network
        batch_x1 = np.array(batch_input1)
        batch_x2 = np.array(batch_input2, dtype="float32")
        batch_y = lb.transform(np.array(batch_output)).astype("float32")
        
        yield[batch_x1, batch_x2], batch_y

In [375]:
lb.classes_

array(['akiec', 'bcc', 'bkl', 'df', 'mel', 'nv', 'vasc'], dtype='<U5')

In [363]:
train_batches = multi_input_generator(train_pr,10,train_dir)

In [365]:
valid_batches = multi_input_generator(validation_pr,10,validation_dir)

In [369]:
next(train_batches)

([array([[[[ 0.9843137 ,  0.8352941 ,  0.99215686],
           [ 0.99215686,  0.84313726,  1.        ],
           [ 1.        ,  0.8509804 ,  1.        ],
           ...,
           [ 0.64705884,  0.4039216 ,  0.5294118 ],
           [ 0.6313726 ,  0.38823533,  0.5137255 ],
           [ 0.6313726 ,  0.38823533,  0.5137255 ]],
  
          [[ 0.9843137 ,  0.8352941 ,  0.99215686],
           [ 0.99215686,  0.84313726,  1.        ],
           [ 1.        ,  0.8509804 ,  1.        ],
           ...,
           [ 0.6627451 ,  0.41960788,  0.54509807],
           [ 0.654902  ,  0.41176474,  0.5372549 ],
           [ 0.654902  ,  0.41176474,  0.5372549 ]],
  
          [[ 0.99215686,  0.84313726,  1.        ],
           [ 0.99215686,  0.84313726,  1.        ],
           [ 1.        ,  0.8509804 ,  1.        ],
           ...,
           [ 0.6862745 ,  0.4431373 ,  0.5686275 ],
           [ 0.6784314 ,  0.43529415,  0.56078434],
           [ 0.6784314 ,  0.43529415,  0.56078434]],
  
    

## Combine models

### Setup model input

In [376]:
num_train_samples = len(train_pr)
num_val_samples = len(validation_pr)
train_batch_size = 10
val_batch_size = 10
image_size = 224

train_steps = np.ceil(num_train_samples / train_batch_size)
val_steps = np.ceil(num_val_samples / val_batch_size)

In [392]:
comb = tensorflow.keras.models.load_model('model.h5', compile=False)

In [393]:
cnn_output = comb.layers[-3].output # global average pooling layer

In [386]:
input_dem = tensorflow.keras.layers.Input(shape=(2,))

In [395]:
merge_layer = layers.concatenate([input_dem, cnn_output])

In [396]:
x = Dense(128, activation="relu")(merge_layer)
x = Dropout(0.25)(x)
x = Dense(32, activation="relu")(x)
prediction_layer = Dense(7, activation='softmax')(x)