In [290]:
import matplotlib.pyplot as plt
#from PIL import Image
import matplotlib.image as mpimg
import numpy as np
#from tensorflow.keras.utils import to_categorical
from tensorflow.keras.preprocessing.image import ImageDataGenerator, img_to_array, load_img
from tensorflow.keras import layers, models
from tensorflow.keras import optimizers
from tensorflow.keras.layers import Input, Dense, Reshape, Flatten, Dropout, multiply
from tensorflow.keras.layers import BatchNormalization, Activation, Embedding, ZeroPadding2D
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.models import Model
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint
import tensorflow.metrics
import pandas as pd
from sklearn.model_selection import train_test_split
import os, shutil, random
from sklearn.metrics import confusion_matrix
import itertools
from sklearn.preprocessing import LabelBinarizer
from sklearn.preprocessing import MinMaxScaler
pd.options.mode.chained_assignment = None  # default='warn'

## Load Metadata

In [282]:
meta = pd.read_pickle("./meta.pkl")

In [18]:
source_dir = "/Users/leona/PythonWork/Github_Projects/Final_Pro/data/ISIC2018_Task3_Training_Input/"

### Setup model input

In [25]:
num_train_samples = len(train_data)
num_val_samples = len(val_data)
train_batch_size = 10
val_batch_size = 10
image_size = 224

train_steps = np.ceil(num_train_samples / train_batch_size)
val_steps = np.ceil(num_val_samples / val_batch_size)

## Transfer learning with pre-trained models

### Setup MobileNet

In [213]:
datagen = ImageDataGenerator(
    preprocessing_function= tensorflow.keras.applications.mobilenet.preprocess_input)

train_batches = datagen.flow_from_directory(train_dir,
                                            target_size=(image_size,image_size),
                                            batch_size=train_batch_size)

valid_batches = datagen.flow_from_directory(validation_dir,
                                            target_size=(image_size,image_size),
                                            batch_size=val_batch_size)

# Note: shuffle=False causes the test dataset to not be shuffled
test_batches = datagen.flow_from_directory(validation_dir,
                                            target_size=(image_size,image_size),
                                            batch_size=1,
                                            shuffle=False)

Found 38890 images belonging to 7 classes.
Found 1103 images belonging to 7 classes.
Found 1103 images belonging to 7 classes.


In [156]:
# We need to choose how many layers we actually want to be trained.

# Here we are freezing the weights of all layers except the
# last 23 layers in the new model.
# The last 23 layers of the model will be trained.

for layer in model.layers[:-23]:
    layer.trainable = False

# Merge CNN output with demographic data 

### Load demographic features for all images (augmented images included)

In [58]:
diagnose = list(meta['dx'].unique())
train_dir = "/Users/leona/PythonWork/Github_Projects/Final_Pro/data/ISIC2018_Task3_Training_Input/train/"
validation_dir = "/Users/leona/PythonWork/Github_Projects/Final_Pro/data/ISIC2018_Task3_Training_Input/validation/"

In [285]:
def get_imagelist(classes,path):
    img_list = []
    for c in classes:
        img_list.append(os.listdir(path + c))
    img_list = [item for sublist in img_list for item in sublist]
    return img_list

In [286]:
train_list = get_imagelist(diagnose,train_dir)
val_list = get_imagelist(diagnose,validation_dir)

In [29]:
img_id = list(meta.image_id)
meta['img_nr'] = [image[5:] for image in meta.image_id]

In [31]:
# Create helper function to extract pure image_id's
def extract_id(x):
        
    if "ISIC" in x:
        return x[5:12]
    else:
        return  x[:7]

In [32]:
aug_img = pd.DataFrame(aug_list, columns=['aug_id'])
aug_img['img_nr'] = aug_img['aug_id'].apply(extract_id)

In [64]:
val_img = pd.DataFrame(val_list, columns=['aug_id'])
val_img['img_nr'] = val_img['aug_id'].apply(extract_id)

In [45]:
complete_train = pd.merge(aug_img, meta, how='inner', on="img_nr", left_on=None, right_on=None,
         left_index=False, right_index=False, sort=True,
         suffixes=('_x', '_y'), copy=True, indicator=False,
         validate=None)

In [294]:
complete_test = pd.merge(val_img, meta, how='inner', on="img_nr", left_on=None, right_on=None,
         left_index=False, right_index=False, sort=True,
         suffixes=('_x', '_y'), copy=True, indicator=False,
         validate=None)

#### Pickle both dataframes

In [295]:
#complete_train.to_pickle("./aug_train.pkl")
#complete_test.to_pickle("./aug_test.pkl")

### Missing values, Min-Max-Scale and binary classification

In [288]:
def preprocess_dem(train,test):


    #replace missing age values by median
    train['age'] = train['age'].fillna(train['age'].median())
    test['age'] = test['age'].fillna(train['age'].median())
    
    # performin min-max scaling each continuous feature column to
    # the range [0, 1]
    cs = MinMaxScaler()
    train["age"] = cs.fit_transform(train["age"].values.reshape(-1,1))
    test["age"] = cs.transform(test["age"].values.reshape(-1,1))
 
    train['sex'].replace("unknown",train['sex'].value_counts().index[0], inplace=True)
    train["sex"] = np.where(train['sex']=="female",1,0)
    
    test['sex'].replace("unknown",train['sex'].value_counts().index[0], inplace=True)
    test["sex"] = np.where(test['sex']=="female",1,0)
    
    # return the concatenated training and testing data
    return (train, test)