# PPMI uncertainty quantification

_(c) Fraunhofer MEVIS/UKE 2019_

This experiment classifies PPMI data with respect to the PD/HC class given. In addition, the data has been visually graded into three classes: certain PD, certain HC, and uncertain.

Also, two SBR values are given, calculated with standard methods.


## Set up the data

In [1]:
import tensorflow as tf
import pandas as pd
import seaborn as sns
import os
import numpy as np
from pathlib import Path

from tensorflow.keras import layers
from tensorflow.keras.callbacks import Callback, ModelCheckpoint
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.optimizers import Adam
from tqdm import tqdm


print('TF:', tf.__version__)
print('Keras:', tf.keras.__version__)
print('Have GPU:', tf.test.is_gpu_available())

TF: 2.0.0-rc0
Keras: 2.2.4-tf
Have GPU: True


In [2]:
DATA_ROOT = 'Z:/Data/Parkinson_DATScans UKE/full_ppmi_data/png/all_2d'
PROJECT_ROOT = "C:/Users/mharz/Documents/Projects/DeepLearning/2019_ParkinsonPPMI-UKE/"
VISUAL_SCORE_CSV = 'PPMI_visual_score_03Oct2019.csv'
SBR_CUTOFF_DIST_CSV = 'PPMI_SBR_distance_to_cutoff.csv'
SBR_CUTOFF_DIST_MIXED_CSV = ''

In [3]:
project_root_path = Path(PROJECT_ROOT)
data_root_path = Path(DATA_ROOT)
ppmi_df = pd.read_csv(project_root_path / VISUAL_SCORE_CSV)
files_df = pd.DataFrame(list(data_root_path.glob('**/*.png')))
files_df = files_df.rename(columns={0 : 'fname'})
files_df['fname'] = files_df['fname'].apply(lambda path: str(path))


In [4]:
def get_class_for(ID):
    c = ppmi_df.loc[ppmi_df['PPMI-ID'] == int(ID)]['group(HC=0,PD=1)'].values[0]
    return 'HC' if c==0 else 'PD'

def get_visual_result_for(ID):
    return ppmi_df.loc[ppmi_df['PPMI-ID'] == int(ID)]['visually(reduced=-1,normal=1,uncertian=0)'].values[0]

def get_SBR_HV(ID):
    return ppmi_df.loc[ppmi_df['PPMI-ID'] == int(ID)]['putSBR_HV'].values[0]

def get_SBR_AAL(ID):
    return ppmi_df.loc[ppmi_df['PPMI-ID'] == int(ID)]['putSBR_AAL'].values[0]


In [5]:
# Extract the PPMI ID from the file name
files_df['PPMI-ID'] = files_df['fname'].apply(lambda ID: ID.split('_')[-2])
# Get class and visual ispection result from ppmi_df
files_df['class'] = files_df['PPMI-ID'].apply(lambda ID: get_class_for(ID))
files_df['visual_result'] = files_df['PPMI-ID'].apply(lambda ID: get_visual_result_for(ID))
files_df['SBR_HV'] = files_df['PPMI-ID'].apply(lambda ID: get_SBR_HV(ID))
files_df['SBR_AAL'] = files_df['PPMI-ID'].apply(lambda ID: get_SBR_AAL(ID))


In [6]:
# Sample from the groups
# We have smoothed and unsmoothed variants of all patients, which cannot end up in train/test/validation separately. Split the data patient-wise instead of file-wise.
groups = [df for _, df in files_df.groupby('PPMI-ID')] # Creates a list of dataframes
np.random.shuffle(groups)      # Shuffles the dataframes

groups_idx = np.arange(len(groups))
# Draw train and validation without replacement from the index.
train_idx = np.random.choice(groups_idx, int(0.8 * len(groups)), replace=False)
val_idx = np.random.choice(list(set(groups_idx) - set(train_idx)), int(0.1 * len(groups)), replace=False)
test_idx = np.array(list(set(groups_idx) - set(train_idx) - set(val_idx)))

In [7]:
# Concat the subset of files per group into new dataframes
train_groups = [groups[idx] for idx in train_idx.tolist()]
train_df = pd.concat(train_groups).reset_index(drop=True)
val_groups = [groups[idx] for idx in val_idx.tolist()]
val_df = pd.concat(val_groups).reset_index(drop=True)
test_groups = [groups[idx] for idx in test_idx.tolist()]
test_df = pd.concat(test_groups).reset_index(drop=True)

## Set up the data source

In [8]:
BATCH_SIZE = 32

def create_datagen():
    return ImageDataGenerator(horizontal_flip=True,
                              rescale=1./255.)

def create_test_gen():
    return ImageDataGenerator(rescale=1./255.).flow_from_dataframe(
        test_df,
        x_col='fname',
        y_col='class', # add other columns for more involved models.
        class_mode='categorical',
        target_size=(109, 91),
        batch_size=BATCH_SIZE,
        shuffle=False
    )

def create_flow(datagen, dataframe):
    return datagen.flow_from_dataframe(
        dataframe, 
        x_col='fname',
        y_col='class', # add other columns for more involved models.
        class_mode='categorical',
        target_size=(109, 91),
        batch_size=BATCH_SIZE
    )

data_generator = create_datagen()
train_gen = create_flow(data_generator, train_df) 
val_gen = create_flow(data_generator, val_df)
test_gen = create_test_gen()

Found 1032 validated image filenames belonging to 2 classes.
Found 128 validated image filenames belonging to 2 classes.
Found 130 validated image filenames belonging to 2 classes.


## Set up the model

In [None]:
import efficientnet.tfkeras as efn
#base_model = efn.EfficientNetB0(weights='imagenet', include_top=False, input_shape=(109,91,3))
base_model = tf.keras.applications.MobileNetV2(weights='imagenet', include_top=False, input_shape=(109,91,3))

# Definition using functional API lets one inspect the layers of the base model.
def build_model():
    x = base_model.output
    x = layers.GlobalAveragePooling2D()(x)
#    x = layers.Dense(512,activation='selu')(x) 
#    x = layers.Dropout(rate=0.3)(x)
#    x = layers.Dense(256,activation='selu')(x)
#    x = layers.Dropout(rate=0.3)(x)
    preds = layers.Dense(2,activation='softmax')(x) 

    model = Model(inputs=base_model.input,outputs=preds)
    return model

model = build_model()    

In [None]:

# Definition using functional API lets one inspect the layers of the base model.
def build_model():
    x = layers.Input(shape=(109,91,1))
    x = layers.Conv2D(filters=64, activation='relu', padding='same')(x)
    x = layers.GlobalAveragePooling2D()(x)
#    x = layers.Dense(512,activation='selu')(x) 
#    x = layers.Dropout(rate=0.3)(x)
    x = layers.Dense(256,activation='selu')(x)
    x = layers.Dropout(rate=0.3)(x)
    preds = layers.Dense(2,activation='softmax')(x) 

    model = Model(inputs=base_model.input,outputs=preds)
    return model

model = build_model()    

In [None]:
model.compile(
    loss='binary_crossentropy',
    optimizer=Adam(lr=0.0001),
    metrics=['accuracy']
)

In [None]:
checkpoint = ModelCheckpoint(
    'model_mobilenetv2.h5', 
    monitor='val_loss', 
    verbose=0, 
    save_best_only=True, 
    save_weights_only=False,
    mode='auto'
)
#tensorboard_cb = tf.keras.callbacks.TensorBoard()
tensorboard_cb = tf.keras.callbacks.TensorBoard(log_dir=Path('tb_logs/model_mobilenetv2/'))#, histogram_freq=1, write_grads=True) # Callback does not work on Windows currently (path issues, cf. https://github.com/tensorflow/tensorboard/issues/2279 )

In [None]:
history = model.fit_generator(
    train_gen,
    steps_per_epoch=train_gen.n / BATCH_SIZE,
    validation_data=val_gen,
    validation_steps=val_gen.n / BATCH_SIZE,
    callbacks=[checkpoint],#, tensorboard_cb],
    epochs=10,
    max_queue_size=4, 
    workers=4,
    use_multiprocessing=False
)
try:
    history_df = history_df.append(pd.DataFrame(history.history))
except:
    history_df = pd.DataFrame(history.history)


In [None]:
history_df[['loss', 'val_loss']].plot()
history_df[['accuracy', 'val_accuracy']].plot()
