[<img src="https://colab.research.google.com/assets/colab-badge.svg">](https://colab.research.google.com/github/mtwenzel/parkinson-classification/blob/master/PPMI-InceptionV3.ipynb)

# Classifying PPMI DAT scans into Parkinson's Disease and Healthy Controls

Licensed under [this](LICENSE) license.

This notebook shows how we performed the experiment to fine-tune the Inception V3 classifier to distinguish patients with and without signs of Parkinson's disease.

The notebook is optimized to work with Google Colab.

It is part of the publication 
> Publication reference and [link](Link) to be inserted after publishing.

The data are a derivative of the DAT scans available from the [PPMI repository](https://www.ppmi-info.org/access-data-specimens/download-data/). Roughly, they were processed to represent the central 5 slices of the putamen in one slice by averaging them. The data was then split randomly into a training and a validation set. As we tested the performance on an independant test set drawn from clinical routine which cannot be published, this notebook does not contain testing of the trained classifier.

The data as used in the publication can be downloaded here:

If you want to run the notebook from Google Colab, put the data into your Google Drive, and adapt the path below.

In [None]:
#@title Connect Google Drive if running in Colab. {display-mode:'form'}
#@markdown You will be asked to navigate to a Google site to log in and allow access to your drive.

try:
  from google.colab import drive
  IN_COLAB = True
except:
  IN_COLAB = False

if IN_COLAB:
    drive.mount('/content/drive')

In [None]:
#@title Check GPU availability {display-mode: 'form'}
import tensorflow as tf
device_name = tf.test.gpu_device_name()
if device_name != '/device:GPU:0':
  raise SystemError('GPU device not found')
print('Found GPU at: {}'.format(device_name))
print('')
print('Detailed information:')
print('---------------------')
from tensorflow.python.client import device_lib
print(device_lib.list_local_devices())

In [None]:
#@title Imports. {display-mode:'form'}
import pandas as pd
import numpy as np
import os
import tensorflow as tf
from tensorflow.keras.layers import Input,Dense,GlobalAveragePooling2D,Flatten,concatenate,BatchNormalization, Dropout
from tensorflow.keras.applications import InceptionV3,DenseNet121
from tensorflow.keras.preprocessing import image
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras import backend as K

# Visualize the Train/Val loss
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
#@title Set the data generators. {display-mode:'form', run: "auto"}
#@markdown Data augmentation choices.
shear_range = 0.1 #@param {type:"slider", min:0.0, max:1.0, step:0.05}
zoom_range = 0.1 #@param {type:"slider", min:0.0, max:1.0, step:0.05}
width_shift_range = 0.1 #@param {type:"slider", min:0.0, max:1.0, step:0.05}
height_shift_range = 0.1 #@param {type:"slider", min:0.0, max:1.0, step:0.05}
rotation_range = 10 #@param {type:"slider", min:0, max:90, step:5}
horizontal_flip = True #@param {type:"boolean"}
vertical_flip = False #@param {type:"boolean"}
#@markdown Data source (find your Google Drive path on the left in Colab!)
data_directory = 'z:/Data/Parkinson_DATScans UKE/full_ppmi_data/png/' #@param ['z:/Data/Parkinson_DATScans UKE/full_ppmi_data/png/', '/content/drive/My Drive/MEVIS/Data/PPMI-classification/'] {allow-input: true}

train_datagen = ImageDataGenerator(rescale=1./255,
    shear_range=shear_range,
    zoom_range=zoom_range,
    width_shift_range=width_shift_range,
    height_shift_range=height_shift_range,
    rotation_range=rotation_range,
    horizontal_flip=horizontal_flip,
    vertical_flip=vertical_flip) 

train_generator = train_datagen.flow_from_directory(os.path.join(data_directory, 'all_2d_train'), # this is where you specify the path to the main data folder
                                                 target_size=(109,91),
                                                 color_mode='rgb',
                                                 batch_size=64,
                                                 class_mode='categorical',
                                                 shuffle=True)

# Data Generator for validation without data augmentation!
val_datagen   = ImageDataGenerator(rescale=1./255) 
val_generator = val_datagen.flow_from_directory(os.path.join(data_directory, 'all_2d_val'), # this is where you specify the path to the main data folder
                                                 target_size=(109,91),
                                                 color_mode='rgb',
                                                 batch_size=64,
                                                 class_mode='categorical',
                                                 shuffle=True)

In [None]:
#@title Set up the pretrained model, and add dense layers. {display-mode:'form', run: "auto"}
#@markdown Set up the trainable dense layers. Further options include BatchNorm (provides regularization), DropOut (also, for normalization, but should not be used together with BatchNorm), and GlobalAveragePooling as an alternative to simple flattening. Did not work well in our experiments.
first_dense_layer_neurons  = 1024 #@param {type:"integer"}
second_dense_layer_neurons = 256 #@param {type:"integer"}
use_global_average_pooling = False #@param {type:"boolean"}
use_batch_norm             = True #@param {type:"boolean"}
use_drop_out               = False  #@param {type:"boolean"}
pretrained_model           = 'Inception V3' #@param ["Inception V3", "DenseNet 121"]
optimizer                  = 'adam' #@param ['adam', 'adagrad', 'adadelta', 'sgd'] {allow-input: true}

if pretrained_model == 'Inception V3':
    base_model=InceptionV3(weights='imagenet',include_top=False, input_shape=(109,91,3)) 
else:
    base_model=DenseNet121(weights='imagenet',include_top=False, input_shape=(109,91,3)) 

x=base_model.output

if use_global_average_pooling == True:
    x=GlobalAveragePooling2D()(x)
else:
    x=Flatten()(x)

if use_batch_norm:
    x = BatchNormalization()(x)
if use_drop_out:
    x = Dropout(rate=0.5)(x)
x = Dense(first_dense_layer_neurons,activation='relu')(x)

if use_batch_norm:
    x = BatchNormalization()(x)
if use_drop_out:
    x = Dropout(rate=0.5)(x)
x = Dense(second_dense_layer_neurons,activation='relu')(x)

if use_batch_norm:
    x = BatchNormalization()(x)
if use_drop_out:
    x = Dropout(rate=0.5)(x)
preds = Dense(2,activation='softmax')(x) # final layer with softmax activation

model=Model(inputs=base_model.input,outputs=preds)

In [None]:
#@title First pass: train added dense layers {display-mode:'form'}
#@markdown Set up the trainable parameters
#@narkdown First train only the top layers (which were randomly initialized), i.e. freeze all convolutional InceptionV3 layers

for layer in base_model.layers:
    layer.trainable = False

if optimizer in ['adam', 'adagrad', 'adadelta', 'sgd']: # standard settings
    model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics = ['accuracy']) # categorical crossentropy would also do...
else:
    from tensorflow.keras.optimizers import SGD
    model.compile(optimizer=SGD(lr=0.0001, momentum=0.9), loss='binary_crossentropy', metrics = ['accuracy'])

trainable_count = int(
    np.sum([K.count_params(p) for p in set(model.trainable_weights)]))
non_trainable_count = int(
    np.sum([K.count_params(p) for p in set(model.non_trainable_weights)]))

print('Total params: {:,}'.format(trainable_count + non_trainable_count))
print('Trainable params: {:,}'.format(trainable_count))
print('Non-trainable params: {:,}'.format(non_trainable_count))

history = model.fit_generator(generator=train_generator,
                              steps_per_epoch=train_generator.n//train_generator.batch_size,
                              epochs=100, # Originally, 500 epochs!
                             validation_data=val_generator,
                             validation_steps=val_generator.n//val_generator.batch_size)

In [None]:
#@title Plot train and validation loss/accuracy {display-mode:'form'}

plt.plot(history.history['loss'], label='train loss')
plt.plot(history.history['val_loss'], label='val loss')
plt.legend()
plt.show()

# Depending on the version of TF/Keras, the metric is either stored as 'acc' or 'accuracy'. This is not checked here.
plt.plot(history.history['acc'], label='train acc')
plt.plot(history.history['val_acc'], label='val acc')
plt.legend()
plt.show()

In [None]:
#@title Fine-tune last convolutional layers {display-mode:'form'}
#@markdown The top layers should have converged, and it would be good practice to fine-tune the top convolutional layers. Print the layers with indices. We chose to fine-tune the top 2 inception blocks, i.e. we will freeze the first 249 layers and unfreeze the rest. Afterwards, the model needs to be recompiled. Note that this will not change the trained parameters.

for i, layer in enumerate(base_model.layers):
   print(i, layer.name)

for layer in model.layers[:249]:
   layer.trainable = False
for layer in model.layers[249:]:
   layer.trainable = True

from tensorflow.keras.optimizers import SGD
model.compile(optimizer=SGD(lr=0.0001, momentum=0.9), loss='binary_crossentropy', metrics=['accuracy'])

trainable_count = int(
    np.sum([K.count_params(p) for p in set(model.trainable_weights)]))
non_trainable_count = int(
    np.sum([K.count_params(p) for p in set(model.non_trainable_weights)]))

print('Total params: {:,}'.format(trainable_count + non_trainable_count))
print('Trainable params: {:,}'.format(trainable_count))
print('Non-trainable params: {:,}'.format(non_trainable_count))

history_finetune = model.fit_generator(generator=train_generator,
                              steps_per_epoch=train_generator.n//train_generator.batch_size,
                              epochs=100,
                             validation_data=val_generator,
                             validation_steps=val_generator.n//val_generator.batch_size)

In [None]:
#@title Plot train and validation loss/accuracy {display-mode:'form'}
plt.plot(history_finetune.history['loss'], label='train loss')
plt.plot(history_finetune.history['val_loss'], label='val loss')
plt.legend()
plt.show()

plt.plot(history_finetune.history['acc'], label='train acc')
plt.plot(history_finetune.history['val_acc'], label='val acc')
plt.legend()
plt.show()