# Model 5 - NN model

## Prediction of radiotherapy plan violation from spatial arrangement of target and organ at risk structures using deep learning

_By Phillip Hungerford,  University of New South Wales_

In [15]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [16]:
cd drive

[Errno 2] No such file or directory: 'drive'
/content/drive/My Drive/Dissertation/5_Code/1_code


In [17]:
cd My Drive

[Errno 2] No such file or directory: 'My Drive'
/content/drive/My Drive/Dissertation/5_Code/1_code


In [18]:
cd Dissertation/5_Code/1_code

[Errno 2] No such file or directory: 'Dissertation/5_Code/1_code'
/content/drive/My Drive/Dissertation/5_Code/1_code


In [19]:
ls

 3D_medical_visualisation_script.py   Model5_ANN.ipynb
 best_NN.hdf5                         original-voxel-model.ipynb
 DataPreparation.ipynb                plot.png
 data_prep.py                         PointNetBasic.ipynb
'EDA&PreProcessing.ipynb'             [0m[01;34m__pycache__[0m/
 keras-test.ipynb                     tf_util.py
'Model1_PointNetFull (1).ipynb'       train.py
 Model2_PointNetBasic.ipynb           voxel-model.ipynb
 Model3_PointNetBasic_l.ipynb         voxels.ipynb


In [0]:
# Import dependencies
import os
import time
import random
import numpy as np
import tensorflow as tf
from numpy.random import seed
import matplotlib.pyplot as plt
from keras.optimizers import SGD
from keras.models import Sequential
from tensorflow import set_random_seed
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score, roc_curve, classification_report
from keras.layers import Dense, MaxPooling1D, Convolution1D, Dropout, Flatten, BatchNormalization
from keras.callbacks import ModelCheckpoint, TensorBoard, EarlyStopping
# fix random seed for reproducibility
seeds=42
random.seed(seeds)
seed(seeds)
set_random_seed(seeds)

In [21]:
# Load 3140 data that excludes the body
X = np.load('../2_pipeline/3140-xyz.npy')
y = np.load('../2_pipeline/labels.npy')

#split data into 1: train+validation set and 2: test set 
X_train_val, X_test, y_train_val, y_test = \
train_test_split(X, y, random_state=0, test_size=0.2)

# split train+validation set into 1a) training and 1b) validation sets
X_train, X_val, y_train, y_val = \
train_test_split(X_train_val, y_train_val, random_state=1, test_size=0.2)

#from keras.utils import to_categorical
#y_test = to_categorical(y_test)
#y_train = to_categorical(y_train)

print('Training shape is: ', X_train.shape)
print('Validation shape is: ', X_val.shape)
print('Test shape is: ', X_test.shape)

Training shape is:  (182, 3140, 3)
Validation shape is:  (46, 3140, 3)
Test shape is:  (58, 3140, 3)


In [0]:
num_points=3140
max_epochs=25
batch_size=32
dropout_rate = 0.7
opt = 'adam'
#opt = SGD(lr=0.0001, momentum=0.9)

# Class weights
class_weight = {0: 0.2, 1: 0.8}

In [25]:
# Import `Sequential` from `keras.models`
from keras.models import Sequential

# Import `Dense` from `keras.layers`
from keras.layers import Dense

# Initialize the constructor
model = Sequential()

# Add an input layer 
model.add(Dense(1024, activation='relu', input_shape=(num_points,3)))
model.add(Dropout(rate=0.5))
model.add(BatchNormalization())

# Add one hidden layer 
model.add(Dense(512, activation='relu'))
model.add(Dropout(rate=0.5))
model.add(BatchNormalization())

#flatten
model.add(Flatten())

# Add an output layer
model.add(Dense(1, activation='sigmoid'))

# Compile model
model.compile(loss='binary_crossentropy', optimizer=opt, metrics=['accuracy'])
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_10 (Dense)             (None, 3140, 1024)        4096      
_________________________________________________________________
dropout_7 (Dropout)          (None, 3140, 1024)        0         
_________________________________________________________________
batch_normalization_7 (Batch (None, 3140, 1024)        4096      
_________________________________________________________________
dense_11 (Dense)             (None, 3140, 512)         524800    
_________________________________________________________________
dropout_8 (Dropout)          (None, 3140, 512)         0         
_________________________________________________________________
batch_normalization_8 (Batch (None, 3140, 512)         2048      
_________________________________________________________________
flatten_4 (Flatten)          (None, 1607680)           0         
__________

In [26]:
checkpoint = ModelCheckpoint('best_NN.hdf5', monitor='val_loss', 
                             verbose=1, save_best_only=True)
earlystopping = EarlyStopping(verbose=1, patience=3)

# Fit the model
history = model.fit(X_train, y_train, batch_size=batch_size, epochs=max_epochs,\
                    shuffle=True, verbose=1, validation_data=(X_val, y_val),\
                    class_weight=class_weight, \
                    callbacks=[checkpoint, earlystopping])

Train on 182 samples, validate on 46 samples
Epoch 1/25


ResourceExhaustedError: ignored

In [0]:

################################################################################
# Classification Report

# make predictions on the test set
y_pred = model.predict(X_test)

################################################################################
from sklearn.metrics import accuracy_score, confusion_matrix
print("\n###################### Model Performance ############################")
# evaluate the model
_, train_acc = model.evaluate(X_train, y_train, verbose=0)
_, test_acc = model.evaluate(X_test, y_test, verbose=0)
print('\nTrain: %.3f, Test: %.3f' % (train_acc, test_acc))
print("\n#####################################################################")
################################################################################

import matplotlib.pyplot as plt
# plot loss during training
plt.subplot(211)
plt.title('Loss')
plt.plot(history.history['loss'], label='train')
plt.plot(history.history['val_loss'], label='test')
plt.legend()

################################################################################

# plot accuracy during training
plt.subplot(212)
plt.title('Accuracy')
plt.plot(history.history['acc'], label='train')
plt.plot(history.history['val_acc'], label='test')
plt.legend()
plt.show()

################################################################################
print("\n#####################################################################")
# Create the confusion matrix
ann_cm = confusion_matrix(y_true = y_test, y_pred = y_pred.round())
print("\nOur test confusion matrix yields: ")
print(ann_cm)
print("\n#####################################################################")

#Classification report
ann_report = classification_report(y_test, y_pred.round())
print("\nClassfication Report for test:\n", ann_report)
print("\n#####################################################################")

#Calculate AUC score
ann_auc = roc_auc_score(y_test, y_pred)
print("\nOur testing AUC for ann is: ", ann_auc)

# calculate the fpr and tpr for all thresholds of the classification
fpr, tpr, threshold = metrics.roc_curve(y_test, y_pred)
roc_auc = metrics.auc(fpr, tpr)

import matplotlib.pyplot as plt
plt.title('Receiver Operating Characteristic')
plt.plot(fpr, tpr, 'b', label = 'AUC = %0.2f' % roc_auc)
plt.legend(loc = 'lower right')
plt.plot([0, 1], [0, 1],'r--')
plt.xlim([0, 1])
plt.ylim([0, 1])
plt.ylabel('True Positive Rate')
plt.xlabel('False Positive Rate')
plt.show()
print("\n#####################################################################")