In [1]:
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from tensorflow import keras
from tensorflow.keras.datasets import mnist
from tensorflow.keras.models import Sequential
from keras.layers.normalization import BatchNormalization
from tensorflow.keras.layers import Dense, Dropout, Flatten
from tensorflow.keras.layers import Conv3D, MaxPooling3D
from tensorflow.keras import backend as K

Using TensorFlow backend.


In [2]:
large_array = np.load("/Users/katyadunets/Desktop/processed_3D_array.npy")
class_array = np.load("/Users/katyadunets/Desktop/classes.npy")

In [3]:
print(large_array.shape)
print(class_array.shape)

(5166, 40, 40, 40)
(5166,)


In [4]:
x_train, x_test, y_train, y_test = train_test_split(large_array, class_array, test_size = 0.20) 

In [5]:
x_train = x_train.reshape(x_train.shape[0], 40, 40, 40, 1)
x_test = x_test.reshape(x_test.shape[0], 40, 40, 40, 1)
input_shape = (40, 40, 40, 1)

In [6]:
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')

In [8]:
model = Sequential()

model.add(Conv3D(64, kernel_size=(3, 3, 3), activation='relu', input_shape=input_shape))
model.add(MaxPooling3D(pool_size=(3, 3, 3)))
model.add(Dropout(0.30))

model.add(Conv3D(64, (3, 3, 3), activation='relu'))
model.add(MaxPooling3D(pool_size=(2, 2, 2)))
model.add(Dropout(0.30))

model.add(Conv3D(128, (3, 3, 3), activation='relu'))
model.add(MaxPooling3D(pool_size=(2, 2, 2)))
model.add(Dropout(0.30))

model.add(Flatten())
model.add(Dense(300, activation='relu'))
model.add(Dropout(0.50))
model.add(Dense(1, activation='sigmoid'))

In [9]:
#attempted with focal loss, but does not improve accuracy so using binary cross-entropy
def focal_loss(gamma=2., alpha=.25):
    def focal_loss_fixed(y_true, y_pred):
        pt_1 = tf.where(tf.equal(y_true, 1), y_pred, tf.ones_like(y_pred))
        pt_0 = tf.where(tf.equal(y_true, 0), y_pred, tf.zeros_like(y_pred))
        return -K.sum(alpha * K.pow(1. - pt_1, gamma) * K.log(pt_1))-K.sum((1-alpha) * K.pow( pt_0, gamma) * K.log(1. - pt_0))
    
    return focal_loss_fixed

model.compile(loss=keras.losses.binary_crossentropy,
              optimizer=keras.optimizers.Adam(),
              metrics=['accuracy'])

In [10]:
model.fit(x_train, y_train,
          batch_size=32,
          epochs=10,
          verbose=1,
          validation_split=0.25)

Train on 3099 samples, validate on 1033 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x10e543978>

In [11]:
score = model.evaluate(x_test, y_test, verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])

Test loss: 0.399673682693356
Test accuracy: 0.8346228238692367


In [33]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv3d (Conv3D)              (None, 38, 38, 38, 64)    1792      
_________________________________________________________________
max_pooling3d (MaxPooling3D) (None, 12, 12, 12, 64)    0         
_________________________________________________________________
dropout (Dropout)            (None, 12, 12, 12, 64)    0         
_________________________________________________________________
conv3d_1 (Conv3D)            (None, 10, 10, 10, 64)    110656    
_________________________________________________________________
max_pooling3d_1 (MaxPooling3 (None, 5, 5, 5, 64)       0         
_________________________________________________________________
dropout_1 (Dropout)          (None, 5, 5, 5, 64)       0         
_________________________________________________________________
conv3d_2 (Conv3D)            (None, 3, 3, 3, 128)      221312    
__________

In [None]:
from sklearn.metrics import confusion_matrix

predictions = model.predict(x_test)
c = confusion_matrix(y_test, predictions.round())

print('Confusion matrix:\n', c)
print('specificity', c[0, 0] / (c[0, 1] + c[0, 0]))
print('sensitivity', c[1, 1] / (c[1, 1] + c[1, 0]))

In [13]:
from sklearn.metrics import roc_auc_score, roc_curve
fpr_keras, tpr_keras, thresholds = roc_curve(y_test, predictions)
auc = roc_auc_score(y_test, predictions)
auc

0.8715848322800194

In [34]:
np.save("/Users/katyadunets/Desktop/fpr.npy", fpr_keras)
np.save("/Users/katyadunets/Desktop/tpr.npy", tpr_keras)
np.save("/Users/katyadunets/Desktop/auc.npy", auc)
np.save("/Users/katyadunets/Desktop/y_test.npy", y_test)
np.save("/Users/katyadunets/Desktop/predictions.npy", predictions)

#issue with importing matplotlib into tensorflow environment - so these arrays were saved separately for 
#plotting in Spyder, however the plotting code is available below

In [None]:
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix
import seaborn as sn

plt.figure(1)
plt.plot([0, 1], [0, 1], 'k--')
plt.plot(fpr, tpr, label='ROC Curve (area = {:.3f})'.format(auc), color = 'red')
plt.xlabel('False positive rate')
plt.ylabel('True positive rate')
plt.title('ROC curve')
plt.legend(loc='best')
plt.show()

c = confusion_matrix(y_true = y_test, y_pred = predictions.round())
sn.heatmap(c, annot=True)
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.title('Confusion Matrix')