In [None]:
# Mount google drive to download crops.zip that contains 256x128 cropped images
from google.colab import drive
drive.mount('/content/drive')

In [None]:
# Prepare the environment before training the model
!unzip -q "/content/drive/My Drive/Colab Notebooks/crops.zip"
!cp "/content/drive/My Drive/Colab Notebooks/truePos.csv" truePos.csv
!cp "/content/drive/My Drive/Colab Notebooks/trueNeg.csv" trueNeg.csv
!cp "/content/drive/My Drive/Colab Notebooks/ett.csv" ett.csv
!cp "/content/drive/My Drive/Colab Notebooks/db-mimic-frontal-only.csv" db-mimic-frontal-only.csv

In [None]:
# Import libraries
import tensorflow as tf
from tensorflow import losses, optimizers
from tensorflow.keras import Input, Model, models, layers, backend
import numpy as np, pandas as pd

In [None]:
# Create the images and labels for training and testing
import glob #file operations
import pickle as pkl #save/load arrays into/from files
import numpy as np #matrix operations
import pdb #python debugging

def create_train_test_data(crossValFold):
  # Initialize variables
  images_train = []
  labels_train = []
  images_test = []
  labels_test = []

  # Search for all image files recursively
  imageFiles = glob.glob('./crops/**/*.npy', recursive=True)

  # Read the contents of the true positive file
  with open("./truePos.csv", 'r') as truePosFile:
    truePosContent = truePosFile.read()

  # Read the contents of the true negative file
  with open("./trueNeg.csv", 'r') as trueNegFile:
    trueNegContent = trueNegFile.read()

  # Read the contents of the ett file
  with open("./ett.csv", 'r') as ettFile:
    ettContent = ettFile.read()

  # Read the contents of the frontal file
  with open("./db-mimic-frontal-only.csv", 'r') as frontalFile:
    frontalContent = frontalFile.read()

  # For each cross validation fold, 2 folders are used for test and 8 folders for train
  testFolder1 = "/p" + str(10+2*(crossValFold-1)) + "/"
  testFolder2 = "/p" + str(11+2*(crossValFold-1)) + "/"
  print("Test Folders: " + testFolder1.replace("/", "") + "-" + testFolder2.replace("/", "") + "\n")

  # Process each image file
  for imageFile in imageFiles:
    # Load image into an array
    img = np.load(imageFile)

    # Remove single-dimensional entries from the image array
    img = np.squeeze(img)

    # Skip the shapes that are not in the form of (256, 128)
    if img.shape[0] != 256 or img.shape[1] != 128:
      continue

    # Get image ID from image file path
    imgStart = imageFile.rfind("/") + 1
    imgEnd = imageFile.rfind(".npy")
    imgID = imageFile[imgStart:imgEnd]

    # Skip the image if it's not frontal
    if frontalContent.find(imgID) == -1:
      continue

    # Get patient ID from image file path
    patientStart = imageFile.rfind("/p") + 2
    patientEnd = imageFile.rfind("/s")
    patientID = imageFile[patientStart:patientEnd]

    # Get study ID from image file path
    studyStart = imageFile.rfind("/s") + 2
    studyEnd = imageFile.rfind("/")
    studyID = imageFile[studyStart:studyEnd]

    # Search for image in true positive file
    imgStart = truePosContent.find(imgID)
    if imgStart == -1:
      # Search for image in true negative file
      imgStart = trueNegContent.find(imgID)
      if imgStart == -1:
        # Search for study in ETT file
        imgStart = ettContent.find(patientID + "," + studyID)
      else:
        imgStart = -1

    # Check image path for train or test
    if testFolder1 in imageFile or testFolder2 in imageFile:
      # Append to test images in flattened form to hold in one row
      images_test.append(img.flatten())

      # Append to test labels based on ETT search
      if imgStart > -1:
        labels_test.append(1)
      else:
        labels_test.append(0)
    else:
      # Append to train images in flattened form to hold in one row
      images_train.append(img.flatten())

      # Append to train labels based on ETT search
      if imgStart > -1:
        labels_train.append(1)
      else:
        labels_train.append(0)

  # Convert lists to np arrays
  x_train = np.array(images_train)
  y_train = np.array(labels_train)
  x_test = np.array(images_test)
  y_test = np.array(labels_test)

  # Print training image count
  print("Train Total: " + str(len(y_train)))
  print("Train Positive: " + str(np.count_nonzero(y_train)))
  print("Train Negative: " + str(np.count_nonzero(y_train==0)) + "\n")

  # Print test image count
  print("Test Total: " + str(len(y_test)))
  print("Test Positive: " + str(np.count_nonzero(y_test)))
  print("Test Negative: " + str(np.count_nonzero(y_test==0)) + "\n")

  return x_train, y_train, x_test, y_test

In [None]:
# Create the images and labels for training only to build the full model
import glob #file operations
import pickle as pkl #save/load arrays into/from files
import numpy as np #matrix operations
import pdb #python debugging

def create_train_data():
  # Initialize variables
  images_train = []
  labels_train = []

  # Search for all image files recursively
  imageFiles = glob.glob('./crops/**/*.npy', recursive=True)

  # Read the contents of the true positive file
  with open("./truePos.csv", 'r') as truePosFile:
    truePosContent = truePosFile.read()

  # Read the contents of the true negative file
  with open("./trueNeg.csv", 'r') as trueNegFile:
    trueNegContent = trueNegFile.read()

  # Read the contents of the ett file
  with open("./ett.csv", 'r') as ettFile:
    ettContent = ettFile.read()

  # Read the contents of the frontal file
  with open("./db-mimic-frontal-only.csv", 'r') as frontalFile:
    frontalContent = frontalFile.read()

  # For each cross validation fold, 2 folders are used for test and 8 folders for train
  testFolder1 = "/p" + str(10+2*(crossValFold-1)) + "/"
  testFolder2 = "/p" + str(11+2*(crossValFold-1)) + "/"
  print("Test Folders: " + testFolder1.replace("/", "") + "-" + testFolder2.replace("/", "") + "\n")

  # Process each image file
  for imageFile in imageFiles:
    # Load image into an array
    img = np.load(imageFile)

    # Remove single-dimensional entries from the image array
    img = np.squeeze(img)

    # Skip the shapes that are not in the form of (256, 128)
    if img.shape[0] != 256 or img.shape[1] != 128:
      continue

    # Get image ID from image file path
    imgStart = imageFile.rfind("/") + 1
    imgEnd = imageFile.rfind(".npy")
    imgID = imageFile[imgStart:imgEnd]

    # Skip the image if it's not frontal
    if frontalContent.find(imgID) == -1:
      continue

    # Get patient ID from image file path
    patientStart = imageFile.rfind("/p") + 2
    patientEnd = imageFile.rfind("/s")
    patientID = imageFile[patientStart:patientEnd]

    # Get study ID from image file path
    studyStart = imageFile.rfind("/s") + 2
    studyEnd = imageFile.rfind("/")
    studyID = imageFile[studyStart:studyEnd]

    # Search for image in true positive file
    imgStart = truePosContent.find(imgID)
    if imgStart == -1:
      # Search for image in true negative file
      imgStart = trueNegContent.find(imgID)
      if imgStart == -1:
        # Search for study in ETT file
        imgStart = ettContent.find(patientID + "," + studyID)
      else:
        imgStart = -1

    # Append to train images in flattened form to hold in one row
    images_train.append(img.flatten())

    # Append to train labels based on ETT search
    if imgStart > -1:
      labels_train.append(1)
    else:
      labels_train.append(0)

  # Convert lists to np arrays
  x_train = np.array(images_train)
  y_train = np.array(labels_train)

  # Print training image count
  print("Train Total: " + str(len(y_train)))
  print("Train Positive: " + str(np.count_nonzero(y_train)))
  print("Train Negative: " + str(np.count_nonzero(y_train==0)) + "\n")

  return x_train, y_train

In [None]:
# How many positive/negative images
import numpy as np
import pylab as py

# Create train and test data for the first cross validation fold
x_train, y_train, x_test, y_test = create_train_test_data(1)

# Print 16 random images
fig = py.figure()
for i in range(16):
  imIndex = np.random.randint(x_train.shape[0])
  im = x_train[imIndex].reshape(256, 128)
  fig.add_subplot(4, 4, i + 1)
  py.imshow(im)
  py.axis('off')
  print(y_train[imIndex])

In [None]:
# --- Prepare
def prepare_model(inputs):
  # --- Define kwargs dictionary
  kwargs = {'kernel_size': (3, 3), 'padding': 'same'}

  # --- Define lambda functions
  conv = lambda x, filters, strides : layers.Conv2D(filters=filters, strides=strides, **kwargs)(x)
  norm = lambda x : layers.BatchNormalization()(x)
  relu = lambda x : layers.LeakyReLU()(x)

  # --- Define stride-1, stride-2 blocks
  conv1 = lambda filters, x : relu(norm(conv(x, filters, strides=1)))
  conv2 = lambda filters, x : relu(norm(conv(x, filters, strides=(2, 2))))

  # --- Define contracting layers
  l1 = conv2(48, conv1(48, conv1(48, conv1(48, conv1(48, conv1(48, inputs['dat']))))))
  l2 = conv2(56, conv1(56, conv1(56, conv1(56, conv1(56, l1)))))
  l3 = conv2(64, conv1(64, conv1(64, conv1(64, conv1(64, l2)))))
  l4 = conv2(80, conv1(80, conv1(80, conv1(80, l3))))
  l5 = conv2(96, conv1(96, conv1(96, conv1(96, l4))))
  l6 = conv2(112, conv1(112, conv1(112, l5)))
  l7 = conv2(128, conv1(128, conv1(128, l6)))

  # --- Flatten
  f0 = layers.Flatten()(l7)

  # --- Create logits
  logits = {}
  logits['ett'] = layers.Dense(2, name='ett')(f0)

  # --- Create model
  model = Model(inputs=inputs, outputs=logits) 

  return model

In [None]:
# --- Validate
import math
import datetime
from sklearn.metrics import roc_auc_score

def test_model(fold, x_test, y_test):
  # Set test total values
  tpCnt = 0
  tnCnt = 0
  fpCnt = 0
  fnCnt = 0
  totalCnt = len(y_test)

  # Set start time
  startTime = datetime.datetime.now().time()

  # Predict
  logits = model.predict(x=x_test.reshape(x_test.shape[0], 256, 128, 1))

  # Convert logits to predictions
  if type(logits) is dict:
      logits = logits['ett']
  predictions = np.argmax(logits, axis=1)

  # Set end time
  endTime = datetime.datetime.now().time()

  # Print start and end times
  print("\nStart Time: " + str(startTime))
  print("End Time: " + str(endTime))
  print("Item Count: " + str(totalCnt))

  # Keep probabilities for the positive outcome only
  probs = logits[:, 1]

  # Calculate ROC AUC score
  auc = roc_auc_score(y_test, probs)
  print('\nROC AUC=%.3f' % (auc))

  # List the negative predictions
  negIndices = np.where(predictions != y_test)
  for i in negIndices[0]:
    if predictions[i] == 1:
      fpCnt += 1
    else:
      fnCnt += 1

  # List the positive predictions
  posIndices = np.where(predictions == y_test)
  for i in posIndices[0]:
    if predictions[i] == 1:
      tpCnt += 1
    else:
      tnCnt += 1

  # Print TP, TN, FP, and FN
  print("\nTP: " + str(tpCnt) + " TN: " + str(tnCnt) + " FP: " + str(fpCnt) + " FN: " + str(fnCnt) + "\n")

  return tpCnt, tnCnt, fpCnt, fnCnt, auc

In [None]:
#--------------------------------------------
# Train and test with 5-fold cross validation
#--------------------------------------------

# Initialize total counts
tpTotal = 0
tnTotal = 0
fpTotal = 0
fnTotal = 0
aucTotal = 0

# Loop for 5-fold cross validation
for fold in range(5):
  print('fold: ' + str(fold))

  # Create model inputs
  inputs = {}
  inputs['dat'] = Input(shape=(256, 128, 1))

  # Prepare the model
  model = prepare_model(inputs)

  # Create train and test data for the cross validation fold
  x_train, y_train, x_test, y_test = create_train_test_data(fold + 1)

  # Initialize learning rate and epoch
  lr = 0.0005
  epoch = 1

  # Compile the model
  model.compile(
      optimizer=optimizers.Adam(learning_rate=lr),
      loss={'ett': losses.SparseCategoricalCrossentropy(from_logits=True)}, 
      metrics={'ett': 'sparse_categorical_accuracy'})

  for i in range(3):
    # Print learning-rate and epoch
    print('learning-rate: ' + str(lr))
    print('epoch: ' + str(epoch))

    # Train the model
    model.fit(
        x=x_train.reshape(x_train.shape[0], 256, 128, 1), 
        y=y_train,
        batch_size=12,
        steps_per_epoch=None, 
        epochs=1)

    # Increment epoch
    epoch += 1

  # Set learning rate
  lr = 0.00005

  # Compile the model
  model.compile(
      optimizer=optimizers.Adam(learning_rate=lr),
      loss={'ett': losses.SparseCategoricalCrossentropy(from_logits=True)}, 
      metrics={'ett': 'sparse_categorical_accuracy'})

  # Print learning-rate and epoch
  print('learning-rate: ' + str(lr))
  print('epoch: ' + str(epoch))

  # Train the model
  model.fit(
      x=x_train.reshape(x_train.shape[0], 256, 128, 1), 
      y=y_train,
      batch_size=12,
      steps_per_epoch=None, 
      epochs=1)

  # Increment epoch
  epoch += 1

  # Set learning rate
  lr = 0.000005

  # Compile the model
  model.compile(
      optimizer=optimizers.Adam(learning_rate=lr),
      loss={'ett': losses.SparseCategoricalCrossentropy(from_logits=True)}, 
      metrics={'ett': 'sparse_categorical_accuracy'})

  # Print learning-rate and epoch
  print('learning-rate: ' + str(lr))
  print('epoch: ' + str(epoch))

  # Train the model
  model.fit(
      x=x_train.reshape(x_train.shape[0], 256, 128, 1), 
      y=y_train,
      batch_size=12,
      steps_per_epoch=None, 
      epochs=1)

  # Test the model
  tpCnt, tnCnt, fpCnt, fnCnt, auc = test_model(fold, x_test, y_test)

  # Add to totals
  tpTotal += tpCnt
  tnTotal += tnCnt
  fpTotal += fpCnt
  fnTotal += fnCnt
  aucTotal += auc

  # Delete the model at the end of each fold
  del model
  del x_train
  del y_train
  del x_test
  del y_test

# Print Total TP, TN, FP, and FN
print("Total TP: " + str(tpTotal) + " Total TN: " + str(tnTotal) + " Total FP: " + str(fpTotal) + " Total FN: " + str(fnTotal) + "\n")
print("Calculate accuracy, sensitivity, and specificity by using these values at https://www.medcalc.org/calc/diagnostic_test.php\n")
print("Avg AUC: " + str(aucTotal/5) + "\n")

In [None]:
#--------------------------------------------------
# Generate a model file from the whole training set
#--------------------------------------------------

# Create model inputs
inputs = {}
inputs['dat'] = Input(shape=(256, 128, 1))

# Prepare the model
model = prepare_model(inputs)

# Create train data to build a model
x_train, y_train = create_train_data()

# Initialize learning rate and epoch
lr = 0.0005
epoch = 1

# Compile the model
model.compile(
    optimizer=optimizers.Adam(learning_rate=lr),
    loss={'ett': losses.SparseCategoricalCrossentropy(from_logits=True)}, 
    metrics={'ett': 'sparse_categorical_accuracy'})

for i in range(3):
  # Print learning-rate and epoch
  print('learning-rate: ' + str(lr))
  print('epoch: ' + str(epoch))

  # Train the model
  model.fit(
      x=x_train.reshape(x_train.shape[0], 256, 128, 1), 
      y=y_train,
      batch_size=12,
      steps_per_epoch=None, 
      epochs=1)

  # Increment epoch
  epoch += 1

# Set learning rate
lr = 0.00005

# Compile the model
model.compile(
    optimizer=optimizers.Adam(learning_rate=lr),
    loss={'ett': losses.SparseCategoricalCrossentropy(from_logits=True)}, 
    metrics={'ett': 'sparse_categorical_accuracy'})

# Print learning-rate and epoch
print('learning-rate: ' + str(lr))
print('epoch: ' + str(epoch))

# Train the model
model.fit(
    x=x_train.reshape(x_train.shape[0], 256, 128, 1), 
    y=y_train,
    batch_size=12,
    steps_per_epoch=None, 
    epochs=1)

# Increment epoch
epoch += 1

# Set learning rate
lr = 0.000005

# Compile the model
model.compile(
    optimizer=optimizers.Adam(learning_rate=lr),
    loss={'ett': losses.SparseCategoricalCrossentropy(from_logits=True)}, 
    metrics={'ett': 'sparse_categorical_accuracy'})

# Print learning-rate and epoch
print('learning-rate: ' + str(lr))
print('epoch: ' + str(epoch))

# Train the model
model.fit(
    x=x_train.reshape(x_train.shape[0], 256, 128, 1), 
    y=y_train,
    batch_size=12,
    steps_per_epoch=None, 
    epochs=1)

In [None]:
# --- Save the model to a file
model.save('./cnn_2_ett_classification.hdf5')

# Copy the model to google drive
!cp ./cnn_2_ett_classification.hdf5 '/content/drive/My Drive/Colab Notebooks/cnn_2_ett_classification.hdf5'

In [None]:
# Copy the model file from google drive
!cp '/content/drive/My Drive/Colab Notebooks/cnn_2_ett_classification.hdf5' ./cnn_2_ett_classification.hdf5

# Load the model from the file
from tensorflow.keras import models as tfModels
model = tfModels.load_model('./cnn_2_ett_classification.hdf5', compile=False)