<a href="https://colab.research.google.com/github/markustoivonen/AIHealthTech2020/blob/master/project/models/dense169.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [36]:
import tensorflow as tf
import os
import glob
from subprocess import getoutput
from keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import (ModelCheckpoint, TensorBoard)
from keras.applications.densenet import preprocess_input
from keras.models import load_model
import warnings
from keras.layers import Dense, Flatten
import keras
warnings.filterwarnings('ignore')  # Ignore python warnings

import numpy as np
from sklearn.utils import class_weight


from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


## Define the Dense169 model

In [3]:
from keras.applications.densenet import DenseNet169
from keras.layers import Dense
from keras import optimizers
from keras.models import Model


def get_dense169(input_shape, learning_rate):
    # create the base pre-trained model
    dense_169_model = DenseNet169(include_top=False, weights='imagenet', input_shape=(input_shape, input_shape, 3))
    x = dense_169_model.output
    model = keras.Sequential([dense_169_model, Flatten(), Dense(1, activation='sigmoid')])

    for layer in dense_169_model.layers:
        layer.trainable = True
    adam = optimizers.Adam(lr=learning_rate)
    model.compile(optimizer=adam,
                  loss='binary_crossentropy',
                  metrics=['accuracy'])

    return model

### Define hyperparams & create architecture

In [4]:
input_shape = 320
batch_size = 8
epochs = 10
learning_rate = 0.0001

dense169_mura_single = get_dense169(input_shape, learning_rate)

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/densenet/densenet169_weights_tf_dim_ordering_tf_kernels_notop.h5


### Load data

#### Training data

In [19]:
# If you want to reduce the size of the training dataset,
# set validation_split=0.95 or greater

train_datagen = ImageDataGenerator(
        rotation_range=30,
        horizontal_flip=True,
        preprocessing_function=preprocess_input,
        validation_split=0)

train_generator = train_datagen.flow_from_directory(
        '/content/drive/MyDrive/MURA-v1.2/train_data/',
        target_size=(input_shape, input_shape),
        batch_size=batch_size,
        class_mode='binary',
        subset='training')
training_data_size = len(train_generator.filenames)
print("Number of Training examples: ", training_data_size)

Found 37 images belonging to 2 classes.
Number of Training examples:  37


#### Validation data and test data

In [20]:
# Set seed so we do not have overlap in validation and in test set
seed = 1025

# Set validation split accordingly, 0.3 makes the sizes to be about 2k / 1k (valid/test)
valid_and_test_datagen = ImageDataGenerator(
    preprocessing_function=preprocess_input,
    validation_split=0.99)

valid_generator = valid_and_test_datagen.flow_from_directory(
        '/content/drive/MyDrive/MURA-v1.2/valid_data',
        target_size=(input_shape, input_shape),
        batch_size=batch_size,
        class_mode='binary',
        shuffle=False,
        subset="training",
        seed=seed)


test_generator = valid_and_test_datagen.flow_from_directory(
    '/content/drive/MyDrive/MURA-v1.2/valid_data',
    target_size=(input_shape, input_shape),
    batch_size=batch_size,
    class_mode='binary',
    shuffle=False,
    subset="validation",
    seed=seed
)
validation_data_size = len(valid_generator.filenames)
print("Number of Validation examples: ", validation_data_size)
test_data_size = len(test_generator.filenames)
print("Number of Test examples: ", test_data_size)

Found 33 images belonging to 2 classes.
Found 3164 images belonging to 2 classes.
Number of Validation examples:  33
Number of Test examples:  3164


### Class weights

In [14]:
weights = class_weight.compute_class_weight('balanced', np.unique(train_generator.classes), train_generator.classes)

# If the training set is only of 1 class, the weights will be wrong
# This could happen if we use a too small dataset for tinkering purposes
if len(weights) = 1:
  weights = [1, 1]

[1.23825503 0.83863636]


### Saving models

In [26]:
# make sure that a folders 'models' and 'dense169' exist in your Google Drive
base_filepath = "/content/drive/MyDrive/MURA-v1.2/models/dense169"
filepath= base_filepath+"/dense169-improvement-{epoch:02d}-{val_accuracy:.2f}.hdf5"
checkpoint = ModelCheckpoint(filepath, monitor='val_accuracy', verbose=1, save_best_only=True, mode='max')

### Train the model

Note: SKIP THIS CELL IF YOU HAVE A TRAINED MODEL ALREADY!

In [29]:
dense169_mura_single.fit(train_generator,
                        validation_data=valid_generator,
                        steps_per_epoch=training_data_size // batch_size,
                        class_weight={0:weights[0], 1:weights[1]},
                        callbacks=checkpoint, 
                        validation_steps=validation_data_size // batch_size,
                        epochs=epochs)

Epoch 1/2
Epoch 00001: val_accuracy improved from -inf to 0.62500, saving model to /content/drive/MyDrive/MURA-v1.2/models/dense169/dense169-improvement-01-0.62.hdf5
Epoch 2/2
Epoch 00002: val_accuracy did not improve from 0.62500


<tensorflow.python.keras.callbacks.History at 0x7f790e488da0>

### Evaluate model

#### Load the trained model

In [38]:
def get_latest_model(path):
    """Gets the latest model created. This assumes that the latest is the best one."""
    files = os.listdir(path)
    paths = [os.path.join(path, basename) for basename in files]
    return max(paths, key=os.path.getctime)
print("Loading model")
model_fp = get_latest_model(base_filepath)
model = load_model(model_fp)
print("Model loaded")

Loading model
Model loaded


#### Generate preditcions

In [43]:
import time
start = time.time()
predictions = model.predict(test_generator)
end = time.time()
print("It took: ", end - start, "seconds to create predictions for the test set.")

It took:  1020.4360239505768


In [62]:
ELBOW_STUDY = "ELBOW"
FINGER_STUDY = "FINGER"
FOREARM_STUDY = "FOREARM"
HAND_STUDY = "HAND"
HUMERUS_STUDY = "HUMERUS"
SHOULDER_STUDY = "SHOULDER"
WRIST_STUDY = "WRIST"

elbow_pred = []
elbow_true = []
finger_pred = []
finger_true = []
forearm_pred = []
forearm_true = []
hand_pred = []
hand_true = []
humerus_pred = []
humerus_true = []
shoulder_pred = []
shoulder_true = []
wrist_pred = []
wrist_true = []

predictions = np.round(predictions.flatten())
y_true = test_generator.classes
filenames = test_generator.filenames

for index, filename in enumerate(filenames):
    if ELBOW_STUDY in filename:
        elbow_pred.append(predictions[index])
        elbow_true.append(y_true[index])
        
    elif FINGER_STUDY in filename:
        finger_pred.append(predictions[index])
        finger_true.append(y_true[index])
        
    elif FOREARM_STUDY in filename:
        forearm_pred.append(predictions[index])
        forearm_true.append(y_true[index])
    
    elif HAND_STUDY in filename:
        hand_pred.append(predictions[index])
        hand_true.append(y_true[index])
    
    elif HUMERUS_STUDY in filename:
        humerus_pred.append(predictions[index])
        humerus_true.append(y_true[index])
            
    elif SHOULDER_STUDY in filename:
        shoulder_pred.append(predictions[index])
        shoulder_true.append(y_true[index])
    
    elif WRIST_STUDY in filename:
        wrist_pred.append(predictions[index])
        wrist_true.append(y_true[index])

In [76]:
from sklearn.metrics import roc_auc_score, confusion_matrix, cohen_kappa_score, f1_score


def write_all_metrics(y_true, y_pred):
    def get_sensitivity(tp, fn):
      return tp / (tp + fn)


    def get_specificity(tn, fp):
        return tn / (tn + fp)
    results = ""
    results += "roc_auc_score: "+str(roc_auc_score(y_true, y_pred))
    tn, fp, fn, tp = confusion_matrix(y_true, y_pred).ravel()

    results += "\nSensitivity: "+ str(get_sensitivity(tp, fn))
    results += "\nSpecificity: " + str(get_specificity(tn, fp))
    results += "\nCohen-Cappa-Score: " + str(cohen_kappa_score(y_true, y_pred))
    results += "\nF1 Score: " +str (f1_score(y_true, y_pred))
    return results




In [80]:
results_filename = ".".join(model_fp.split(".")[0:2]) + "_results.txt"

with open(results_filename, 'w') as f:
  f.write("\n===== ELBOW ======\n")
  f.write(write_all_metrics(elbow_true,elbow_pred))

  f.write("\n===== FINGER ======\n")
  f.write(write_all_metrics(finger_true,finger_pred))

  f.write("\n===== FOREARM ======\n")
  f.write(write_all_metrics(forearm_true,forearm_pred))

  f.write("\n===== HAND ======\n")
  f.write(write_all_metrics(hand_true,hand_pred))

  f.write("\n===== HUMERUS ======\n")
  f.write(write_all_metrics(humerus_true,humerus_pred))

  f.write("\n===== SHOULDER ======\n")
  f.write(write_all_metrics(shoulder_true, shoulder_pred))

  f.write("\n===== WRIST ======\n")
  f.write(write_all_metrics(wrist_true,wrist_pred))