In [1]:
import os
import pandas as pd
import math
import numpy as np
import tensorflow as tf

from datetime import datetime
from keras.models import Sequential
from keras.layers import Activation, BatchNormalization, Conv2D, Dense, Dropout, Flatten, MaxPooling2D
from keras import backend as K
from keras import regularizers
from keras.callbacks import Callback
from keras.callbacks import ModelCheckpoint
from keras.models import load_model
from keras.optimizers import Adam
from keras.preprocessing.image import ImageDataGenerator
from matplotlib.pyplot import imshow
from PIL import Image
from sklearn.metrics import roc_auc_score

K.tensorflow_backend._get_available_gpus()

Using TensorFlow backend.


['/job:localhost/replica:0/task:0/device:GPU:0']

In [13]:
datagen = ImageDataGenerator(rescale=1./255, horizontal_flip=True, validation_split=0.2)

# The training set just has blanks instead of 0s
train_labels = pd.read_csv("../CheXpert-v1.0-small/train.csv").fillna(0)
train_labels["Path"] = '../' + train_labels["Path"]

In [3]:
# Filter out Lateral images.  We'll train two models -> one for lateral and one for frontal

frontal_train_labels = train_labels[train_labels['Frontal/Lateral'] == 'Frontal']

# Filter out uncertains in the training dataset.  There are no uncertains in the validation dataset.
frontal_train_labels = frontal_train_labels[frontal_train_labels["Pleural Effusion"] != -1.0]

In [4]:
frontal_train_labels.head()

Unnamed: 0,Path,Sex,Age,Frontal/Lateral,AP/PA,No Finding,Enlarged Cardiomediastinum,Cardiomegaly,Lung Opacity,Lung Lesion,Edema,Consolidation,Pneumonia,Atelectasis,Pneumothorax,Pleural Effusion,Pleural Other,Fracture,Support Devices
0,../CheXpert-v1.0-small/train/patient00001/stud...,Female,68,Frontal,AP,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
2,../CheXpert-v1.0-small/train/patient00002/stud...,Female,83,Frontal,AP,0.0,0.0,0.0,1.0,0.0,0.0,-1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
4,../CheXpert-v1.0-small/train/patient00003/stud...,Male,41,Frontal,AP,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,../CheXpert-v1.0-small/train/patient00004/stud...,Female,20,Frontal,PA,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,../CheXpert-v1.0-small/train/patient00005/stud...,Male,33,Frontal,PA,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0


In [5]:
frontal_validation_labels.head()

Unnamed: 0,Path,Sex,Age,Frontal/Lateral,AP/PA,No Finding,Enlarged Cardiomediastinum,Cardiomegaly,Lung Opacity,Lung Lesion,Edema,Consolidation,Pneumonia,Atelectasis,Pneumothorax,Pleural Effusion,Pleural Other,Fracture,Support Devices
0,../CheXpert-v1.0-small/valid/patient64541/stud...,Male,73,Frontal,AP,0.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,../CheXpert-v1.0-small/valid/patient64542/stud...,Male,70,Frontal,PA,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
3,../CheXpert-v1.0-small/valid/patient64543/stud...,Male,85,Frontal,AP,0.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,../CheXpert-v1.0-small/valid/patient64544/stud...,Female,42,Frontal,AP,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,../CheXpert-v1.0-small/valid/patient64545/stud...,Female,55,Frontal,AP,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0


In [15]:
train_datagen = datagen.flow_from_dataframe(dataframe=frontal_train_labels,
                                            directory=".",
                                            x_col="Path",
                                            y_col=['Pleural Effusion'],
                                            class_mode = "raw",
                                            color_mode='grayscale',
                                            target_size=(100, 100),
                                            batch_size=32,
                                           subset='training')
validation_datagen = datagen.flow_from_dataframe(dataframe=frontal_train_labels,
                                                directory=".",
                                                x_col="Path",
                                                y_col=['Pleural Effusion'],
                                                class_mode = "raw",
                                                color_mode='grayscale',
                                                target_size=(100, 100),
                                                batch_size=32,
                                                subset='validation')

Found 145160 validated image filenames.
Found 36289 validated image filenames.


In [9]:
# metrics functions

def recall_m(y_true, y_pred):
        true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
        possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
        recall = true_positives / (possible_positives + K.epsilon())
        return recall

def precision_m(y_true, y_pred):
        true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
        predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
        precision = true_positives / (predicted_positives + K.epsilon())
        return precision
    
def f1_m(y_true, y_pred):
    precision = precision_m(y_true, y_pred)
    recall = recall_m(y_true, y_pred)
    return 2*((precision*recall)/(precision+recall+K.epsilon()))

In [None]:
classifier = Sequential()

# 200x200 input
# Input is kinda large, so we select larger filters for the first layer to decrease 
# size of feature maps (and hopefully speed up training).

# Input: 100 x 100 x 1
classifier.add(Conv2D(32, (5, 5), input_shape=(100, 100, 1), use_bias=False))
classifier.add(BatchNormalization())
classifier.add(Activation('relu'))
classifier.add(Dropout(.2))
# Input: 96 x 96 x 32
classifier.add(Conv2D(64, (3, 3)))
classifier.add(BatchNormalization())
classifier.add(Activation('relu'))
classifier.add(Dropout(.2))
# Input: 94 x 94 x 64
classifier.add(MaxPooling2D(pool_size=(2,2)))
# Input: 47 x 47 x 64
classifier.add(Conv2D(64, (3, 3)))
classifier.add(BatchNormalization())
classifier.add(Activation('relu'))
classifier.add(Dropout(.2))
# Input: 45 x 45 x 128
classifier.add(Conv2D(64, (3, 3)))
classifier.add(BatchNormalization())
classifier.add(Activation('relu'))
classifier.add(Dropout(.2))
# Input: 43 x 43 x 128
classifier.add(MaxPooling2D(pool_size=(2,2)))
# Input: 21 x 21 x 128
classifier.add(Conv2D(128, (3, 3)))
classifier.add(BatchNormalization())
classifier.add(Activation('relu'))
classifier.add(Dropout(.2))
# Input: 19 x 19 x 128
classifier.add(Conv2D(128, (3, 3)))
classifier.add(BatchNormalization())
classifier.add(Activation('relu'))
classifier.add(Dropout(.2))
# Input: 17 x 17 x 128
classifier.add(MaxPooling2D(pool_size=(2,2)))
# Input: 8 x 8 x 128
classifier.add(Flatten())

# Input: 8192 x 1024
classifier.add(Dense(activation="relu", units=1024))
classifier.add(Dense(activation="relu", units=1024))
classifier.add(Dense(activation="sigmoid", units=1))

classifier.compile(optimizer=Adam(learning_rate=0.0001), loss='binary_crossentropy', metrics=[precision_m, recall_m, f1_m])

mc = ModelCheckpoint('trial_1/weights{epoch:04d}.h5', save_weights_only=True, save_best_only=True, monitor='val_f1_m', mode='max')
classifier.load_weights('trial_1/weights0068.h5')
# os.makedirs('trial_1')

# too many epochs mean overfitting, not enough epochs mean underfitting
classifier.fit_generator(
    train_datagen,
    steps_per_epoch=5280,
    epochs=120,
    initial_epoch=68,
    validation_data=validation_datagen,
    validation_steps=800,
    workers=4,
    verbose=2,
    callbacks=[mc])

Epoch 69/120


```
Epoch 1/120
 - 1824s - loss: 0.6324 - precision_m: 0.6168 - recall_m: 0.6097 - f1_m: 0.5961 - val_loss: 0.6089 - val_precision_m: 0.4197 - val_recall_m: 0.9118 - val_f1_m: 0.5645
Epoch 2/120
 - 1823s - loss: 0.5522 - precision_m: 0.6712 - recall_m: 0.6673 - f1_m: 0.6580 - val_loss: 0.7262 - val_precision_m: 0.4355 - val_recall_m: 0.9531 - val_f1_m: 0.5894
Epoch 3/120
 - 1822s - loss: 0.5300 - precision_m: 0.6879 - recall_m: 0.7028 - f1_m: 0.6855 - val_loss: 0.6404 - val_precision_m: 0.4670 - val_recall_m: 0.9524 - val_f1_m: 0.6174
Epoch 4/120
 - 1828s - loss: 0.5139 - precision_m: 0.6990 - recall_m: 0.7182 - f1_m: 0.6990 - val_loss: 0.6052 - val_precision_m: 0.4798 - val_recall_m: 0.9068 - val_f1_m: 0.6179
Epoch 5/120
 - 1830s - loss: 0.5070 - precision_m: 0.7055 - recall_m: 0.7209 - f1_m: 0.7037 - val_loss: 0.5689 - val_precision_m: 0.4781 - val_recall_m: 0.9096 - val_f1_m: 0.6170
Epoch 6/120
 - 1871s - loss: 0.4981 - precision_m: 0.7109 - recall_m: 0.7266 - f1_m: 0.7093 - val_loss: 0.5635 - val_precision_m: 0.5431 - val_recall_m: 0.8370 - val_f1_m: 0.6474
Epoch 7/120
 - 1871s - loss: 0.4901 - precision_m: 0.7148 - recall_m: 0.7372 - f1_m: 0.7170 - val_loss: 0.4575 - val_precision_m: 0.5015 - val_recall_m: 0.8966 - val_f1_m: 0.6332
Epoch 8/120
 - 1872s - loss: 0.4879 - precision_m: 0.7197 - recall_m: 0.7378 - f1_m: 0.7199 - val_loss: 0.4185 - val_precision_m: 0.6175 - val_recall_m: 0.7452 - val_f1_m: 0.6623
Epoch 9/120
 - 1861s - loss: 0.4811 - precision_m: 0.7215 - recall_m: 0.7429 - f1_m: 0.7236 - val_loss: 0.5401 - val_precision_m: 0.5342 - val_recall_m: 0.8853 - val_f1_m: 0.6568
Epoch 10/120
 - 1860s - loss: 0.4786 - precision_m: 0.7228 - recall_m: 0.7487 - f1_m: 0.7272 - val_loss: 0.4813 - val_precision_m: 0.6527 - val_recall_m: 0.8266 - val_f1_m: 0.7178
Epoch 11/120
 - 1860s - loss: 0.4740 - precision_m: 0.7277 - recall_m: 0.7530 - f1_m: 0.7317 - val_loss: 0.4646 - val_precision_m: 0.6559 - val_recall_m: 0.7863 - val_f1_m: 0.7040
Epoch 12/120
 - 1862s - loss: 0.4741 - precision_m: 0.7257 - recall_m: 0.7532 - f1_m: 0.7308 - val_loss: 0.6654 - val_precision_m: 0.4968 - val_recall_m: 0.8966 - val_f1_m: 0.6299
Epoch 13/120
 - 1863s - loss: 0.4700 - precision_m: 0.7283 - recall_m: 0.7568 - f1_m: 0.7340 - val_loss: 0.5501 - val_precision_m: 0.6129 - val_recall_m: 0.8279 - val_f1_m: 0.6951
Epoch 14/120
 - 1860s - loss: 0.4685 - precision_m: 0.7312 - recall_m: 0.7575 - f1_m: 0.7361 - val_loss: 0.6378 - val_precision_m: 0.4713 - val_recall_m: 0.9202 - val_f1_m: 0.6144
Epoch 15/120
 - 1862s - loss: 0.4670 - precision_m: 0.7334 - recall_m: 0.7545 - f1_m: 0.7357 - val_loss: 0.5095 - val_precision_m: 0.6344 - val_recall_m: 0.7750 - val_f1_m: 0.6858
Epoch 16/120
 - 1862s - loss: 0.4626 - precision_m: 0.7333 - recall_m: 0.7590 - f1_m: 0.7378 - val_loss: 0.4987 - val_precision_m: 0.5146 - val_recall_m: 0.9168 - val_f1_m: 0.6501
Epoch 17/120
 - 1861s - loss: 0.4638 - precision_m: 0.7335 - recall_m: 0.7550 - f1_m: 0.7360 - val_loss: 0.5701 - val_precision_m: 0.5346 - val_recall_m: 0.8417 - val_f1_m: 0.6438
Epoch 18/120
 - 1861s - loss: 0.4609 - precision_m: 0.7344 - recall_m: 0.7627 - f1_m: 0.7404 - val_loss: 0.4864 - val_precision_m: 0.6166 - val_recall_m: 0.8580 - val_f1_m: 0.7067
Epoch 19/120
 - 1855s - loss: 0.4603 - precision_m: 0.7354 - recall_m: 0.7654 - f1_m: 0.7423 - val_loss: 0.4151 - val_precision_m: 0.6374 - val_recall_m: 0.7549 - val_f1_m: 0.6786
Epoch 20/120
 - 1854s - loss: 0.4589 - precision_m: 0.7354 - recall_m: 0.7648 - f1_m: 0.7418 - val_loss: 0.5631 - val_precision_m: 0.6355 - val_recall_m: 0.8437 - val_f1_m: 0.7136
Epoch 21/120
 - 1850s - loss: 0.4571 - precision_m: 0.7364 - recall_m: 0.7632 - f1_m: 0.7416 - val_loss: 0.4758 - val_precision_m: 0.6422 - val_recall_m: 0.8205 - val_f1_m: 0.7093
Epoch 22/120
 - 1857s - loss: 0.4591 - precision_m: 0.7344 - recall_m: 0.7668 - f1_m: 0.7425 - val_loss: 0.3895 - val_precision_m: 0.6311 - val_recall_m: 0.8078 - val_f1_m: 0.6979
Epoch 23/120
 - 1844s - loss: 0.4534 - precision_m: 0.7399 - recall_m: 0.7685 - f1_m: 0.7461 - val_loss: 0.5250 - val_precision_m: 0.5338 - val_recall_m: 0.9052 - val_f1_m: 0.6625
Epoch 24/120
 - 1812s - loss: 0.4535 - precision_m: 0.7405 - recall_m: 0.7672 - f1_m: 0.7456 - val_loss: 0.5801 - val_precision_m: 0.5677 - val_recall_m: 0.8753 - val_f1_m: 0.6784
Epoch 25/120
 - 1811s - loss: 0.4519 - precision_m: 0.7400 - recall_m: 0.7702 - f1_m: 0.7472 - val_loss: 0.5036 - val_precision_m: 0.6241 - val_recall_m: 0.8490 - val_f1_m: 0.7092
Epoch 26/120
 - 1811s - loss: 0.4531 - precision_m: 0.7398 - recall_m: 0.7661 - f1_m: 0.7449 - val_loss: 0.4986 - val_precision_m: 0.6001 - val_recall_m: 0.8371 - val_f1_m: 0.6873
Epoch 27/120
 - 1809s - loss: 0.4505 - precision_m: 0.7403 - recall_m: 0.7734 - f1_m: 0.7487 - val_loss: 0.4194 - val_precision_m: 0.6618 - val_recall_m: 0.8356 - val_f1_m: 0.7285
Epoch 28/120
 - 1811s - loss: 0.4498 - precision_m: 0.7428 - recall_m: 0.7721 - f1_m: 0.7495 - val_loss: 0.4492 - val_precision_m: 0.6025 - val_recall_m: 0.8761 - val_f1_m: 0.7045
Epoch 29/120
 - 1810s - loss: 0.4492 - precision_m: 0.7441 - recall_m: 0.7686 - f1_m: 0.7482 - val_loss: 0.4916 - val_precision_m: 0.6022 - val_recall_m: 0.8465 - val_f1_m: 0.6926
Epoch 30/120
 - 1812s - loss: 0.4457 - precision_m: 0.7427 - recall_m: 0.7760 - f1_m: 0.7514 - val_loss: 0.4618 - val_precision_m: 0.5964 - val_recall_m: 0.8762 - val_f1_m: 0.6994
Epoch 31/120
 - 1811s - loss: 0.4465 - precision_m: 0.7444 - recall_m: 0.7769 - f1_m: 0.7528 - val_loss: 0.5896 - val_precision_m: 0.5203 - val_recall_m: 0.9328 - val_f1_m: 0.6589
Epoch 32/120
 - 1812s - loss: 0.4448 - precision_m: 0.7461 - recall_m: 0.7748 - f1_m: 0.7525 - val_loss: 0.4945 - val_precision_m: 0.6038 - val_recall_m: 0.8867 - val_f1_m: 0.7083
Epoch 33/120
 - 1811s - loss: 0.4429 - precision_m: 0.7469 - recall_m: 0.7754 - f1_m: 0.7533 - val_loss: 0.4700 - val_precision_m: 0.5837 - val_recall_m: 0.8858 - val_f1_m: 0.6925
Epoch 34/120
 - 1811s - loss: 0.4425 - precision_m: 0.7471 - recall_m: 0.7746 - f1_m: 0.7529 - val_loss: 0.3913 - val_precision_m: 0.6071 - val_recall_m: 0.8631 - val_f1_m: 0.7015
Epoch 35/120
 - 1812s - loss: 0.4436 - precision_m: 0.7467 - recall_m: 0.7756 - f1_m: 0.7531 - val_loss: 0.4105 - val_precision_m: 0.5948 - val_recall_m: 0.8493 - val_f1_m: 0.6885
Epoch 36/120
 - 1810s - loss: 0.4434 - precision_m: 0.7470 - recall_m: 0.7775 - f1_m: 0.7542 - val_loss: 0.3986 - val_precision_m: 0.6314 - val_recall_m: 0.8027 - val_f1_m: 0.6953
Epoch 37/120
 - 1807s - loss: 0.4430 - precision_m: 0.7478 - recall_m: 0.7750 - f1_m: 0.7534 - val_loss: 0.4265 - val_precision_m: 0.5780 - val_recall_m: 0.8463 - val_f1_m: 0.6758
Epoch 38/120
 - 1810s - loss: 0.4412 - precision_m: 0.7494 - recall_m: 0.7760 - f1_m: 0.7549 - val_loss: 0.4656 - val_precision_m: 0.6274 - val_recall_m: 0.8659 - val_f1_m: 0.7171
Epoch 39/120
 - 1810s - loss: 0.4409 - precision_m: 0.7466 - recall_m: 0.7771 - f1_m: 0.7538 - val_loss: 0.4149 - val_precision_m: 0.5874 - val_recall_m: 0.8157 - val_f1_m: 0.6715
Epoch 40/120
 - 1810s - loss: 0.4402 - precision_m: 0.7489 - recall_m: 0.7778 - f1_m: 0.7554 - val_loss: 0.3805 - val_precision_m: 0.6682 - val_recall_m: 0.7676 - val_f1_m: 0.7030
Epoch 41/120
 - 1814s - loss: 0.4398 - precision_m: 0.7496 - recall_m: 0.7797 - f1_m: 0.7566 - val_loss: 0.4013 - val_precision_m: 0.6070 - val_recall_m: 0.8351 - val_f1_m: 0.6921
Epoch 42/120
 - 1807s - loss: 0.4406 - precision_m: 0.7486 - recall_m: 0.7813 - f1_m: 0.7567 - val_loss: 0.4614 - val_precision_m: 0.5446 - val_recall_m: 0.9017 - val_f1_m: 0.6697
Epoch 43/120
 - 1811s - loss: 0.4357 - precision_m: 0.7513 - recall_m: 0.7820 - f1_m: 0.7588 - val_loss: 0.4586 - val_precision_m: 0.6068 - val_recall_m: 0.8165 - val_f1_m: 0.6849
Epoch 44/120
 - 1810s - loss: 0.4375 - precision_m: 0.7502 - recall_m: 0.7794 - f1_m: 0.7568 - val_loss: 0.3010 - val_precision_m: 0.6546 - val_recall_m: 0.8166 - val_f1_m: 0.7156
Epoch 45/120
 - 1820s - loss: 0.4339 - precision_m: 0.7540 - recall_m: 0.7827 - f1_m: 0.7607 - val_loss: 0.4039 - val_precision_m: 0.6398 - val_recall_m: 0.8291 - val_f1_m: 0.7118
Epoch 46/120
 - 1820s - loss: 0.4358 - precision_m: 0.7500 - recall_m: 0.7817 - f1_m: 0.7580 - val_loss: 0.5414 - val_precision_m: 0.6096 - val_recall_m: 0.8372 - val_f1_m: 0.6948
Epoch 47/120
 - 1817s - loss: 0.4359 - precision_m: 0.7529 - recall_m: 0.7813 - f1_m: 0.7594 - val_loss: 0.3016 - val_precision_m: 0.6008 - val_recall_m: 0.8006 - val_f1_m: 0.6750
Epoch 48/120
 - 1815s - loss: 0.4334 - precision_m: 0.7521 - recall_m: 0.7811 - f1_m: 0.7587 - val_loss: 0.4154 - val_precision_m: 0.7738 - val_recall_m: 0.7153 - val_f1_m: 0.7307
Epoch 49/120
 - 1819s - loss: 0.4333 - precision_m: 0.7535 - recall_m: 0.7827 - f1_m: 0.7602 - val_loss: 0.4344 - val_precision_m: 0.5989 - val_recall_m: 0.8588 - val_f1_m: 0.6955
Epoch 50/120
 - 1809s - loss: 0.4324 - precision_m: 0.7538 - recall_m: 0.7835 - f1_m: 0.7611 - val_loss: 0.5684 - val_precision_m: 0.5447 - val_recall_m: 0.8714 - val_f1_m: 0.6597
Epoch 51/120
 - 1819s - loss: 0.4305 - precision_m: 0.7566 - recall_m: 0.7811 - f1_m: 0.7610 - val_loss: 0.4887 - val_precision_m: 0.5845 - val_recall_m: 0.8654 - val_f1_m: 0.6883
Epoch 52/120
 - 1813s - loss: 0.4345 - precision_m: 0.7517 - recall_m: 0.7823 - f1_m: 0.7593 - val_loss: 0.4395 - val_precision_m: 0.5381 - val_recall_m: 0.9087 - val_f1_m: 0.6662
Epoch 53/120
 - 1815s - loss: 0.4295 - precision_m: 0.7556 - recall_m: 0.7866 - f1_m: 0.7634 - val_loss: 0.5556 - val_precision_m: 0.5887 - val_recall_m: 0.8778 - val_f1_m: 0.6958
Epoch 54/120
 - 1807s - loss: 0.4278 - precision_m: 0.7557 - recall_m: 0.7855 - f1_m: 0.7629 - val_loss: 0.2362 - val_precision_m: 0.6362 - val_recall_m: 0.8229 - val_f1_m: 0.7069
```