In [1]:
import numpy as np
import pandas as pd
import os
import h5py
from keras.utils.io_utils import HDF5Matrix

Using TensorFlow backend.


In [2]:
h5_path = '../chest_xray.h5'
disease_vec_labels = ['Atelectasis','Cardiomegaly','Consolidation','Edema','Effusion',
                      'Emphysema','Fibrosis','Hernia','Infiltration','Mass','Nodule',
                      'Pleural_Thickening','Pneumonia','Pneumothorax']
disease_vec = []
with h5py.File(h5_path, 'r') as h5_data:
    all_fields = list(h5_data.keys())
    for c_key in all_fields:
        print(c_key, h5_data[c_key].shape, h5_data[c_key].dtype)
    for c_key in disease_vec_labels:
        disease_vec += [h5_data[c_key][:]]
disease_vec = np.stack(disease_vec,1)
print('Disease Vec:', disease_vec.shape)

Atelectasis (112120,) int64
Cardiomegaly (112120,) int64
Consolidation (112120,) int64
Edema (112120,) int64
Effusion (112120,) int64
Emphysema (112120,) int64
Fibrosis (112120,) int64
Finding Labels (112120,) |S100
Follow-up # (112120,) int64
Height] (112120,) int64
Hernia (112120,) int64
Image Index (112120,) |S16
Infiltration (112120,) int64
Mass (112120,) int64
No Finding (112120,) int64
Nodule (112120,) int64
OriginalImagePixelSpacing[x (112120,) float64
OriginalImage[Width (112120,) int64
Patient Age (112120,) int64
Patient Gender (112120,) |S1
Patient ID (112120,) int64
Pleural_Thickening (112120,) int64
Pneumonia (112120,) int64
Pneumothorax (112120,) int64
Unnamed: 11 (112120,) float64
View Position (112120,) |S2
images (112120, 256, 256, 1) uint8
path (112120,) |S26
y] (112120,) float64
Disease Vec: (112120, 14)


In [3]:
# generate binary label
binary_vec = [[1,0] if label.sum() == 0 else [0,1] for label in disease_vec]
binary_vec = np.asarray(binary_vec)
print(binary_vec.shape)

(112120, 2)


In [4]:
img_ds = HDF5Matrix(h5_path, 'images')
split_idx = int(img_ds.shape[0] * 0.9)
X_train = HDF5Matrix(h5_path, 'images', end=split_idx)
X_test = HDF5Matrix(h5_path, 'images', start=split_idx)
y_train = binary_vec[:split_idx]
y_test = binary_vec[split_idx:]
print('Train Shape', X_train.shape, 'test shape', X_test.shape)

Train Shape (100908, 256, 256, 1) test shape (11212, 256, 256, 1)


In [5]:
from keras.applications.densenet import DenseNet121
from keras.layers import GlobalAveragePooling2D, AveragePooling2D, GlobalMaxPool2D, Dense, Dropout, Flatten, Conv2D
from keras.models import Sequential
from keras.utils import multi_gpu_model

In [6]:
nb_records, nb_classes = binary_vec.shape
img_shape = img_ds.shape[1:]
nb_gpus = 2

base_model = DenseNet121(input_shape=img_shape, include_top=False, weights='imagenet')
model = Sequential()
model.add(base_model)
model.add(GlobalAveragePooling2D())
model.add(Dropout(0.2))
model.add(Dense(512))
model.add(Dropout(0.2))
model.add(Dense(nb_classes, activation = 'sigmoid'))
model = multi_gpu_model(model, gpus=nb_gpus)
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
inception_resnet_v2_input (Inpu (None, 256, 256, 1)  0                                            
__________________________________________________________________________________________________
lambda_1 (Lambda)               (None, 256, 256, 1)  0           inception_resnet_v2_input[0][0]  
__________________________________________________________________________________________________
lambda_2 (Lambda)               (None, 256, 256, 1)  0           inception_resnet_v2_input[0][0]  
__________________________________________________________________________________________________
sequential_1 (Sequential)       (None, 2)            55124130    lambda_1[0][0]                   
                                                                 lambda_2[0][0]                   
__________

In [7]:
from keras.callbacks import ModelCheckpoint, LearningRateScheduler, EarlyStopping, ReduceLROnPlateau

weight_path="weights_{epoch:02d}_{val_acc:.4f}.hdf5"
checkpoint = ModelCheckpoint(weight_path, monitor='loss', verbose=1, 
                             save_best_only=True, mode='min', save_weights_only=True)
early = EarlyStopping(monitor="loss", mode="min", patience=3)

In [None]:
batch_size = 64
nb_epoch = 50

model.fit(X_train, y_train, batch_size=batch_size, epochs=nb_epoch,
          verbose=1,
          validation_split=0.2,
          callbacks=[checkpoint, early]
          )

Train on 80726 samples, validate on 20182 samples
Epoch 1/50

Epoch 00001: loss improved from inf to 0.64614, saving model to weights_01_0.5287.hdf5
Epoch 2/50

Epoch 00002: loss improved from 0.64614 to 0.60793, saving model to weights_02_0.6281.hdf5
Epoch 3/50

Epoch 00003: loss improved from 0.60793 to 0.59761, saving model to weights_03_0.5749.hdf5
Epoch 4/50

Epoch 00004: loss improved from 0.59761 to 0.59098, saving model to weights_04_0.6873.hdf5
Epoch 5/50

Epoch 00005: loss improved from 0.59098 to 0.58597, saving model to weights_05_0.6905.hdf5
Epoch 6/50

Epoch 00006: loss improved from 0.58597 to 0.58202, saving model to weights_06_0.6962.hdf5
Epoch 7/50

Epoch 00007: loss improved from 0.58202 to 0.57544, saving model to weights_07_0.6809.hdf5
Epoch 8/50

Epoch 00008: loss improved from 0.57544 to 0.57111, saving model to weights_08_0.6670.hdf5
Epoch 9/50

Epoch 00009: loss improved from 0.57111 to 0.56596, saving model to weights_09_0.6458.hdf5
Epoch 10/50

Epoch 00010: l

In [None]:
from sklearn.metrics import precision_score, recall_score, f1_score

print("Predicting")
y_pred = model.predict(X_test)
print(y_test.shape, y_pred.shape)

In [None]:
# y_test = np.argmax(y_test, axis=1)
y_pred = np.argmax(y_pred, axis=1)

precision = precision_score(y_test, y_pred, average='weighted')
recall = recall_score(y_test, y_pred, average='weighted')
f1 = f1_score(y_test, y_pred, average="weighted")
print("Precision: ", precision)
print("Recall: ", recall)
print("F1: ", f1)

In [None]:
model.save('./model-20181012.h5')