In [1]:
import numpy as np
import pandas as pd
import os
import h5py
from keras.utils.io_utils import HDF5Matrix

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
h5_path = './chest_xray.h5'
disease_vec_labels = ['Atelectasis','Cardiomegaly','Consolidation','Edema','Effusion',
                      'Emphysema','Fibrosis','Hernia','Infiltration','Mass','Nodule',
                      'Pleural_Thickening','Pneumonia','Pneumothorax']
disease_vec = []
with h5py.File(h5_path, 'r') as h5_data:
    all_fields = list(h5_data.keys())
    for c_key in all_fields:
        print(c_key, h5_data[c_key].shape, h5_data[c_key].dtype)
    for c_key in disease_vec_labels:
        disease_vec += [h5_data[c_key][:]]
disease_vec = np.stack(disease_vec,1)
print('Disease Vec:', disease_vec.shape)

Atelectasis (112120,) int64
Cardiomegaly (112120,) int64
Consolidation (112120,) int64
Edema (112120,) int64
Effusion (112120,) int64
Emphysema (112120,) int64
Fibrosis (112120,) int64
Finding Labels (112120,) |S100
Follow-up # (112120,) int64
Height] (112120,) int64
Hernia (112120,) int64
Image Index (112120,) |S16
Infiltration (112120,) int64
Mass (112120,) int64
No Finding (112120,) int64
Nodule (112120,) int64
OriginalImagePixelSpacing[x (112120,) float64
OriginalImage[Width (112120,) int64
Patient Age (112120,) int64
Patient Gender (112120,) |S1
Patient ID (112120,) int64
Pleural_Thickening (112120,) int64
Pneumonia (112120,) int64
Pneumothorax (112120,) int64
Unnamed: 11 (112120,) float64
View Position (112120,) |S2
images (112120, 224, 224, 3) uint8
path (112120,) |S29
y] (112120,) float64
Disease Vec: (112120, 14)


In [27]:
# generate binary label
binary_vec = [[1,0] if label.sum() == 0 else [0,1] for label in disease_vec]
binary_vec = np.asarray(binary_vec)
print(binary_vec.shape)

(112120, 2)


In [28]:
img_ds = HDF5Matrix(h5_path, 'images')
split_idx = int(img_ds.shape[0] * 0.9)
X_train = HDF5Matrix(h5_path, 'images', end=split_idx)
X_test = HDF5Matrix(h5_path, 'images', start=split_idx)
y_train = binary_vec[:split_idx]
y_test = binary_vec[split_idx:]
print('Train Shape', X_train.shape, 'test shape', X_test.shape)

Train Shape (100908, 224, 224, 3) test shape (11212, 224, 224, 3)


In [29]:
import os
from keras.applications.densenet import DenseNet121
from keras.layers import Input
from keras.layers.core import Dense
from keras.models import Model
from keras.utils import multi_gpu_model

In [30]:
nb_records, nb_classes = binary_vec.shape
nb_gpus = len(os.getenv("CUDA_VISIBLE_DEVICES", "1").split(","))
img_shape = img_ds.shape[1:]
img_input = Input(shape=img_shape)

In [31]:
base_model = DenseNet121(include_top=False,
                         input_tensor=img_input, 
                         input_shape=img_shape, 
                         weights='imagenet', 
                         pooling="avg")
x = base_model.output
predictions = Dense(nb_classes, activation="sigmoid", name="predictions")(x)
model = Model(inputs=img_input, outputs=predictions)
if nb_gpus > 1:
    model = multi_gpu_model(model, gpus=nb_gpus)
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_9 (InputLayer)            (None, 224, 224, 3)  0                                            
__________________________________________________________________________________________________
zero_padding2d_11 (ZeroPadding2 (None, 230, 230, 3)  0           input_9[0][0]                    
__________________________________________________________________________________________________
conv1/conv (Conv2D)             (None, 112, 112, 64) 9408        zero_padding2d_11[0][0]          
__________________________________________________________________________________________________
conv1/bn (BatchNormalization)   (None, 112, 112, 64) 256         conv1/conv[0][0]                 
__________________________________________________________________________________________________
conv1/relu

In [32]:
from keras.callbacks import ModelCheckpoint, LearningRateScheduler, EarlyStopping

weight_path="weights_{epoch:02d}_{val_acc:.4f}.hdf5"
checkpoint = ModelCheckpoint(weight_path, monitor='val_loss', verbose=1, 
                             save_best_only=True, mode='min', save_weights_only=True)
# early = EarlyStopping(monitor="acc", mode="max", patience=3)
callbacks = [checkpoint, ]

In [34]:
batch_size = 32
nb_epoch = 50

model.fit(X_train, y_train, batch_size=batch_size, epochs=nb_epoch,
          verbose=1,
          validation_split=0.2,
          shuffle=True,
          callbacks=callbacks
          )

Train on 80726 samples, validate on 20182 samples
Epoch 1/50

Epoch 00001: val_loss improved from inf to 0.63581, saving model to weights_01_0.6644.hdf5
Epoch 2/50

Epoch 00002: val_loss did not improve from 0.63581
Epoch 3/50

Epoch 00003: val_loss did not improve from 0.63581
Epoch 4/50

Epoch 00004: val_loss improved from 0.63581 to 0.62203, saving model to weights_04_0.6838.hdf5
Epoch 5/50

Epoch 00005: val_loss improved from 0.62203 to 0.60666, saving model to weights_05_0.6817.hdf5
Epoch 6/50

Epoch 00006: val_loss did not improve from 0.60666
Epoch 7/50

Epoch 00007: val_loss did not improve from 0.60666
Epoch 8/50

Epoch 00008: val_loss did not improve from 0.60666
Epoch 9/50

Epoch 00009: val_loss did not improve from 0.60666
Epoch 10/50

Epoch 00010: val_loss did not improve from 0.60666
Epoch 11/50

Epoch 00011: val_loss did not improve from 0.60666
Epoch 12/50

Epoch 00012: val_loss did not improve from 0.60666
Epoch 13/50

Epoch 00013: val_loss did not improve from 0.60666


Epoch 00043: val_loss did not improve from 0.60666
Epoch 44/50

Epoch 00044: val_loss did not improve from 0.60666
Epoch 45/50

Epoch 00045: val_loss did not improve from 0.60666
Epoch 46/50

Epoch 00046: val_loss did not improve from 0.60666
Epoch 47/50

Epoch 00047: val_loss did not improve from 0.60666
Epoch 48/50

Epoch 00048: val_loss did not improve from 0.60666
Epoch 49/50

Epoch 00049: val_loss did not improve from 0.60666
Epoch 50/50

Epoch 00050: val_loss did not improve from 0.60666


<keras.callbacks.History at 0x7fece18a9438>

In [35]:
from sklearn.metrics import precision_score, recall_score, f1_score

print("Predicting")
y_pred = model.predict(X_test)
print(y_test.shape, y_pred.shape)

Predicting
(11212, 2) (11212, 2)


In [37]:
y_test = np.argmax(y_test, axis=1)
y_pred = np.argmax(y_pred, axis=1)

precision = precision_score(y_test, y_pred, average='weighted')
recall = recall_score(y_test, y_pred, average='weighted')
f1 = f1_score(y_test, y_pred, average="weighted")
print("Precision: ", precision)
print("Recall: ", recall)
print("F1: ", f1)

Precision:  0.6182440838245461
Recall:  0.6094363182304674
F1:  0.5988335490280106


In [38]:
model.save('./model-20181014.h5')