In [1]:
import numpy as np
import pandas as pd
import os
import h5py
from keras.utils.io_utils import HDF5Matrix

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
h5_path = './chest_xray.h5'
disease_vec_labels = ['Atelectasis','Cardiomegaly','Consolidation','Edema','Effusion',
                      'Emphysema','Fibrosis','Hernia','Infiltration','Mass','Nodule',
                      'Pleural_Thickening','Pneumonia','Pneumothorax']
disease_vec = []
with h5py.File(h5_path, 'r') as h5_data:
    all_fields = list(h5_data.keys())
    for c_key in all_fields:
        print(c_key, h5_data[c_key].shape, h5_data[c_key].dtype)
    for c_key in disease_vec_labels:
        disease_vec += [h5_data[c_key][:]]
disease_vec = np.stack(disease_vec,1)
print('Disease Vec:', disease_vec.shape)

Atelectasis (112120,) int64
Cardiomegaly (112120,) int64
Consolidation (112120,) int64
Edema (112120,) int64
Effusion (112120,) int64
Emphysema (112120,) int64
Fibrosis (112120,) int64
Finding Labels (112120,) |S100
Follow-up # (112120,) int64
Height] (112120,) int64
Hernia (112120,) int64
Image Index (112120,) |S16
Infiltration (112120,) int64
Mass (112120,) int64
No Finding (112120,) int64
Nodule (112120,) int64
OriginalImagePixelSpacing[x (112120,) float64
OriginalImage[Width (112120,) int64
Patient Age (112120,) int64
Patient Gender (112120,) |S1
Patient ID (112120,) int64
Pleural_Thickening (112120,) int64
Pneumonia (112120,) int64
Pneumothorax (112120,) int64
Unnamed: 11 (112120,) float64
View Position (112120,) |S2
images (112120, 224, 224, 3) uint8
path (112120,) |S29
y] (112120,) float64
Disease Vec: (112120, 14)


In [3]:
# generate binary label
binary_vec = [[1,0] if label.sum() == 0 else [0,1] for label in disease_vec]
binary_vec = np.asarray(binary_vec)
print(binary_vec.shape)

(112120, 2)


In [4]:
img_ds = HDF5Matrix(h5_path, 'images')
split_idx = int(img_ds.shape[0] * 0.9)
X_train = HDF5Matrix(h5_path, 'images', end=split_idx)
X_test = HDF5Matrix(h5_path, 'images', start=split_idx)
y_train = binary_vec[:split_idx]
y_test = binary_vec[split_idx:]
print('Train Shape', X_train.shape, 'test shape', X_test.shape)

Train Shape (100908, 224, 224, 3) test shape (11212, 224, 224, 3)


In [5]:
import os
from keras.applications.densenet import DenseNet121
from keras.layers import Input
from keras.layers.core import Dense
from keras.models import Model
from keras.utils import multi_gpu_model

In [6]:
nb_records, nb_classes = binary_vec.shape
nb_gpus = len(os.getenv("CUDA_VISIBLE_DEVICES", "1").split(","))
img_shape = img_ds.shape[1:]
img_input = Input(shape=img_shape)

In [7]:
base_model = DenseNet121(include_top=False,
                         input_tensor=img_input, 
                         input_shape=img_shape, 
                         weights='imagenet', 
                         pooling="avg")
x = base_model.output
predictions = Dense(nb_classes, activation="sigmoid", name="predictions")(x)
model = Model(inputs=img_input, outputs=predictions)
if nb_gpus > 1:
    model = multi_gpu_model(model, gpus=nb_gpus)
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, 224, 224, 3)  0                                            
__________________________________________________________________________________________________
zero_padding2d_1 (ZeroPadding2D (None, 230, 230, 3)  0           input_1[0][0]                    
__________________________________________________________________________________________________
conv1/conv (Conv2D)             (None, 112, 112, 64) 9408        zero_padding2d_1[0][0]           
__________________________________________________________________________________________________
conv1/bn (BatchNormalization)   (None, 112, 112, 64) 256         conv1/conv[0][0]                 
__________________________________________________________________________________________________
conv1/relu

In [8]:
from keras.callbacks import ModelCheckpoint, LearningRateScheduler, EarlyStopping

weight_path="weights_{epoch:02d}_{val_acc:.4f}.hdf5"
checkpoint = ModelCheckpoint(weight_path, monitor='acc', verbose=1, 
                             save_best_only=False, mode='max', save_weights_only=True)
early = EarlyStopping(monitor="acc", mode="max", patience=3)
callbacks = [checkpoint,]

In [None]:
# from keras.preprocessing.image import ImageDataGenerator

# batch_size = 32
# nb_epoch = 200

# X_train, X_valid = np.vsplit(X_train, [int(0.8*len(X_train)),])
# y_train, y_valid = np.vsplit(y_train, [int(0.8*len(y_train)),])
# print('X_train shape: {}, X_valid shape: {}'.format(X_train.shape, X_valid.shape))
# print('y_train shape: {}, y_valid shape: {}'.format(y_train.shape, y_valid.shape))

# # define data augmentation configuration
# train_datagen = ImageDataGenerator(horizontal_flip=True)
# valid_datagen = ImageDataGenerator(horizontal_flip=True)
# # fit the data augmentation
# train_datagen.fit(X_train)
# valid_datagen.fit(X_valid)
# # setup generator
# train_generator = train_datagen.flow(X_train, y_train, batch_size=batch_size)
# valid_generator = valid_datagen.flow(X_valid, y_valid, batch_size=batch_size)

# model.fit_generator(train_generator, 
#                     steps_per_epoch=nb_records / batch_size / 10, 
#                     epochs=nb_epoch, 
#                     validation_data=valid_generator, 
#                     validation_steps=nb_records / batch_size / 5)

In [None]:
batch_size = 32
nb_epoch = 200

model.fit(X_train, y_train, batch_size=batch_size, epochs=nb_epoch,
          verbose=1,
          validation_split=0.2,
          shuffle=True,
          callbacks=callbacks
          )

Train on 80726 samples, validate on 20182 samples
Epoch 1/200

Epoch 00001: saving model to weights_01_0.5190.hdf5
Epoch 2/200

Epoch 00002: saving model to weights_02_0.5190.hdf5
Epoch 3/200

Epoch 00003: saving model to weights_03_0.5190.hdf5
Epoch 4/200

Epoch 00004: saving model to weights_04_0.5190.hdf5
Epoch 5/200

Epoch 00005: saving model to weights_05_0.4810.hdf5
Epoch 6/200

Epoch 00006: saving model to weights_06_0.5190.hdf5
Epoch 7/200

Epoch 00007: saving model to weights_07_0.5190.hdf5
Epoch 8/200

Epoch 00008: saving model to weights_08_0.5190.hdf5
Epoch 9/200

Epoch 00009: saving model to weights_09_0.5190.hdf5
Epoch 10/200

Epoch 00010: saving model to weights_10_0.5190.hdf5
Epoch 11/200

Epoch 00011: saving model to weights_11_0.5190.hdf5
Epoch 12/200

Epoch 00012: saving model to weights_12_0.5190.hdf5
Epoch 13/200

Epoch 00013: saving model to weights_13_0.5190.hdf5
Epoch 14/200

Epoch 00014: saving model to weights_14_0.5190.hdf5
Epoch 15/200

Epoch 00015: saving m

In [None]:
from sklearn.metrics import precision_score, recall_score, f1_score

print("Predicting")
y_pred = model.predict(X_test)
print(y_test.shape, y_pred.shape)

In [None]:
y_test = np.argmax(y_test, axis=1)
y_pred = np.argmax(y_pred, axis=1)

precision = precision_score(y_test, y_pred, average='weighted')
recall = recall_score(y_test, y_pred, average='weighted')
f1 = f1_score(y_test, y_pred, average="weighted")
print("Precision: ", precision)
print("Recall: ", recall)
print("F1: ", f1)

In [None]:
model.save('./model-20181014.h5')