In [1]:
import numpy as np
import pandas as pd
import os
import h5py
from keras.utils.io_utils import HDF5Matrix

Using TensorFlow backend.


In [2]:
h5_path = './chest_xray.h5'
disease_vec_labels = ['Atelectasis','Cardiomegaly','Consolidation','Edema','Effusion',
                      'Emphysema','Fibrosis','Hernia','Infiltration','Mass','Nodule',
                      'Pleural_Thickening','Pneumonia','Pneumothorax']
disease_vec = []
with h5py.File(h5_path, 'r') as h5_data:
    all_fields = list(h5_data.keys())
    for c_key in all_fields:
        print(c_key, h5_data[c_key].shape, h5_data[c_key].dtype)
    for c_key in disease_vec_labels:
        disease_vec += [h5_data[c_key][:]]
disease_vec = np.stack(disease_vec,1)
print('Disease Vec:', disease_vec.shape)

Atelectasis (112120,) int64
Cardiomegaly (112120,) int64
Consolidation (112120,) int64
Edema (112120,) int64
Effusion (112120,) int64
Emphysema (112120,) int64
Fibrosis (112120,) int64
Finding Labels (112120,) |S100
Follow-up # (112120,) int64
Height] (112120,) int64
Hernia (112120,) int64
Image Index (112120,) |S16
Infiltration (112120,) int64
Mass (112120,) int64
No Finding (112120,) int64
Nodule (112120,) int64
OriginalImagePixelSpacing[x (112120,) float64
OriginalImage[Width (112120,) int64
Patient Age (112120,) int64
Patient Gender (112120,) |S1
Patient ID (112120,) int64
Pleural_Thickening (112120,) int64
Pneumonia (112120,) int64
Pneumothorax (112120,) int64
Unnamed: 11 (112120,) float64
View Position (112120,) |S2
images (112120, 224, 224, 3) uint8
path (112120,) |S29
y] (112120,) float64
Disease Vec: (112120, 14)


In [3]:
img_ds = HDF5Matrix(h5_path, 'images')
split_idx = int(img_ds.shape[0] * 0.9)
X_train = HDF5Matrix(h5_path, 'images', end=split_idx)
X_test = HDF5Matrix(h5_path, 'images', start=split_idx)
y_train = disease_vec[:split_idx]
y_test = disease_vec[split_idx:]
print('Train Shape', X_train.shape, 'test shape', X_test.shape)
print('Train y shape', y_train.shape, 'test y shape', y_test.shape)

Train Shape (100908, 224, 224, 3) test shape (11212, 224, 224, 3)
Train y shape (100908, 14) test y shape (11212, 14)


In [4]:
import os
from keras.applications.densenet import DenseNet121
from keras.layers import Input
from keras.layers.core import Dense
from keras.models import Model
from keras.utils import multi_gpu_model

In [5]:
nb_records, nb_classes = disease_vec.shape
nb_gpus = len(os.getenv("CUDA_VISIBLE_DEVICES", "1").split(","))
img_shape = img_ds.shape[1:]
img_input = Input(shape=img_shape)

In [6]:
base_model = DenseNet121(include_top=False,
                         input_tensor=img_input, 
                         input_shape=img_shape, 
                         weights='imagenet', 
                         pooling="avg")
x = base_model.output
predictions = Dense(nb_classes, activation="sigmoid", name="predictions")(x)
model = Model(inputs=img_input, outputs=predictions)
if nb_gpus > 1:
    model = multi_gpu_model(model, gpus=nb_gpus)
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, 224, 224, 3)  0                                            
__________________________________________________________________________________________________
zero_padding2d_1 (ZeroPadding2D (None, 230, 230, 3)  0           input_1[0][0]                    
__________________________________________________________________________________________________
conv1/conv (Conv2D)             (None, 112, 112, 64) 9408        zero_padding2d_1[0][0]           
__________________________________________________________________________________________________
conv1/bn (BatchNormalization)   (None, 112, 112, 64) 256         conv1/conv[0][0]                 
__________________________________________________________________________________________________
conv1/relu

In [7]:
from keras.callbacks import ModelCheckpoint, LearningRateScheduler, EarlyStopping

weight_path="weights_{epoch:02d}_{val_acc:.4f}.hdf5"
checkpoint = ModelCheckpoint(weight_path, monitor='acc', verbose=1, 
                             save_best_only=True, mode='max', save_weights_only=True)
# early = EarlyStopping(monitor="acc", mode="max", patience=3)
callbacks = [checkpoint, ]

In [None]:
# from keras.preprocessing.image import ImageDataGenerator

# batch_size = 32
# nb_epoch = 200

# # define data augmentation configuration
# train_datagen = ImageDataGenerator(horizontal_flip=True)
# valid_datagen = ImageDataGenerator(horizontal_flip=True)
# # fit the data augmentation
# train_datagen.fit(X_train)
# valid_datagen.fit(X_valid)
# # setup generator
# train_generator = train_datagen.flow(X_train, y_train, batch_size=batch_size)
# valid_generator = valid_datagen.flow(X_valid, y_valid, batch_size=batch_size)

# model.fit_generator(train_generator, 
#                     steps_per_epoch=nb_records / batch_size / 10, 
#                     epochs=nb_epoch, 
#                     validation_data=valid_generator, 
#                     validation_steps=nb_records / batch_size / 5)

In [None]:
batch_size = 32
nb_epoch = 200

model.fit(X_train, y_train, batch_size=batch_size, epochs=nb_epoch,
          verbose=1,
          validation_split=0.2,
          shuffle=True,
          callbacks=callbacks
          )

Train on 80726 samples, validate on 20182 samples
Epoch 1/200

Epoch 00001: acc improved from -inf to 0.94796, saving model to weights_01_0.9470.hdf5
Epoch 2/200

Epoch 00002: acc improved from 0.94796 to 0.94840, saving model to weights_02_0.9477.hdf5
Epoch 3/200

Epoch 00003: acc improved from 0.94840 to 0.94844, saving model to weights_03_0.9471.hdf5
Epoch 4/200

Epoch 00004: acc improved from 0.94844 to 0.94855, saving model to weights_04_0.9458.hdf5
Epoch 5/200

Epoch 00005: acc improved from 0.94855 to 0.94866, saving model to weights_05_0.9414.hdf5
Epoch 6/200

Epoch 00006: acc improved from 0.94866 to 0.94877, saving model to weights_06_0.9172.hdf5
Epoch 7/200

Epoch 00007: acc improved from 0.94877 to 0.94879, saving model to weights_07_0.9475.hdf5
Epoch 8/200

Epoch 00008: acc improved from 0.94879 to 0.94894, saving model to weights_08_0.9474.hdf5
Epoch 9/200

Epoch 00009: acc did not improve from 0.94894
Epoch 10/200

Epoch 00010: acc improved from 0.94894 to 0.94902, savin


Epoch 00036: acc improved from 0.98376 to 0.98470, saving model to weights_36_0.9343.hdf5
Epoch 37/200

Epoch 00037: acc improved from 0.98470 to 0.98572, saving model to weights_37_0.9226.hdf5
Epoch 38/200

Epoch 00038: acc improved from 0.98572 to 0.98614, saving model to weights_38_0.9328.hdf5
Epoch 39/200

Epoch 00039: acc improved from 0.98614 to 0.98699, saving model to weights_39_0.9351.hdf5
Epoch 40/200

Epoch 00040: acc improved from 0.98699 to 0.98750, saving model to weights_40_0.9303.hdf5
Epoch 41/200

Epoch 00041: acc improved from 0.98750 to 0.98831, saving model to weights_41_0.9221.hdf5
Epoch 42/200

Epoch 00042: acc improved from 0.98831 to 0.98855, saving model to weights_42_0.9299.hdf5
Epoch 43/200

Epoch 00043: acc improved from 0.98855 to 0.98904, saving model to weights_43_0.9329.hdf5
Epoch 44/200

Epoch 00044: acc improved from 0.98904 to 0.98969, saving model to weights_44_0.9333.hdf5
Epoch 45/200

Epoch 00045: acc improved from 0.98969 to 0.98975, saving model


Epoch 00073: acc improved from 0.99465 to 0.99465, saving model to weights_73_0.9257.hdf5
Epoch 74/200

Epoch 00074: acc improved from 0.99465 to 0.99498, saving model to weights_74_0.9303.hdf5
Epoch 75/200

Epoch 00075: acc did not improve from 0.99498
Epoch 76/200

Epoch 00076: acc improved from 0.99498 to 0.99499, saving model to weights_76_0.9250.hdf5
Epoch 77/200

Epoch 00077: acc improved from 0.99499 to 0.99520, saving model to weights_77_0.9211.hdf5
Epoch 78/200

Epoch 00078: acc improved from 0.99520 to 0.99533, saving model to weights_78_0.9365.hdf5
Epoch 79/200

Epoch 00079: acc improved from 0.99533 to 0.99537, saving model to weights_79_0.9330.hdf5
Epoch 80/200

Epoch 00080: acc did not improve from 0.99537
Epoch 81/200

Epoch 00081: acc improved from 0.99537 to 0.99562, saving model to weights_81_0.9325.hdf5
Epoch 82/200

Epoch 00082: acc did not improve from 0.99562
Epoch 83/200

Epoch 00083: acc did not improve from 0.99562
Epoch 84/200

Epoch 00084: acc did not improv

In [None]:
from sklearn.metrics import precision_score, recall_score, f1_score

print("Predicting")
y_pred = model.predict(X_test)
print(y_test.shape, y_pred.shape)

In [None]:
# y_test = np.argmax(y_test, axis=1)
# y_pred = np.argmax(y_pred, axis=1)

precision = precision_score(y_test, y_pred, average='weighted')
recall = recall_score(y_test, y_pred, average='weighted')
f1 = f1_score(y_test, y_pred, average="weighted")
print("Precision: ", precision)
print("Recall: ", recall)
print("F1: ", f1)

In [None]:
model.save('./model-20181012.h5')