In [1]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import os
from glob import glob
%matplotlib inline
import matplotlib.pyplot as plt
import h5py
from keras.utils.io_utils import HDF5Matrix

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
h5_path = '../chest_xray.h5'
disease_vec_labels = ['Atelectasis','Cardiomegaly','Consolidation','Edema','Effusion',
                      'Emphysema','Fibrosis','Hernia','Infiltration','Mass','Nodule',
                      'Pleural_Thickening','Pneumonia','Pneumothorax']
disease_vec = []
with h5py.File(h5_path, 'r') as h5_data:
    all_fields = list(h5_data.keys())
    for c_key in all_fields:
        print(c_key, h5_data[c_key].shape, h5_data[c_key].dtype)
    for c_key in disease_vec_labels:
        disease_vec += [h5_data[c_key][:]]
disease_vec = np.stack(disease_vec,1)
print('Disease Vec:', disease_vec.shape)

Atelectasis (112120,) int64
Cardiomegaly (112120,) int64
Consolidation (112120,) int64
Edema (112120,) int64
Effusion (112120,) int64
Emphysema (112120,) int64
Fibrosis (112120,) int64
Finding Labels (112120,) |S100
Follow-up # (112120,) int64
Height] (112120,) int64
Hernia (112120,) int64
Image Index (112120,) |S16
Infiltration (112120,) int64
Mass (112120,) int64
No Finding (112120,) int64
Nodule (112120,) int64
OriginalImagePixelSpacing[x (112120,) float64
OriginalImage[Width (112120,) int64
Patient Age (112120,) int64
Patient Gender (112120,) |S1
Patient ID (112120,) int64
Pleural_Thickening (112120,) int64
Pneumonia (112120,) int64
Pneumothorax (112120,) int64
Unnamed: 11 (112120,) float64
View Position (112120,) |S2
images (112120, 256, 256, 1) uint8
path (112120,) |S26
y] (112120,) float64
Disease Vec: (112120, 14)


In [3]:
img_ds = HDF5Matrix(h5_path, 'images')
split_idx = int(img_ds.shape[0] * 0.8)
X_train = HDF5Matrix(h5_path, 'images', end = split_idx)
X_test = HDF5Matrix(h5_path, 'images', start = split_idx)
y_train = disease_vec[:split_idx]
y_test = disease_vec[split_idx:]
print('Train Shape', X_train.shape, 'test shape', X_test.shape)

Train Shape (89696, 256, 256, 1) test shape (22424, 256, 256, 1)


In [4]:
from keras.callbacks import EarlyStopping
from keras.callbacks import TensorBoard
from keras.layers import Dense, Activation, Flatten, Dropout
from keras.layers import MaxPooling2D
from keras.layers.convolutional import Conv2D
from keras.models import Sequential
from keras.utils import np_utils
from keras.utils import multi_gpu_model
from sklearn.metrics import precision_score, recall_score, f1_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

In [10]:
nb_records, nb_classes = disease_vec.shape
img_shape = img_ds.shape[1:]
batch_size = 64
nb_epoch = 20
nb_gpus = 2


# build model
model = Sequential()

'''
First set of three layers
Image size: 256 x 256
nb_filters = 32
kernel_size = (2,2)
'''

nb_filters = 32
kernel_size = (2,2)

model.add(Conv2D(nb_filters, kernel_size,
                 padding='valid',
                 strides=1,
                 input_shape=img_shape))
model.add(Activation('relu'))

model.add(Conv2D(nb_filters, kernel_size))
model.add(Activation('relu'))

model.add(Conv2D(nb_filters, kernel_size))
model.add(Activation('relu'))

model.add(MaxPooling2D(pool_size=(2, 2)))

'''
Second set of three layers
Image Size: 128 x 128
nb_filters = 64
kernel_size = (4, 4)
'''

nb_filters = 64
kernel_size = (4, 4)


model.add(Conv2D(nb_filters, kernel_size))
model.add(Activation('relu'))

model.add(Conv2D(nb_filters, kernel_size))
model.add(Activation('relu'))

# model.add(Conv2D(nb_filters, kernel_size))
# model.add(Activation('relu'))

model.add(MaxPooling2D(pool_size=(2, 2)))

'''
Third set of three layers
Image Size: 64 x 64
nb_filters = 128
kernel_size = (8, 8)
'''

nb_filters = 128
kernel_size = (8, 8)

model.add(Conv2D(nb_filters, kernel_size))
model.add(Activation('relu'))

model.add(Conv2D(nb_filters, kernel_size))
model.add(Activation('relu'))

# model.add(Conv2D(nb_filters, kernel_size))
# model.add(Activation('relu'))

model.add(MaxPooling2D(pool_size=(12, 12)))

model.add(Flatten())
print("Model flattened out to: ", model.output_shape)

model.add(Dense(4096))
model.add(Activation('relu'))
model.add(Dropout(0.2))

model.add(Dense(4096))
model.add(Activation("relu"))
model.add(Dropout(0.2))

model.add(Dense(nb_classes))
model.add(Activation('softmax'))

model = multi_gpu_model(model, gpus=nb_gpus)
model.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

print(model.summary())

# train model
stop = EarlyStopping(monitor='acc',
                     min_delta=0.001,
                     patience=2,
                     verbose=1,
                     mode='auto')

tensor_board = TensorBoard(log_dir='./Graph', histogram_freq=0, write_graph=True, write_images=True)

model.fit(X_train, y_train, batch_size=batch_size, epochs=nb_epoch,
          verbose=1,
          validation_split=0.2,
          class_weight='auto',
          callbacks=[stop, tensor_board]
          )

Model flattened out to:  (None, 1152)
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
conv2d_19_input (InputLayer)    (None, 256, 256, 1)  0                                            
__________________________________________________________________________________________________
lambda_5 (Lambda)               (None, 256, 256, 1)  0           conv2d_19_input[0][0]            
__________________________________________________________________________________________________
lambda_6 (Lambda)               (None, 256, 256, 1)  0           conv2d_19_input[0][0]            
__________________________________________________________________________________________________
sequential_3 (Sequential)       (None, 14)           23241326    lambda_5[0][0]                   
                                                                 lambda

<keras.callbacks.History at 0x7f610d0bada0>

In [6]:
# predict
print("Predicting")
y_pred = model.predict(X_test)

y_test = np.argmax(y_test, axis=1)
y_pred = np.argmax(y_pred, axis=1)

precision = precision_score(y_test, y_pred, average='weighted')
recall = recall_score(y_test, y_pred, average='weighted')
f1 = f1_score(y_test, y_pred, average="weighted")
print("Precision: ", precision)
print("Recall: ", recall)
print("F1: ", f1)

Predicting
Precision:  0.39375289199788366
Recall:  0.6274973242953978
F1:  0.4838753171755333


  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


In [8]:
model.save('./model20181011.h5')