In [1]:
from keras.preprocessing.image import ImageDataGenerator, load_img
from keras.models import Sequential, Model
from keras.layers import Conv2D, MaxPooling2D
from keras.layers import Activation, Dropout, Flatten, Dense
from keras import backend as K
import cv2
from skimage.transform import resize
import glob
from sklearn.metrics import confusion_matrix
from sklearn import ensemble
from sklearn.model_selection import cross_val_score
import os
from sklearn.svm import SVC
from keras.applications.vgg16 import VGG16
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import warnings
%matplotlib inline
warnings.filterwarnings('ignore')

Using TensorFlow backend.
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


## Data

In [92]:
X = []
y = []
for directory, _, file in os.walk('data/chest_xray/train'):
    # cutting the data in half because it takes too long to run these models on the full data
    # grayscale to better fit in the RF and SVC models
    for f in file[1::2]:
        f = os.path.join(directory, f)
        img = cv2.imread(f, cv2.IMREAD_GRAYSCALE)
        if img is not None:
            # resizing to (150, 150)
            img = resize(img, (150, 150, 1))
            img = np.asarray(img)
            label=f.split('/')[-2]
            X.append(img)
            y.append(label)

In [93]:
X_test = []
y_test = []
for directory, _, file in os.walk('data/chest_xray/test'):
    # not cutting this in half because the test set is already small
    for f in file[1:]:
        f = os.path.join(directory, f)
        img = cv2.imread(f, cv2.IMREAD_GRAYSCALE)
        if img is not None:
            img = resize(img, (150, 150, 1))
            img = np.asarray(img)
            label=f.split('/')[-2]
            X_test.append(img)
            y_test.append(label)

In [94]:
X, X_test = np.asarray(X), np.asarray(X_test)
y, y_test = np.asarray(y), np.asarray(y_test)

X_rf = []
# flattening to a 1 dimensional array to feed into the RF and SVC models
for i in range(len(X)):
    a = X[i].flatten()
    X_rf.append(a)
    
X_rf = np.asarray(X_rf)

X_rf_test = []
for i in range(len(X_test)):
    a = X_test[i].flatten()
    X_rf_test.append(a)
    
X_rf_test = np.asarray(X_rf_test)

In [95]:
# binarizing
for i in range(len(y)):
    if y[i] == 'PNEUMONIA':
        y[i] = 1
    else:
        y[i] = 0

for i in range(len(y_test)):
    if y_test[i] == 'PNEUMONIA':
        y_test[i] = 1
    else:
        y_test[i] = 0

In [96]:
# computing class weights. this data is imbalanced
from sklearn.utils import class_weight
y_labels = np.argmax(y)
classweight = class_weight.compute_class_weight('balanced', np.unique(y), y)
print(classweight)

[1.94552239 0.67294786]


## Random Forest

In [41]:
rf = ensemble.RandomForestClassifier()
rf.fit(X_rf, y)
print(cross_val_score(rf, X_rf, y))

[0.91954023 0.91254315 0.93317972]


In [44]:
y_pred = rf.predict(X_rf_test)
tn, fp, fn, tp = confusion_matrix(y_pred, y_test).ravel()
accuracy = (tn + tp) / (tn + tp + fn + fp)
print('accuracy: {}\ntrue negative: {}\nfalse negative: {}\ntrue positive: {}\nfalse positive: {}'
      .format(accuracy, tn, fn, tp, fp))

accuracy: 0.792604501607717
true negative: 115
false negative: 118
true positive: 378
false positive: 11


In [104]:
# employing class weights here
rf = ensemble.RandomForestClassifier(class_weight = {'0':classweight[0], '1':classweight[1]})
rf.fit(X_rf, y)
print(cross_val_score(rf, X_rf, y))

y_pred = rf.predict(X_rf_test)
tn, fp, fn, tp = confusion_matrix(y_pred, y_test).ravel()
accuracy = (tn + tp) / (tn + tp + fn + fp)
print('accuracy: {}\ntrue negative: {}\nfalse negative: {}\ntrue positive: {}\nfalse positive: {}'
      .format(accuracy, tn, fn, tp, fp))

[0.92068966 0.89643268 0.90898618]
accuracy: 0.7781350482315113
true negative: 111
false negative: 122
true positive: 373
false positive: 16


The model does fairly poorly, giving equally likely true and false negatives. The high accuracy is primarily a result of the unbalanced nature of the samples. Sensitivity is our most important metric, and it, at least, it somewhat low. Oddly, class weight balancing appears to make this model perform worse.

## SVC

In [107]:
svc = SVC()
svc.fit(X_rf, y)
y_pred = svc.predict(X_rf_test)
tn, fp, fn, tp = confusion_matrix(y_pred, y_test).ravel()
accuracy = (tn + tp) / (tn + tp + fn + fp)
print('accuracy: {}\ntrue negative: {}\nfalse negative: {}\ntrue positive: {}\nfalse positive: {}'
      .format(accuracy, tn, fn, tp, fp))

accuracy: 0.7427652733118971
true negative: 79
false negative: 154
true positive: 383
false positive: 6


In [106]:
# employing class weights here
svc = SVC(class_weight = 'balanced')
svc.fit(X_rf, y)
y_pred = svc.predict(X_rf_test)
tn, fp, fn, tp = confusion_matrix(y_pred, y_test).ravel()
accuracy = (tn + tp) / (tn + tp + fn + fp)
print('accuracy: {}\ntrue negative: {}\nfalse negative: {}\ntrue positive: {}\nfalse positive: {}'
      .format(accuracy, tn, fn, tp, fp))

accuracy: 0.8263665594855305
true negative: 149
false negative: 84
true positive: 365
false positive: 24


Overall accuracy is significantly higher, yet sensitivity is lower than the random forest model. Runtime is also significantly longer.

## Dense

In [146]:
# Manipulating the data to give more robust results
datagen = ImageDataGenerator(
        rotation_range=180,
        width_shift_range=.2,
        height_shift_range=.2,
        rescale=1/255,
        shear_range=.2,
        zoom_range=.2,
        horizontal_flip=True,
        fill_mode='nearest')

test_datagen = ImageDataGenerator(rescale=1/255)

if K.image_data_format() == 'channels_first':
    input_shape = (3, 150, 150)
else:
    input_shape = (150, 150, 3)

In [147]:
model = Sequential()
model.add(Flatten(input_shape=input_shape))
model.add(Dense(64))
model.add(Activation('relu'))
model.add(Dropout(.5))
model.add(Dense(1))
model.add(Activation('sigmoid'))
model.compile(loss='binary_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

In [148]:
train = datagen.flow_from_directory('data/chest_xray/train',
                                   target_size=(150, 150),
                                   batch_size=16,
                                   class_mode='binary')

test = test_datagen.flow_from_directory('data/chest_xray/test',
                                   target_size=(150, 150),
                                   batch_size=16,
                                   class_mode='binary')

Found 5216 images belonging to 2 classes.
Found 624 images belonging to 2 classes.


In [112]:
model.fit_generator(train,
                   steps_per_epoch=5216 // 16,
                   epochs = 5,
                   validation_data = test,
                   validation_steps=624 // 16)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [113]:
print(model.metrics_names)
model.evaluate(test)

['loss', 'acc']


[5.978394523645059, 0.625]

In [126]:
# For inputting test images in color and for the last model
X = []
y = []
for directory, _, file in os.walk('data/chest_xray/train'):
    for f in file[1::2]:
        f = os.path.join(directory, f)
        img = cv2.imread(f, cv2.IMREAD_COLOR)
        if img is not None:
            img = resize(img, (150, 150, 3))
            img = np.asarray(img)
            label=f.split('/')[-2]
            X.append(img)
            y.append(label)
            
X_test = []
y_test = []
for directory, _, file in os.walk('data/chest_xray/test'):
    for f in file[1:]:
        f = os.path.join(directory, f)
        img = cv2.imread(f, cv2.IMREAD_COLOR)
        if img is not None:
            img = resize(img, (150, 150, 3))
            img = np.asarray(img)
            label=f.split('/')[-2]
            X_test.append(img)
            y_test.append(label)
            
X, X_test = np.asarray(X), np.asarray(X_test)
y, y_test = np.asarray(y), np.asarray(y_test)

In [127]:
for i in range(len(y)):
    if y[i] == 'PNEUMONIA':
        y[i] = 1
    else:
        y[i] = 0

for i in range(len(y_test2)):
    if y_test[i] == 'PNEUMONIA':
        y_test[i] = 1
    else:
        y_test[i] = 0

In [130]:
y = [int(i) for i in y]
y_test = [int(i) for i in y_test]

y_pred = model.predict(X_test)
tn, fp, fn, tp = confusion_matrix(y_pred, y_test).ravel()
accuracy = (tn + tp) / (tn + tp + fn + fp)
print('accuracy: {}\ntrue negative: {}\nfalse negative: {}\ntrue positive: {}\nfalse positive: {}'
      .format(accuracy, tn, fn, tp, fp))

accuracy: 0.6254019292604501
true negative: 0
false negative: 233
true positive: 389
false positive: 0


In [133]:
y_pred

array([[1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],

Our dense network has one 64 feature layer and drops out half the data to avoid overfitting. This model only ended up predicting 1s, so we'll weight it as well and see if that changes anything.

In [149]:
model = Sequential()
model.add(Flatten(input_shape=input_shape))
model.add(Dense(64))
model.add(Activation('relu'))
model.add(Dropout(.5))
model.add(Dense(1))
model.add(Activation('sigmoid'))
model.compile(loss='binary_crossentropy',
              optimizer='adam',
              weighted_metrics=['categorical_accuracy'],
              metrics=['accuracy'])

In [150]:
model.fit_generator(train,
                   steps_per_epoch=5216 // 16,
                   epochs = 5,
                   validation_data = test,
                   validation_steps=624 // 16)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x14f519390>

In [151]:
y_pred = model.predict(X_test)
tn, fp, fn, tp = confusion_matrix(y_pred, y_test).ravel()
accuracy = (tn + tp) / (tn + tp + fn + fp)
print('accuracy: {}\ntrue negative: {}\nfalse negative: {}\ntrue positive: {}\nfalse positive: {}'
      .format(accuracy, tn, fn, tp, fp))

accuracy: 0.6254019292604501
true negative: 0
false negative: 233
true positive: 389
false positive: 0


 It has fairly miserable performance.

## CNN

In [166]:
model = Sequential()
model.add(Conv2D(32, (3, 3), input_shape=input_shape))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Conv2D(32, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Conv2D(32, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Flatten())
model.add(Dense(64))
model.add(Activation('relu'))
model.add(Dropout(.5))
model.add(Dense(1))
model.add(Activation('sigmoid'))
model.compile(loss='binary_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

In [167]:
model.fit_generator(train,
                   steps_per_epoch=5216 // 16,
                   epochs = 27,
                   validation_data = test,
                   validation_steps=624 // 16)
model.save_weights('weights')

Epoch 1/27
Epoch 2/27
Epoch 3/27
Epoch 4/27
Epoch 5/27
Epoch 6/27
Epoch 7/27
Epoch 8/27
Epoch 9/27
Epoch 10/27
Epoch 11/27
Epoch 12/27
Epoch 13/27
Epoch 14/27
Epoch 15/27
Epoch 16/27
Epoch 17/27
Epoch 18/27
Epoch 19/27
Epoch 20/27
Epoch 21/27
Epoch 22/27
Epoch 23/27
Epoch 24/27
Epoch 25/27
Epoch 26/27
Epoch 27/27


In [168]:
y_pred = model.predict(X_test)
# binarizing output
y_binary_pred = []
for i in range(len(y_pred)):
    if y_pred[i] >= .5:
        y_binary_pred.append(1)
    else:
        y_binary_pred.append(0)
        
tn, fp, fn, tp = confusion_matrix(y_binary_pred, y_test).ravel()
accuracy = (tn + tp) / (tn + tp + fn + fp)
print('accuracy: {}\ntrue negative: {}\nfalse negative: {}\ntrue positive: {}\nfalse positive: {}'
      .format(accuracy, tn, fn, tp, fp))

accuracy: 0.8183279742765274
true negative: 129
false negative: 104
true positive: 380
false positive: 9


Our CNN model uses three convolutional neural networks chained together with pooling layers, and round it out with the same two dense layers around a 50% dropout as before. Performance is better, but still falls short of ever being useful. Weighting it (weighted modeled not included) didn't appear to help, but the unweighted model has a fairly low number of errors, especially type II errors.

## VGG16

In [169]:
model = VGG16(include_top=False, input_shape=(150, 150, 3))
# binarizing our output
x = model.output
x = Flatten()(x)
x = Dense(2, activation='softmax')(x)

model = Model(inputs=model.input, outputs=x)

print(model.summary())

Model: "model_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_2 (InputLayer)         (None, 150, 150, 3)       0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 150, 150, 64)      1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 150, 150, 64)      36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 75, 75, 64)        0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 75, 75, 128)       73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 75, 75, 128)       147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 37, 37, 128)       0   

In [170]:
# freezing the top 18 layers to expedite this training
for layer in model.layers[0:18]:
    layer.trainable=False

In [171]:
print(model.summary())

Model: "model_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_2 (InputLayer)         (None, 150, 150, 3)       0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 150, 150, 64)      1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 150, 150, 64)      36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 75, 75, 64)        0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 75, 75, 128)       73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 75, 75, 128)       147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 37, 37, 128)       0   

In [175]:
model.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

In [178]:
from keras.utils import to_categorical
y2 = y
y_test2 = y_test
y2 = to_categorical(y2)
y_test2 = to_categorical(y_test2)
model.fit(X, y2,
          batch_size = 200,
          epochs = 30,
          validation_data = (X_test, y_test2))

Train on 2607 samples, validate on 622 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


<keras.callbacks.History at 0x15751c510>

In [180]:
y_pred = model.predict(X_test)
# binarizing output
y_binary_pred = []
for i in range(len(y_pred)):
    if y_pred[i] >= .5:
        y_binary_pred.append(1)
    else:
        y_binary_pred.append(0)
        
tn, fp, fn, tp = confusion_matrix(y_pred, y_test2).ravel()
accuracy = (tn + tp) / (tn + tp + fn + fp)
print('accuracy: {}\ntrue negative: {}\nfalse negative: {}\ntrue positive: {}\nfalse positive: {}'
      .format(accuracy, tn, fn, tp, fp))

ValueError: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()

In [181]:
y_pred

array([[9.2564187e-05, 9.9990737e-01],
       [2.9377057e-04, 9.9970621e-01],
       [8.2008431e-05, 9.9991798e-01],
       ...,
       [2.7759981e-01, 7.2240019e-01],
       [6.9250260e-03, 9.9307501e-01],
       [1.2869163e-03, 9.9871314e-01]], dtype=float32)

This model performs significanty better, but it overfits and still leaves us with quite a lot of type I error

## Image Examples

In [None]:
# We're taking the sum of y_pred and y_test2 to find examples of misclassified and properly classified images.
# Since the output is binary, 1s will indicate false predictions, and 0s and 2s will indicate true ones.

y_tot = np.add(y_pred, y_test2)
print(y_tot[y_tot == 1].index)
print(y_tot)
plt.imshow(X_test[5])
print(y_tot[y_tot == 2].index)
plt.imshow(X_test[100])
print(y_tot[y_tot == 0].index)
plt.imshow(X_test[300])