In [1]:
import os
import numpy as np
import pandas as pd

In [3]:
df = pd.read_csv('../../CHESTXRAY/Data_Entry_2017.csv')
image_dir = '../../images'
image_path = {f:os.path.join(image_dir,f) for f in os.listdir(image_dir)}
print("Scans found: {}, total headers: {}".format(len(image_path), df.shape[0]))
df['path'] = df['Image Index'].map(image_path.get)  # add path column
# note: df['Patient Age'] has value larger than 100
df.sample(3)

Scans found: 112120, total headers: 112120


Unnamed: 0,Image Index,Finding Labels,Follow-up #,Patient ID,Patient Age,Patient Gender,View Position,OriginalImage[Width,Height],OriginalImagePixelSpacing[x,y],Unnamed: 11,path
12694,00003330_003.png,No Finding,3,3330,44,F,AP,2500,2048,0.168,0.168,,../../images/00003330_003.png
49111,00012460_001.png,Mass,1,12460,63,M,PA,2992,2991,0.143,0.143,,../../images/00012460_001.png
91705,00022870_009.png,No Finding,9,22870,63,M,PA,2862,2991,0.143,0.143,,../../images/00022870_009.png


In [4]:
df['positive'] = df['Finding Labels'].map(lambda x: 1 if x != 'No Finding' else 0)
df['negtive'] = df['Finding Labels'].map(lambda x: 1 if x == 'No Finding' else 0)
df.sample(3)

Unnamed: 0,Image Index,Finding Labels,Follow-up #,Patient ID,Patient Age,Patient Gender,View Position,OriginalImage[Width,Height],OriginalImagePixelSpacing[x,y],Unnamed: 11,path,positive,negtive
107983,00029207_008.png,Mass,8,29207,23,M,PA,2021,2021,0.194311,0.194311,,../../images/00029207_008.png,1,0
64063,00015809_015.png,Pneumonia,15,15809,38,F,AP,3056,2544,0.139,0.139,,../../images/00015809_015.png,1,0
63943,00015784_000.png,No Finding,0,15784,63,F,PA,2442,2627,0.143,0.143,,../../images/00015784_000.png,0,1


In [5]:
classes = ['positive', 'negtive']
nb_records, nb_classes = df.shape[0], len(classes)
print(nb_records, nb_classes)

112120 2


In [6]:
train_df = df.iloc[:int(nb_records*0.7)]
valid_df = df.iloc[int(nb_records*0.7):int(nb_records*0.9)]
test_df = df.iloc[int(nb_records*0.9):]
print(train_df.shape, valid_df.shape, test_df.shape)

(78484, 15) (22424, 15) (11212, 15)


In [7]:
from models import ModelFactory

image_shape = (224, 224, 3)  # input image shape
model = ModelFactory(nb_classes, image_shape).densenet121()
model.summary()

Using TensorFlow backend.


__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, 224, 224, 3)  0                                            
__________________________________________________________________________________________________
zero_padding2d_1 (ZeroPadding2D (None, 230, 230, 3)  0           input_1[0][0]                    
__________________________________________________________________________________________________
conv1/conv (Conv2D)             (None, 112, 112, 64) 9408        zero_padding2d_1[0][0]           
__________________________________________________________________________________________________
conv1/bn (BatchNormalization)   (None, 112, 112, 64) 256         conv1/conv[0][0]                 
__________________________________________________________________________________________________
conv1/relu

In [8]:
model.load_weights("weights_024_0.6073.hdf5")

In [9]:
from generator import DataGenerator

batch_size = 16

test_generator = DataGenerator(test_df, path_key="path", classes_key=classes, batch_size=batch_size, shuffle=False)
print(len(test_generator))

701


In [10]:
y_pred = model.predict_generator(test_generator, verbose=1)
print(type(y_pred), len(y_pred))

<class 'numpy.ndarray'> 11212


In [11]:
y_test = test_df[classes].values
print(type(y_test), len(y_test))

<class 'numpy.ndarray'> 11212


In [12]:
print(y_pred[0])
print(y_test[0])

[0.9269635  0.07277189]
[1 0]


In [14]:
from keras.optimizers import Adam

optimizer = Adam(lr=0.001)
model.compile(optimizer=optimizer, loss="binary_crossentropy", metrics=["accuracy"])

In [15]:
score = model.evaluate_generator(test_generator, verbose=1)
print(type(score), len(score))

<class 'list'> 2


In [16]:
print("Test loss:", score[0])
print("Test accuracy", score[1])

Test loss: 0.5643169690233191
Test accuracy 0.7201658937066018
