### load data

In [1]:
import os
import numpy as np
import pandas as pd
%matplotlib inline
import matplotlib.pyplot as plt

In [2]:
df = pd.read_csv("weights_20181018-1019/Data_Entry_2017_shuffled.csv")
df.sample(3)

Unnamed: 0.1,Unnamed: 0,Image Index,Finding Labels,Follow-up #,Patient ID,Patient Age,Patient Gender,View Position,OriginalImage[Width,Height],...,Emphysema,Fibrosis,Hernia,Infiltration,Mass,No Finding,Nodule,Pleural_Thickening,Pneumonia,Pneumothorax
4408,56530,00014063_001.png,No Finding,1,14063,58,M,PA,2992,2991,...,0,0,0,0,0,1,0,0,0,0
81795,50497,00012804_005.png,Atelectasis|Effusion|Fibrosis|Infiltration|Mass,5,12804,62,M,PA,2992,2991,...,0,1,0,1,1,0,0,0,0,0
3390,25551,00006717_003.png,Atelectasis|Effusion|Infiltration,3,6717,58,F,PA,2048,2500,...,0,0,0,1,0,0,0,0,0,0


In [3]:
classes = ['Atelectasis','Cardiomegaly','Consolidation','Edema','Effusion',
           'Emphysema','Fibrosis','Hernia','Infiltration','Mass','Nodule',
           'Pleural_Thickening','Pneumonia','Pneumothorax']
nb_records, nb_classes = df.shape[0], len(classes)
print(nb_records, nb_classes)


train_df = df.iloc[:int(nb_records*0.7)]
valid_df = df.iloc[int(nb_records*0.7):int(nb_records*0.9)]
test_df = df.iloc[int(nb_records*0.9):]
print(train_df.shape, valid_df.shape, test_df.shape)

112120 14
(78484, 29) (22424, 29) (11212, 29)


### build model and load weights

In [4]:
from models import ModelFactory

image_shape = (224, 224, 3)  # input image shape
model = ModelFactory(nb_classes, image_shape).densenet121()
model.summary()

Using TensorFlow backend.


__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, 224, 224, 3)  0                                            
__________________________________________________________________________________________________
zero_padding2d_1 (ZeroPadding2D (None, 230, 230, 3)  0           input_1[0][0]                    
__________________________________________________________________________________________________
conv1/conv (Conv2D)             (None, 112, 112, 64) 9408        zero_padding2d_1[0][0]           
__________________________________________________________________________________________________
conv1/bn (BatchNormalization)   (None, 112, 112, 64) 256         conv1/conv[0][0]                 
__________________________________________________________________________________________________
conv1/relu

In [5]:
model.load_weights("weights_20181018-1019/weights_034_0.3282.hdf5")

### model predict

In [7]:
from generator import DataGenerator

batch_size = 32

test_generator = DataGenerator(test_df, path_key="path", classes_key=classes, batch_size=batch_size, shuffle=False)
print(len(test_generator))

351


In [8]:
y_pred = model.predict_generator(test_generator, verbose=1)
print(type(y_pred), len(y_pred))

<class 'numpy.ndarray'> 11212


In [9]:
y_test = test_df[classes].values
print(type(y_test), len(y_test))

<class 'numpy.ndarray'> 11212


In [10]:
print(y_pred[0])
print(y_test[0])

[8.1442968e-09 1.2996384e-09 6.5117622e-10 3.1706602e-08 1.1175252e-07
 2.1770479e-06 2.2893023e-07 1.6032443e-14 7.6416931e-03 3.3124425e-10
 1.9012436e-06 1.1459352e-07 2.3358273e-06 3.1633538e-06]
[0 0 0 0 0 0 0 0 0 0 0 0 0 0]


In [11]:
# binary classification

tp, tn, fp, fn = 0, 0, 0, 0
thres = 0.5  # threshold to determine if was a valid prediction
for r_test, r_pred in zip(y_test, y_pred):
    tp += 1 if r_test.sum() > 0 and r_pred.max() > thres else 0  # predicted desease
    tn += 1 if r_test.sum() == 0 and r_pred.max() <= thres else 0
    fp += 1 if r_test.sum() == 0 and r_pred.max() > thres else 0
    fn += 1 if r_test.sum() > 0 and r_pred.max() <= thres else 0
print("tp = {}, tn = {}, fp = {}, fn = {}\n".format(tp, tn, fp, fn))

precision = tp / (tp + fp)
recall = tp / (tp + fn)
f1 = 2 * precision * recall / (precision + recall)
accuracy = (tp + tn) / (tp + tn + fp + fn)
print("Precision: ", precision)
print("Recall: ", recall)
print("F1: ", f1)
print("Accuracy: ", accuracy)

tp = 1635, tn = 4177, fp = 1845, fn = 3555

Precision:  0.4698275862068966
Recall:  0.315028901734104
F1:  0.3771626297577855
Accuracy:  0.5183731716018551
