### load data

In [1]:
import os
import numpy as np
import pandas as pd
%matplotlib inline
import matplotlib.pyplot as plt

In [2]:
df = pd.read_csv("weights_20181018-1019/Data_Entry_2017_shuffled.csv")
df.sample(3)

Unnamed: 0.1,Unnamed: 0,Image Index,Finding Labels,Follow-up #,Patient ID,Patient Age,Patient Gender,View Position,OriginalImage[Width,Height],...,Emphysema,Fibrosis,Hernia,Infiltration,Mass,No Finding,Nodule,Pleural_Thickening,Pneumonia,Pneumothorax
63748,13514,00003516_000.png,No Finding,0,3516,48,F,PA,2048,2500,...,0,0,0,0,0,1,0,0,0,0
2996,77600,00019082_000.png,No Finding,0,19082,71,M,PA,3056,2468,...,0,0,0,0,0,1,0,0,0,0
57893,40457,00010531_018.png,Infiltration,18,10531,29,F,AP,3024,2544,...,0,0,0,1,0,0,0,0,0,0


In [3]:
classes = ['Atelectasis','Cardiomegaly','Consolidation','Edema','Effusion',
           'Emphysema','Fibrosis','Hernia','Infiltration','Mass','Nodule',
           'Pleural_Thickening','Pneumonia','Pneumothorax']
nb_records, nb_classes = df.shape[0], len(classes)
print(nb_records, nb_classes)


train_df = df.iloc[:int(nb_records*0.7)]
valid_df = df.iloc[int(nb_records*0.7):int(nb_records*0.9)]
test_df = df.iloc[int(nb_records*0.9):]
print(train_df.shape, valid_df.shape, test_df.shape)

112120 14
(78484, 29) (22424, 29) (11212, 29)


### build model and load weights

In [4]:
from models import ModelFactory

image_shape = (224, 224, 3)  # input image shape
model = ModelFactory(nb_classes, image_shape).densenet121()
model.summary()

Using TensorFlow backend.


__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, 224, 224, 3)  0                                            
__________________________________________________________________________________________________
zero_padding2d_1 (ZeroPadding2D (None, 230, 230, 3)  0           input_1[0][0]                    
__________________________________________________________________________________________________
conv1/conv (Conv2D)             (None, 112, 112, 64) 9408        zero_padding2d_1[0][0]           
__________________________________________________________________________________________________
conv1/bn (BatchNormalization)   (None, 112, 112, 64) 256         conv1/conv[0][0]                 
__________________________________________________________________________________________________
conv1/relu

In [5]:
model.load_weights("weights_20181018-1019/weights_034_0.3282.hdf5")

### model predict

In [6]:
from generator import DataGenerator

batch_size = 32

test_generator = DataGenerator(test_df, path_key="path", classes_key=classes, batch_size=batch_size, shuffle=False)
print(len(test_generator))

351


In [7]:
y_pred = model.predict_generator(test_generator, verbose=1)
print(type(y_pred), len(y_pred))

<class 'numpy.ndarray'> 11212


In [8]:
y_test = test_df[classes].values
print(type(y_test), len(y_test))

<class 'numpy.ndarray'> 11212


In [9]:
print(y_pred[0])
print(y_test[0])

[5.1383424e-04 2.5200246e-05 3.8264378e-04 4.3186352e-09 2.5990819e-06
 7.9005345e-04 8.1298343e-04 8.2888384e-08 3.4342325e-04 1.8236385e-08
 5.9591461e-05 7.2733485e-05 3.0600619e-05 1.0399860e-08]
[0 0 0 0 0 0 0 0 0 0 0 0 0 0]


In [10]:
# AUROC measurement
from sklearn.metrics import roc_auc_score

test_log_path = "auroc.log"
print("** write auroc to {} **".format(test_log_path))
aurocs = []
with open(test_log_path, "w") as f:
    for i in range(nb_classes):
        try:
            score = roc_auc_score(y_test[:, i], y_pred[:, i])
            aurocs.append(score)
        except ValueError:
            score = 0
        f.write("{}: {}\n".format(classes[i], score))
    mean_auroc = np.mean(aurocs)
    f.write("-------------------------\n")
    f.write("mean auroc: {}\n".format(mean_auroc))
    print("mean auroc: {}".format(mean_auroc))

** write auroc to auroc.log **
mean auroc: 0.49451268305196494


In [11]:
# binary classification

tp, tn, fp, fn = 0, 0, 0, 0
thres = 0.5  # threshold to determine if was a valid prediction
for r_test, r_pred in zip(y_test, y_pred):
    tp += 1 if r_test.sum() > 0 and r_pred.max() > thres else 0  # predicted desease
    tn += 1 if r_test.sum() == 0 and r_pred.max() <= thres else 0
    fp += 1 if r_test.sum() == 0 and r_pred.max() > thres else 0
    fn += 1 if r_test.sum() > 0 and r_pred.max() <= thres else 0
print("tp = {}, tn = {}, fp = {}, fn = {}\n".format(tp, tn, fp, fn))

precision = tp / (tp + fp)
recall = tp / (tp + fn)
f1 = 2 * precision * recall / (precision + recall)
accuracy = (tp + tn) / (tp + tn + fp + fn)
print("Precision: ", precision)
print("Recall: ", recall)
print("F1: ", f1)
print("Accuracy: ", accuracy)

tp = 1596, tn = 4147, fp = 1875, fn = 3594

Precision:  0.45980985306828004
Recall:  0.30751445086705204
F1:  0.36854866643574646
Accuracy:  0.5122190510167678
