In [1]:
import numpy as np
import os
from configparser import ConfigParser
from generator import AugmentedImageSequence
from models.keras import ModelFactory
from sklearn.metrics import roc_auc_score
from utility import get_sample_counts

Using TensorFlow backend.


In [2]:
# parser config
config_file = "./config.ini"
cp = ConfigParser()
cp.read(config_file)

# default config
output_dir = cp["DEFAULT"].get("output_dir")
base_model_name = cp["DEFAULT"].get("base_model_name")
class_names = cp["DEFAULT"].get("class_names").split(",")
image_source_dir = cp["DEFAULT"].get("image_source_dir")

# train config
image_dimension = cp["TRAIN"].getint("image_dimension")

# test config
batch_size = cp["TEST"].getint("batch_size")
test_steps = cp["TEST"].get("test_steps")
use_best_weights = cp["TEST"].getboolean("use_best_weights")

# parse weights file path
output_weights_name = cp["TRAIN"].get("output_weights_name")
weights_path = os.path.join(output_dir, output_weights_name)
best_weights_path = os.path.join(output_dir, "best_{}".format(output_weights_name))

# get test sample count
test_counts, _ = get_sample_counts(output_dir, "test", class_names)

  labels = df[class_names].as_matrix()


In [3]:
# compute steps
if test_steps == "auto":
    test_steps = int(test_counts / batch_size)
else:
    try:
        test_steps = int(test_steps)
    except ValueError:
        raise ValueError("""
            test_steps: {} is invalid,
            please use 'auto' or integer.
            """.format(test_steps))
print("** test_steps: {} **".format(test_steps))

** test_steps: 47 **


In [4]:
print("** load model **")
if use_best_weights:
    print("** use best weights **")
    model_weights_path = best_weights_path
else:
    print("** use last weights **")
    model_weights_path = weights_path
model_factory = ModelFactory()
model = model_factory.get_model(
    class_names,
    model_name=base_model_name,
    use_base_weights=False,
    weights_path=model_weights_path)

** load model **
** use best weights **
load model weights_path: ./experiments/1/best_weights.h5


In [5]:
print("** load test generator **")
test_sequence = AugmentedImageSequence(
    dataset_csv_file=os.path.join(output_dir, "dev.csv"),
    class_names=class_names,
    source_image_dir=image_source_dir,
    batch_size=batch_size,
    target_size=(image_dimension, image_dimension),
    augmenter=None,
    steps=test_steps,
    shuffle_on_epoch_end=False,
)

** load test generator **


  self.x_path, self.y = df["Image Index"].as_matrix(), df[self.class_names].as_matrix()


In [6]:
print("** make prediction **")
y_hat = model.predict_generator(test_sequence, verbose=1)
y = test_sequence.get_y_true()

** make prediction **


  warn("The default mode, 'constant', will be changed to 'reflect' in "
  warn("Anti-aliasing will be enabled by default in skimage 0.15 to "




In [7]:
test_log_path = os.path.join(output_dir, "test_bruce.log")
print("** write log to {} **".format(test_log_path))
aurocs = []
with open(test_log_path, "w") as f:
    for i in range(len(class_names)):
        try:
            score = roc_auc_score(y[:, i], y_hat[:, i])
            aurocs.append(score)
        except ValueError:
            score = 0
        f.write("{}: {}\n".format(class_names[i], score))
    mean_auroc = np.mean(aurocs)
    f.write("-------------------------\n")
    f.write("mean auroc: {}\n".format(mean_auroc))
    print("mean auroc: {}".format(mean_auroc))

** write log to ./experiments/1/test_bruce.log **
mean auroc: 0.8196393137718962


In [8]:
print(type(y_hat), type(y))
# y_pred
print(y_hat[0])
# y_test
print(y[0])

<class 'numpy.ndarray'> <class 'numpy.ndarray'>
[7.4522257e-05 9.4205710e-10 1.2541806e-05 2.6150180e-03 2.1063430e-07
 5.3130307e-06 9.5761958e-09 4.5514650e-08 5.6060912e-07 2.4599367e-10
 4.3676898e-10 4.0079358e-09 9.9609956e-08 1.7189953e-16]
[0 0 0 0 0 0 0 0 0 0 0 0 0 0]


In [21]:
# binary classification evaluation

tp, tn, fp, fn = 0, 0, 0, 0
thres = 0.001  # threshold to determine if was a valid prediction
for r_test, r_pred in zip(y, y_hat):
    tp += 1 if r_test.sum() > 0 and r_pred.max() > thres else 0  # predicted desease
    tn += 1 if r_test.sum() == 0 and r_pred.max() <= thres else 0
    fp += 1 if r_test.sum() == 0 and r_pred.max() > thres else 0
    fn += 1 if r_test.sum() > 0 and r_pred.max() <= thres else 0
print("tp = {}, tn = {}, fp = {}, fn = {}\n".format(tp, tn, fp, fn))

precision = tp / (tp + fp)
recall = tp / (tp + fn)
f1 = 2 * precision * recall / (precision + recall)
accuracy = (tp + tn) / (tp + tn + fp + fn)
print("Precision: ", precision)
print("Recall: ", recall)
print("F1: ", f1)
print("Accuracy: ", accuracy)

tp = 691, tn = 118, fp = 672, fn = 23

Precision:  0.5069699192956714
Recall:  0.9677871148459384
F1:  0.6653827636013481
Accuracy:  0.5378989361702128
