In [None]:
import numpy as np
import tifffile as tiff
import matplotlib.pyplot as plt
from pathlib import Path

In [None]:
%cd ..
from evaluate import read_roi, count_matches, calc_f1_scores
%cd -

In [None]:
REPRESENTATIVE_IOU = 0.4
NUM_THRESHOLDS = 100
thresholds = np.array(range(1, NUM_THRESHOLDS+1)) / NUM_THRESHOLDS

In [None]:
def evaluate_gt(gt_dir, thresholds):
    """
    Evaluate individual annotations against ground truth masks.

    Parameters
    ----------
    gt_dir : string
        Directory path to GT labelings. Consensus labels must be placed
        directly under this directory whereas individual labels must be
        placed under its subdirectories.
    thresholds : list of float
        IoU thresholds.

    Returns
    -------
    f1_dict : dictionary
        Dictionary containing F1 scores for the individual annotations.

    """
    EXT_LIST = ['.tif', '.roi', '.zip']

    gt_dir = Path(gt_dir)
    gt_files = sorted(gt_dir.glob('*.tif'))
    gt_subdirs = sorted([x for x in gt_dir.iterdir() if x.is_dir()])

    f1_dict = {}
    f1_sum = np.zeros(len(thresholds))
    for subdir in gt_subdirs: # iterate over individual labels
        
        sum_counts = np.zeros((len(thresholds), 3), dtype=int)
        for gt_file in gt_files:
            gt_masks = tiff.imread(gt_file).astype(bool)
            eval_masks = np.zeros((0,) + gt_masks.shape[1:])
            for ext in EXT_LIST:
                eval_file = subdir.joinpath(gt_file.stem + ext)
                if(eval_file.exists()):
                    eval_masks = read_roi(eval_file, gt_masks.shape[1:])
                    break

            counts, _ = count_matches(eval_masks, gt_masks, thresholds)
            sum_counts += np.array(counts)

        f1, _, _ = calc_f1_scores(sum_counts)
        annotator = subdir.name
        f1_dict[annotator] = f1
        f1_sum += f1

    f1_dict['mean'] = f1_sum / len(gt_subdirs)
    return f1_dict

In [None]:
def plot_multiple_F1s(f1_dict, thresholds):
    """
    Plot multiple F1 score curves.

    Parameters
    ----------
    f1_dict : dictionary of list of float
        Multiple sets of F1 scores. Each item in the dictionary is a pair of
        a label name and a list of F1 scores for varying IoU thresholds.
    thresholds : list of float
        IoU thresholds.

    Returns
    -------
    None.

    """
    plt.figure(figsize=(5, 5))
    plt.axis('square')
    plt.xlim(0, 1)
    plt.ylim(0, 1)
    offset = 0.005 # so points on y=1 will be visible

    plt.plot(thresholds, f1_dict['mean'] - offset, label='mean')
    for label, f1 in f1_dict.items():
        if(label != 'mean'):
            plt.plot(thresholds, f1 - offset, label=label)

    plt.legend(loc='lower left')
    plt.ylabel('Score')
    plt.xlabel('IoU Threshold')
    plt.vlines(REPRESENTATIVE_IOU, 0, 1, colors='gray', linestyles='dashed') 

    indices = np.where(thresholds >= REPRESENTATIVE_IOU)
    rep_idx = indices[0][0]
    title = 'F1 = %.2f (mean)' % f1_dict['mean'][rep_idx]
    for label, f1 in f1_dict.items():
        if(label != 'mean'):
            title += ', %.2f (%s)' % (f1[rep_idx], label)
    plt.title(title + ' at IoU = %.1f' % REPRESENTATIVE_IOU)
    plt.show()

In [None]:
GT_DIR = '/media/bandy/nvme_data/voltage/datasets_v0.3/lowmag_GT_v20201027'
f1_dict = evaluate_gt(GT_DIR, thresholds)
plot_multiple_F1s(f1_dict, thresholds)

In [None]:
GT_DIR = '/media/bandy/nvme_data/voltage/datasets_v0.3/highmag_GT_v20201027'
f1_dict = evaluate_gt(GT_DIR, thresholds)
plot_multiple_F1s(f1_dict, thresholds)