In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import json
import ntpath
import os
from evaluate import calc_f1_scores

In [None]:
with open('settings.txt') as f:
    settings = json.load(f)

GT_IDS = settings['individual_gt_ids']
REPRESENTATIVE_IOU = settings['representative_iou']
OUT_DIR = settings['output_base_path'] + '/' + settings['evaluation_result_path']

In [None]:
def path_leaf(path):
    head, tail = ntpath.split(path)
    return tail or ntpath.basename(head)

def aggregate_scores(filter_func=lambda x: True):
    
    with open('file_params.txt') as f:
        params = json.load(f)

    datasets = []
    magnifications = []
    precision_each = []
    recall_each = []
    f1_each = []
    first = True
    for key, param in params.items():
        if(not filter_func(param)):
            continue
        eval_dir = settings['output_base_path'] + '/' + param['output_path']
        eval_dir += '/' + settings['evaluation_result_path']
        basename, _ = os.path.splitext(path_leaf(param['filename']))
        df = pd.read_csv(eval_dir + '/' + basename + '_stats.csv')
        if(first):
            df_sum = df[['TruePos', 'FalsePos', 'FalseNeg']]
            thresholds = df['IoU_Thresh']
            indices = np.where(thresholds >= REPRESENTATIVE_IOU)
            representative_iou_index = indices[0][0]
            first = False
        else:
            df_sum += df
        datasets.append(key)
        magnifications.append(param['magnification'])
        precision_each.append(df['Precision'][representative_iou_index])
        recall_each.append(df['Recall'][representative_iou_index])
        f1_each.append(df['F1'][representative_iou_index])

    f1_all, precision_all, recall_all = calc_f1_scores(df_sum)
    
    f1_rep = f1_all[representative_iou_index]

    df_sum.insert(0, 'IoU_Thresh', thresholds)
    df_sum['Precision'] = precision_all
    df_sum['Recall'] = recall_all
    df_sum['F1'] = f1_all
    
    df_each = pd.DataFrame(datasets, columns=['Dataset'])
    df_each['Magnification'] = magnifications
    df_each['Precision'] = precision_each
    df_each['Recall'] = recall_each
    df_each['F1'] = f1_each
    
    return f1_rep, df_sum, df_each

In [None]:
f1_rep, df_sum, df_each = aggregate_scores()
df_sum.to_csv(OUT_DIR + '/all_stats.csv', index=False)
df_each.to_csv(OUT_DIR + '/each_stats.csv', index=False)
thresholds = df_sum['IoU_Thresh']

In [None]:
from IPython.display import Markdown
display(Markdown('# F1 = %.2f' % f1_rep))

In [None]:
MAGNIFICATION_THRESH = 20
f1_rep_16x, df_sum_16x, df_each_16x = aggregate_scores(lambda x: x['magnification'] <= MAGNIFICATION_THRESH)
f1_rep_40x, df_sum_40x, df_each_40x = aggregate_scores(lambda x: x['magnification'] > MAGNIFICATION_THRESH)

In [None]:
def plot_f1_score(df, rep, title):
    plt.axis('square')
    plt.xlim(0, 1)
    plt.ylim(0, 1)
    plt.plot(thresholds, df['Precision'], label='Precision')
    plt.plot(thresholds, df['Recall'], label='Recall')
    plt.plot(thresholds, df['F1'], label='F1 score')
    plt.legend(loc='upper right')
    plt.ylabel('Score')
    plt.xlabel('IoU Threshold')
    plt.vlines(REPRESENTATIVE_IOU, 0, 1, colors='gray', linestyles='dashed')
    plt.hlines(rep, REPRESENTATIVE_IOU - 0.1, REPRESENTATIVE_IOU + 0.1, colors='gray', linestyles='dashed')
    plt.text(REPRESENTATIVE_IOU + 0.1, rep, 'F1 = %.2f' % rep)
    plt.title(title)

    
plt.figure(figsize=(17, 5))
plt.suptitle('Accuracy Summary', fontsize=16)

plt.subplot(1, 3, 1)
plot_f1_score(df_sum, f1_rep, 'All datasets')

plt.subplot(1, 3, 2)
plot_f1_score(df_sum_16x, f1_rep_16x, '16x datasets')

plt.subplot(1, 3, 3)
plot_f1_score(df_sum_40x, f1_rep_40x, '40x datasets')

plt.show()

In [None]:
def plot_per_dataset_score(df, column, label, color):
    scores = df[column]
    keys = df['Dataset']
    mags = df['Magnification']
    plt.figure(figsize=(17, 3))
    plt.ylim(0, 1)
    plt.grid(axis='y')
    plt.bar(list(range(len(scores))), scores, color=color) 
    plt.xticks(list(range(len(scores))), keys, rotation='vertical')
    for mag, ticklabel in zip(mags, plt.gca().get_xticklabels()):
        if(mag >= 40):
            ticklabel.set_color('green')
        elif(mag >= 20):
            ticklabel.set_color('blue')
    plt.ylabel(label)
    plt.xlabel('Dataset  (black 16x, blue 20x, green 40x)')
    plt.title('Per-dataset ' + label + ' at IoU = %.1f' % REPRESENTATIVE_IOU)
    plt.show()

In [None]:
plot_per_dataset_score(df_each, 'F1', 'F1 score', 'C2')

In [None]:
plot_per_dataset_score(df_each, 'Precision', 'Precision', 'C0')

In [None]:
plot_per_dataset_score(df_each, 'Recall', 'Recall', 'C1')