## Calibrate MegaDetector v4.1

Creates calibration plot. Runs isotonic calibration and saves calibration function parameters. See last cell for how to load calibration function parameters.

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import json
import os

import matplotlib.pyplot as plt
import numpy as np
import sklearn.isotonic
from tqdm import tqdm

from detection.detector_eval import detector_eval  # requires TF ODAPI

In [None]:
# path to detections JSON in Batch API output format
DETECTIONS_JSON_PATH = 'mdv4_1_detections_on_test.json'

# path to list of results queried from MegaDB
LABELS_JSON_PATH = 'mdv4_1_labels_on_test.json'

In [None]:
with open(DETECTIONS_JSON_PATH, 'r') as f:
    detections_js = json.load(f)

with open(LABELS_JSON_PATH, 'r') as f:
    labels_js = json.load(f)

In [None]:
gt_db_dict = {
    img['download_id'] + '.jpg': img for img in labels_js
}
detection_res = {
    os.path.basename(img['file']): img for img in detections_js['images']
}
label_id_to_name = {
    int(cat_id): name for cat_id, name in detections_js['detection_categories'].items()
}
label_map_name_to_id = {v: k for k, v in label_id_to_name.items()}
display(label_map_name_to_id)
assert set(detection_res.keys()) <= set(gt_db_dict.keys())

In [None]:
per_image_gts, per_image_detections = detector_eval.get_per_image_gts_and_detections(
    gt_db_dict=gt_db_dict,
    detection_res=detection_res,
    label_map_name_to_id=label_map_name_to_id)

In [None]:
per_cat_metrics = detector_eval.compute_precision_recall_bbox(
    per_image_detections=per_image_detections,
    per_image_gts=per_image_gts,
    num_gt_classes=len(detections_js['detection_categories']),
    matching_iou_threshold=0.5)

In [None]:
fig, axs = plt.subplots(nrows=1, ncols=3, figsize=(15, 5), facecolor='white', tight_layout=True)
has_legend = False
for cat_id, ax in enumerate(axs, start=1):
    plot_utils.plot_calibration_curve(
        true_scores=per_cat_metrics[cat_id]['tp_fp'],
        pred_scores=per_cat_metrics[cat_id]['scores'],
        num_bins=15, ax=ax)

    cat = label_id_to_name[cat_id]
    ax.set_title(ax.get_title() + '\n' + cat)
    if not has_legend:
        fig.legend(loc='upper left', bbox_to_anchor=(0.05, 0.85))
        has_legend = True

In [None]:
all_pred_scores = np.concatenate([
    per_cat_metrics[cat_id]['scores'] for cat_id in label_id_to_name
])
all_true_scores = np.concatenate([
    per_cat_metrics[cat_id]['tp_fp'] for cat_id in label_id_to_name
])

calibrator = sklearn.isotonic.IsotonicRegression(y_min=0, y_max=1, increasing=True, out_of_bounds='raise')
calibrator.fit(all_pred_scores, all_true_scores)
# calibrator.f_ is a scipy.interpolate.interp1d object
np.savez_compressed(
    'mdv4_1_isotonic_calibration.npz',
    x=calibrator.f_.x,
    y=calibrator.f_.y)

fig, axs = plt.subplots(nrows=1, ncols=3, figsize=(15, 5), facecolor='white', tight_layout=True)
has_legend = False
for cat_id, ax in enumerate(axs, start=1):
    true_scores = per_cat_metrics[cat_id]['tp_fp']
    pred_scores = per_cat_metrics[cat_id]['scores']
    plot_utils.plot_calibration_curve(
        true_scores=true_scores,
        pred_scores=pred_scores,
        num_bins=15, name='uncalibrated outputs', ax=ax)
    plot_utils.plot_calibration_curve(
        true_scores=true_scores,
        pred_scores=calibrator.transform(pred_scores),
        num_bins=15, name='calibrated outputs', ax=ax,
        plot_hist=False, plot_perf=False)

    cat = label_id_to_name[cat_id]
    ax.set_title(ax.get_title() + '\n' + cat)
    if not has_legend:
        fig.legend(loc='upper left', bbox_to_anchor=(0.05, 0.85))
        has_legend = True

In [None]:
# as a sanity check, and to demonstrate how to load the isotonic calibration
import scipy.interpolate

with np.load('mdv4_1_isotonic_calibration.npz') as npz:
    f = scipy.interpolate.interp1d(x=npz['x'], y=npz['y'], kind='linear')
assert np.all(f(all_pred_scores) == calibrator.transform(all_pred_scores))