In [1]:
import numpy as np
from matplotlib import pyplot as plt
from pycocotools.coco import COCO
from pycocotools.cocoeval import COCOeval, Params
import plotly.express as px
import plotly.graph_objects as go
import pandas as pd
from collections import defaultdict
import random
import pickle

import supervisely as sly
import supervisely.nn.benchmark

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
import supervisely
from supervisely.nn import benchmark

## Loading data

In [3]:
import supervisely.nn.benchmark
import supervisely.nn.benchmark.evaluation
import supervisely.nn.benchmark.evaluation.coco
import json

eval_dir_1 = "data/41774_COCO-100 (det)/66127_Serve YOLOv8 | v9 | v10/evaluation"
eval_dir_2 = "data/41774_COCO-100 (det)/66127_Serve YOLOv8 | v9 | v10_001/evaluation"


def read_eval_data(eval_dir):
    cocoGt_path, cocoDt_path, eval_data_path, inference_info_path = (
        eval_dir + "/cocoGt.json",
        eval_dir + "/cocoDt.json",
        eval_dir + "/eval_data.pkl",
        eval_dir + "/inference_info.json",
    )

    with open(cocoGt_path, "r") as f:
        cocoGt_dataset = json.load(f)
    with open(cocoDt_path, "r") as f:
        cocoDt_dataset = json.load(f)

    with open(eval_data_path, "rb") as f:
        eval_data = pickle.load(f)

    cocoGt = COCO()
    cocoGt.dataset = cocoGt_dataset
    cocoGt.createIndex()
    cocoDt = cocoGt.loadRes(cocoDt_dataset["annotations"])

    mp = supervisely.nn.benchmark.evaluation.coco.MetricProvider(
        eval_data['matches'], eval_data['coco_metrics'], eval_data['params'], cocoGt, cocoDt
    )
    return mp

mp1 = read_eval_data(eval_dir_1)
mp2 = read_eval_data(eval_dir_2)
mp1.calculate()
mp2.calculate()

creating index...
index created!
Loading and preparing results...
DONE (t=0.00s)
creating index...
index created!
creating index...
index created!
Loading and preparing results...
DONE (t=0.00s)
creating index...
index created!


  precision = tp / (tp + fp)
  pr = tp / (tp + fp)
  f1 = 2 * pr * rc / (pr + rc)
  precision = tp / (tp + fp)
  pr = tp / (tp + fp)
  f1 = 2 * pr * rc / (pr + rc)


In [4]:
from pycocotools import mask as maskUtils

iouType = 'bbox'
iouThr = 0.75

key_name = 'bbox' if iouType == 'bbox' else 'segmentation'

imgId2ann1 = defaultdict(list)
imgId2ann2 = defaultdict(list)
for m in mp1.m.fp_matches:
    ann = mp1.cocoDt.anns[m['dt_id']]
    imgId2ann1[m['image_id']].append(ann)
for m in mp2.m.fp_matches:
    ann = mp2.cocoDt.anns[m['dt_id']]
    imgId2ann2[m['image_id']].append(ann)

same_fp_matches = []
for img_id in imgId2ann1:
    anns1 = imgId2ann1[img_id]
    anns2 = imgId2ann2[img_id]
    geoms1 = [x[key_name] for x in anns1]
    geoms2 = [x[key_name] for x in anns2]

    ious = maskUtils.iou(geoms1, geoms2, [0] * len(geoms2))
    if len(ious) == 0:
        continue
    indxs = np.nonzero(ious > iouThr)
    if len(indxs[0]) == 0:
        continue
    indxs = list(zip(*indxs))
    indxs = sorted(indxs, key=lambda x: ious[x[0], x[1]], reverse=True)
    id1, id2 = list(zip(*indxs))
    id1, id2 = set(id1), set(id2)
    for i, j in indxs:
        if i in id1 and j in id2:
            same_fp_matches.append((anns1[i], anns2[j], ious[i, j]))
            id1.remove(i)
            id2.remove(j)

In [5]:
# Find different FP matches for each model
id1, id2 = zip(*[(x[0]['id'], x[1]['id']) for x in same_fp_matches])
id1 = set(id1)
id2 = set(id2)

diff_fp_matches_1 = set([x['dt_id'] for x in mp1.m.fp_matches]) - id1
diff_fp_matches_2 = set([x['dt_id'] for x in mp2.m.fp_matches]) - id2

In [6]:
assert len(mp1.m.fp_matches) == len(diff_fp_matches_1) + len(same_fp_matches)
assert len(mp2.m.fp_matches) == len(diff_fp_matches_2) + len(same_fp_matches)

In [7]:
# Find same FN matches, that are in both models
# Find different FN matches for each model

id1 = set([x['gt_id'] for x in mp1.m.fn_matches])
id2 = set([x['gt_id'] for x in mp2.m.fn_matches])

same_fn_matches = id1 & id2
diff_fn_matches_1 = id1 - id2
diff_fn_matches_2 = id2 - id1

In [8]:
assert len(mp1.m.fn_matches) == len(same_fn_matches) + len(diff_fn_matches_1)
assert len(mp2.m.fn_matches) == len(same_fn_matches) + len(diff_fn_matches_2)

In [9]:
# Find same TP matches, that are in both models
# Find different TP matches for each model

id1 = set([x['gt_id'] for x in mp1.m.tp_matches])
id2 = set([x['gt_id'] for x in mp2.m.tp_matches])

same_tp_matches = id1 & id2
diff_tp_matches_1 = id1 - id2
diff_tp_matches_2 = id2 - id1

In [10]:
assert len(mp1.m.tp_matches) == len(same_tp_matches) + len(diff_tp_matches_1)
assert len(mp2.m.tp_matches) == len(same_tp_matches) + len(diff_tp_matches_2)

In [34]:
# Stacked outcome counts
count = [mp1.m.tp_matches, mp1.m.fn_matches, mp1.m.fp_matches, mp2.m.tp_matches, mp2.m.fn_matches, mp2.m.fp_matches]
count = [len(x) for x in count]
outcome = ['TP', 'FN', 'FP'] * 2
model = ['Model 1'] * 3 + ['Model 2'] * 3

data = {
    "count": count,
    "type": outcome,
    "model": model,
}

df = pd.DataFrame(data)

color_map = {
    'TP': '#1fb466',
    'FN': '#dd3f3f',
    'FP': '#d5a5a5'
}
fig = px.bar(df, y="model", x="count", color="type", title="Outcome Counts",
             labels={'count': 'Total Count', "model": ""},
             color_discrete_map=color_map,
             orientation='h',)

fig.show()

In [39]:
# Stacked outcome counts
count = [same_tp_matches, same_fn_matches, same_fp_matches, diff_tp_matches_1, diff_fn_matches_1, diff_fp_matches_1, diff_tp_matches_2, diff_fn_matches_2, diff_fp_matches_2]
count = [len(x) for x in count]
outcome = ['TP', 'FN', 'FP'] * 3
model = ['The same in both models'] * 3 + ['Model 1 (diff)'] * 3 + ['Model 2 (diff)'] * 3

data = {
    "count": count,
    "type": outcome,
    "model": model,
}

df = pd.DataFrame(data)

color_map = {
    'TP': '#1fb466',
    'FN': '#dd3f3f',
    'FP': '#d5a5a5'
}

# make bar horizontal
fig = px.bar(df, y="model", x="count", color="type", title="Outcome Counts Difference",
             labels={'count': 'Total Count', "model": ""},
             color_discrete_map=color_map,
             orientation='h')


fig.show()