Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file added docs/source/benchmark_annotators_box.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added docs/source/benchmark_projects_box.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added docs/source/benchmark_scatter.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
2 changes: 2 additions & 0 deletions docs/source/superannotate.sdk.rst
Original file line number Diff line number Diff line change
Expand Up @@ -322,3 +322,5 @@ Utility functions
--------------------------------

.. autofunction:: superannotate.dicom_to_rgb_sequence
.. autofunction:: superannotate.consensus
.. autofunction:: superannotate.benchmark
43 changes: 42 additions & 1 deletion docs/source/tutorial.sdk.rst
Original file line number Diff line number Diff line change
Expand Up @@ -544,4 +544,45 @@ Analogically the box plots of consensus scores for each project are colored acco
Scatter plot of consensus score vs instance area is separated by projects. Hovering on a point reveals its annotator and image name.
The points are colored according to class name. Each annotator is represented with separate symbol.

.. image:: consensus_scatter.png
.. image:: consensus_scatter.png

----------


Computing benchmark scores for instances between ground truth project and given project list
____________________________________________________________________________________________


Benchmark is a tool to compare the quallity of the annotations of the same image that is present in several projects with
the ground truth annotation of the same image that is in a separate project.

To compute the benchmark scores:

.. code-block:: python

res_df = sa.benchmark("<ground_truth_project_name>",[project_names], "<path_to_export_folder>", [image_list], "<annotation_type>")

Here pandas DataFrame with exactly same structure as in case of consensus computation is returned.

Besides the pandas DataFrame there is an option to get the following plots by setting the show_plots flag to True:

* Box plot of benchmark scores for each annotators
* Box plot of benchmark scores for each project
* Scatter plots of benchmark score vs instance area for each project

.. code-block:: python

sa.benchmark("<ground_truth_project_name>", [project_names], "<path_to_export_folder>", [image_list], "<annotation_type>", show_plots=True)

To the left of each box plot the original score points of that annotator is depicted, the box plots are colored by annotator.

.. image:: benchmark_annotators_box.png

Analogically the box plots of benchmark scores for each project are colored according to project name.

.. image:: benchmark_projects_box.png

Scatter plot of benchmark score vs instance area is separated by projects. Hovering on a point reveals its annotator and image name.
The points are colored according to class name. Each annotator is represented with separate symbol.

.. image:: benchmark_scatter.png
1 change: 1 addition & 0 deletions superannotate/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
project_type_str_to_int, user_role_str_to_int
)
from .consensus_benchmark.consensus import consensus
from .consensus_benchmark.benchmark import benchmark
from .dataframe_filtering import (
filter_annotation_instances, filter_images_by_comments,
filter_images_by_tags
Expand Down
127 changes: 127 additions & 0 deletions superannotate/consensus_benchmark/benchmark.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,127 @@
"""
Main module for benchmark computation
"""
import logging
import tempfile
import pandas as pd
from pathlib import Path

from .helpers import image_consensus, consensus_plot
from ..db.exports import prepare_export, download_export
from ..analytics.common import aggregate_annotations_as_df

logger = logging.getLogger("superannotate-python-sdk")


def benchmark(
gt_project_name,
project_names,
export_root=None,
image_list=None,
annot_type='bbox',
show_plots=False
):
"""Computes benchmark score for each instance of given images that are present both gt_project_name project and projects in project_names list:

:param gt_project_name: Project name that contains the ground truth annotations
:type gt_project_name: str
:param project_names: list of project names to aggregate through
:type project_names: list of str
:param export_root: root export path of the projects
:type export_root: Pathlike (str or Path)
:param image_list: List of image names from the projects list that must be used. If None, then all images from the projects list will be used. Default: None
:type image_list: list
:param annot_type: Type of annotation instances to consider. Available candidates are: ["bbox", "polygon", "point"]
:type annot_type: str
:param show_plots: If True, show plots based on results of consensus computation. Default: False
:type show_plots: bool

"""
def aggregate_attributes(instance_df):
def attribute_to_list(attribute_df):
attribute_names = list(attribute_df["attributeName"])
attribute_df["attributeNames"] = len(attribute_df) * [
attribute_names
]
return attribute_df

attributes = None
if not instance_df["attributeGroupName"].isna().all():
attrib_group_name = instance_df.groupby("attributeGroupName")[[
"attributeGroupName", "attributeName"
]].apply(attribute_to_list)
attributes = dict(
zip(
attrib_group_name["attributeGroupName"],
attrib_group_name["attributeNames"]
)
)

instance_df.drop(
["attributeGroupName", "attributeName"], axis=1, inplace=True
)
instance_df.drop_duplicates(
subset=["imageName", "instanceId", "project"], inplace=True
)
instance_df["attributes"] = [attributes]
return instance_df

supported_types = ['polygon', 'bbox', 'point']
if annot_type not in supported_types:
raise NotImplementedError

if export_root is None:
with tempfile.TemporaryDirectory() as export_dir:
gt_project_meta = prepare_export(gt_project_name)
download_export(gt_project_name, gt_project_meta, export_dir)
gt_project_df = aggregate_annotations_as_df(export_dir)
else:
export_dir = Path(export_root) / gt_project_name
gt_project_df = aggregate_annotations_as_df(export_dir)
gt_project_df["project"] = gt_project_name

benchmark_dfs = []
for project_name in project_names:
if export_root is None:
with tempfile.TemporaryDirectory() as export_dir:
proj_export_meta = prepare_export(project_name)
download_export(project_name, proj_export_meta, export_dir)
project_df = aggregate_annotations_as_df(export_dir)
else:
export_dir = Path(export_root) / project_name
project_df = aggregate_annotations_as_df(export_dir)

project_df["project"] = project_name
project_gt_df = pd.concat([project_df, gt_project_df])
project_gt_df = project_gt_df[project_gt_df["instanceId"].notna()]

if image_list is not None:
project_gt_df = project_gt_df.loc[
project_gt_df["imageName"].isin(image_list)]

project_gt_df.query("type == '" + annot_type + "'", inplace=True)

project_gt_df = project_gt_df.groupby(
["imageName", "instanceId", "project"]
)
project_gt_df = project_gt_df.apply(aggregate_attributes).reset_index(
drop=True
)
unique_images = set(project_gt_df["imageName"])
all_benchmark_data = []
for image_name in unique_images:
image_data = image_consensus(project_gt_df, image_name, annot_type)
all_benchmark_data.append(pd.DataFrame(image_data))

benchmark_project_df = pd.concat(all_benchmark_data, ignore_index=True)
benchmark_project_df = benchmark_project_df[
benchmark_project_df["projectName"] == project_name]

benchmark_dfs.append(benchmark_project_df)

benchmark_df = pd.concat(benchmark_dfs, ignore_index=True)

if show_plots:
consensus_plot(benchmark_df, project_names)

return benchmark_df
50 changes: 50 additions & 0 deletions tests/consensus_benchmark/test_benchmark.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
from pathlib import Path
import superannotate as sa

sa.init(Path.home() / ".superannotate" / "config.json")

test_root = Path().resolve() / 'tests'


def test_benchmark():
annot_types = ['polygon', 'bbox', 'point']
gt_project_name = 'consensus_1'
project_names = ['consensus_2', 'consensus_3']
df_column_names = [
'creatorEmail', 'imageName', 'instanceId', 'area', 'className',
'attributes', 'projectName', 'score'
]
export_path = test_root / 'consensus_benchmark'
for annot_type in annot_types:
res_df = sa.benchmark(
gt_project_name,
project_names,
export_root=export_path,
annot_type=annot_type
)
#test content of projectName column
assert sorted(res_df['projectName'].unique()) == project_names

#test structure of resulting DataFrame
assert sorted(res_df.columns) == sorted(df_column_names)

#test lower bound of the score
assert (res_df['score'] >= 0).all()

#test upper bound of the score
assert (res_df['score'] <= 1).all()

image_names = [
'bonn_000000_000019_leftImg8bit.png',
'bielefeld_000000_000321_leftImg8bit.png'
]

#test filtering images with given image names list
res_images = sa.benchmark(
gt_project_name,
project_names,
export_root=export_path,
image_list=image_names
)

assert sorted(res_images['imageName'].unique()) == sorted(image_names)