diff --git a/docs/source/benchmark_annotators_box.png b/docs/source/benchmark_annotators_box.png new file mode 100644 index 000000000..4f037e571 Binary files /dev/null and b/docs/source/benchmark_annotators_box.png differ diff --git a/docs/source/benchmark_projects_box.png b/docs/source/benchmark_projects_box.png new file mode 100644 index 000000000..bdc605440 Binary files /dev/null and b/docs/source/benchmark_projects_box.png differ diff --git a/docs/source/benchmark_scatter.png b/docs/source/benchmark_scatter.png new file mode 100644 index 000000000..6ec2e1438 Binary files /dev/null and b/docs/source/benchmark_scatter.png differ diff --git a/docs/source/superannotate.sdk.rst b/docs/source/superannotate.sdk.rst index b227c5a55..2101bcf20 100644 --- a/docs/source/superannotate.sdk.rst +++ b/docs/source/superannotate.sdk.rst @@ -322,3 +322,5 @@ Utility functions -------------------------------- .. autofunction:: superannotate.dicom_to_rgb_sequence +.. autofunction:: superannotate.consensus +.. autofunction:: superannotate.benchmark diff --git a/docs/source/tutorial.sdk.rst b/docs/source/tutorial.sdk.rst index 00b5c5bc3..e5baef25f 100644 --- a/docs/source/tutorial.sdk.rst +++ b/docs/source/tutorial.sdk.rst @@ -544,4 +544,45 @@ Analogically the box plots of consensus scores for each project are colored acco Scatter plot of consensus score vs instance area is separated by projects. Hovering on a point reveals its annotator and image name. The points are colored according to class name. Each annotator is represented with separate symbol. -.. image:: consensus_scatter.png \ No newline at end of file +.. image:: consensus_scatter.png + +---------- + + +Computing benchmark scores for instances between ground truth project and given project list +____________________________________________________________________________________________ + + +Benchmark is a tool to compare the quallity of the annotations of the same image that is present in several projects with +the ground truth annotation of the same image that is in a separate project. + +To compute the benchmark scores: + +.. code-block:: python + + res_df = sa.benchmark("",[project_names], "", [image_list], "") + +Here pandas DataFrame with exactly same structure as in case of consensus computation is returned. + +Besides the pandas DataFrame there is an option to get the following plots by setting the show_plots flag to True: + +* Box plot of benchmark scores for each annotators +* Box plot of benchmark scores for each project +* Scatter plots of benchmark score vs instance area for each project + +.. code-block:: python + + sa.benchmark("", [project_names], "", [image_list], "", show_plots=True) + +To the left of each box plot the original score points of that annotator is depicted, the box plots are colored by annotator. + +.. image:: benchmark_annotators_box.png + +Analogically the box plots of benchmark scores for each project are colored according to project name. + +.. image:: benchmark_projects_box.png + +Scatter plot of benchmark score vs instance area is separated by projects. Hovering on a point reveals its annotator and image name. +The points are colored according to class name. Each annotator is represented with separate symbol. + +.. image:: benchmark_scatter.png \ No newline at end of file diff --git a/superannotate/__init__.py b/superannotate/__init__.py index 64e0498b3..f5587d450 100644 --- a/superannotate/__init__.py +++ b/superannotate/__init__.py @@ -22,6 +22,7 @@ project_type_str_to_int, user_role_str_to_int ) from .consensus_benchmark.consensus import consensus +from .consensus_benchmark.benchmark import benchmark from .dataframe_filtering import ( filter_annotation_instances, filter_images_by_comments, filter_images_by_tags diff --git a/superannotate/consensus_benchmark/benchmark.py b/superannotate/consensus_benchmark/benchmark.py new file mode 100644 index 000000000..99d9f6111 --- /dev/null +++ b/superannotate/consensus_benchmark/benchmark.py @@ -0,0 +1,127 @@ +""" +Main module for benchmark computation +""" +import logging +import tempfile +import pandas as pd +from pathlib import Path + +from .helpers import image_consensus, consensus_plot +from ..db.exports import prepare_export, download_export +from ..analytics.common import aggregate_annotations_as_df + +logger = logging.getLogger("superannotate-python-sdk") + + +def benchmark( + gt_project_name, + project_names, + export_root=None, + image_list=None, + annot_type='bbox', + show_plots=False +): + """Computes benchmark score for each instance of given images that are present both gt_project_name project and projects in project_names list: + + :param gt_project_name: Project name that contains the ground truth annotations + :type gt_project_name: str + :param project_names: list of project names to aggregate through + :type project_names: list of str + :param export_root: root export path of the projects + :type export_root: Pathlike (str or Path) + :param image_list: List of image names from the projects list that must be used. If None, then all images from the projects list will be used. Default: None + :type image_list: list + :param annot_type: Type of annotation instances to consider. Available candidates are: ["bbox", "polygon", "point"] + :type annot_type: str + :param show_plots: If True, show plots based on results of consensus computation. Default: False + :type show_plots: bool + + """ + def aggregate_attributes(instance_df): + def attribute_to_list(attribute_df): + attribute_names = list(attribute_df["attributeName"]) + attribute_df["attributeNames"] = len(attribute_df) * [ + attribute_names + ] + return attribute_df + + attributes = None + if not instance_df["attributeGroupName"].isna().all(): + attrib_group_name = instance_df.groupby("attributeGroupName")[[ + "attributeGroupName", "attributeName" + ]].apply(attribute_to_list) + attributes = dict( + zip( + attrib_group_name["attributeGroupName"], + attrib_group_name["attributeNames"] + ) + ) + + instance_df.drop( + ["attributeGroupName", "attributeName"], axis=1, inplace=True + ) + instance_df.drop_duplicates( + subset=["imageName", "instanceId", "project"], inplace=True + ) + instance_df["attributes"] = [attributes] + return instance_df + + supported_types = ['polygon', 'bbox', 'point'] + if annot_type not in supported_types: + raise NotImplementedError + + if export_root is None: + with tempfile.TemporaryDirectory() as export_dir: + gt_project_meta = prepare_export(gt_project_name) + download_export(gt_project_name, gt_project_meta, export_dir) + gt_project_df = aggregate_annotations_as_df(export_dir) + else: + export_dir = Path(export_root) / gt_project_name + gt_project_df = aggregate_annotations_as_df(export_dir) + gt_project_df["project"] = gt_project_name + + benchmark_dfs = [] + for project_name in project_names: + if export_root is None: + with tempfile.TemporaryDirectory() as export_dir: + proj_export_meta = prepare_export(project_name) + download_export(project_name, proj_export_meta, export_dir) + project_df = aggregate_annotations_as_df(export_dir) + else: + export_dir = Path(export_root) / project_name + project_df = aggregate_annotations_as_df(export_dir) + + project_df["project"] = project_name + project_gt_df = pd.concat([project_df, gt_project_df]) + project_gt_df = project_gt_df[project_gt_df["instanceId"].notna()] + + if image_list is not None: + project_gt_df = project_gt_df.loc[ + project_gt_df["imageName"].isin(image_list)] + + project_gt_df.query("type == '" + annot_type + "'", inplace=True) + + project_gt_df = project_gt_df.groupby( + ["imageName", "instanceId", "project"] + ) + project_gt_df = project_gt_df.apply(aggregate_attributes).reset_index( + drop=True + ) + unique_images = set(project_gt_df["imageName"]) + all_benchmark_data = [] + for image_name in unique_images: + image_data = image_consensus(project_gt_df, image_name, annot_type) + all_benchmark_data.append(pd.DataFrame(image_data)) + + benchmark_project_df = pd.concat(all_benchmark_data, ignore_index=True) + benchmark_project_df = benchmark_project_df[ + benchmark_project_df["projectName"] == project_name] + + benchmark_dfs.append(benchmark_project_df) + + benchmark_df = pd.concat(benchmark_dfs, ignore_index=True) + + if show_plots: + consensus_plot(benchmark_df, project_names) + + return benchmark_df \ No newline at end of file diff --git a/tests/consensus_benchmark/test_benchmark.py b/tests/consensus_benchmark/test_benchmark.py new file mode 100644 index 000000000..760751c86 --- /dev/null +++ b/tests/consensus_benchmark/test_benchmark.py @@ -0,0 +1,50 @@ +from pathlib import Path +import superannotate as sa + +sa.init(Path.home() / ".superannotate" / "config.json") + +test_root = Path().resolve() / 'tests' + + +def test_benchmark(): + annot_types = ['polygon', 'bbox', 'point'] + gt_project_name = 'consensus_1' + project_names = ['consensus_2', 'consensus_3'] + df_column_names = [ + 'creatorEmail', 'imageName', 'instanceId', 'area', 'className', + 'attributes', 'projectName', 'score' + ] + export_path = test_root / 'consensus_benchmark' + for annot_type in annot_types: + res_df = sa.benchmark( + gt_project_name, + project_names, + export_root=export_path, + annot_type=annot_type + ) + #test content of projectName column + assert sorted(res_df['projectName'].unique()) == project_names + + #test structure of resulting DataFrame + assert sorted(res_df.columns) == sorted(df_column_names) + + #test lower bound of the score + assert (res_df['score'] >= 0).all() + + #test upper bound of the score + assert (res_df['score'] <= 1).all() + + image_names = [ + 'bonn_000000_000019_leftImg8bit.png', + 'bielefeld_000000_000321_leftImg8bit.png' + ] + + #test filtering images with given image names list + res_images = sa.benchmark( + gt_project_name, + project_names, + export_root=export_path, + image_list=image_names + ) + + assert sorted(res_images['imageName'].unique()) == sorted(image_names)