superannotateai · ero1311 · Nov 10, 2020 · Nov 3, 2020 · Nov 3, 2020 · Nov 10, 2020
diff --git a/docs/source/benchmark_annotators_box.png b/docs/source/benchmark_annotators_box.png
diff --git a/docs/source/benchmark_projects_box.png b/docs/source/benchmark_projects_box.png
diff --git a/docs/source/benchmark_scatter.png b/docs/source/benchmark_scatter.png
diff --git a/docs/source/superannotate.sdk.rst b/docs/source/superannotate.sdk.rst
@@ -322,3 +322,5 @@ Utility functions
 --------------------------------
 
 .. autofunction:: superannotate.dicom_to_rgb_sequence
+.. autofunction:: superannotate.consensus
+.. autofunction:: superannotate.benchmark
diff --git a/docs/source/tutorial.sdk.rst b/docs/source/tutorial.sdk.rst
@@ -544,4 +544,45 @@ Analogically the box plots of consensus scores for each project are colored acco
 Scatter plot of consensus score vs instance area is separated by projects. Hovering on a point reveals its annotator and image name. 
 The points are colored according to class name. Each annotator is represented with separate symbol.
 
-.. image:: consensus_scatter.png
+.. image:: consensus_scatter.png
+
+----------
+
+
+Computing benchmark scores for instances between ground truth project and given project list
+____________________________________________________________________________________________
+
+
+Benchmark is a tool to compare the quallity of the annotations of the same image that is present in several projects with 
+the ground truth annotation of the same image that is in a separate project.
+
+To compute the benchmark scores:
+
+.. code-block:: python
+
+   res_df = sa.benchmark("<ground_truth_project_name>",[project_names], "<path_to_export_folder>", [image_list], "<annotation_type>")
+
+Here pandas DataFrame with exactly same structure as in case of consensus computation is returned.
+
+Besides the pandas DataFrame there is an option to get the following plots by setting the show_plots flag to True:
+
+* Box plot of benchmark scores for each annotators
+* Box plot of benchmark scores for each project
+* Scatter plots of benchmark score vs instance area for each project
+
+.. code-block:: python
+
+   sa.benchmark("<ground_truth_project_name>", [project_names], "<path_to_export_folder>", [image_list], "<annotation_type>", show_plots=True)
+
+To the left of each box plot the original score points of that annotator is depicted, the box plots are colored by annotator.
+
+.. image:: benchmark_annotators_box.png
+
+Analogically the box plots of benchmark scores for each project are colored according to project name.
+
+.. image:: benchmark_projects_box.png
+
+Scatter plot of benchmark score vs instance area is separated by projects. Hovering on a point reveals its annotator and image name. 
+The points are colored according to class name. Each annotator is represented with separate symbol.
+
+.. image:: benchmark_scatter.png
diff --git a/superannotate/__init__.py b/superannotate/__init__.py
@@ -22,6 +22,7 @@
     project_type_str_to_int, user_role_str_to_int
 )
 from .consensus_benchmark.consensus import consensus
+from .consensus_benchmark.benchmark import benchmark
 from .dataframe_filtering import (
     filter_annotation_instances, filter_images_by_comments,
     filter_images_by_tags

diff --git a/superannotate/consensus_benchmark/benchmark.py b/superannotate/consensus_benchmark/benchmark.py
@@ -0,0 +1,127 @@
+"""
+Main module for benchmark computation
+"""
+import logging
+import tempfile
+import pandas as pd
+from pathlib import Path
+
+from .helpers import image_consensus, consensus_plot
+from ..db.exports import prepare_export, download_export
+from ..analytics.common import aggregate_annotations_as_df
+
+logger = logging.getLogger("superannotate-python-sdk")
+
+
+def benchmark(
+    gt_project_name,
+    project_names,
+    export_root=None,
+    image_list=None,
+    annot_type='bbox',
+    show_plots=False
+):
+    """Computes benchmark score for each instance of given images that are present both gt_project_name project and projects in project_names list:    
+
+    :param gt_project_name: Project name that contains the ground truth annotations
+    :type gt_project_name: str
+    :param project_names: list of project names to aggregate through
+    :type project_names: list of str
+    :param export_root: root export path of the projects
+    :type export_root: Pathlike (str or Path)
+    :param image_list: List of image names from the projects list that must be used. If None, then all images from the projects list will be used. Default: None
+    :type image_list: list
+    :param annot_type: Type of annotation instances to consider. Available candidates are: ["bbox", "polygon", "point"]
+    :type annot_type: str
+    :param show_plots: If True, show plots based on results of consensus computation. Default: False
+    :type show_plots: bool
+
+    """
+    def aggregate_attributes(instance_df):
+        def attribute_to_list(attribute_df):
+            attribute_names = list(attribute_df["attributeName"])
+            attribute_df["attributeNames"] = len(attribute_df) * [
+                attribute_names
+            ]
+            return attribute_df
+
+        attributes = None
+        if not instance_df["attributeGroupName"].isna().all():
+            attrib_group_name = instance_df.groupby("attributeGroupName")[[
+                "attributeGroupName", "attributeName"
+            ]].apply(attribute_to_list)
+            attributes = dict(
+                zip(
+                    attrib_group_name["attributeGroupName"],
+                    attrib_group_name["attributeNames"]
+                )
+            )
+
+        instance_df.drop(
+            ["attributeGroupName", "attributeName"], axis=1, inplace=True
+        )
+        instance_df.drop_duplicates(
+            subset=["imageName", "instanceId", "project"], inplace=True
+        )
+        instance_df["attributes"] = [attributes]
+        return instance_df
+
+    supported_types = ['polygon', 'bbox', 'point']
+    if annot_type not in supported_types:
+        raise NotImplementedError
+
+    if export_root is None:
+        with tempfile.TemporaryDirectory() as export_dir:
+            gt_project_meta = prepare_export(gt_project_name)
+            download_export(gt_project_name, gt_project_meta, export_dir)
+            gt_project_df = aggregate_annotations_as_df(export_dir)
+    else:
+        export_dir = Path(export_root) / gt_project_name
+        gt_project_df = aggregate_annotations_as_df(export_dir)
+    gt_project_df["project"] = gt_project_name
+
+    benchmark_dfs = []
+    for project_name in project_names:
+        if export_root is None:
+            with tempfile.TemporaryDirectory() as export_dir:
+                proj_export_meta = prepare_export(project_name)
+                download_export(project_name, proj_export_meta, export_dir)
+                project_df = aggregate_annotations_as_df(export_dir)
+        else:
+            export_dir = Path(export_root) / project_name
+            project_df = aggregate_annotations_as_df(export_dir)
+
+        project_df["project"] = project_name
+        project_gt_df = pd.concat([project_df, gt_project_df])
+        project_gt_df = project_gt_df[project_gt_df["instanceId"].notna()]
+
+        if image_list is not None:
+            project_gt_df = project_gt_df.loc[
+                project_gt_df["imageName"].isin(image_list)]
+
+        project_gt_df.query("type == '" + annot_type + "'", inplace=True)
+
+        project_gt_df = project_gt_df.groupby(
+            ["imageName", "instanceId", "project"]
+        )
+        project_gt_df = project_gt_df.apply(aggregate_attributes).reset_index(
+            drop=True
+        )
+        unique_images = set(project_gt_df["imageName"])
+        all_benchmark_data = []
+        for image_name in unique_images:
+            image_data = image_consensus(project_gt_df, image_name, annot_type)
+            all_benchmark_data.append(pd.DataFrame(image_data))
+
+        benchmark_project_df = pd.concat(all_benchmark_data, ignore_index=True)
+        benchmark_project_df = benchmark_project_df[
+            benchmark_project_df["projectName"] == project_name]
+
+        benchmark_dfs.append(benchmark_project_df)
+
+    benchmark_df = pd.concat(benchmark_dfs, ignore_index=True)
+
+    if show_plots:
+        consensus_plot(benchmark_df, project_names)
+
+    return benchmark_df
diff --git a/tests/consensus_benchmark/test_benchmark.py b/tests/consensus_benchmark/test_benchmark.py
@@ -0,0 +1,50 @@
+from pathlib import Path
+import superannotate as sa
+
+sa.init(Path.home() / ".superannotate" / "config.json")
+
+test_root = Path().resolve() / 'tests'
+
+
+def test_benchmark():
+    annot_types = ['polygon', 'bbox', 'point']
+    gt_project_name = 'consensus_1'
+    project_names = ['consensus_2', 'consensus_3']
+    df_column_names = [
+        'creatorEmail', 'imageName', 'instanceId', 'area', 'className',
+        'attributes', 'projectName', 'score'
+    ]
+    export_path = test_root / 'consensus_benchmark'
+    for annot_type in annot_types:
+        res_df = sa.benchmark(
+            gt_project_name,
+            project_names,
+            export_root=export_path,
+            annot_type=annot_type
+        )
+        #test content of projectName column
+        assert sorted(res_df['projectName'].unique()) == project_names
+
+        #test structure of resulting DataFrame
+        assert sorted(res_df.columns) == sorted(df_column_names)
+
+        #test lower bound of the score
+        assert (res_df['score'] >= 0).all()
+
+        #test upper bound of the score
+        assert (res_df['score'] <= 1).all()
+
+    image_names = [
+        'bonn_000000_000019_leftImg8bit.png',
+        'bielefeld_000000_000321_leftImg8bit.png'
+    ]
+
+    #test filtering images with given image names list
+    res_images = sa.benchmark(
+        gt_project_name,
+        project_names,
+        export_root=export_path,
+        image_list=image_names
+    )
+
+    assert sorted(res_images['imageName'].unique()) == sorted(image_names)