Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 26 additions & 7 deletions superannotate/analytics/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,8 @@ def aggregate_annotations_as_df(
include_classes_wo_annotations=False,
include_comments=False,
include_tags=False,
verbose=True
verbose=True,
folder_names=None
):
"""Aggregate annotations as pandas dataframe from project root.

Expand All @@ -166,14 +167,16 @@ def aggregate_annotations_as_df(
:type include_comments: bool
:param include_tags: enables inclusion of tags info as tag column
:type include_tags: bool
:param folder_names: Aggregate the specified folders from project_root. If None aggregate all folders in the project_root.
:type folder_names: (list of str)

:return: DataFrame on annotations with columns: "imageName", "instanceId",
"className", "attributeGroupName", "attributeName", "type", "error", "locked",
"visible", "trackingId", "probability", "pointLabels",
"meta" (geometry information as string), "commentResolved", "classColor",
"groupId", "imageWidth", "imageHeight", "imageStatus", "imagePinned",
"createdAt", "creatorRole", "creationType", "creatorEmail", "updatedAt",
"updatorRole", "updatorEmail", "tag"
"updatorRole", "updatorEmail", "tag", "folderName"
:rtype: pandas DataFrame
"""

Expand Down Expand Up @@ -208,7 +211,8 @@ def aggregate_annotations_as_df(
"creatorEmail": [],
"updatedAt": [],
"updatorRole": [],
"updatorEmail": []
"updatorEmail": [],
"folderName": []
}

if include_comments:
Expand Down Expand Up @@ -283,15 +287,25 @@ def __get_user_metadata(annotation):

annotations_paths = []

for path in Path(project_root).glob('*.json'):
annotations_paths.append(path)
if folder_names is None:
project_dir_content = Path(project_root).glob('*')
for entry in project_dir_content:
if entry.is_file() and entry.suffix == '.json':
annotations_paths.append(entry)
elif entry.is_dir() and entry.name != "classes":
annotations_paths.extend(list(entry.rglob('*.json')))
else:
for folder_name in folder_names:
annotations_paths.extend(
list((Path(project_root) / folder_name).rglob('*.json'))
)

if not annotations_paths:
logger.warning(
"No annotations found in project export root %s", project_root
)
type_postfix = "___objects.json" if glob.glob(
"{}/*___objects.json".format(project_root)
type_postfix = "___objects.json" if annotations_paths[0].match(
"*___objects.json"
) else "___pixel.json"
for annotation_path in annotations_paths:
annotation_json = json.load(open(annotation_path))
Expand Down Expand Up @@ -359,6 +373,9 @@ def __get_user_metadata(annotation):
annotation_point_labels = annotation.get("pointLabels")
attributes = annotation.get("attributes")
user_metadata = __get_user_metadata(annotation)
folder_name = None
if annotation_path.parent != Path(project_root):
folder_name = annotation_path.parent.name
num_added = 0
if not attributes:
annotation_dict = {
Expand All @@ -375,6 +392,7 @@ def __get_user_metadata(annotation):
"pointLabels": annotation_point_labels,
"classColor": annotation_class_color,
"groupId": annotation_group_id,
"folderName": folder_name,
}
annotation_dict.update(user_metadata)
annotation_dict.update(image_metadata)
Expand Down Expand Up @@ -414,6 +432,7 @@ def __get_user_metadata(annotation):
"pointLabels": annotation_point_labels,
"classColor": annotation_class_color,
"groupId": annotation_group_id,
"folderName": folder_name,
}
annotation_dict.update(user_metadata)
annotation_dict.update(image_metadata)
Expand Down
54 changes: 25 additions & 29 deletions superannotate/consensus_benchmark/benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,19 +14,22 @@


def benchmark(
gt_project_name,
project_names,
project,
gt_folder,
folder_names,
export_root=None,
image_list=None,
annot_type='bbox',
show_plots=False
):
"""Computes benchmark score for each instance of given images that are present both gt_project_name project and projects in project_names list:
"""Computes benchmark score for each instance of given images that are present both gt_project_name project and projects in folder_names list:

:param gt_project_name: Project name that contains the ground truth annotations
:type gt_project_name: str
:param project_names: list of project names to aggregate through
:type project_names: list of str
:param project: project name or metadata of the project
:type project: str or dict
:param gt_folder: project folder name that contains the ground truth annotations
:type gt_folder: str
:param folder_names: list of folder names in the project for which the scores will be computed
:type folder_names: list of str
:param export_root: root export path of the projects
:type export_root: Pathlike (str or Path)
:param image_list: List of image names from the projects list that must be used. If None, then all images from the projects list will be used. Default: None
Expand All @@ -36,6 +39,8 @@ def benchmark(
:param show_plots: If True, show plots based on results of consensus computation. Default: False
:type show_plots: bool

:return: Pandas DateFrame with columns (creatorEmail, QA, imageName, instanceId, className, area, attribute, folderName, score)
:rtype: pandas DataFrame
"""
def aggregate_attributes(instance_df):
def attribute_to_list(attribute_df):
Expand All @@ -61,7 +66,7 @@ def attribute_to_list(attribute_df):
["attributeGroupName", "attributeName"], axis=1, inplace=True
)
instance_df.drop_duplicates(
subset=["imageName", "instanceId", "project"], inplace=True
subset=["imageName", "instanceId", "folderName"], inplace=True
)
instance_df["attributes"] = [attributes]
return instance_df
Expand All @@ -72,27 +77,18 @@ def attribute_to_list(attribute_df):

if export_root is None:
with tempfile.TemporaryDirectory() as export_dir:
gt_project_meta = prepare_export(gt_project_name)
download_export(gt_project_name, gt_project_meta, export_dir)
gt_project_df = aggregate_annotations_as_df(export_dir)
proj_export_meta = prepare_export(project_name)
download_export(project_name, proj_export_meta, export_dir)
project_df = aggregate_annotations_as_df(export_dir)
else:
export_dir = Path(export_root) / gt_project_name
gt_project_df = aggregate_annotations_as_df(export_dir)
gt_project_df["project"] = gt_project_name
project_df = aggregate_annotations_as_df(export_root)

benchmark_dfs = []
for project_name in project_names:
if export_root is None:
with tempfile.TemporaryDirectory() as export_dir:
proj_export_meta = prepare_export(project_name)
download_export(project_name, proj_export_meta, export_dir)
project_df = aggregate_annotations_as_df(export_dir)
else:
export_dir = Path(export_root) / project_name
project_df = aggregate_annotations_as_df(export_dir)
gt_project_df = project_df[project_df["folderName"] == gt_folder]

project_df["project"] = project_name
project_gt_df = pd.concat([project_df, gt_project_df])
benchmark_dfs = []
for folder_name in folder_names:
folder_df = project_df[project_df["folderName"] == folder_name]
project_gt_df = pd.concat([folder_df, gt_project_df])
project_gt_df = project_gt_df[project_gt_df["instanceId"].notna()]

if image_list is not None:
Expand All @@ -102,7 +98,7 @@ def attribute_to_list(attribute_df):
project_gt_df.query("type == '" + annot_type + "'", inplace=True)

project_gt_df = project_gt_df.groupby(
["imageName", "instanceId", "project"]
["imageName", "instanceId", "folderName"]
)
project_gt_df = project_gt_df.apply(aggregate_attributes).reset_index(
drop=True
Expand All @@ -115,13 +111,13 @@ def attribute_to_list(attribute_df):

benchmark_project_df = pd.concat(all_benchmark_data, ignore_index=True)
benchmark_project_df = benchmark_project_df[
benchmark_project_df["projectName"] == project_name]
benchmark_project_df["folderName"] == folder_name]

benchmark_dfs.append(benchmark_project_df)

benchmark_df = pd.concat(benchmark_dfs, ignore_index=True)

if show_plots:
consensus_plot(benchmark_df, project_names)
consensus_plot(benchmark_df, folder_names)

return benchmark_df
39 changes: 20 additions & 19 deletions superannotate/consensus_benchmark/consensus.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,16 +14,19 @@


def consensus(
project_names,
project,
folder_names,
export_root=None,
image_list=None,
annot_type='bbox',
show_plots=False
):
"""Computes consensus score for each instance of given images that are present in at least 2 of the given projects:

:param project_names: list of project names to aggregate through
:type project_names: list of str
:param project: project name or metadata of the project
:type project: str or dict
:param folder_names: list of folder names in the project for which the scores will be computed
:type folder_names: list of str
:param export_root: root export path of the projects
:type export_root: Pathlike (str or Path)
:param image_list: List of image names from the projects list that must be used. If None, then all images from the projects list will be used. Default: None
Expand All @@ -33,26 +36,24 @@ def consensus(
:param show_plots: If True, show plots based on results of consensus computation. Default: False
:type show_plots: bool

:return: Pandas DateFrame with columns (creatorEmail, QA, imageName, instanceId, className, area, attribute, folderName, score)
:rtype: pandas DataFrame
"""
supported_types = ['polygon', 'bbox', 'point']
if annot_type not in supported_types:
raise NotImplementedError

project_dfs = []
for project_name in project_names:
if export_root is None:
with tempfile.TemporaryDirectory() as export_dir:
proj_export_meta = prepare_export(project_name)
download_export(project_name, proj_export_meta, export_dir)
project_df = aggregate_annotations_as_df(export_dir)
else:
export_dir = Path(export_root) / project_name
if export_root is None:
with tempfile.TemporaryDirectory() as export_dir:
proj_export_meta = prepare_export(project)
download_export(project, proj_export_meta, export_dir)
project_df = aggregate_annotations_as_df(export_dir)
project_df["project"] = project_name
project_dfs.append(project_df)
else:
project_df = aggregate_annotations_as_df(export_root)

all_projects_df = pd.concat(project_dfs)
all_projects_df = all_projects_df[all_projects_df["instanceId"].notna()]
all_projects_df = project_df[project_df["instanceId"].notna()]
all_projects_df = all_projects_df.loc[
all_projects_df["folderName"].isin(folder_names)]

if image_list is not None:
all_projects_df = all_projects_df.loc[
Expand Down Expand Up @@ -84,13 +85,13 @@ def attribute_to_list(attribute_df):
["attributeGroupName", "attributeName"], axis=1, inplace=True
)
instance_df.drop_duplicates(
subset=["imageName", "instanceId", "project"], inplace=True
subset=["imageName", "instanceId", "folderName"], inplace=True
)
instance_df["attributes"] = [attributes]
return instance_df

all_projects_df = all_projects_df.groupby(
["imageName", "instanceId", "project"]
["imageName", "instanceId", "folderName"]
)
all_projects_df = all_projects_df.apply(aggregate_attributes).reset_index(
drop=True
Expand All @@ -105,6 +106,6 @@ def attribute_to_list(attribute_df):
consensus_df = pd.concat(all_consensus_data, ignore_index=True)

if show_plots:
consensus_plot(consensus_df, project_names)
consensus_plot(consensus_df, folder_names)

return consensus_df
22 changes: 11 additions & 11 deletions superannotate/consensus_benchmark/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,10 +39,10 @@ def image_consensus(df, image_name, annot_type):

"""
image_df = df[df["imageName"] == image_name]
all_projects = list(set(df["project"]))
all_projects = list(set(df["folderName"]))
column_names = [
"creatorEmail", "imageName", "instanceId", "area", "className",
"attributes", "projectName", "score"
"attributes", "folderName", "score"
]
instance_id = 0
image_data = {}
Expand All @@ -52,8 +52,8 @@ def image_consensus(df, image_name, annot_type):
projects_shaply_objs = {}
# generate shapely objects of instances
for _, row in image_df.iterrows():
if row["project"] not in projects_shaply_objs:
projects_shaply_objs[row["project"]] = []
if row["folderName"] not in projects_shaply_objs:
projects_shaply_objs[row["folderName"]] = []
inst_data = row["meta"]
if annot_type == 'bbox':
inst_coords = inst_data["points"]
Expand All @@ -69,7 +69,7 @@ def image_consensus(df, image_name, annot_type):
elif annot_type == 'point':
inst = Point(inst_data["x"], inst_data["y"])
if inst.is_valid:
projects_shaply_objs[row["project"]].append(
projects_shaply_objs[row["folderName"]].append(
(
inst, row["className"], row["creatorEmail"],
row["attributes"]
Expand Down Expand Up @@ -113,7 +113,7 @@ def image_consensus(df, image_name, annot_type):
image_data["imageName"].append(image_name)
image_data["instanceId"].append(instance_id)
image_data["className"].append(max_instances[0][2])
image_data["projectName"].append(max_instances[0][0])
image_data["folderName"].append(max_instances[0][0])
image_data["score"].append(0)
else:
for curr_match_data in max_instances:
Expand All @@ -130,7 +130,7 @@ def image_consensus(df, image_name, annot_type):
image_data["imageName"].append(image_name)
image_data["instanceId"].append(instance_id)
image_data["className"].append(curr_match_data[2])
image_data["projectName"].append(curr_match_data[0])
image_data["folderName"].append(curr_match_data[0])
image_data["score"].append(
proj_cons / (len(all_projects) - 1)
)
Expand All @@ -156,10 +156,10 @@ def consensus_plot(consensus_df, projects):
#project-wise boxplot
project_box_fig = px.box(
plot_data,
x="projectName",
x="folderName",
y="score",
points="all",
color="projectName",
color="folderName",
color_discrete_sequence=px.colors.qualitative.Dark24
)
project_box_fig.show()
Expand All @@ -171,12 +171,12 @@ def consensus_plot(consensus_df, projects):
y="score",
color="className",
symbol="creatorEmail",
facet_col="projectName",
facet_col="folderName",
color_discrete_sequence=px.colors.qualitative.Dark24,
hover_data={
"className": False,
"imageName": True,
"projectName": False,
"folderName": False,
"area": False,
"score": False
},
Expand Down
Loading