diff --git a/src/superannotate/lib/app/analytics/aggregators.py b/src/superannotate/lib/app/analytics/aggregators.py index ec9ccdad5..173a05d91 100644 --- a/src/superannotate/lib/app/analytics/aggregators.py +++ b/src/superannotate/lib/app/analytics/aggregators.py @@ -16,19 +16,52 @@ logger = get_default_logger() +@dataclass +class ImageRowData: + itemName: str = None + itemHeight: int = None + itemWidth: int = None + itemStatus: str = None + itemPinned: bool = None + instanceId: int = None + className: str = None + attributeGroupName: str = None + attributeName: str = None + type: str = None + error: str = None + locked: bool = None + visible: bool = None + trackingId: int = None + probability: int = None + pointLabels: str = None + meta: str = None + classColor: str = None + groupId: int = None + createdAt: str = None + creatorRole: str = None + creationType: str = None + creatorEmail: str = None + updatedAt: str = None + updatorRole: str = None + updatorEmail: str = None + folderName: str = None + itemAnnotator: str = None + itemQA: str = None + commentResolved: str = None + tag: str = None @dataclass class VideoRawData: - videoName: str = None + itemName: str = None folderName: str = None - videoHeight: int = None - videoWidth: int = None - videoStatus: str = None - videoUrl: str = None - videoDuration: int = None - videoError: str = None - videoAnnotator: str = None - videoQA: str = None + itemHeight: int = None + itemWidth: int = None + itemStatus: str = None + itemURL: str = None + itemDuration: int = None + error: str = None + itemAnnotator: str = None + itemQA: str = None # tag tagId: int = None tag: str = None @@ -57,12 +90,12 @@ class VideoRawData: class DocumentRawData: - docName: str = None + itemName: str = None folderName: str = None - docStatus: str = None - docUrl: str = None - docAnnotator: str = None - docQA: str = None + itemStatus: str = None + itemURL: str = None + itemAnnotator: str = None + itemQA: str = None # tag tagId: int = None tag: str = None @@ -91,9 +124,9 @@ class DataAggregator: "polygon": lambda annotation: annotation["points"], "polyline": lambda annotation: annotation["points"], "cuboid": lambda annotation: annotation["points"], - "comment": lambda annotation: annotation["points"], + "comment": lambda annotation: annotation["correspondence"], "point": lambda annotation: {"x": annotation["x"], "y": annotation["y"]}, - "annotation_type": lambda annotation: dict( + "ellipse": lambda annotation: dict( cx=annotation["cx"], cy=annotation["cy"], rx=annotation["rx"], @@ -101,6 +134,10 @@ class DataAggregator: angle=annotation["angle"], ), "tag": lambda annotation: None, + "mask": lambda annotation: {"parts": annotation["parts"]}, + "template": lambda annotation : None, + "rbbox": lambda annotation: annotation["points"], + "comment_inst": lambda annotation: annotation["points"] } def __init__( @@ -110,42 +147,46 @@ def __init__( folder_names: Optional[List[Union[Path, str]]] = None, ): self.project_type = project_type + if isinstance(project_type, str): + self.project_type = constances.ProjectType(project_type) self.project_root = Path(project_root) self.folder_names = folder_names self._annotation_suffix = None self.classes_path = self.project_root / "classes" / "classes.json" - @property - def annotation_suffix(self): - if not self._annotation_suffix: - if self.project_type == constances.ProjectType.VECTOR.name: - self._annotation_suffix = VECTOR_ANNOTATION_POSTFIX - elif self.project_type == constances.ProjectType.PIXEL.name: - self._annotation_suffix = PIXEL_ANNOTATION_POSTFIX - else: - self._annotation_suffix = ATTACHED_VIDEO_ANNOTATION_POSTFIX - return ATTACHED_VIDEO_ANNOTATION_POSTFIX + def _set_annotation_suffix(self, path): + + fname = next((x for x in path.glob("*.json")), None) + if not fname: + self._annotation_suffix = ".json" + elif VECTOR_ANNOTATION_POSTFIX in fname.name: + self._annotation_suffix = VECTOR_ANNOTATION_POSTFIX + elif PIXEL_ANNOTATION_POSTFIX in fname.name: + self._annotation_suffix = PIXEL_ANNOTATION_POSTFIX + else: + self._annotation_suffix = ".json" def get_annotation_paths(self): annotations_paths = [] if self.folder_names is None: + self._set_annotation_suffix(self.project_root) for path in self.project_root.glob("*"): - if path.is_file() and path.suffix == self.annotation_suffix: + if path.is_file() and self._annotation_suffix in path.name: annotations_paths.append(path) elif path.is_dir() and path.name != "classes": annotations_paths.extend( - list(path.rglob(f"*{self.annotation_suffix}")) + list(path.rglob(f"*{self._annotation_suffix}")) ) else: for folder_name in self.folder_names: + self._set_annotation_suffix(self.project_root / folder_name) annotations_paths.extend( list( (self.project_root / folder_name).rglob( - f"*{self.annotation_suffix:}" + f"*{self._annotation_suffix}" ) ) ) - if not annotations_paths: logger.warning(f"Could not find annotations in {self.project_root}.") return annotations_paths @@ -163,16 +204,18 @@ def aggregate_annotations_as_df(self): self.check_classes_path() annotation_paths = self.get_annotation_paths() + if self.project_type in ( - constances.ProjectType.VECTOR.name, - constances.ProjectType.PIXEL.name, + constances.ProjectType.VECTOR, + constances.ProjectType.PIXEL ): return self.aggregate_image_annotations_as_df(annotation_paths) - elif self.project_type == constances.ProjectType.VIDEO.name: + elif self.project_type is constances.ProjectType.VIDEO: return self.aggregate_video_annotations_as_df(annotation_paths) - elif self.project_type == constances.ProjectType.DOCUMENT.name: + elif self.project_type is constances.ProjectType.DOCUMENT: return self.aggregate_document_annotations_as_df(annotation_paths) + def __add_attributes_to_raws(self, raws, attributes, element_raw): for attribute_id, attribute in enumerate(attributes): attribute_raw = copy.copy(element_raw) @@ -182,7 +225,6 @@ def __add_attributes_to_raws(self, raws, attributes, element_raw): raws.append(attribute_raw) if not attributes: raws.append(element_raw) - return raws def aggregate_video_annotations_as_df(self, annotation_paths: List[str]): @@ -192,21 +234,21 @@ def aggregate_video_annotations_as_df(self, annotation_paths: List[str]): annotation_data = json.load(open(annotation_path)) raw_data = VideoRawData() # metadata - raw_data.videoName = annotation_data["metadata"]["name"] + raw_data.itemName = annotation_data["metadata"]["name"] raw_data.folderName = ( annotation_path.parent.name if annotation_path.parent != self.project_root else None ) - raw_data.videoHeight = annotation_data["metadata"].get("height") - raw_data.videoWidth = annotation_data["metadata"].get("width") - raw_data.videoStatus = annotation_data["metadata"].get("status") - raw_data.videoUrl = annotation_data["metadata"].get("url") - raw_data.videoDuration = annotation_data["metadata"].get("duration") - - raw_data.videoError = annotation_data["metadata"].get("error") - raw_data.videoAnnotator = annotation_data["metadata"].get("annotatorEmail") - raw_data.videoQA = annotation_data["metadata"].get("qaEmail") + raw_data.itemHeight = annotation_data["metadata"].get("height") + raw_data.itemWidth = annotation_data["metadata"].get("width") + raw_data.itemStatus = annotation_data["metadata"].get("status") + raw_data.itemURL = annotation_data["metadata"].get("url") + raw_data.itemDuration = annotation_data["metadata"].get("duration") + + raw_data.error = annotation_data["metadata"].get("error") + raw_data.itemAnnotator = annotation_data["metadata"].get("annotatorEmail") + raw_data.itemQA = annotation_data["metadata"].get("qaEmail") # append tags for idx, tag in enumerate(annotation_data.get("tags", [])): tag_row = copy.copy(raw_data) @@ -217,6 +259,8 @@ def aggregate_video_annotations_as_df(self, annotation_paths: List[str]): instances = annotation_data.get("instances", []) for idx, instance in enumerate(instances): instance_type = instance["meta"].get("type", "event") + if instance_type == "comment": + instance_type = "comment_inst" instance_raw = copy.copy(raw_data) instance_raw.instanceId = int(idx) instance_raw.instanceStart = instance["meta"].get("start") @@ -272,16 +316,16 @@ def aggregate_document_annotations_as_df(self, annotation_paths: List[str]): annotation_data = json.load(open(annotation_path)) raw_data = DocumentRawData() # metadata - raw_data.docName = annotation_data["metadata"]["name"] + raw_data.itemName = annotation_data["metadata"]["name"] raw_data.folderName = ( annotation_path.parent.name if annotation_path.parent != self.project_root else None ) - raw_data.docStatus = annotation_data["metadata"].get("status") - raw_data.docUrl = annotation_data["metadata"].get("url") - raw_data.docAnnotator = annotation_data["metadata"].get("annotatorEmail") - raw_data.docQA = annotation_data["metadata"].get("qaEmail") + raw_data.itemStatus = annotation_data["metadata"].get("status") + raw_data.itemURL = annotation_data["metadata"].get("url") + raw_data.itemAnnotator = annotation_data["metadata"].get("annotatorEmail") + raw_data.itemQA = annotation_data["metadata"].get("qaEmail") # append tags for idx, tag in enumerate(annotation_data.get("tags", [])): tag_row = copy.copy(raw_data) @@ -319,95 +363,54 @@ def aggregate_document_annotations_as_df(self, annotation_paths: List[str]): return df.where(pd.notnull(df), None) def aggregate_image_annotations_as_df(self, annotations_paths: List[str]): - annotation_data = { - "imageName": [], - "imageHeight": [], - "imageWidth": [], - "imageStatus": [], - "imagePinned": [], - "instanceId": [], - "className": [], - "attributeGroupName": [], - "attributeName": [], - "type": [], - "error": [], - "locked": [], - "visible": [], - "trackingId": [], - "probability": [], - "pointLabels": [], - "meta": [], - "classColor": [], - "groupId": [], - "createdAt": [], - "creatorRole": [], - "creationType": [], - "creatorEmail": [], - "updatedAt": [], - "updatorRole": [], - "updatorEmail": [], - "folderName": [], - "imageAnnotator": [], - "imageQA": [], - "commentResolved": [], - "tag": [], - } classes_json = json.load(open(self.classes_path)) class_name_to_color = {} class_group_name_to_values = {} + rows = [] + freestyle_attributes = set() for annotation_class in classes_json: name = annotation_class["name"] color = annotation_class["color"] class_name_to_color[name] = color class_group_name_to_values[name] = {} for attribute_group in annotation_class["attribute_groups"]: + group_type = attribute_group.get("group_type") + group_id = attribute_group.get("id") + if group_type and group_type in ["text", "numeric"]: + freestyle_attributes.add(group_id) class_group_name_to_values[name][attribute_group["name"]] = [] for attribute in attribute_group["attributes"]: class_group_name_to_values[name][attribute_group["name"]].append( attribute["name"] ) - def __append_annotation(annotation_dict): - for annotation_key in annotation_data: - if annotation_key in annotation_dict: - annotation_data[annotation_key].append( - annotation_dict[annotation_key] - ) - else: - annotation_data[annotation_key].append(None) - for annotation_path in annotations_paths: - annotation_json = json.load(open(annotation_path)) - parts = Path(annotation_path).name.split(self.annotation_suffix) - if len(parts) != 2: - continue - image_name = parts[0] - image_metadata = self.__get_image_metadata(image_name, annotation_json) + row_data = ImageRowData() + annotation_json = None + with open(annotation_path, 'r') as fp: + annotation_json = json.load(fp) + parts = Path(annotation_path).name.split(self._annotation_suffix) + row_data = self.__fill_image_metadata(row_data, annotation_json['metadata']) annotation_instance_id = 0 + # include comments for annotation in annotation_json["comments"]: - comment_resolved = annotation["resolved"] - comment_meta = { - "x": annotation["x"], - "y": annotation["y"], - "comments": annotation["correspondence"], - } - annotation_dict = { - "type": "comment", - "meta": comment_meta, - "commentResolved": comment_resolved, - } - user_metadata = self.__get_user_metadata(annotation) - annotation_dict.update(user_metadata) - annotation_dict.update(image_metadata) - __append_annotation(annotation_dict) + comment_row = copy.copy(row_data) + comment_row.comment_resolved = annotation["resolved"] + comment_row.comment = DataAggregator.MAPPERS["comment"](annotation) + comment_row = self.__fill_user_metadata(row_data, annotation) + rows.append(comment_row) # include tags - for annotation in annotation_json["tags"]: - annotation_dict = {"type": "tag", "tag": annotation} - annotation_dict.update(image_metadata) - __append_annotation(annotation_dict) - for annotation in annotation_json["instances"]: + for idx, tag in enumerate(annotation_json["tags"]): + tag_row = copy.copy(row_data) + tag_row.tagId = idx + tag_row.rag = tag + rows.append(tag_row) + + #Instances + for idx, annotation in enumerate(annotation_json["instances"]): + instance_row = copy.copy(row_data) annotation_type = annotation.get("type", "mask") annotation_class_name = annotation.get("className") if ( @@ -419,64 +422,32 @@ def __append_annotation(annotation_dict): annotation_class_name, ) continue - annotation_class_color = class_name_to_color[annotation_class_name] - annotation_group_id = annotation.get("groupId") - annotation_locked = annotation.get("locked") - annotation_visible = annotation.get("visible") - annotation_tracking_id = annotation.get("trackingId") - annotation_meta = None - if annotation_type in ["bbox", "polygon", "polyline", "cuboid"]: - annotation_meta = {"points": annotation["points"]} - elif annotation_type == "point": - annotation_meta = {"x": annotation["x"], "y": annotation["y"]} - elif annotation_type == "ellipse": - annotation_meta = { - "cx": annotation["cx"], - "cy": annotation["cy"], - "rx": annotation["rx"], - "ry": annotation["ry"], - "angle": annotation["angle"], - } - elif annotation_type == "mask": - annotation_meta = {"parts": annotation["parts"]} - elif annotation_type == "template": - annotation_meta = { - "connections": annotation["connections"], - "points": annotation["points"], - } - annotation_error = annotation.get("error") - annotation_probability = annotation.get("probability") - annotation_point_labels = annotation.get("pointLabels") + instance_row.classColor = class_name_to_color[annotation_class_name] + instance_row.groupId = annotation.get("groupId") + instance_row.locked = annotation.get("locked") + instance_row.visible = annotation.get("visible") + instance_row.trackingId = annotation.get("trackingId") + instance_row.type = annotation.get("type") + instance_row.meta = DataAggregator.MAPPERS[annotation_type](annotation) + instance_row.error = annotation.get("error") + instance_row.probability = annotation.get("probability") + instance_row.pointLabels = annotation.get("pointLabels") + instance_row.instanceId = idx attributes = annotation.get("attributes") - user_metadata = self.__get_user_metadata(annotation) + instance_row = self.__fill_user_metadata(instance_row, annotation) folder_name = None if Path(annotation_path).parent != Path(self.project_root): folder_name = Path(annotation_path).parent.name + instance_row.folderName = folder_name num_added = 0 if not attributes: - annotation_dict = { - "imageName": image_name, - "instanceId": annotation_instance_id, - "className": annotation_class_name, - "type": annotation_type, - "locked": annotation_locked, - "visible": annotation_visible, - "trackingId": annotation_tracking_id, - "meta": annotation_meta, - "error": annotation_error, - "probability": annotation_probability, - "pointLabels": annotation_point_labels, - "classColor": annotation_class_color, - "groupId": annotation_group_id, - "folderName": folder_name, - } - annotation_dict.update(user_metadata) - annotation_dict.update(image_metadata) - __append_annotation(annotation_dict) + rows.append(instance_row) num_added = 1 else: for attribute in attributes: + attribute_row = copy.copy(instance_row) attribute_group = attribute.get("groupName") + group_id = attribute.get("groupId") attribute_name = attribute.get("name") if ( attribute_group @@ -491,56 +462,41 @@ def __append_annotation(annotation_dict): attribute_name not in class_group_name_to_values[annotation_class_name][ attribute_group - ] + ] and group_id not in freestyle_attributes ): logger.warning( f"Annotation class group value {attribute_name} not in classes json. Skipping." ) continue - annotation_dict = { - "imageName": image_name, - "instanceId": annotation_instance_id, - "className": annotation_class_name, - "attributeGroupName": attribute_group, - "attributeName": attribute_name, - "type": annotation_type, - "locked": annotation_locked, - "visible": annotation_visible, - "trackingId": annotation_tracking_id, - "meta": annotation_meta, - "error": annotation_error, - "probability": annotation_probability, - "pointLabels": annotation_point_labels, - "classColor": annotation_class_color, - "groupId": annotation_group_id, - "folderName": folder_name, - } - annotation_dict.update(user_metadata) - annotation_dict.update(image_metadata) - __append_annotation(annotation_dict) + + else: + attribute_row.attributeGroupName = attribute_group + attribute_row.attributeName = attribute_name + + + rows.append(attribute_row) num_added += 1 if num_added > 0: annotation_instance_id += 1 - df = pd.DataFrame(annotation_data) + df = pd.DataFrame([row.__dict__ for row in rows], dtype=object) df = df.astype({"probability": float}) return df @staticmethod - def __get_image_metadata(image_name, annotations): - image_metadata = {"imageName": image_name} - - image_metadata["imageHeight"] = annotations["metadata"].get("height") - image_metadata["imageWidth"] = annotations["metadata"].get("width") - image_metadata["imageStatus"] = annotations["metadata"].get("status") - image_metadata["imagePinned"] = annotations["metadata"].get("pinned") - image_metadata["imageAnnotator"] = annotations["metadata"].get("annotatorEmail") - image_metadata["imageQA"] = annotations["metadata"].get("qaEmail") - return image_metadata + def __fill_image_metadata(raw_data, metadata): + raw_data.itemName = metadata.get('name') + raw_data.itemHeight = metadata.get("height") + raw_data.itemWidth = metadata.get("width") + raw_data.itemStatus = metadata.get("status") + raw_data.itemPinned = metadata.get("pinned") + raw_data.itemAnnotator = metadata.get("annotatorEmail") + raw_data.itemQA = metadata.get("qaEmail") + return raw_data @staticmethod - def __get_user_metadata(annotation): + def __fill_user_metadata(row_data, annotation): annotation_created_at = pd.to_datetime(annotation.get("createdAt")) annotation_created_by = annotation.get("createdBy") annotation_creator_email = None @@ -556,13 +512,11 @@ def __get_user_metadata(annotation): if annotation_updated_by: annotation_updator_email = annotation_updated_by.get("email") annotation_updator_role = annotation_updated_by.get("role") - user_metadata = { - "createdAt": annotation_created_at, - "creatorRole": annotation_creator_role, - "creatorEmail": annotation_creator_email, - "creationType": annotation_creation_type, - "updatedAt": annotation_updated_at, - "updatorRole": annotation_updator_role, - "updatorEmail": annotation_updator_email, - } - return user_metadata + row_data.createdAt = annotation_created_at + row_data.creatorRole = annotation_creator_role + row_data.creatorEmail = annotation_creator_email + row_data.creationType = annotation_creation_type + row_data.updatedAt = annotation_updated_at + row_data.updatorRole = annotation_updator_role + row_data.updatorEmail = annotation_updator_email + return row_data diff --git a/src/superannotate/lib/app/analytics/common.py b/src/superannotate/lib/app/analytics/common.py index 4f1877658..b5f753792 100644 --- a/src/superannotate/lib/app/analytics/common.py +++ b/src/superannotate/lib/app/analytics/common.py @@ -33,7 +33,7 @@ def aggregate_image_annotations_as_df( :type folder_names: (list of str) :return: DataFrame on annotations with columns: - "imageName", "instanceId", + "itemName", "instanceId", "className", "attributeGroupName", "attributeName", "type", "error", "locked", "visible", "trackingId", "probability", "pointLabels", "meta" (geometry information as string), "commentResolved", "classColor", @@ -382,10 +382,10 @@ def instance_consensus(inst_1, inst_2): """Helper function that computes consensus score between two instances: :param inst_1: First instance for consensus score. - :type inst_1: shapely object + :type inst_1: shapely object or a tag :param inst_2: Second instance for consensus score. - :type inst_2: shapely object + :type inst_2: shapely object or a tag """ if inst_1.type == inst_2.type == "Polygon": intersect = inst_1.intersection(inst_2) @@ -398,8 +398,35 @@ def instance_consensus(inst_1, inst_2): return score +def calculate_tag_consensus(image_df): + column_names = [ + "creatorEmail", + "itemName", + "instanceId", + "className", + "folderName", + "attributeGroupName", + "attributeName" + ] + + image_data = {} + for column_name in column_names: + image_data[column_name] = [] + + image_df=image_df.reset_index() + image_data["score"] = [] + for i, irow in image_df.iterrows(): + for c in column_names: + image_data[c].append(irow[c]) + image_data["score"].append(0) + for j, jrow in image_df.iterrows(): + if i == j: + continue + if (irow["className"] == jrow["className"]) and irow["attributeGroupName"] == jrow["attributeGroupName"] and irow["attributeName"] == jrow["attributeName"]: + image_data["score"][i]+=1 + return image_data -def image_consensus(df, image_name, annot_type): +def consensus(df, item_name, annot_type): """Helper function that computes consensus score for instances of a single image: :param df: Annotation data of all images @@ -419,11 +446,11 @@ def image_consensus(df, image_name, annot_type): "To use superannotate.benchmark or superannotate.consensus functions please install shapely package." ) - image_df = df[df["imageName"] == image_name] + image_df = df[df["itemName"] == item_name] all_projects = list(set(df["folderName"])) column_names = [ "creatorEmail", - "imageName", + "itemName", "instanceId", "area", "className", @@ -436,6 +463,8 @@ def image_consensus(df, image_name, annot_type): for column_name in column_names: image_data[column_name] = [] + if annot_type == "tag": + return calculate_tag_consensus(image_df) projects_shaply_objs = {} # generate shapely objects of instances for _, row in image_df.iterrows(): @@ -443,19 +472,19 @@ def image_consensus(df, image_name, annot_type): projects_shaply_objs[row["folderName"]] = [] inst_data = row["meta"] if annot_type == "bbox": - inst_coords = inst_data["points"] + inst_coords = inst_data x1, x2 = inst_coords["x1"], inst_coords["x2"] y1, y2 = inst_coords["y1"], inst_coords["y2"] inst = box(min(x1, x2), min(y1, y2), max(x1, x2), max(y1, y2)) elif annot_type == "polygon": - inst_coords = inst_data["points"] + inst_coords = inst_data shapely_format = [] for i in range(0, len(inst_coords) - 1, 2): shapely_format.append((inst_coords[i], inst_coords[i + 1])) inst = Polygon(shapely_format) elif annot_type == "point": inst = Point(inst_data["x"], inst_data["y"]) - if inst.is_valid: + if annot_type != "tag" and inst.is_valid: projects_shaply_objs[row["folderName"]].append( (inst, row["className"], row["creatorEmail"], row["attributes"]) ) @@ -479,7 +508,7 @@ def image_consensus(df, image_name, annot_type): max_instances.append((curr_proj, *curr_inst_data)) visited_instances[curr_proj][curr_id] = True else: - if annot_type in ["polygon", "bbox"]: + if annot_type in ["polygon", "bbox", "tag"]: max_score = 0 else: max_score = float("-inf") @@ -501,7 +530,7 @@ def image_consensus(df, image_name, annot_type): image_data["creatorEmail"].append(max_instances[0][3]) image_data["attributes"].append(max_instances[0][4]) image_data["area"].append(max_instances[0][1].area) - image_data["imageName"].append(image_name) + image_data["itemName"].append(item_name) image_data["instanceId"].append(instance_id) image_data["className"].append(max_instances[0][2]) image_data["folderName"].append(max_instances[0][0]) @@ -518,7 +547,7 @@ def image_consensus(df, image_name, annot_type): image_data["creatorEmail"].append(curr_match_data[3]) image_data["attributes"].append(curr_match_data[4]) image_data["area"].append(curr_match_data[1].area) - image_data["imageName"].append(image_name) + image_data["itemName"].append(item_name) image_data["instanceId"].append(instance_id) image_data["className"].append(curr_match_data[2]) image_data["folderName"].append(curr_match_data[0]) @@ -564,7 +593,7 @@ def consensus_plot(consensus_df, *_, **__): color_discrete_sequence=px.colors.qualitative.Dark24, hover_data={ "className": False, - "imageName": True, + "itemName": True, "folderName": False, "area": False, "score": False, diff --git a/src/superannotate/lib/app/interface/sdk_interface.py b/src/superannotate/lib/app/interface/sdk_interface.py index 55db1ea84..0f9d29313 100644 --- a/src/superannotate/lib/app/interface/sdk_interface.py +++ b/src/superannotate/lib/app/interface/sdk_interface.py @@ -1697,7 +1697,7 @@ def consensus( folder_names: List[NotEmptyStr], export_root: Optional[Union[NotEmptyStr, Path]] = None, image_list: Optional[List[NotEmptyStr]] = None, - annot_type: Optional[AnnotationType] = "bbox", + annotation_type: Optional[AnnotationType] = "bbox", show_plots: Optional[StrictBool] = False, ): """Computes consensus score for each instance of given images that are present in at least 2 of the given projects: @@ -1719,29 +1719,14 @@ def consensus( :rtype: pandas DataFrame """ - if export_root is None: - with tempfile.TemporaryDirectory() as temp_dir: - export_root = temp_dir - response = self.controller.consensus( - project_name=project, - folder_names=folder_names, - export_path=export_root, - image_list=image_list, - annot_type=annot_type, - show_plots=show_plots, - ) - - else: - response = self.controller.consensus( - project_name=project, - folder_names=folder_names, - export_path=export_root, - image_list=image_list, - annot_type=annot_type, - show_plots=show_plots, - ) - if response.errors: - raise AppException(response.errors) + response = self.controller.consensus( + project_name=project, + folder_names=folder_names, + image_list=image_list, + annot_type=annotation_type, + ) + if response.errors: + raise AppException(response.errors) return response.data def run_prediction( @@ -1950,32 +1935,17 @@ def aggregate_annotations_as_df( :return: DataFrame on annotations :rtype: pandas DataFrame """ - if project_type in ( - constants.ProjectType.VECTOR.name, - constants.ProjectType.PIXEL.name, - ): - from superannotate.lib.app.analytics.common import ( - aggregate_image_annotations_as_df, - ) + from superannotate.lib.app.analytics.aggregators import DataAggregator - return aggregate_image_annotations_as_df( - project_root=project_root, - include_classes_wo_annotations=False, - include_comments=True, - include_tags=True, - folder_names=folder_names, - ) - elif project_type in ( - constants.ProjectType.VIDEO.name, - constants.ProjectType.DOCUMENT.name, - ): - from superannotate.lib.app.analytics.aggregators import DataAggregator - - return DataAggregator( - project_type=project_type, - project_root=project_root, - folder_names=folder_names, - ).aggregate_annotations_as_df() + try: + ProjectTypes.validate(project_type) + except TypeError as e: + raise AppException(e) + return DataAggregator( + project_type=project_type, + project_root=project_root, + folder_names=folder_names, + ).aggregate_annotations_as_df() def delete_annotations( self, project: NotEmptyStr, item_names: Optional[List[NotEmptyStr]] = None diff --git a/src/superannotate/lib/app/interface/types.py b/src/superannotate/lib/app/interface/types.py index cb76193a1..cea32d521 100644 --- a/src/superannotate/lib/app/interface/types.py +++ b/src/superannotate/lib/app/interface/types.py @@ -98,7 +98,7 @@ def validate(cls, value: Union[str]) -> Union[str]: class AnnotationType(StrictStr): - VALID_TYPES = ["bbox", "polygon", "point"] + VALID_TYPES = ["bbox", "polygon", "point", "tag"] @classmethod def validate(cls, value: Union[str]) -> Union[str]: diff --git a/src/superannotate/lib/core/usecases/models.py b/src/superannotate/lib/core/usecases/models.py index 33b6db449..263392dde 100644 --- a/src/superannotate/lib/core/usecases/models.py +++ b/src/superannotate/lib/core/usecases/models.py @@ -5,15 +5,17 @@ import zipfile from pathlib import Path from typing import List +from tempfile import TemporaryDirectory import boto3 import lib.core as constances import pandas as pd import requests from botocore.exceptions import ClientError +from lib.app.analytics.aggregators import DataAggregator from lib.app.analytics.common import aggregate_image_annotations_as_df from lib.app.analytics.common import consensus_plot -from lib.app.analytics.common import image_consensus +from lib.app.analytics.common import consensus from lib.core.conditions import Condition from lib.core.conditions import CONDITION_EQ as EQ from lib.core.entities import FolderEntity @@ -27,6 +29,10 @@ from lib.core.serviceproviders import BaseServiceProvider from lib.core.usecases.base import BaseReportableUseCase from lib.core.usecases.base import BaseUseCase +from lib.core.usecases.folders import GetFolderUseCase +from lib.core.usecases.annotations import GetAnnotations, DownloadAnnotations +from lib.core.usecases.classes import DownloadAnnotationClassesUseCase +from lib.core.reporter import Reporter from superannotate.logger import get_default_logger logger = get_default_logger() @@ -345,13 +351,13 @@ def execute(self): if self._image_list is not None: project_gt_df = project_gt_df.loc[ - project_gt_df["imageName"].isin(self._image_list) + project_gt_df["itemName"].isin(self._image_list) ] project_gt_df.query("type == '" + self._annotation_type + "'", inplace=True) project_gt_df = project_gt_df.groupby( - ["imageName", "instanceId", "folderName"] + ["itemName", "instanceId", "folderName"] ) def aggregate_attributes(instance_df): @@ -378,7 +384,7 @@ def attribute_to_list(attribute_df): ["attributeGroupName", "attributeName"], axis=1, inplace=True ) instance_df.drop_duplicates( - subset=["imageName", "instanceId", "folderName"], inplace=True + subset=["itemName", "instanceId", "folderName"], inplace=True ) instance_df["attributes"] = [attributes] return instance_df @@ -386,7 +392,7 @@ def attribute_to_list(attribute_df): project_gt_df = project_gt_df.apply(aggregate_attributes).reset_index( drop=True ) - unique_images = set(project_gt_df["imageName"]) + unique_images = set(project_gt_df["itemName"]) all_benchmark_data = [] for image_name in unique_images: image_data = image_consensus( @@ -410,21 +416,74 @@ def __init__( self, project: ProjectEntity, folder_names: list, - export_dir: str, image_list: list, annotation_type: str, - show_plots: bool, + service_provider: BaseServiceProvider, ): super().__init__() self._project = project - self._folder_names = folder_names - self._export_dir = export_dir self._image_list = image_list - self._annota_type_type = annotation_type - self._show_plots = show_plots + self._instance_type = annotation_type + self._folders = [] + self._folder_names = folder_names + self.service_provider = service_provider + + for folder_name in folder_names: + get_folder_uc = GetFolderUseCase( + project = self._project, + service_provider = service_provider, + folder_name=folder_name + ) + folder = get_folder_uc.execute().data + if not folder: + raise AppException(f"Can't find folder {folder_name}") + + self._folders.append(folder) + + def _download_annotations(self, destination): + reporter = Reporter( + log_info=False, + log_warning=False, + log_debug=False, + disable_progress_bar=True + ) + + classes_dir = Path(destination) / "classes" + classes_dir.mkdir() + + DownloadAnnotationClassesUseCase( + reporter = reporter, + download_path = classes_dir, + project = self._project, + service_provider = self.service_provider + ).execute() + + for folder in self._folders: + download_annotations_uc = DownloadAnnotations( + reporter = reporter, + project = self._project, + folder = folder, + item_names = self._image_list, + destination = destination, #Destination unknown known known + service_provider = self.service_provider, + recursive = False + ) + tmp = download_annotations_uc.execute() + if tmp.errors: + raise AppException(tmp.errors) + return tmp.data + def execute(self): - project_df = aggregate_image_annotations_as_df(self._export_dir) + with TemporaryDirectory() as temp_dir: + export_path = self._download_annotations(temp_dir) + aggregator = DataAggregator( + project_type = self._project.type, + folder_names = self._folder_names, + project_root = export_path, + ) + project_df = aggregator.aggregate_annotations_as_df() + all_projects_df = project_df[project_df["instanceId"].notna()] all_projects_df = all_projects_df.loc[ all_projects_df["folderName"].isin(self._folder_names) @@ -432,10 +491,12 @@ def execute(self): if self._image_list is not None: all_projects_df = all_projects_df.loc[ - all_projects_df["imageName"].isin(self._image_list) + all_projects_df["itemName"].isin(self._image_list) ] - all_projects_df.query("type == '" + self._annota_type_type + "'", inplace=True) + + all_projects_df.query("type == '" + self._instance_type + "'", inplace=True) + def aggregate_attributes(instance_df): def attribute_to_list(attribute_df): @@ -459,29 +520,31 @@ def attribute_to_list(attribute_df): ["attributeGroupName", "attributeName"], axis=1, inplace=True ) instance_df.drop_duplicates( - subset=["imageName", "instanceId", "folderName"], inplace=True + subset=["itemName", "instanceId", "folderName"], inplace=True ) instance_df["attributes"] = [attributes] return instance_df - all_projects_df = all_projects_df.groupby( - ["imageName", "instanceId", "folderName"] - ) - all_projects_df = all_projects_df.apply(aggregate_attributes).reset_index( - drop=True - ) - unique_images = set(all_projects_df["imageName"]) + if self._instance_type != "tag": + all_projects_df = all_projects_df.groupby( + ["itemName", "instanceId", "folderName"] + ) + all_projects_df = all_projects_df.apply(aggregate_attributes).reset_index( + drop=True + ) + unique_images = set(all_projects_df["itemName"]) + else: + unique_images = all_projects_df["itemName"].unique() all_consensus_data = [] for image_name in unique_images: - image_data = image_consensus( - all_projects_df, image_name, self._annota_type_type + image_data = consensus( + all_projects_df, image_name, self._instance_type ) all_consensus_data.append(pd.DataFrame(image_data)) consensus_df = pd.concat(all_consensus_data, ignore_index=True) - - if self._show_plots: - consensus_plot(consensus_df, self._folder_names) + if self._instance_type == "tag": + consensus_df["score"]/=(len(self._folder_names) - 1) self._response.data = consensus_df return self._response diff --git a/src/superannotate/lib/infrastructure/controller.py b/src/superannotate/lib/infrastructure/controller.py index 61eb814ae..004a29763 100644 --- a/src/superannotate/lib/infrastructure/controller.py +++ b/src/superannotate/lib/infrastructure/controller.py @@ -1128,38 +1128,19 @@ def consensus( self, project_name: str, folder_names: list, - export_path: str, + # export_path: str, image_list: list, annot_type: str, - show_plots: bool, + # show_plots: bool, ): project = self.get_project(project_name) - export_response = self.prepare_export( - project.name, - folder_names=folder_names, - include_fuse=False, - only_pinned=False, - ) - if export_response.errors: - return export_response - - response = self.download_export( - project_name=project.name, - export_name=export_response.data["name"], - folder_path=export_path, - extract_zip_contents=True, - to_s3_bucket=False, - ) - if response.errors: - raise AppException(response.errors) use_case = usecases.ConsensusUseCase( project=project, folder_names=folder_names, - export_dir=export_path, image_list=image_list, annotation_type=annot_type, - show_plots=show_plots, + service_provider = self.service_provider ) return use_case.execute()