From 8c75a0480eb787628d8b120da1b381f01d3b4329 Mon Sep 17 00:00:00 2001 From: Vaghinak Basentsyan Date: Tue, 22 Feb 2022 14:47:38 +0400 Subject: [PATCH] Fixed streamed data chinks handeling --- .../lib/app/interface/sdk_interface.py | 2 +- src/superannotate/lib/core/reporter.py | 4 +- .../lib/core/usecases/annotations.py | 59 ++++++++++++------- src/superannotate/lib/core/usecases/images.py | 4 +- .../lib/infrastructure/controller.py | 5 +- .../lib/infrastructure/services.py | 1 + .../lib/infrastructure/stream_data_handler.py | 10 ++-- .../annotations/test_get_annotations.py | 24 ++++++-- tests/integration/test_df_processing.py | 9 ++- tests/profiling/profiling.py | 0 10 files changed, 79 insertions(+), 39 deletions(-) delete mode 100644 tests/profiling/profiling.py diff --git a/src/superannotate/lib/app/interface/sdk_interface.py b/src/superannotate/lib/app/interface/sdk_interface.py index 2e61157c3..472a3b47a 100644 --- a/src/superannotate/lib/app/interface/sdk_interface.py +++ b/src/superannotate/lib/app/interface/sdk_interface.py @@ -2850,7 +2850,7 @@ def invite_contributors_to_team( @Trackable @validate_arguments -def get_annotations(project: NotEmptyStr, items: Optional[List[NotEmptyStr]]): +def get_annotations(project: NotEmptyStr, items: Optional[List[NotEmptyStr]] = None): """Returns annotations for the given list of items. :param project: project name diff --git a/src/superannotate/lib/core/reporter.py b/src/superannotate/lib/core/reporter.py index 835f4d265..b8e140e27 100644 --- a/src/superannotate/lib/core/reporter.py +++ b/src/superannotate/lib/core/reporter.py @@ -46,9 +46,9 @@ def log_debug(self, value: str): self.debug_messages.append(value) def start_progress( - self, iterations: Union[int, range], description: str = "Processing" + self, iterations: Union[int, range], description: str = "Processing", disable=False ): - self.progress_bar = self.get_progress_bar(iterations, description) + self.progress_bar = self.get_progress_bar(iterations, description, disable) @staticmethod def get_progress_bar( diff --git a/src/superannotate/lib/core/usecases/annotations.py b/src/superannotate/lib/core/usecases/annotations.py index f4ee6f812..3bcf1eb39 100644 --- a/src/superannotate/lib/core/usecases/annotations.py +++ b/src/superannotate/lib/core/usecases/annotations.py @@ -9,6 +9,8 @@ import boto3 import lib.core as constances +from lib.core.conditions import Condition +from lib.core.conditions import CONDITION_EQ as EQ from lib.core.data_handlers import ChainedAnnotationHandlers from lib.core.data_handlers import DocumentTagHandler from lib.core.data_handlers import LastActionHandler @@ -494,14 +496,18 @@ def __init__( reporter: Reporter, project: ProjectEntity, folder: FolderEntity, + images: BaseManageableRepository, item_names: Optional[List[str]], - backend_service_provider: SuerannotateServiceProvider + backend_service_provider: SuerannotateServiceProvider, + show_process: bool = True ): super().__init__(reporter) self._project = project self._folder = folder + self._images = images self._item_names = item_names self._client = backend_service_provider + self._show_process = show_process def validate_item_names(self): if self._item_names: @@ -512,27 +518,36 @@ def validate_item_names(self): f"Dropping duplicates. Found {len_unique_items}/{len_items} unique items." ) self._item_names = item_names + else: + condition = ( + Condition("team_id", self._project.team_id, EQ) + & Condition("project_id", self._project.uuid, EQ) + & Condition("folder_id", self._folder.uuid, EQ) + ) + + self._item_names = [item.name for item in self._images.get_all(condition)] def execute(self): - items_count = len(self._item_names) - self.reporter.log_info( - f"Getting {items_count} annotations from " - f"{self._project.name}{f'/{self._folder.name}' if self._folder else ''}." - ) - self.reporter.start_progress(items_count) - annotations = self._client.get_annotations( - team_id=self._project.team_id, - project_id=self._project.uuid, - folder_id=self._folder.uuid, - items=self._item_names, - reporter=self.reporter - ) - received_items_count = len(annotations) - if items_count > received_items_count: - self.reporter.log_warning( - f"Could not find annotations for {items_count - received_items_count}/{items_count} items." + if self.is_valid(): + items_count = len(self._item_names) + self.reporter.log_info( + f"Getting {items_count} annotations from " + f"{self._project.name}{f'/{self._folder.name}' if self._folder else ''}." ) - self._response.data = annotations + self.reporter.start_progress(items_count, disable=not self._show_process) + annotations = self._client.get_annotations( + team_id=self._project.team_id, + project_id=self._project.uuid, + folder_id=self._folder.uuid, + items=self._item_names, + reporter=self.reporter + ) + received_items_count = len(annotations) + if items_count > received_items_count: + self.reporter.log_warning( + f"Could not find annotations for {items_count - received_items_count}/{items_count} items." + ) + self._response.data = annotations return self._response @@ -542,6 +557,7 @@ def __init__( reporter: Reporter, project: ProjectEntity, folder: FolderEntity, + images: BaseManageableRepository, video_name: str, fps: int, backend_service_provider: SuerannotateServiceProvider @@ -549,6 +565,7 @@ def __init__( super().__init__(reporter) self._project = project self._folder = folder + self._images = images self._video_name = video_name self._fps = fps self._client = backend_service_provider @@ -558,8 +575,10 @@ def execute(self): reporter=self.reporter, project=self._project, folder=self._folder, + images=self._images, item_names=[self._video_name], - backend_service_provider=self._client + backend_service_provider=self._client, + show_process=False ).execute() generator = VideoFrameGenerator(response.data[0], fps=self._fps) self.reporter.log_info(f"Getting annotations for {generator.frames_count} frames from {self._video_name}.") diff --git a/src/superannotate/lib/core/usecases/images.py b/src/superannotate/lib/core/usecases/images.py index eb04047ff..0e583e6c2 100644 --- a/src/superannotate/lib/core/usecases/images.py +++ b/src/superannotate/lib/core/usecases/images.py @@ -1928,8 +1928,8 @@ def execute(self): project=self._project, folder=self._folder, attachments=self._attachments[ - i : i + self.CHUNK_SIZE - ], # noqa: E203 + i : i + self.CHUNK_SIZE # noqa: E203 + ], backend_service_provider=self._backend_service, annotation_status=self._annotation_status, upload_state_code=self._upload_state_code, diff --git a/src/superannotate/lib/infrastructure/controller.py b/src/superannotate/lib/infrastructure/controller.py index 7b6bde783..d637e839f 100644 --- a/src/superannotate/lib/infrastructure/controller.py +++ b/src/superannotate/lib/infrastructure/controller.py @@ -74,8 +74,9 @@ def __init__(self, config_path: str = None, token: str = None): self.initialize_backend_client() def retrieve_configs(self, path: Path, raise_exception=True): + token, backend_url, ssl_verify = None, None, None - if not path.is_file() or not os.access(path, os.R_OK): + if not Path(path).is_file() or not os.access(path, os.R_OK): if raise_exception: raise AppException( f"SuperAnnotate config file {str(path)} not found." @@ -1626,6 +1627,7 @@ def get_annotations(self, project_name: str, folder_name: str, item_names: List[ reporter=self.default_reporter, project=project, folder=folder, + images=self.images, item_names=item_names, backend_service_provider=self.backend_client ) @@ -1639,6 +1641,7 @@ def get_annotations_per_frame(self, project_name: str, folder_name: str, video_n reporter=self.default_reporter, project=project, folder=folder, + images=self.images, video_name=video_name, fps=fps, backend_service_provider=self.backend_client diff --git a/src/superannotate/lib/infrastructure/services.py b/src/superannotate/lib/infrastructure/services.py index 1844ba3c5..b1eecc2b7 100644 --- a/src/superannotate/lib/infrastructure/services.py +++ b/src/superannotate/lib/infrastructure/services.py @@ -176,6 +176,7 @@ class SuperannotateBackendService(BaseBackendService): DEFAULT_CHUNK_SIZE = 1000 URL_USERS = "users" + URL_LIST_ALL_IMAGES = "/images/getImagesWithAnnotationPaths" URL_LIST_PROJECTS = "projects" URL_FOLDERS_IMAGES = "images-folders" URL_CREATE_PROJECT = "project" diff --git a/src/superannotate/lib/infrastructure/stream_data_handler.py b/src/superannotate/lib/infrastructure/stream_data_handler.py index 5c4abf548..a28ae0679 100644 --- a/src/superannotate/lib/infrastructure/stream_data_handler.py +++ b/src/superannotate/lib/infrastructure/stream_data_handler.py @@ -25,18 +25,21 @@ async def fetch(self, method: str, session: aiohttp.ClientSession, url: str, dat params: dict = None): response = await session._request(method, url, json=data, params=params) buffer = b"" - async for line in response.content: + async for line in response.content.iter_any(): slices = line.split(self.DELIMITER) if len(slices) == 1: buffer += slices[0] continue elif slices[0]: self._annotations.append(json.loads(buffer + slices[0])) + self._reporter.update_progress() for data in slices[1:-1]: self._annotations.append(json.loads(data)) + self._reporter.update_progress() buffer = slices[-1] if buffer: self._annotations.append(json.loads(buffer)) + self._reporter.update_progress() return self._annotations async def get_data( @@ -54,9 +57,8 @@ async def get_data( if chunk_size: for i in range(0, len(data), chunk_size): - await self.fetch(method, session, url, map_function(data[i:i + chunk_size]), params=params) - self._reporter.update_progress(chunk_size) + data_to_process = data[i:i + chunk_size] + await self.fetch(method, session, url, map_function(data_to_process), params=params) else: await self.fetch(method, session, url, map_function(data), params=params) - self._reporter.update_progress(len(data)) return self._annotations diff --git a/tests/integration/annotations/test_get_annotations.py b/tests/integration/annotations/test_get_annotations.py index 9eb87ad3f..8a73fb0b5 100644 --- a/tests/integration/annotations/test_get_annotations.py +++ b/tests/integration/annotations/test_get_annotations.py @@ -1,12 +1,13 @@ -from pathlib import Path +import json import os +from pathlib import Path from typing import List -import json -import pytest -import src.superannotate as sa +import pytest from pydantic import parse_obj_as from superannotate_schemas.schemas.internal import VectorAnnotation + +import src.superannotate as sa from tests.integration.base import BaseTestCase @@ -40,3 +41,18 @@ def test_get_annotations(self): annotation_data = json.load(annotation_file) self.assertEqual(len(annotation_data["instances"]), len(annotations[0]["instances"])) parse_obj_as(List[VectorAnnotation], annotations) + + @pytest.mark.flaky(reruns=3) + def test_get_annotations_all(self): + sa.init() + sa.upload_images_from_folder_to_project( + self.PROJECT_NAME, self.folder_path, annotation_status="InProgress" + ) + sa.create_annotation_classes_from_classes_json( + self.PROJECT_NAME, f"{self.folder_path}/classes/classes.json" + ) + _, _, _ = sa.upload_annotations_from_folder_to_project( + self.PROJECT_NAME, self.folder_path + ) + annotations = sa.get_annotations(f"{self.PROJECT_NAME}") + self.assertEqual(len(annotations), 4) diff --git a/tests/integration/test_df_processing.py b/tests/integration/test_df_processing.py index 15cd43e72..6b1707e72 100644 --- a/tests/integration/test_df_processing.py +++ b/tests/integration/test_df_processing.py @@ -19,7 +19,7 @@ def folder_path(self): ) def test_filter_instances(self): - df = sa.aggregate_annotations_as_df(self.folder_path,self.PROJECT_TYPE) + df = sa.aggregate_annotations_as_df(self.folder_path, self.PROJECT_TYPE) df = df[~(df.duplicated(["instanceId", "imageName"]))] df = df[df.duplicated(["trackingId"], False) & df["trackingId"].notnull()] self.assertEqual(len(df), 2) @@ -28,6 +28,7 @@ def test_filter_instances(self): {"example_image_1.jpg", "example_image_2.jpg"}, ) + class TestDFWithTagInstace(BaseTestCase): PROJECT_TYPE = "Vector" TEST_FOLDER_PATH = "data_set/sample_project_vector_with_tag" @@ -38,13 +39,11 @@ def folder_path(self): Path(os.path.join(dirname(dirname(__file__)), self.TEST_FOLDER_PATH)) ) - def test_filter_instances(self): df = sa.aggregate_annotations_as_df(self.folder_path, self.PROJECT_TYPE) self.assertEqual(df.iloc[0]["type"], "tag") - class TestClassDistibutionWithTagInstance(BaseTestCase): PROJECT_TYPE = "Vector" EXPORT_ROOT_PATH = "data_set" @@ -57,6 +56,6 @@ def root_path(self): ) def test_filter_instances(self): - df = sa.class_distribution(export_root=self.root_path,project_names=[self.PROJECT_NAME]) + df = sa.class_distribution(export_root=self.root_path, project_names=[self.PROJECT_NAME]) self.assertEqual(df.iloc[0]['count'], 1) - self.assertEqual(df.iloc[0]['className'], "Weather") \ No newline at end of file + self.assertEqual(df.iloc[0]['className'], "Weather") diff --git a/tests/profiling/profiling.py b/tests/profiling/profiling.py deleted file mode 100644 index e69de29bb..000000000