From dca0c551f9005154737e1449bd3eaff2b0edf983 Mon Sep 17 00:00:00 2001 From: Vaghinak Basentsyan Date: Sat, 18 Sep 2021 00:35:01 +0400 Subject: [PATCH 1/6] Changed validation message --- pytest.ini | 2 +- src/superannotate/__init__.py | 2 +- .../lib/app/interface/sdk_interface.py | 3 +- src/superannotate/lib/app/interface/types.py | 25 ++++--- .../lib/core/serviceproviders.py | 4 ++ src/superannotate/lib/core/types.py | 4 +- src/superannotate/lib/core/usecases.py | 72 ++++++++++--------- .../lib/infrastructure/controller.py | 19 +++-- .../lib/infrastructure/helpers.py | 1 + .../lib/infrastructure/repositories.py | 4 +- .../lib/infrastructure/services.py | 3 +- tests/integration/test_annotation_adding.py | 4 +- tests/integration/test_folders.py | 8 +-- tests/integration/test_interface.py | 1 + 14 files changed, 89 insertions(+), 63 deletions(-) diff --git a/pytest.ini b/pytest.ini index f14f931f9..730acbdc6 100644 --- a/pytest.ini +++ b/pytest.ini @@ -2,4 +2,4 @@ minversion = 3.0 log_cli=true python_files = test_*.py -;addopts = -n 32 --dist=loadscope \ No newline at end of file +addopts = -n 32 --dist=loadscope \ No newline at end of file diff --git a/src/superannotate/__init__.py b/src/superannotate/__init__.py index 0770a4f82..aca0ecaad 100644 --- a/src/superannotate/__init__.py +++ b/src/superannotate/__init__.py @@ -299,4 +299,4 @@ logging.config.fileConfig( os.path.join(WORKING_DIR, "logging.conf"), disable_existing_loggers=False ) -sys.tracebacklimit = 1 +sys.tracebacklimit = 0 diff --git a/src/superannotate/lib/app/interface/sdk_interface.py b/src/superannotate/lib/app/interface/sdk_interface.py index 92276e373..15b8e497e 100644 --- a/src/superannotate/lib/app/interface/sdk_interface.py +++ b/src/superannotate/lib/app/interface/sdk_interface.py @@ -74,7 +74,6 @@ def init(path_to_config_json: str): @validate_arguments def set_auth_token(token: str): controller.set_token(token) - controller.init(controller.config_path) @Trackable @@ -2185,6 +2184,8 @@ def set_image_annotation_status( ) if response.errors: raise AppException(response.errors) + image = controller.get_image_metadata(project_name, folder_name, image_name).data + return ImageSerializer(image).serialize() @Trackable diff --git a/src/superannotate/lib/app/interface/types.py b/src/superannotate/lib/app/interface/types.py index 49003ce6b..d7c24f479 100644 --- a/src/superannotate/lib/app/interface/types.py +++ b/src/superannotate/lib/app/interface/types.py @@ -34,22 +34,29 @@ def validate(cls, value: Union[str]) -> Union[str]: return value +def to_chunks(t, size=2): + it = iter(t) + return zip(*[it] * size) + + def validate_arguments(func): @wraps(func) def wrapped(*args, **kwargs): try: return pydantic_validate_arguments(func)(*args, **kwargs) except ValidationError as e: - messages = defaultdict(list) + error_messages = defaultdict(list) for error in e.errors(): - messages[error["loc"][0]].append(f"{error['loc'][-1]} {error['msg']}") - raise AppException( - "\n".join( - [ - f"Invalid {message}: {','.join(text)}" - for message, text in messages.items() - ] + error_messages[error["loc"][0]].append( + f"{''.join([f' {i[0]} -> {i[1]}' for i in to_chunks(error['loc'])])} {error['loc'][-1]} {error['msg']}" ) - ) + texts = ["\n"] + for error, text in error_messages.items(): + texts.append( + "{} {}{}".format( + error, " " * (21 - len(error)), f"\n {' ' * 21}".join(text) + ) + ) + raise AppException("\n".join(texts)) return wrapped diff --git a/src/superannotate/lib/core/serviceproviders.py b/src/superannotate/lib/core/serviceproviders.py index 9cc6e14e0..56bba4ed1 100644 --- a/src/superannotate/lib/core/serviceproviders.py +++ b/src/superannotate/lib/core/serviceproviders.py @@ -16,6 +16,10 @@ def __call__(cls, *args, **kwargs): SingleInstanceMetaClass._instances[cls] = super().__call__(*args, **kwargs) return SingleInstanceMetaClass._instances[cls] + def get_instance(cls): + if cls._instances: + return cls._instances[cls] + class SuerannotateServiceProvider(metaclass=SingleInstanceMetaClass): @abstractmethod diff --git a/src/superannotate/lib/core/types.py b/src/superannotate/lib/core/types.py index ab45e2da2..61d673221 100644 --- a/src/superannotate/lib/core/types.py +++ b/src/superannotate/lib/core/types.py @@ -22,8 +22,8 @@ class AttributeGroup(BaseModel): class ClassesJson(BaseModel): name: StrictStr - color: Optional[StrictStr] - attribute_groups: Optional[List[AttributeGroup]] + color: StrictStr + attribute_groups: List[AttributeGroup] class Metadata(BaseModel): diff --git a/src/superannotate/lib/core/usecases.py b/src/superannotate/lib/core/usecases.py index 6e3cb4ba6..867a38875 100644 --- a/src/superannotate/lib/core/usecases.py +++ b/src/superannotate/lib/core/usecases.py @@ -811,13 +811,13 @@ def execute(self): ) if "error" in response: raise AppException(response["error"]) - folder_str = ( - "" if self._folder_names is None else "/".join(self._folder_names) - ) + report_message = self._project.name + if self._folder_names: + report_message = f"[{', '.join(self._folder_names)}]" logger.info( f"Prepared export {response['name']} for project " - f"{self._project.name}/{folder_str} (project ID {self._project.uuid})." + f"{report_message} (project ID {self._project.uuid})." ) self._response.data = response @@ -1018,10 +1018,10 @@ def validate_folder(self): def execute(self): if self.is_valid(): - is_updated = self._folders.update(self._folder) - if not is_updated: + folder = self._folders.update(self._folder) + if not folder: self._response.errors = AppException("Couldn't rename folder.") - self._response.data = self._folder + self._response.data = folder return self._response @@ -2281,9 +2281,7 @@ def fill_classes_data(self, annotations: dict): for annotation in ( i for i in annotations["instances"] if i.get("type", None) == "template" ): - template_name = templates.get( - annotation.get("templateId"), None - ) + template_name = templates.get(annotation.get("templateId"), None) if template_name: annotation["templateName"] = template_name @@ -2292,9 +2290,9 @@ def fill_classes_data(self, annotations: dict): if annotation_class_id not in annotation_classes: continue annotation["className"] = annotation_classes[annotation_class_id]["name"] - for attribute in annotation["attributes"]: + for attribute in [i for i in annotation["attributes"] if "groupId" in i]: if ( - attribute["groupName"] + attribute["groupId"] not in annotation_classes[annotation_class_id]["attribute_groups"] ): continue @@ -2302,16 +2300,16 @@ def fill_classes_data(self, annotations: dict): "attribute_groups" ][attribute["groupId"]]["name"] if ( - attribute["name"] + attribute["groupId"] not in annotation_classes[annotation_class_id]["attribute_groups"][ attribute["groupId"] ]["attributes"] ): - del attribute["groupName"] + del attribute["groupId"] continue attribute["name"] = annotation_classes[annotation_class_id][ "attribute_groups" - ][attribute["groupId"]]["attributes"] + ][attribute["groupId"]]["name"] def execute(self): if self.is_valid(): @@ -3336,7 +3334,7 @@ def fill_classes_data(self, annotations: dict): continue attribute["id"] = annotation_classes[annotation_class_name][ "attribute_groups" - ][attribute["groupName"]]["attributes"] + ][attribute["groupName"]]["attributes"][attribute["name"]] def execute(self): if self.is_valid(): @@ -3424,6 +3422,8 @@ def __init__( self._annotations_to_upload = None self._missing_annotations = None self.missing_attribute_groups = set() + self.missing_classes = set() + self.missing_attributes = set() @property def s3_client(self): @@ -3474,6 +3474,7 @@ def fill_classes_data(self, annotations: dict): annotation_class_name = annotation["className"] if annotation_class_name not in annotation_classes: if annotation_class_name not in unknown_classes: + self.missing_classes.add(annotation_class_name) unknown_classes[annotation_class_name] = { "id": -(len(unknown_classes) + 1), "attribute_groups": {}, @@ -3516,6 +3517,7 @@ def fill_classes_data(self, annotations: dict): ][attribute["groupName"]]["attributes"] ): del attribute["groupId"] + self.missing_attributes.add(attribute["name"]) continue attribute["id"] = annotation_classes[annotation_class_name][ "attribute_groups" @@ -3661,10 +3663,7 @@ def execute(self): failed_annotations, missing_annotations, ) - if self.missing_attribute_groups: - logger.warning( - f"Couldn't find annotation groups [{', '.join(self.missing_attribute_groups)}]" - ) + self.report_missing_data() return self._response def upload_to_s3( @@ -3706,6 +3705,18 @@ def upload_to_s3( bucket.put_object(Key=image_info["annotation_bluemap_path"], Body=file) return image_id_name_map[image_id], True + def report_missing_data(self): + if self.missing_classes: + logger.warning(f"Couldn't find classes [{', '.join(self.missing_classes)}]") + if self.missing_attribute_groups: + logger.warning( + f"Couldn't find annotation groups [{', '.join(self.missing_attribute_groups)}]" + ) + if self.missing_attributes: + logger.warning( + f"Couldn't find attributes [{', '.join(self.missing_attributes)}]" + ) + class CreateModelUseCase(BaseUseCase): def __init__( @@ -4295,6 +4306,7 @@ def execute(self): image_ids=image_ids, ) if not res.ok: + # todo add error message in the response return self._response success_images = [] @@ -4782,24 +4794,14 @@ def images_to_upload(self): duplicated_paths.append(path) filtered_paths = [ path - for path in filtered_paths + for path in paths if not any( - [ - path.endswith(extension) - for extension in self.exclude_file_patterns - ] - ) - ] - duplicated_paths = [ - path - for path in duplicated_paths - if not any( - [ - path.endswith(extension) - for extension in self.exclude_file_patterns - ] + [extension in path for extension in self.exclude_file_patterns] ) ] + excluded_paths = [path for path in paths if path not in filtered_paths] + if excluded_paths: + logger.info(f"Excluded paths {', '.join(excluded_paths)}") image_entities = ( GetBulkImages( diff --git a/src/superannotate/lib/infrastructure/controller.py b/src/superannotate/lib/infrastructure/controller.py index 693555395..23c348de0 100644 --- a/src/superannotate/lib/infrastructure/controller.py +++ b/src/superannotate/lib/infrastructure/controller.py @@ -89,6 +89,7 @@ def init(self, config_path): else: self._backend_client.api_url = main_endpoint self._backend_client._auth_token = token + self._backend_client.get_session.cache_clear() self._team_id = int(self.configs.get_one("token").value.split("=")[-1]) self._team = None @@ -108,13 +109,21 @@ def team_name(self): _, self._team_name = self.get_team() return self._team_name + @staticmethod + def _validate_token(token: str): + try: + int(token.split("=")[-1]) + except ValueError: + raise AppException("Invalid token.") + def set_token(self, token): + self._validate_token(token) + self._team_id = int(token.split("=")[-1]) self.configs.insert(ConfigEntity("token", token)) - self._backend_client = SuperannotateBackendService( - api_url=self.configs.get_one("main_endpoint").value, - auth_token=self.configs.get_one("token").value, - logger=self._logger, - ) + self._backend_client = SuperannotateBackendService.get_instance() + self._backend_client._api_url = self.configs.get_one("main_endpoint").value + self._backend_client._auth_token = self.configs.get_one("token").value + self._backend_client.get_session.cache_clear() @property def projects(self): diff --git a/src/superannotate/lib/infrastructure/helpers.py b/src/superannotate/lib/infrastructure/helpers.py index c414816b8..e18d22b66 100644 --- a/src/superannotate/lib/infrastructure/helpers.py +++ b/src/superannotate/lib/infrastructure/helpers.py @@ -12,6 +12,7 @@ def wrapper_cache(func): @wraps(func) def wrapped_func(*args, **kwargs): + wrapped_func.cache_clear = func.cache_clear if datetime.utcnow() >= func.expiration: func.cache_clear() func.expiration = datetime.utcnow() + func.lifetime diff --git a/src/superannotate/lib/infrastructure/repositories.py b/src/superannotate/lib/infrastructure/repositories.py index 36016ba6f..e84fb2a79 100644 --- a/src/superannotate/lib/infrastructure/repositories.py +++ b/src/superannotate/lib/infrastructure/repositories.py @@ -269,7 +269,9 @@ def insert(self, entity: FolderEntity) -> FolderEntity: def update(self, entity: FolderEntity): project_id = entity.project_id team_id = entity.team_id - return self._service.update_folder(project_id, team_id, entity.to_dict()) + response = self._service.update_folder(project_id, team_id, entity.to_dict()) + if response: + return self.dict2entity(response) def delete(self, entity: FolderEntity): return self._service.delete_folders( diff --git a/src/superannotate/lib/infrastructure/services.py b/src/superannotate/lib/infrastructure/services.py index 85bf50f18..b503f6b69 100644 --- a/src/superannotate/lib/infrastructure/services.py +++ b/src/superannotate/lib/infrastructure/services.py @@ -346,7 +346,8 @@ def update_folder(self, project_id: int, team_id: int, folder_data: dict): ) params = {"project_id": project_id, "team_id": team_id} res = self._request(update_folder_url, "put", data=folder_data, params=params) - return res.ok + if res.ok: + return res.json() def get_project_settings(self, project_id: int, team_id: int): get_settings_url = urljoin( diff --git a/tests/integration/test_annotation_adding.py b/tests/integration/test_annotation_adding.py index 6d19780eb..f232b1a29 100644 --- a/tests/integration/test_annotation_adding.py +++ b/tests/integration/test_annotation_adding.py @@ -37,9 +37,9 @@ def test_upload_invalid_annotations(self): self.PROJECT_NAME, self.invalid_json_path ) - self.assertEqual(len(uploaded_annotations),3) + self.assertEqual(len(uploaded_annotations), 3) self.assertEqual(len(failed_annotations), 1) - self.assertEqual(len(missing_annotations),0) + self.assertEqual(len(missing_annotations), 0) def test_upload_annotations(self): sa.upload_images_from_folder_to_project( diff --git a/tests/integration/test_folders.py b/tests/integration/test_folders.py index 61542de78..a69232186 100644 --- a/tests/integration/test_folders.py +++ b/tests/integration/test_folders.py @@ -514,9 +514,7 @@ def test_create_folder_with_special_chars(self): def test_rename_folder_to_existing_name(self): sa.create_folder(self.PROJECT_NAME, self.TEST_FOLDER_NAME_1) - sa.create_folder(self.PROJECT_NAME, "_"*len(self.SPECIAL_CHARS)) - sa.rename_folder(f"{self.PROJECT_NAME}/{self.TEST_FOLDER_NAME_1}", self.SPECIAL_CHARS) - folder = sa.get_folder_metadata(self.PROJECT_NAME, "_" * len(self.SPECIAL_CHARS) + " (1)") + sa.create_folder(self.PROJECT_NAME, self.TEST_FOLDER_NAME_2) + sa.rename_folder(f"{self.PROJECT_NAME}/{self.TEST_FOLDER_NAME_1}", self.TEST_FOLDER_NAME_2) + folder = sa.get_folder_metadata(self.PROJECT_NAME, self.TEST_FOLDER_NAME_2 + " (1)") self.assertIsNotNone(folder) - - diff --git a/tests/integration/test_interface.py b/tests/integration/test_interface.py index 00928e032..d7079ab45 100644 --- a/tests/integration/test_interface.py +++ b/tests/integration/test_interface.py @@ -1,6 +1,7 @@ import os from os.path import dirname import tempfile +import unittest import src.superannotate as sa from src.superannotate.lib.app.exceptions import AppException From f79923f355832a91b30376aca788628dd0952017 Mon Sep 17 00:00:00 2001 From: shab Date: Thu, 23 Sep 2021 17:19:11 +0400 Subject: [PATCH 2/6] Fix prepare export argument --- .../lib/app/interface/sdk_interface.py | 69 +++++++++++-------- src/superannotate/lib/app/interface/types.py | 10 +++ src/superannotate/lib/core/enums.py | 4 ++ 3 files changed, 55 insertions(+), 28 deletions(-) diff --git a/src/superannotate/lib/app/interface/sdk_interface.py b/src/superannotate/lib/app/interface/sdk_interface.py index dff57607f..f10280bbd 100644 --- a/src/superannotate/lib/app/interface/sdk_interface.py +++ b/src/superannotate/lib/app/interface/sdk_interface.py @@ -29,6 +29,7 @@ from lib.app.helpers import get_annotation_paths from lib.app.helpers import get_paths_and_duplicated_from_csv from lib.app.helpers import reformat_metrics_json +from lib.app.interface.types import AnnotationStatuses from lib.app.interface.types import AnnotationType from lib.app.interface.types import NotEmptyStr from lib.app.interface.types import Status @@ -1646,7 +1647,7 @@ def upload_images_from_s3_bucket_to_project( def prepare_export( project: Union[NotEmptyStr, dict], folder_names: Optional[List[NotEmptyStr]] = None, - annotation_statuses: Optional[List[NotEmptyStr]] = None, + annotation_statuses: Optional[List[AnnotationStatuses]] = None, include_fuse: Optional[StrictBool] = False, only_pinned=False, ): @@ -2152,38 +2153,50 @@ def download_export( """ project_name, folder_name = extract_project_folder(project) export_name = export["name"] if isinstance(export, dict) else export - response = controller.download_export( - project_name=project_name, - export_name=export_name, - folder_path=folder_path, - extract_zip_contents=extract_zip_contents, - to_s3_bucket=to_s3_bucket, - ) - downloaded_folder_path = response.data if to_s3_bucket: - to_s3_bucket = boto3.Session().resource("s3").Bucket(to_s3_bucket) - - files_to_upload = [] - for file in Path(downloaded_folder_path).rglob("*.*"): - files_to_upload.append(file) - - def _upload_file_to_s3(to_s3_bucket, path, s3_key) -> None: - controller.upload_file_to_s3( - to_s3_bucket=to_s3_bucket, path=path, s3_key=s3_key + with tempfile.TemporaryDirectory() as tmp: + response = controller.download_export( + project_name=project_name, + export_name=export_name, + folder_path=tmp, + extract_zip_contents=extract_zip_contents, + to_s3_bucket=to_s3_bucket, ) + downloaded_folder_path = response.data + if to_s3_bucket: + to_s3_bucket = boto3.Session().resource("s3").Bucket(to_s3_bucket) + files_to_upload = [] + for file in Path(downloaded_folder_path).rglob("*.*"): + files_to_upload.append(file) + + def _upload_file_to_s3(to_s3_bucket, path, s3_key) -> None: + controller.upload_file_to_s3( + to_s3_bucket=to_s3_bucket, path=path, s3_key=s3_key + ) - with concurrent.futures.ThreadPoolExecutor(max_workers=10) as executor: - results = [] - for path in files_to_upload: - s3_key = f"{path.as_posix()}" - results.append( - executor.submit(_upload_file_to_s3, to_s3_bucket, str(path), s3_key) + with concurrent.futures.ThreadPoolExecutor(max_workers=10) as executor: + results = [] + for path in files_to_upload: + s3_key = f"{folder_path}/{path.name}" + results.append( + executor.submit( + _upload_file_to_s3, to_s3_bucket, str(path), s3_key + ) + ) + for future in concurrent.futures.as_completed(results): + future.result() + logger.info( + "Exported to AWS %s/%s", to_s3_bucket.name, str(folder_path) ) - - for future in concurrent.futures.as_completed(results): - future.result() - logger.info("Exported to AWS %s/%s", to_s3_bucket, str(path)) + else: + controller.download_export( + project_name=project_name, + export_name=export_name, + folder_path=folder_path, + extract_zip_contents=extract_zip_contents, + to_s3_bucket=to_s3_bucket, + ) @Trackable diff --git a/src/superannotate/lib/app/interface/types.py b/src/superannotate/lib/app/interface/types.py index d7c24f479..8f44ad615 100644 --- a/src/superannotate/lib/app/interface/types.py +++ b/src/superannotate/lib/app/interface/types.py @@ -34,6 +34,16 @@ def validate(cls, value: Union[str]) -> Union[str]: return value +class AnnotationStatuses(StrictStr): + @classmethod + def validate(cls, value: Union[str]) -> Union[str]: + if value.lower() not in AnnotationStatus.values(): + raise TypeError( + f"Available annotation_statuses are {', '.join(AnnotationStatus.titles())}. " + ) + return value + + def to_chunks(t, size=2): it = iter(t) return zip(*[it] * size) diff --git a/src/superannotate/lib/core/enums.py b/src/superannotate/lib/core/enums.py index 94454bfa5..bcc714328 100644 --- a/src/superannotate/lib/core/enums.py +++ b/src/superannotate/lib/core/enums.py @@ -27,6 +27,10 @@ def get_value(cls, name): def values(cls): return [enum.name.lower() for enum in list(cls)] + @classmethod + def titles(cls): + return [enum.name for enum in list(cls)] + class ProjectType(BaseTitledEnum): VECTOR = "Vector", 1 From 65d5dae04f52aa4f0288475d402f13f006880539 Mon Sep 17 00:00:00 2001 From: shab Date: Thu, 23 Sep 2021 19:21:10 +0400 Subject: [PATCH 3/6] Fix download export --- .../lib/app/interface/sdk_interface.py | 51 ++------- src/superannotate/lib/core/usecases.py | 108 +++++++++++------- 2 files changed, 74 insertions(+), 85 deletions(-) diff --git a/src/superannotate/lib/app/interface/sdk_interface.py b/src/superannotate/lib/app/interface/sdk_interface.py index 0f50dd401..149a074db 100644 --- a/src/superannotate/lib/app/interface/sdk_interface.py +++ b/src/superannotate/lib/app/interface/sdk_interface.py @@ -2153,50 +2153,13 @@ def download_export( """ project_name, folder_name = extract_project_folder(project) export_name = export["name"] if isinstance(export, dict) else export - - if to_s3_bucket: - with tempfile.TemporaryDirectory() as tmp: - response = controller.download_export( - project_name=project_name, - export_name=export_name, - folder_path=tmp, - extract_zip_contents=extract_zip_contents, - to_s3_bucket=to_s3_bucket, - ) - downloaded_folder_path = response.data - if to_s3_bucket: - to_s3_bucket = boto3.Session().resource("s3").Bucket(to_s3_bucket) - files_to_upload = [] - for file in Path(downloaded_folder_path).rglob("*.*"): - files_to_upload.append(file) - - def _upload_file_to_s3(to_s3_bucket, path, s3_key) -> None: - controller.upload_file_to_s3( - to_s3_bucket=to_s3_bucket, path=path, s3_key=s3_key - ) - - with concurrent.futures.ThreadPoolExecutor(max_workers=10) as executor: - results = [] - for path in files_to_upload: - s3_key = f"{folder_path}/{path.name}" - results.append( - executor.submit( - _upload_file_to_s3, to_s3_bucket, str(path), s3_key - ) - ) - for future in concurrent.futures.as_completed(results): - future.result() - logger.info( - "Exported to AWS %s/%s", to_s3_bucket.name, str(folder_path) - ) - else: - controller.download_export( - project_name=project_name, - export_name=export_name, - folder_path=folder_path, - extract_zip_contents=extract_zip_contents, - to_s3_bucket=to_s3_bucket, - ) + controller.download_export( + project_name=project_name, + export_name=export_name, + folder_path=folder_path, + extract_zip_contents=extract_zip_contents, + to_s3_bucket=to_s3_bucket, + ) @Trackable diff --git a/src/superannotate/lib/core/usecases.py b/src/superannotate/lib/core/usecases.py index 16a942883..888f0fb72 100644 --- a/src/superannotate/lib/core/usecases.py +++ b/src/superannotate/lib/core/usecases.py @@ -21,6 +21,7 @@ import numpy as np import pandas as pd import requests +import tempfile from boto3.exceptions import Boto3Error from lib.app.analytics.common import aggregate_annotations_as_df from lib.app.analytics.common import consensus_plot @@ -3966,51 +3967,76 @@ def validate_project_type(self): constances.LIMITED_FUNCTIONS[self._project.project_type] ) - def execute(self): - if self.is_valid(): - exports = self._service.get_exports( - team_id=self._project.team_id, project_id=self._project.uuid - ) - export_id = None - for export in exports: - if export["name"] == self._export_name: - export_id = export["id"] - break - if not export_id: - raise AppException("Export not found.") + def upload_to_s3_from_folder(self, folder_path: str): + to_s3_bucket = boto3.Session().resource("s3").Bucket(self._to_s3_bucket) + files_to_upload = list(Path(folder_path).rglob("*.*")) - while True: - export = self._service.get_export( - team_id=self._project.team_id, - project_id=self._project.uuid, - export_id=export_id, + def _upload_file_to_s3(_to_s3_bucket, _path, _s3_key) -> None: + _to_s3_bucket.upload_file(_path, _s3_key) + + with concurrent.futures.ThreadPoolExecutor(max_workers=10) as executor: + results = [] + for path in files_to_upload: + s3_key = f"{self._folder_path}/{path.name}" + results.append( + executor.submit( + _upload_file_to_s3, to_s3_bucket, str(path), s3_key + ) ) - if export["status"] == ExportStatus.IN_PROGRESS.value: - logger.info("Waiting 5 seconds for export to finish on server.") - time.sleep(5) - continue - if export["status"] == ExportStatus.ERROR.value: - raise AppException("Couldn't download export.") - pass - break - - filename = Path(export["path"]).name - filepath = Path(self._folder_path) / filename - with requests.get(export["download"], stream=True) as r: - r.raise_for_status() - with open(filepath, "wb") as f: - for chunk in r.iter_content(chunk_size=8192): - f.write(chunk) - if self._extract_zip_contents: - with zipfile.ZipFile(filepath, "r") as f: - f.extractall(self._folder_path) - Path.unlink(filepath) - logger.info(f"Extracted {filepath} to folder {self._folder_path}") - else: - logger.info(f"Downloaded export ID {export['id']} to {filepath}") + def download_to_local_storage(self, destination: str): + exports = self._service.get_exports( + team_id=self._project.team_id, project_id=self._project.uuid + ) + export = next(filter(lambda i: i["name"] == self._export_name, exports), None) + export = self._service.get_export( + team_id=self._project.team_id, + project_id=self._project.uuid, + export_id=export["id"], + ) + if not export: + raise AppException("Export not found.") + export_status = export["status"] + + while export_status != ExportStatus.COMPLETE.value: + logger.info("Waiting 5 seconds for export to finish on server.") + time.sleep(5) + + export = self._service.get_export( + team_id=self._project.team_id, + project_id=self._project.uuid, + export_id=export["id"], + ) + export_status = export["status"] + if export_status in (ExportStatus.ERROR.value, ExportStatus.CANCELED.value): + raise AppException("Couldn't download export.") + + filename = Path(export["path"]).name + filepath = Path(destination) / filename + with requests.get(export["download"], stream=True) as response: + response.raise_for_status() + with open(filepath, "wb") as f: + for chunk in response.iter_content(chunk_size=8192): + f.write(chunk) + if self._extract_zip_contents: + with zipfile.ZipFile(filepath, "r") as f: + f.extractall(destination) + Path.unlink(filepath) + return export["id"], filepath, destination - self._response.data = self._folder_path + def execute(self): + if self.is_valid(): + if self._to_s3_bucket: + with tempfile.TemporaryDirectory() as tmp: + self.download_to_local_storage(tmp) + self.upload_to_s3_from_folder(tmp) + logger.info(f"Exported to AWS {self._to_s3_bucket}/{self._folder_path}") + else: + export_id, filepath, destination = self.download_to_local_storage(self._folder_path) + if self._extract_zip_contents: + logger.info(f"Extracted {filepath} to folder {destination}") + else: + logger.info(f"Downloaded export ID {export_id} to {filepath}") return self._response From a687ce2a3b974338b86c21154b177ad092d5d7f5 Mon Sep 17 00:00:00 2001 From: shab Date: Fri, 24 Sep 2021 11:41:01 +0400 Subject: [PATCH 4/6] Add download export progress --- .../lib/app/interface/sdk_interface.py | 12 ++++++- src/superannotate/lib/core/usecases.py | 32 +++++++++++++------ .../lib/infrastructure/controller.py | 3 +- 3 files changed, 34 insertions(+), 13 deletions(-) diff --git a/src/superannotate/lib/app/interface/sdk_interface.py b/src/superannotate/lib/app/interface/sdk_interface.py index 149a074db..01300c1b7 100644 --- a/src/superannotate/lib/app/interface/sdk_interface.py +++ b/src/superannotate/lib/app/interface/sdk_interface.py @@ -2153,13 +2153,23 @@ def download_export( """ project_name, folder_name = extract_project_folder(project) export_name = export["name"] if isinstance(export, dict) else export - controller.download_export( + + use_case = controller.download_export( project_name=project_name, export_name=export_name, folder_path=folder_path, extract_zip_contents=extract_zip_contents, to_s3_bucket=to_s3_bucket, ) + if use_case.is_valid(): + if to_s3_bucket: + with tqdm( + total=use_case.get_upload_files_count(), desc="Uploading" + ) as progress_bar: + for _ in use_case.execute(): + progress_bar.update(1) + else: + use_case.execute() @Trackable diff --git a/src/superannotate/lib/core/usecases.py b/src/superannotate/lib/core/usecases.py index 888f0fb72..372647d66 100644 --- a/src/superannotate/lib/core/usecases.py +++ b/src/superannotate/lib/core/usecases.py @@ -5,6 +5,7 @@ import logging import os.path import random +import tempfile import time import uuid import zipfile @@ -21,7 +22,6 @@ import numpy as np import pandas as pd import requests -import tempfile from boto3.exceptions import Boto3Error from lib.app.analytics.common import aggregate_annotations_as_df from lib.app.analytics.common import consensus_plot @@ -382,7 +382,9 @@ def execute(self): if self._include_workflow: new_workflows = self._workflows_repo(self._backend_service, project) for workflow in self.workflows.get_all(): - existing_workflow_ids = list(map(lambda i: i.uuid, new_workflows.get_all())) + existing_workflow_ids = list( + map(lambda i: i.uuid, new_workflows.get_all()) + ) workflow_data = copy.copy(workflow) workflow_data.project_id = project.uuid workflow_data.class_id = annotation_classes_mapping[ @@ -391,7 +393,9 @@ def execute(self): new_workflows.insert(workflow_data) workflows = new_workflows.get_all() new_workflow = [ - work_flow for work_flow in workflows if work_flow.uuid not in existing_workflow_ids + work_flow + for work_flow in workflows + if work_flow.uuid not in existing_workflow_ids ][0] workflow_attributes = [] for attribute in workflow_data.attribute: @@ -3960,6 +3964,7 @@ def __init__( self._folder_path = folder_path self._extract_zip_contents = extract_zip_contents self._to_s3_bucket = to_s3_bucket + self._temp_dir = None def validate_project_type(self): if self._project.project_type in constances.LIMITED_FUNCTIONS: @@ -3979,10 +3984,9 @@ def _upload_file_to_s3(_to_s3_bucket, _path, _s3_key) -> None: for path in files_to_upload: s3_key = f"{self._folder_path}/{path.name}" results.append( - executor.submit( - _upload_file_to_s3, to_s3_bucket, str(path), s3_key - ) + executor.submit(_upload_file_to_s3, to_s3_bucket, str(path), s3_key) ) + yield def download_to_local_storage(self, destination: str): exports = self._service.get_exports( @@ -4024,15 +4028,23 @@ def download_to_local_storage(self, destination: str): Path.unlink(filepath) return export["id"], filepath, destination + def get_upload_files_count(self): + if not self._temp_dir: + self._temp_dir = tempfile.TemporaryDirectory() + self.download_to_local_storage(self._temp_dir.name) + return len(list(Path(self._temp_dir.name).rglob("*.*"))) + def execute(self): if self.is_valid(): if self._to_s3_bucket: - with tempfile.TemporaryDirectory() as tmp: - self.download_to_local_storage(tmp) - self.upload_to_s3_from_folder(tmp) + self.get_upload_files_count() + yield from self.upload_to_s3_from_folder(self._temp_dir.name) logger.info(f"Exported to AWS {self._to_s3_bucket}/{self._folder_path}") + self._temp_dir.cleanup() else: - export_id, filepath, destination = self.download_to_local_storage(self._folder_path) + export_id, filepath, destination = self.download_to_local_storage( + self._folder_path + ) if self._extract_zip_contents: logger.info(f"Extracted {filepath} to folder {destination}") else: diff --git a/src/superannotate/lib/infrastructure/controller.py b/src/superannotate/lib/infrastructure/controller.py index 23c348de0..2c82047c9 100644 --- a/src/superannotate/lib/infrastructure/controller.py +++ b/src/superannotate/lib/infrastructure/controller.py @@ -1382,7 +1382,7 @@ def download_export( to_s3_bucket: bool, ): project = self._get_project(project_name) - use_case = usecases.DownloadExportUseCase( + return usecases.DownloadExportUseCase( service=self._backend_client, project=project, export_name=export_name, @@ -1390,7 +1390,6 @@ def download_export( extract_zip_contents=extract_zip_contents, to_s3_bucket=to_s3_bucket, ) - return use_case.execute() def download_ml_model(self, model_data: dict, download_path: str): model = MLModelEntity( From 3b97a4c71bbc498b9e121c3009e2e77a8ff9534f Mon Sep 17 00:00:00 2001 From: Vaghinak Basentsyan Date: Fri, 24 Sep 2021 12:00:15 +0400 Subject: [PATCH 5/6] Fixed tests --- tests/integration/test_depricated_functions_video.py | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/tests/integration/test_depricated_functions_video.py b/tests/integration/test_depricated_functions_video.py index 79c53f630..6377499fa 100644 --- a/tests/integration/test_depricated_functions_video.py +++ b/tests/integration/test_depricated_functions_video.py @@ -22,24 +22,18 @@ class TestDeprecatedFunctionsVideo(BaseTestCase): EXCEPTION_MESSAGE = LIMITED_FUNCTIONS[ProjectType.VIDEO.value] EXCEPTION_MESSAGE_DOCUMENT_VIDEO = DEPRICATED_DOCUMENT_VIDEO_MESSAGE - def setUp(self, *args, **kwargs): self.tearDown() - sa.create_project( self.PROJECT_NAME, self.PROJECT_DESCRIPTION, self.PROJECT_TYPE ) - sa.create_project( self.PROJECT_NAME_2, self.PROJECT_DESCRIPTION_2, self.PROJECT_TYPE_2 ) def tearDown(self) -> None: projects = sa.search_projects(self.PROJECT_NAME, return_metadata=True) - for project in projects: - sa.delete_project(project) - - projects = sa.search_projects(self.PROJECT_NAME_2, return_metadata=True) + projects.extend(sa.search_projects(self.PROJECT_NAME_2, return_metadata=True)) for project in projects: sa.delete_project(project) From 5575d0e60654f4183c874141d78e6c40b5347fca Mon Sep 17 00:00:00 2001 From: shab Date: Fri, 24 Sep 2021 12:11:28 +0400 Subject: [PATCH 6/6] Fix use case --- src/superannotate/lib/app/interface/sdk_interface.py | 3 ++- src/superannotate/lib/core/usecases.py | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/src/superannotate/lib/app/interface/sdk_interface.py b/src/superannotate/lib/app/interface/sdk_interface.py index 69c239238..21be13444 100644 --- a/src/superannotate/lib/app/interface/sdk_interface.py +++ b/src/superannotate/lib/app/interface/sdk_interface.py @@ -2169,7 +2169,8 @@ def download_export( for _ in use_case.execute(): progress_bar.update(1) else: - use_case.execute() + for _ in use_case.execute(): + continue @Trackable diff --git a/src/superannotate/lib/core/usecases.py b/src/superannotate/lib/core/usecases.py index 22aced638..987d9d11c 100644 --- a/src/superannotate/lib/core/usecases.py +++ b/src/superannotate/lib/core/usecases.py @@ -4049,6 +4049,7 @@ def execute(self): logger.info(f"Extracted {filepath} to folder {destination}") else: logger.info(f"Downloaded export ID {export_id} to {filepath}") + yield return self._response