From 3696b47b6eca0fdca4c0366e08b264410007cf20 Mon Sep 17 00:00:00 2001 From: Erik Date: Wed, 17 Mar 2021 16:54:11 +0400 Subject: [PATCH 01/22] public links implemented --- docs/source/superannotate.sdk.rst | 1 + superannotate/__init__.py | 4 +- superannotate/__main__.py | 27 ++++ superannotate/db/projects.py | 220 ++++++++++++++++++++++++++++++ tests/attach_urls.csv | 10 ++ tests/test_attach_image_urls.py | 32 +++++ 6 files changed, 292 insertions(+), 2 deletions(-) create mode 100644 tests/attach_urls.csv create mode 100644 tests/test_attach_image_urls.py diff --git a/docs/source/superannotate.sdk.rst b/docs/source/superannotate.sdk.rst index 3a71627bf..3459a48d9 100644 --- a/docs/source/superannotate.sdk.rst +++ b/docs/source/superannotate.sdk.rst @@ -37,6 +37,7 @@ ________ .. autofunction:: superannotate.delete_folders .. autofunction:: superannotate.rename_folder .. autofunction:: superannotate.upload_images_to_project +.. autofunction:: superannotate.attach_image_urls_to_project .. autofunction:: superannotate.upload_images_from_public_urls_to_project .. autofunction:: superannotate.upload_images_from_google_cloud_to_project .. autofunction:: superannotate.upload_images_from_azure_blob_to_project diff --git a/superannotate/__init__.py b/superannotate/__init__.py index 7be9ad8b2..d5483db84 100644 --- a/superannotate/__init__.py +++ b/superannotate/__init__.py @@ -72,8 +72,8 @@ def consensus(*args, **kwargs): upload_images_from_google_cloud_to_project, upload_images_from_public_urls_to_project, upload_images_from_s3_bucket_to_project, upload_images_to_project, - upload_preannotations_from_folder_to_project, upload_video_to_project, - upload_videos_from_folder_to_project + attach_image_urls_to_project, upload_preannotations_from_folder_to_project, + upload_video_to_project, upload_videos_from_folder_to_project ) from .db.search_projects import search_projects from .db.teams import ( diff --git a/superannotate/__main__.py b/superannotate/__main__.py index 3d3832d40..9d46d76a7 100644 --- a/superannotate/__main__.py +++ b/superannotate/__main__.py @@ -62,6 +62,8 @@ def main(): create_folder(command, further_args) elif command == "upload-images": image_upload(command, further_args) + elif command == "attach-image-urls": + attach_image_urls(command, further_args) elif command == "upload-videos": video_upload(command, further_args) elif command in ["upload-preannotations", "upload-annotations"]: @@ -288,6 +290,31 @@ def image_upload(command_name, args): ) +def attach_image_urls(command_name, args): + parser = argparse.ArgumentParser(prog=_CLI_COMMAND + " " + command_name) + parser.add_argument( + '--project', required=True, help='Project name to upload' + ) + parser.add_argument( + '--attachments', + required=True, + help='path to csv file on attachments metadata' + ) + parser.add_argument( + '--annotation_status', + required=False, + default="NotStarted", + help= + 'Set images\' annotation statuses after upload. Default is NotStarted' + ) + args = parser.parse_args(args) + sa.attach_image_urls_to_project( + project=args.project, + attachments=args.attachments, + annotation_status=args.annotation_status + ) + + def export_project(command_name, args): parser = argparse.ArgumentParser(prog=_CLI_COMMAND + " " + command_name) parser.add_argument( diff --git a/superannotate/db/projects.py b/superannotate/db/projects.py index a26708486..fa0aa5b34 100644 --- a/superannotate/db/projects.py +++ b/superannotate/db/projects.py @@ -9,6 +9,7 @@ import threading import time import uuid +import pandas as pd from os.path import basename from pathlib import Path from urllib.parse import urlparse @@ -895,6 +896,225 @@ def _tqdm_download( break +def attach_image_urls_to_project( + project, attachments, annotation_status="NotStarted" +): + """Link images on external storage to SuperAnnotate. + + :param project: project name or project folder path + :type project: str or dict + :param attachments: path to csv file on attachments metadata + :type attachments: Pathlike (str or Path) + :param annotation_status: value to set the annotation statuses of the linked images: NotStarted InProgress QualityCheck Returned Completed Skipped + :type annotation_status: str + + :return: list of linked image urls, list of unreachable image urls + :rtype: tuple + """ + + project, project_folder = get_project_and_folder_metadata(project) + project_folder_name = project["name"] + ( + f'/{project_folder["name"]}' if project_folder else "" + ) + upload_state = project.get("upload_state") + if upload_state == "basic": + raise SABaseException( + 0, + "You cannot attach URLs in this type of project. Please attach it in an external storage project" + ) + upload_state = "external" + annotation_status = common.annotation_status_str_to_int(annotation_status) + team_id, project_id = project["team_id"], project["id"] + image_data = pd.read_csv(attachments) + existing_names = image_data[~image_data["name"].isnull()] + existing_images = search_images((project, project_folder)) + duplicate_idx = [] + for ind, _ in image_data[image_data["name"].isnull()].iterrows(): + while True: + name_try = str(uuid.uuid4()) + if name_try not in existing_images: + image_data.at[ind, "name"] = name_try + existing_images.append(name_try) + break + for ind, row in existing_names.iterrows(): + if row["name"] in existing_images: + duplicate_idx.append(ind) + duplicate_images = image_data.loc[duplicate_idx]["name"].tolist() + image_data.drop(labels=duplicate_idx, inplace=True) + if len(duplicate_images) != 0: + logger.warning( + "%s already existing images found that won't be uploaded.", + len(duplicate_images) + ) + image_data = pd.DataFrame(image_data, columns=["name", "url"]) + img_names_urls = image_data.values.tolist() + len_img_names_urls = len(img_names_urls) + logger.info( + "Uploading %s images to project %s.", len_img_names_urls, + project_folder_name + ) + if len_img_names_urls == 0: + return ([], [], duplicate_images) + params = {'team_id': team_id} + uploaded = [[] for _ in range(_NUM_THREADS)] + tried_upload = [[] for _ in range(_NUM_THREADS)] + couldnt_upload = [[] for _ in range(_NUM_THREADS)] + finish_event = threading.Event() + chunksize = int(math.ceil(len_img_names_urls / _NUM_THREADS)) + response = _api.send_request( + req_type='GET', + path=f'/project/{project_id}/sdkImageUploadToken', + params=params + ) + if not response.ok: + raise SABaseException( + response.status_code, "Couldn't get upload token " + response.text + ) + if project_folder is not None: + project_folder_id = project_folder["id"] + else: + project_folder_id = None + res = response.json() + prefix = res['filePath'] + tqdm_thread = threading.Thread( + target=__tqdm_thread_image_upload, + args=(len_img_names_urls, tried_upload, finish_event), + daemon=True + ) + tqdm_thread.start() + + threads = [] + for thread_id in range(_NUM_THREADS): + t = threading.Thread( + target=__attach_image_urls_to_project_thread, + args=( + res, img_names_urls, project, annotation_status, prefix, + thread_id, chunksize, couldnt_upload, uploaded, tried_upload, + project_folder_id + ), + daemon=True + ) + threads.append(t) + t.start() + for t in threads: + t.join() + finish_event.set() + tqdm_thread.join() + list_of_not_uploaded = [] + for couldnt_upload_thread in couldnt_upload: + for f in couldnt_upload_thread: + list_of_not_uploaded.append(str(f)) + list_of_uploaded = [] + for upload_thread in uploaded: + for f in upload_thread: + list_of_uploaded.append(str(f)) + + return (list_of_uploaded, list_of_not_uploaded, duplicate_images) + + +def __attach_image_urls_to_project_thread( + res, img_names_urls, project, annotation_status, prefix, thread_id, + chunksize, couldnt_upload, uploaded, tried_upload, project_folder_id +): + len_img_paths = len(img_names_urls) + start_index = thread_id * chunksize + end_index = start_index + chunksize + if start_index >= len_img_paths: + return + s3_session = boto3.Session( + aws_access_key_id=res['accessKeyId'], + aws_secret_access_key=res['secretAccessKey'], + aws_session_token=res['sessionToken'] + ) + s3_resource = s3_session.resource('s3') + bucket = s3_resource.Bucket(res["bucket"]) + prefix = res['filePath'] + uploaded_imgs = [] + uploaded_imgs_info = ([], []) + for i in range(start_index, end_index): + if i >= len_img_paths: + break + name, url = img_names_urls[i] + tried_upload[thread_id].append(name) + img_name_hash = str(uuid.uuid4()) + Path(name).suffix + key = prefix + img_name_hash + try: + bucket.put_object( + Body=json.dumps(create_empty_annotation((None, None), name)), + Key=key + ".json" + ) + except Exception as e: + logger.warning("Unable to upload image %s. %s", name, e) + couldnt_upload[thread_id].append(name) + continue + else: + uploaded_imgs.append(name) + uploaded_imgs_info[0].append(img_names_urls[i]) + uploaded_imgs_info[1].append(key) + if len(uploaded_imgs) >= 100: + try: + __create_image_url( + uploaded_imgs_info[0], uploaded_imgs_info[1], project, + annotation_status, project_folder_id + ) + except SABaseException as e: + couldnt_upload[thread_id] += uploaded_imgs + logger.warning(e) + else: + uploaded[thread_id] += uploaded_imgs + uploaded_imgs = [] + uploaded_imgs_info = ([], []) + try: + __create_image_url( + uploaded_imgs_info[0], uploaded_imgs_info[1], project, + annotation_status, project_folder_id + ) + except SABaseException as e: + couldnt_upload[thread_id] += uploaded_imgs + logger.warning(e) + else: + uploaded[thread_id] += uploaded_imgs + + +def __create_image_url( + img_names_urls, remote_paths, project, annotation_status, project_folder_id +): + if len(remote_paths) == 0: + return + team_id, project_id = project["team_id"], project["id"] + data = { + "project_id": str(project_id), + "team_id": str(team_id), + "images": [], + "annotation_status": annotation_status, + "meta": {}, + "upload_state": 2 + } + if project_folder_id is not None: + data["folder_id"] = project_folder_id + for img_name_url, remote_path in zip(img_names_urls, remote_paths): + data["images"].append( + { + "name": img_name_url[0], + "path": img_name_url[1] + } + ) + data["meta"][img_name_url[0]] = { + "width": None, + "height": None, + "annotation_json_path": remote_path + ".json", + "annotation_bluemap_path": remote_path + ".png" + } + + response = _api.send_request( + req_type='POST', path='/image/ext-create', json_req=data + ) + if not response.ok: + raise SABaseException( + response.status_code, "Couldn't ext-create image " + response.text + ) + + def upload_images_from_public_urls_to_project( project, img_urls, diff --git a/tests/attach_urls.csv b/tests/attach_urls.csv new file mode 100644 index 000000000..0a7577d28 --- /dev/null +++ b/tests/attach_urls.csv @@ -0,0 +1,10 @@ +url,name +https://drive.google.com/uc?export=download&id=1vwfCpTzcjxoEA4hhDxqapPOVvLVeS7ZS,6022a74d5384c50017c366b3 +https://drive.google.com/uc?export=download&id=1geS2YtQiTYuiduEirKVYxBujHJaIWA3V,6022a74b26aec4002575b9e8 +https://drive.google.com/uc?export=download&id=1SfGcn9hdkVM35ZP0S93eStsE7Ti4GtHU,6022a74b5384c50017c366ad +https://drive.google.com/uc?export=download&id=1SfGcn9hdkVM35ZP0S93eStsE7Ti4GtHU, +https://drive.google.com/uc?export=download&id=1SfGcn9hdkVM35ZP0S93eStsE7Ti4GtHU,6022a74b5384c50017c366cv +https://drive.google.com/uc?export=download&id=1geS2YtQiTYuiduEirKVYxBujHJaIWA3V, +https://drive.google.com/uc?export=download&id=1geS2YtQiTYuiduEirKVYxBujHJaIWA3V,6022a74b26aec4002575b9rm +https://drive.google.com/uc?export=download&id=1geS2YtQiTYuiduEirKVYxBujHJaIWA3V,6022a74b26aec4002575b9e8 + diff --git a/tests/test_attach_image_urls.py b/tests/test_attach_image_urls.py new file mode 100644 index 000000000..d8cc122f9 --- /dev/null +++ b/tests/test_attach_image_urls.py @@ -0,0 +1,32 @@ +from pathlib import Path + +import pytest + +import superannotate as sa + +PROJECT_NAME_VECTOR = "test attach image urls" +PATH_TO_URLS = Path("./tests/attach_urls.csv") + + +def test_attach_image_urls(): + projects = sa.search_projects(PROJECT_NAME_VECTOR, return_metadata=True) + for project in projects: + sa.delete_project(project) + + project = sa.create_project(PROJECT_NAME_VECTOR, "test", "Vector") + + uploaded, could_not_upload, existing_images = sa.attach_image_urls_to_project( + project, PATH_TO_URLS + ) + + assert len(uploaded) == 8 + assert len(could_not_upload) == 0 + assert len(existing_images) == 0 + + uploaded, could_not_upload, existing_images = sa.attach_image_urls_to_project( + project, PATH_TO_URLS + ) + + assert len(uploaded) == 2 + assert len(could_not_upload) == 0 + assert len(existing_images) == 6 From 2c6a94610d113069ebe29e7fc173d7bd1437415d Mon Sep 17 00:00:00 2001 From: Erik Date: Mon, 22 Mar 2021 11:40:12 +0400 Subject: [PATCH 02/22] refactored to avoid code duplication in attach image urls --- superannotate/common.py | 7 ++ superannotate/db/projects.py | 112 +++++++++++++++----------------- tests/attach_urls.csv | 1 - tests/test_attach_image_urls.py | 4 +- 4 files changed, 63 insertions(+), 61 deletions(-) diff --git a/superannotate/common.py b/superannotate/common.py index 6f3159f39..77961321b 100644 --- a/superannotate/common.py +++ b/superannotate/common.py @@ -29,6 +29,9 @@ "Completed": 5, "Skipped": 6 } + +_UPLOAD_STATES = {"Initial": 1, "Basic": 2, "External": 3} + _USER_ROLES = {"Admin": 2, "Annotator": 3, "QA": 4, "Customer": 5, "Viewer": 6} _AVAILABLE_SEGMENTATION_MODELS = ['autonomous', 'generic'] _MODEL_TRAINING_STATUSES = { @@ -118,6 +121,10 @@ def annotation_status_str_to_int(annotation_status): return _ANNOTATION_STATUSES[annotation_status] +def upload_state_str_to_int(upload_state): + return _UPLOAD_STATES[upload_state] + + def annotation_status_int_to_str(annotation_status): """Converts metadata annotation_status int value to a string diff --git a/superannotate/db/projects.py b/superannotate/db/projects.py index fa0aa5b34..fee6abad9 100644 --- a/superannotate/db/projects.py +++ b/superannotate/db/projects.py @@ -694,9 +694,14 @@ def __upload_images_to_aws_thread( if len(uploaded_imgs) >= 100: try: __create_image( - uploaded_imgs_info[0], uploaded_imgs_info[1], project, - annotation_status, prefix, uploaded_imgs_info[2], - project_folder_id + uploaded_imgs_info[0], + uploaded_imgs_info[1], + project, + annotation_status, + prefix, + uploaded_imgs_info[2], + project_folder_id, + upload_state="Basic" ) except SABaseException as e: couldnt_upload[thread_id] += uploaded_imgs @@ -707,8 +712,14 @@ def __upload_images_to_aws_thread( uploaded_imgs_info = ([], [], []) try: __create_image( - uploaded_imgs_info[0], uploaded_imgs_info[1], project, - annotation_status, prefix, uploaded_imgs_info[2], project_folder_id + uploaded_imgs_info[0], + uploaded_imgs_info[1], + project, + annotation_status, + prefix, + uploaded_imgs_info[2], + project_folder_id, + upload_state="Basic" ) except SABaseException as e: couldnt_upload[thread_id] += uploaded_imgs @@ -718,12 +729,19 @@ def __upload_images_to_aws_thread( def __create_image( - img_names, img_paths, project, annotation_status, remote_dir, sizes, - project_folder_id + img_names, + img_paths, + project, + annotation_status, + remote_dir, + sizes, + project_folder_id, + upload_state="Initial" ): if len(img_paths) == 0: return team_id, project_id = project["team_id"], project["id"] + upload_state_code = common.upload_state_str_to_int(upload_state) data = { "project_id": str(project_id), "team_id": str(team_id), @@ -732,14 +750,19 @@ def __create_image( "team_id": str(team_id), "images": [], "annotation_status": annotation_status, - "meta": {} + "meta": {}, + "upload_state": upload_state_code } if project_folder_id is not None: data["folder_id"] = project_folder_id - for img_name, img_path, size in zip(img_names, img_paths, sizes): + for img_data, img_path, size in zip(img_names, img_paths, sizes): img_name_uuid = Path(img_path).name remote_path = remote_dir + f"{img_name_uuid}" - data["images"].append({"name": img_name, "path": remote_path}) + if upload_state == "External": + img_name, img_url = img_data + else: + img_name, img_url = img_data, remote_path + data["images"].append({"name": img_name, "path": img_url}) data["meta"][img_name] = { "width": size[0], "height": size[1], @@ -982,7 +1005,6 @@ def attach_image_urls_to_project( daemon=True ) tqdm_thread.start() - threads = [] for thread_id in range(_NUM_THREADS): t = threading.Thread( @@ -1030,7 +1052,7 @@ def __attach_image_urls_to_project_thread( bucket = s3_resource.Bucket(res["bucket"]) prefix = res['filePath'] uploaded_imgs = [] - uploaded_imgs_info = ([], []) + uploaded_imgs_info = ([], [], []) for i in range(start_index, end_index): if i >= len_img_paths: break @@ -1051,11 +1073,18 @@ def __attach_image_urls_to_project_thread( uploaded_imgs.append(name) uploaded_imgs_info[0].append(img_names_urls[i]) uploaded_imgs_info[1].append(key) + uploaded_imgs_info[2].append((None, None)) if len(uploaded_imgs) >= 100: try: - __create_image_url( - uploaded_imgs_info[0], uploaded_imgs_info[1], project, - annotation_status, project_folder_id + __create_image( + uploaded_imgs_info[0], + uploaded_imgs_info[1], + project, + annotation_status, + prefix, + uploaded_imgs_info[2], + project_folder_id, + upload_state="External" ) except SABaseException as e: couldnt_upload[thread_id] += uploaded_imgs @@ -1063,11 +1092,17 @@ def __attach_image_urls_to_project_thread( else: uploaded[thread_id] += uploaded_imgs uploaded_imgs = [] - uploaded_imgs_info = ([], []) + uploaded_imgs_info = ([], [], []) try: - __create_image_url( - uploaded_imgs_info[0], uploaded_imgs_info[1], project, - annotation_status, project_folder_id + __create_image( + uploaded_imgs_info[0], + uploaded_imgs_info[1], + project, + annotation_status, + prefix, + uploaded_imgs_info[2], + project_folder_id, + upload_state="External" ) except SABaseException as e: couldnt_upload[thread_id] += uploaded_imgs @@ -1076,45 +1111,6 @@ def __attach_image_urls_to_project_thread( uploaded[thread_id] += uploaded_imgs -def __create_image_url( - img_names_urls, remote_paths, project, annotation_status, project_folder_id -): - if len(remote_paths) == 0: - return - team_id, project_id = project["team_id"], project["id"] - data = { - "project_id": str(project_id), - "team_id": str(team_id), - "images": [], - "annotation_status": annotation_status, - "meta": {}, - "upload_state": 2 - } - if project_folder_id is not None: - data["folder_id"] = project_folder_id - for img_name_url, remote_path in zip(img_names_urls, remote_paths): - data["images"].append( - { - "name": img_name_url[0], - "path": img_name_url[1] - } - ) - data["meta"][img_name_url[0]] = { - "width": None, - "height": None, - "annotation_json_path": remote_path + ".json", - "annotation_bluemap_path": remote_path + ".png" - } - - response = _api.send_request( - req_type='POST', path='/image/ext-create', json_req=data - ) - if not response.ok: - raise SABaseException( - response.status_code, "Couldn't ext-create image " + response.text - ) - - def upload_images_from_public_urls_to_project( project, img_urls, diff --git a/tests/attach_urls.csv b/tests/attach_urls.csv index 0a7577d28..354f3465a 100644 --- a/tests/attach_urls.csv +++ b/tests/attach_urls.csv @@ -6,5 +6,4 @@ https://drive.google.com/uc?export=download&id=1SfGcn9hdkVM35ZP0S93eStsE7Ti4GtHU https://drive.google.com/uc?export=download&id=1SfGcn9hdkVM35ZP0S93eStsE7Ti4GtHU,6022a74b5384c50017c366cv https://drive.google.com/uc?export=download&id=1geS2YtQiTYuiduEirKVYxBujHJaIWA3V, https://drive.google.com/uc?export=download&id=1geS2YtQiTYuiduEirKVYxBujHJaIWA3V,6022a74b26aec4002575b9rm -https://drive.google.com/uc?export=download&id=1geS2YtQiTYuiduEirKVYxBujHJaIWA3V,6022a74b26aec4002575b9e8 diff --git a/tests/test_attach_image_urls.py b/tests/test_attach_image_urls.py index d8cc122f9..82be922fd 100644 --- a/tests/test_attach_image_urls.py +++ b/tests/test_attach_image_urls.py @@ -19,7 +19,7 @@ def test_attach_image_urls(): project, PATH_TO_URLS ) - assert len(uploaded) == 8 + assert len(uploaded) == 7 assert len(could_not_upload) == 0 assert len(existing_images) == 0 @@ -29,4 +29,4 @@ def test_attach_image_urls(): assert len(uploaded) == 2 assert len(could_not_upload) == 0 - assert len(existing_images) == 6 + assert len(existing_images) == 5 From c4e142cbc205eae356a3ecce108cdf410ae5c790 Mon Sep 17 00:00:00 2001 From: Erik Date: Tue, 23 Mar 2021 10:19:12 +0400 Subject: [PATCH 03/22] Disabled some SDK functions for projects with URL attached images --- superannotate/common.py | 9 ++++-- superannotate/db/images.py | 13 ++++++++ superannotate/db/project_images.py | 14 ++++++-- superannotate/db/projects.py | 52 +++++++++++++++++++++++++++--- superannotate/ml/ml_funcs.py | 23 +++++++++++-- 5 files changed, 101 insertions(+), 10 deletions(-) diff --git a/superannotate/common.py b/superannotate/common.py index 77961321b..aa591d74e 100644 --- a/superannotate/common.py +++ b/superannotate/common.py @@ -30,7 +30,8 @@ "Skipped": 6 } -_UPLOAD_STATES = {"Initial": 1, "Basic": 2, "External": 3} +_UPLOAD_STATES_STR_TO_CODES = {"Initial": 1, "Basic": 2, "External": 3} +_UPLOAD_STATES_CODES_TO_STR = {1: "Initial", 2: "Basic", 3: "External"} _USER_ROLES = {"Admin": 2, "Annotator": 3, "QA": 4, "Customer": 5, "Viewer": 6} _AVAILABLE_SEGMENTATION_MODELS = ['autonomous', 'generic'] @@ -122,7 +123,11 @@ def annotation_status_str_to_int(annotation_status): def upload_state_str_to_int(upload_state): - return _UPLOAD_STATES[upload_state] + return _UPLOAD_STATES_STR_TO_CODES[upload_state] + + +def upload_state_int_to_str(upload_state): + return _UPLOAD_STATES_CODES_TO_STR[upload_state] def annotation_status_int_to_str(annotation_status): diff --git a/superannotate/db/images.py b/superannotate/db/images.py index ee795f773..df809434b 100644 --- a/superannotate/db/images.py +++ b/superannotate/db/images.py @@ -631,6 +631,12 @@ def download_image( ) project, project_folder = get_project_and_folder_metadata(project) + upload_state = common.upload_state_int_to_str(project.get("upload_state")) + if upload_state == "External": + raise SABaseException( + 0, + "The function does not support projects containing images attached with URLs" + ) img = get_image_bytes( (project, project_folder), image_name, variant=variant ) @@ -698,6 +704,13 @@ def get_image_bytes(project, image_name, variant='original'): :return: io.BytesIO() of the image :rtype: io.BytesIO() """ + project, project_folder = get_project_and_folder_metadata(project) + upload_state = common.upload_state_int_to_str(project.get("upload_state")) + if upload_state == "External": + raise SABaseException( + 0, + "The function does not support projects containing images attached with URLs" + ) if variant not in ["original", "lores"]: raise SABaseException( 0, "Image download variant should be either original or lores" diff --git a/superannotate/db/project_images.py b/superannotate/db/project_images.py index 3469a2d6d..3b054cf06 100644 --- a/superannotate/db/project_images.py +++ b/superannotate/db/project_images.py @@ -51,6 +51,12 @@ def upload_image_to_project( :type image_quality_in_editor: str """ project, project_folder = get_project_and_folder_metadata(project) + upload_state = common.upload_state_int_to_str(project.get("upload_state")) + if upload_state == "External": + raise SABaseException( + 0, + "The function does not support projects containing images attached with URLs" + ) annotation_status = common.annotation_status_str_to_int(annotation_status) if image_quality_in_editor is None: image_quality_in_editor = get_project_default_image_quality_in_editor( @@ -121,8 +127,12 @@ def upload_image_to_project( else: project_folder_id = None __create_image( - [img_name], [key], project, annotation_status, prefix, - [images_info_and_array[2]], project_folder_id + [img_name], [key], + project, + annotation_status, + prefix, [images_info_and_array[2]], + project_folder_id, + upload_state="Basic" ) while True: diff --git a/superannotate/db/projects.py b/superannotate/db/projects.py index fee6abad9..d69172d7c 100644 --- a/superannotate/db/projects.py +++ b/superannotate/db/projects.py @@ -237,6 +237,13 @@ def upload_video_to_project( :return: filenames of uploaded images :rtype: list of strs """ + project, project_folder = get_project_and_folder_metadata(project) + upload_state = common.upload_state_int_to_str(project.get("upload_state")) + if upload_state == "External": + raise SABaseException( + 0, + "The function does not support projects containing images attached with URLs" + ) logger.info("Uploading from video %s.", str(video_path)) rotate_code = None try: @@ -390,6 +397,12 @@ def upload_videos_from_folder_to_project( :rtype: tuple of list of strs """ project, project_folder = get_project_and_folder_metadata(project) + upload_state = common.upload_state_int_to_str(project.get("upload_state")) + if upload_state == "External": + raise SABaseException( + 0, + "The function does not support projects containing images attached with URLs" + ) if recursive_subfolders: logger.warning( "When using recursive subfolder parsing same name videos in different subfolders will overwrite each other." @@ -480,6 +493,12 @@ def upload_images_from_folder_to_project( project_folder_name = project["name"] + ( f'/{project_folder["name"]}' if project_folder else "" ) + upload_state = common.upload_state_int_to_str(project.get("upload_state")) + if upload_state == "External": + raise SABaseException( + 0, + "The function does not support projects containing images attached with URLs" + ) if recursive_subfolders: logger.info( "When using recursive subfolder parsing same name images in different subfolders will overwrite each other." @@ -812,6 +831,12 @@ def upload_images_to_project( project_folder_name = project["name"] + ( f'/{project_folder["name"]}' if project_folder else "" ) + upload_state = common.upload_state_int_to_str(project.get("upload_state")) + if upload_state == "External": + raise SABaseException( + 0, + "The function does not support projects containing images attached with URLs" + ) if not isinstance(img_paths, list): raise SABaseException( 0, "img_paths argument to upload_images_to_project should be a list" @@ -939,13 +964,12 @@ def attach_image_urls_to_project( project_folder_name = project["name"] + ( f'/{project_folder["name"]}' if project_folder else "" ) - upload_state = project.get("upload_state") - if upload_state == "basic": + upload_state = common.upload_state_int_to_str(project.get("upload_state")) + if upload_state == "Basic": raise SABaseException( 0, "You cannot attach URLs in this type of project. Please attach it in an external storage project" ) - upload_state = "external" annotation_status = common.annotation_status_str_to_int(annotation_status) team_id, project_id = project["team_id"], project["id"] image_data = pd.read_csv(attachments) @@ -1144,7 +1168,13 @@ def upload_images_from_public_urls_to_project( images_to_upload = [] duplicate_images_filenames = [] path_to_url = {} - + project, project_folder = get_project_and_folder_metadata(project) + upload_state = common.upload_state_int_to_str(project.get("upload_state")) + if upload_state == "External": + raise SABaseException( + 0, + "The function does not support projects containing images attached with URLs" + ) finish_event = threading.Event() tqdm_thread = threading.Thread( target=_tqdm_download, @@ -1246,6 +1276,13 @@ def upload_images_from_google_cloud_to_project( images_to_upload = [] duplicate_images_filenames = [] path_to_url = {} + project, project_folder = get_project_and_folder_metadata(project) + upload_state = common.upload_state_int_to_str(project.get("upload_state")) + if upload_state == "External": + raise SABaseException( + 0, + "The function does not support projects containing images attached with URLs" + ) cloud_client = storage.Client(project=google_project) bucket = cloud_client.get_bucket(bucket_name) image_blobs = bucket.list_blobs(prefix=folder_path) @@ -1322,6 +1359,13 @@ def upload_images_from_azure_blob_to_project( images_to_upload = [] duplicate_images_filenames = [] path_to_url = {} + project, project_folder = get_project_and_folder_metadata(project) + upload_state = common.upload_state_int_to_str(project.get("upload_state")) + if upload_state == "External": + raise SABaseException( + 0, + "The function does not support projects containing images attached with URLs" + ) connect_key = os.getenv('AZURE_STORAGE_CONNECTION_STRING') blob_service_client = BlobServiceClient.from_connection_string(connect_key) container_client = blob_service_client.get_container_client(container_name) diff --git a/superannotate/ml/ml_funcs.py b/superannotate/ml/ml_funcs.py index d7678e43c..484d37957 100644 --- a/superannotate/ml/ml_funcs.py +++ b/superannotate/ml/ml_funcs.py @@ -17,7 +17,7 @@ from ..api import API from ..common import ( _AVAILABLE_SEGMENTATION_MODELS, model_training_status_int_to_str, - project_type_str_to_int + project_type_str_to_int, upload_state_int_to_str ) from ..db.images import get_image_metadata, search_images from ..exceptions import SABaseException @@ -56,7 +56,12 @@ def run_prediction(project, images_list, model): f"Specified project has type {project['type']}, and does not correspond to the type of provided model" ) project_id = project["id"] - + upload_state = upload_state_int_to_str(project.get("upload_state")) + if upload_state == "External": + raise SABaseException( + 0, + "The function does not support projects containing images attached with URLs" + ) images_metadata = get_image_metadata(project, images_list) if isinstance(images_metadata, dict): images_metadata = [images_metadata] @@ -126,6 +131,13 @@ def run_segmentation(project, images_list, model): ) raise SABaseException(0, "Model Does not exist") + upload_state = upload_state_int_to_str(project.get("upload_state")) + if upload_state == "External": + raise SABaseException( + 0, + "The function does not support projects containing images attached with URLs" + ) + images_metadata = get_image_metadata(project, images_list) images_metadata.sort(key=lambda x: x["name"]) @@ -216,6 +228,13 @@ def run_training( raise SABaseException(0, "Invalid project types") project_type = types.pop() + upload_state = upload_state_int_to_str(project.get("upload_state")) + if upload_state == "External": + raise SABaseException( + 0, + "The function does not support projects containing images attached with URLs" + ) + base_model = base_model.get(project_type, None) if not base_model: logger.error( From 8a372c7f56705f469b71b591197ea7434c31c1d2 Mon Sep 17 00:00:00 2001 From: Hovnatan Karapetyan Date: Wed, 24 Mar 2021 11:27:53 +0400 Subject: [PATCH 04/22] add num to send limit --- superannotate/db/project_images.py | 55 ++++++++++++++++++++++-------- 1 file changed, 40 insertions(+), 15 deletions(-) diff --git a/superannotate/db/project_images.py b/superannotate/db/project_images.py index 3469a2d6d..6b1b24877 100644 --- a/superannotate/db/project_images.py +++ b/superannotate/db/project_images.py @@ -268,9 +268,25 @@ def delete_images(project, image_names): :param image_names: to be deleted images' names. If None, all the images will be deleted :type image_names: list of strs """ + project, project_folder = get_project_and_folder_metadata(project) + params = {"team_id": project["team_id"], "project_id": project["id"]} if image_names is None: - images = search_images(project, return_metadata=True) + if project_folder is not None: + data = {"folder_id": project_folder["id"]} + else: + data = {"folder_id": get_project_root_folder_id(project)} + response = _api.send_request( + req_type='PUT', + path='/image/delete/images', + params=params, + json_req=data + ) + if not response.ok: + raise SABaseException( + response.status_code, "Couldn't delete images " + response.text + ) else: + NUM_TO_SEND = 1000 if not isinstance(image_names, list): raise SABaseException( 0, "image_names should be a list of strs or None" @@ -278,21 +294,30 @@ def delete_images(project, image_names): images = get_image_metadata( project, image_names, return_dict_on_single_output=False ) - project, _ = get_project_and_folder_metadata(project) - - params = {"team_id": project["team_id"], "project_id": project["id"]} - data = {"image_ids": [image["id"] for image in images]} - response = _api.send_request( - req_type='PUT', - path='/image/delete/images', - params=params, - json_req=data + for start_index in range(0, len(image_names), NUM_TO_SEND): + data = { + "image_ids": + [ + image["id"] + for image in images[start_index:start_index + + NUM_TO_SEND] + ] + } + response = _api.send_request( + req_type='PUT', + path='/image/delete/images', + params=params, + json_req=data + ) + if not response.ok: + raise SABaseException( + response.status_code, + "Couldn't delete images " + response.text + ) + logger.info( + "Images deleted in project %s%s", project["name"], + "" if project_folder is None else "/" + project_folder["name"] ) - if not response.ok: - raise SABaseException( - response.status_code, "Couldn't delete images " + response.text - ) - logger.info("Images %s deleted in project %s", image_names, project["name"]) def move_images( From b98fd063cac08300f6b74ae4d338d1160e4a3a7f Mon Sep 17 00:00:00 2001 From: Hovnatan Karapetyan Date: Wed, 24 Mar 2021 16:25:08 +0400 Subject: [PATCH 05/22] Send 1000 images to delete --- superannotate/db/project_images.py | 53 +++++++++++------------------- 1 file changed, 20 insertions(+), 33 deletions(-) diff --git a/superannotate/db/project_images.py b/superannotate/db/project_images.py index 6b1b24877..0aeb471cf 100644 --- a/superannotate/db/project_images.py +++ b/superannotate/db/project_images.py @@ -268,13 +268,29 @@ def delete_images(project, image_names): :param image_names: to be deleted images' names. If None, all the images will be deleted :type image_names: list of strs """ + NUM_TO_SEND = 1000 project, project_folder = get_project_and_folder_metadata(project) params = {"team_id": project["team_id"], "project_id": project["id"]} if image_names is None: - if project_folder is not None: - data = {"folder_id": project_folder["id"]} - else: - data = {"folder_id": get_project_root_folder_id(project)} + images = search_images((project, project_folder), return_metadata=True) + else: + if not isinstance(image_names, list): + raise SABaseException( + 0, "image_names should be a list of strs or None" + ) + images = get_image_metadata( + (project, project_folder), + image_names, + return_dict_on_single_output=False + ) + for start_index in range(0, len(images), NUM_TO_SEND): + data = { + "image_ids": + [ + image["id"] + for image in images[start_index:start_index + NUM_TO_SEND] + ] + } response = _api.send_request( req_type='PUT', path='/image/delete/images', @@ -285,35 +301,6 @@ def delete_images(project, image_names): raise SABaseException( response.status_code, "Couldn't delete images " + response.text ) - else: - NUM_TO_SEND = 1000 - if not isinstance(image_names, list): - raise SABaseException( - 0, "image_names should be a list of strs or None" - ) - images = get_image_metadata( - project, image_names, return_dict_on_single_output=False - ) - for start_index in range(0, len(image_names), NUM_TO_SEND): - data = { - "image_ids": - [ - image["id"] - for image in images[start_index:start_index + - NUM_TO_SEND] - ] - } - response = _api.send_request( - req_type='PUT', - path='/image/delete/images', - params=params, - json_req=data - ) - if not response.ok: - raise SABaseException( - response.status_code, - "Couldn't delete images " + response.text - ) logger.info( "Images deleted in project %s%s", project["name"], "" if project_folder is None else "/" + project_folder["name"] From 299af4bcc074e169264199ee32e3b56d5ff001c0 Mon Sep 17 00:00:00 2001 From: Hovnatan Karapetyan Date: Wed, 24 Mar 2021 16:32:48 +0400 Subject: [PATCH 06/22] Version bump --- superannotate/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/superannotate/version.py b/superannotate/version.py index 703970876..72aa75832 100644 --- a/superannotate/version.py +++ b/superannotate/version.py @@ -1 +1 @@ -__version__ = "4.1.0" +__version__ = "4.1.1" From ed537d5b50fe4c853c9efe3b637f52a560278cd5 Mon Sep 17 00:00:00 2001 From: Hovnatan Karapetyan Date: Wed, 24 Mar 2021 16:33:35 +0400 Subject: [PATCH 07/22] Version bump --- superannotate/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/superannotate/version.py b/superannotate/version.py index 72aa75832..8ab5f7d37 100644 --- a/superannotate/version.py +++ b/superannotate/version.py @@ -1 +1 @@ -__version__ = "4.1.1" +__version__ = "4.1.1b1" From 5cdb71484ae87673de3d73321125968a9d145f1e Mon Sep 17 00:00:00 2001 From: Hovnatan Karapetyan Date: Thu, 25 Mar 2021 15:20:00 +0400 Subject: [PATCH 08/22] Fix fuse creation --- superannotate/db/images.py | 17 +++++++++++++---- superannotate/version.py | 2 +- 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/superannotate/db/images.py b/superannotate/db/images.py index ee795f773..e006ab003 100644 --- a/superannotate/db/images.py +++ b/superannotate/db/images.py @@ -621,6 +621,11 @@ def download_image( :return: paths of downloaded image and annotations if included :rtype: tuple """ + if (include_fuse or include_overlay) and not include_annotations: + raise SABaseException( + 0, + "To download fuse or overlay image need to set include_annotations=True in download_image" + ) if not Path(local_dir_path).is_dir(): raise SABaseException( 0, f"local_dir_path {local_dir_path} is not an existing directory" @@ -818,7 +823,7 @@ def _get_image_pre_or_annotations(project, image_name, pre): fill_class_and_attribute_names(res_json, annotation_classes_dict) result = { f"{pre}annotation_json": - response.json(), + res_json, f"{pre}annotation_json_filename": common.get_annotation_json_name(image_name, project_type) } @@ -1041,12 +1046,11 @@ def create_fuse_image( (image_size[1], image_size[0], 4), [0, 0, 0, 255], np.uint8 ) fi_ovl[:, :, :3] = np.array(pil_image) + fi_pil_ovl = Image.fromarray(fi_ovl) + draw_ovl = ImageDraw.Draw(fi_pil_ovl) if project_type == "Vector": fi_pil = Image.fromarray(fi) draw = ImageDraw.Draw(fi_pil) - if output_overlay: - fi_pil_ovl = Image.fromarray(fi_ovl) - draw_ovl = ImageDraw.Draw(fi_pil_ovl) for annotation in annotation_json["instances"]: if "className" not in annotation: continue @@ -1159,6 +1163,11 @@ def create_fuse_image( temp_mask = np.alltrue(annotation_mask == part_color, axis=2) fi[temp_mask] = fill_color fi_pil = Image.fromarray(fi) + alpha = 0.5 # transparency measure + if output_overlay: + fi_pil_ovl = Image.fromarray( + cv2.addWeighted(fi, alpha, fi_ovl, 1 - alpha, 0) + ) if in_memory: if output_overlay: diff --git a/superannotate/version.py b/superannotate/version.py index 8ab5f7d37..8994fb500 100644 --- a/superannotate/version.py +++ b/superannotate/version.py @@ -1 +1 @@ -__version__ = "4.1.1b1" +__version__ = "4.1.1b2" From 74bf115098287c7aaacdd769b3594bbaee15e7bb Mon Sep 17 00:00:00 2001 From: Hovnatan Karapetyan Date: Fri, 26 Mar 2021 10:53:04 +0400 Subject: [PATCH 09/22] Code cleanup --- superannotate/db/project_images.py | 82 +++++++++++++----------------- 1 file changed, 34 insertions(+), 48 deletions(-) diff --git a/superannotate/db/project_images.py b/superannotate/db/project_images.py index 0aeb471cf..0c0904f5a 100644 --- a/superannotate/db/project_images.py +++ b/superannotate/db/project_images.py @@ -179,37 +179,11 @@ def _copy_images( res['skipped'] += response.json()['skipped'] for image_name in image_names: - if include_annotations: - annotations = get_image_annotations( - (source_project, source_project_folder), image_name - ) - if annotations["annotation_json"] is not None: - if "annotation_mask" in annotations: - if annotations["annotation_mask"] is not None: - upload_image_annotations( - (destination_project, destination_project_folder), - image_name, annotations["annotation_json"], - annotations["annotation_mask"] - ) - else: - upload_image_annotations( - (destination_project, destination_project_folder), - image_name, annotations["annotation_json"] - ) - if copy_annotation_status or copy_pin: - img_metadata = get_image_metadata( - (source_project, source_project_folder), image_name - ) - if copy_annotation_status: - set_image_annotation_status( - (destination_project, destination_project_folder), - image_name, img_metadata["annotation_status"] - ) - if copy_pin: - pin_image( - (destination_project, destination_project_folder), - image_name, img_metadata["is_pinned"] - ) + _copy_metadata( + source_project, source_project_folder, image_name, + destination_project, destination_project_folder, image_name, + include_annotations, copy_annotation_status, copy_pin + ) return res @@ -385,9 +359,6 @@ def copy_image( destination_project, destination_project_folder = get_project_and_folder_metadata( destination_project ) - img_metadata = get_image_metadata( - (source_project, source_project_folder), image_name - ) img_b = get_image_bytes((source_project, source_project_folder), image_name) new_name = image_name extension = Path(image_name).suffix @@ -416,6 +387,22 @@ def copy_image( upload_image_to_project( (destination_project, destination_project_folder), img_b, new_name ) + _copy_metadata( + source_project, source_project_folder, image_name, destination_project, + destination_project_folder, new_name, include_annotations, + copy_annotation_status, copy_pin + ) + logger.info( + "Copied image %s/%s to %s/%s.", source_project["name"], image_name, + destination_project["name"], new_name + ) + + +def _copy_metadata( + source_project, source_project_folder, image_name, destination_project, + destination_project_folder, new_name, include_annotations, + copy_annotation_status, copy_pin +): if include_annotations: annotations = get_image_annotations( (source_project, source_project_folder), image_name @@ -433,21 +420,20 @@ def copy_image( (destination_project, destination_project_folder), new_name, annotations["annotation_json"] ) - if copy_annotation_status: - set_image_annotation_status( - (destination_project, destination_project_folder), new_name, - img_metadata["annotation_status"] - ) - if copy_pin: - pin_image( - (destination_project, destination_project_folder), new_name, - img_metadata["is_pinned"] + if copy_annotation_status or copy_pin: + img_metadata = get_image_metadata( + (source_project, source_project_folder), image_name ) - - logger.info( - "Copied image %s/%s to %s/%s.", source_project["name"], image_name, - destination_project["name"], new_name - ) + if copy_annotation_status: + set_image_annotation_status( + (destination_project, destination_project_folder), new_name, + img_metadata["annotation_status"] + ) + if copy_pin: + pin_image( + (destination_project, destination_project_folder), new_name, + img_metadata["is_pinned"] + ) def move_image( From 2c75dfb7a205be5fc2232dcdb7f7471c8e0aa1e2 Mon Sep 17 00:00:00 2001 From: Hovnatan Karapetyan Date: Fri, 26 Mar 2021 10:55:12 +0400 Subject: [PATCH 10/22] Code cleanup --- superannotate/db/project_images.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/superannotate/db/project_images.py b/superannotate/db/project_images.py index 0c0904f5a..24cfb1e63 100644 --- a/superannotate/db/project_images.py +++ b/superannotate/db/project_images.py @@ -179,7 +179,7 @@ def _copy_images( res['skipped'] += response.json()['skipped'] for image_name in image_names: - _copy_metadata( + _copy_annotations_and_metadata( source_project, source_project_folder, image_name, destination_project, destination_project_folder, image_name, include_annotations, copy_annotation_status, copy_pin @@ -387,7 +387,7 @@ def copy_image( upload_image_to_project( (destination_project, destination_project_folder), img_b, new_name ) - _copy_metadata( + _copy_annotations_and_metadata( source_project, source_project_folder, image_name, destination_project, destination_project_folder, new_name, include_annotations, copy_annotation_status, copy_pin @@ -398,7 +398,7 @@ def copy_image( ) -def _copy_metadata( +def _copy_annotations_and_metadata( source_project, source_project_folder, image_name, destination_project, destination_project_folder, new_name, include_annotations, copy_annotation_status, copy_pin From cad92873e68bfab0185cf59d644c1fa124234740 Mon Sep 17 00:00:00 2001 From: Hovnatan Karapetyan Date: Fri, 26 Mar 2021 11:00:46 +0400 Subject: [PATCH 11/22] Update makefile --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 3df8de506..dfe67dc16 100644 --- a/Makefile +++ b/Makefile @@ -9,7 +9,7 @@ PYTESTS=pytest COVERAGE=coverage tests: check_formatting docs - $(PYTESTS) -n auto --full-trace --verbose tests + $(PYTESTS) -n auto --full-trace tests stress-tests: SA_STRESS_TESTS=1 stress-tests: tests From 15fed8ca5b013a442875b6aa311e22fc17108fdd Mon Sep 17 00:00:00 2001 From: Hovnatan Karapetyan Date: Fri, 26 Mar 2021 11:04:32 +0400 Subject: [PATCH 12/22] Version bump --- superannotate/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/superannotate/version.py b/superannotate/version.py index 8994fb500..15b9d866e 100644 --- a/superannotate/version.py +++ b/superannotate/version.py @@ -1 +1 @@ -__version__ = "4.1.1b2" +__version__ = "4.1.1b3" From bf5045c034e5789c57c592f388882a4e01434b1f Mon Sep 17 00:00:00 2001 From: Hovnatan Karapetyan Date: Fri, 26 Mar 2021 14:49:32 +0400 Subject: [PATCH 13/22] Docs update --- docs/source/tutorial.sdk.rst | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/docs/source/tutorial.sdk.rst b/docs/source/tutorial.sdk.rst index 4f06ec5a4..f34836b51 100644 --- a/docs/source/tutorial.sdk.rst +++ b/docs/source/tutorial.sdk.rst @@ -98,6 +98,10 @@ To create a new "Vector" project with name "Example Project 1" and description sa.create_project(project, "test", "Vector") +.. warning:: + + In general, SDK functions are not thread-safe. + Creating a folder in a project ______________________________ From 35bc63f739e58c7df1ad8886e80774fed7059e33 Mon Sep 17 00:00:00 2001 From: Erik Date: Fri, 26 Mar 2021 15:51:29 +0400 Subject: [PATCH 14/22] handling duplicates in csv in attach URLs, disabling fuse generation in exports for projects with attached URLs --- superannotate/db/exports.py | 7 ++++++- superannotate/db/projects.py | 7 ++++++- tests/attach_urls.csv | 3 ++- tests/test_attach_image_urls.py | 4 ++-- 4 files changed, 16 insertions(+), 5 deletions(-) diff --git a/superannotate/db/exports.py b/superannotate/db/exports.py index dc0a4f74c..dabbe1967 100644 --- a/superannotate/db/exports.py +++ b/superannotate/db/exports.py @@ -12,7 +12,7 @@ from tqdm import tqdm from ..api import API -from ..common import annotation_status_str_to_int +from ..common import annotation_status_str_to_int, upload_state_int_to_str from ..exceptions import ( SABaseException, SAExistingExportNameException, SANonExistingExportNameException @@ -123,6 +123,11 @@ def prepare_export( """ if not isinstance(project, dict): project = get_project_metadata_bare(project) + upload_state = upload_state_int_to_str(project.get("upload_state")) + if upload_state == "External" and include_fuse == True: + logger.info( + "Include fuse functionality is not supported for projects containing images attached with URLs" + ) team_id, project_id = project["team_id"], project["id"] if annotation_statuses is None: annotation_statuses = [2, 3, 4, 5] diff --git a/superannotate/db/projects.py b/superannotate/db/projects.py index d69172d7c..be91fce08 100644 --- a/superannotate/db/projects.py +++ b/superannotate/db/projects.py @@ -973,7 +973,11 @@ def attach_image_urls_to_project( annotation_status = common.annotation_status_str_to_int(annotation_status) team_id, project_id = project["team_id"], project["id"] image_data = pd.read_csv(attachments) + image_data = image_data[~image_data["url"].isnull()] existing_names = image_data[~image_data["name"].isnull()] + duplicate_idx_csv = existing_names.duplicated(subset="name", keep="first") + duplicate_images = existing_names[duplicate_idx_csv]["name"].tolist() + existing_names = existing_names[~duplicate_idx_csv] existing_images = search_images((project, project_folder)) duplicate_idx = [] for ind, _ in image_data[image_data["name"].isnull()].iterrows(): @@ -983,10 +987,11 @@ def attach_image_urls_to_project( image_data.at[ind, "name"] = name_try existing_images.append(name_try) break + image_data.drop_duplicates(subset="name", keep="first", inplace=True) for ind, row in existing_names.iterrows(): if row["name"] in existing_images: duplicate_idx.append(ind) - duplicate_images = image_data.loc[duplicate_idx]["name"].tolist() + duplicate_images.extend(image_data.loc[duplicate_idx]["name"].tolist()) image_data.drop(labels=duplicate_idx, inplace=True) if len(duplicate_images) != 0: logger.warning( diff --git a/tests/attach_urls.csv b/tests/attach_urls.csv index 354f3465a..702c509dd 100644 --- a/tests/attach_urls.csv +++ b/tests/attach_urls.csv @@ -6,4 +6,5 @@ https://drive.google.com/uc?export=download&id=1SfGcn9hdkVM35ZP0S93eStsE7Ti4GtHU https://drive.google.com/uc?export=download&id=1SfGcn9hdkVM35ZP0S93eStsE7Ti4GtHU,6022a74b5384c50017c366cv https://drive.google.com/uc?export=download&id=1geS2YtQiTYuiduEirKVYxBujHJaIWA3V, https://drive.google.com/uc?export=download&id=1geS2YtQiTYuiduEirKVYxBujHJaIWA3V,6022a74b26aec4002575b9rm - +https://drive.google.com/uc?export=download&id=1SfGcn9hdkVM35ZP0S93eStsE7Ti4GtHU,6022a74b5384c50017c366cv +,fjvnvjsvnsdjcndsjcndjcs diff --git a/tests/test_attach_image_urls.py b/tests/test_attach_image_urls.py index 82be922fd..4d417990d 100644 --- a/tests/test_attach_image_urls.py +++ b/tests/test_attach_image_urls.py @@ -21,7 +21,7 @@ def test_attach_image_urls(): assert len(uploaded) == 7 assert len(could_not_upload) == 0 - assert len(existing_images) == 0 + assert len(existing_images) == 1 uploaded, could_not_upload, existing_images = sa.attach_image_urls_to_project( project, PATH_TO_URLS @@ -29,4 +29,4 @@ def test_attach_image_urls(): assert len(uploaded) == 2 assert len(could_not_upload) == 0 - assert len(existing_images) == 5 + assert len(existing_images) == 6 From 49a083a6e7c6eaa5da9977507590e65f36a333a9 Mon Sep 17 00:00:00 2001 From: Erik Date: Fri, 26 Mar 2021 15:56:50 +0400 Subject: [PATCH 15/22] fixed docstring to include dupe images list on return --- superannotate/db/projects.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/superannotate/db/projects.py b/superannotate/db/projects.py index be91fce08..5713414df 100644 --- a/superannotate/db/projects.py +++ b/superannotate/db/projects.py @@ -956,7 +956,7 @@ def attach_image_urls_to_project( :param annotation_status: value to set the annotation statuses of the linked images: NotStarted InProgress QualityCheck Returned Completed Skipped :type annotation_status: str - :return: list of linked image urls, list of unreachable image urls + :return: list of linked image names, list of failed image names, list of duplicate image names :rtype: tuple """ From d32cb8d120c29267fd0a76467f862112e776fadc Mon Sep 17 00:00:00 2001 From: rcmanga <62347044+rcmanga@users.noreply.github.com> Date: Mon, 29 Mar 2021 10:55:57 +0400 Subject: [PATCH 16/22] Update version.py --- superannotate/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/superannotate/version.py b/superannotate/version.py index 15b9d866e..782e92bc6 100644 --- a/superannotate/version.py +++ b/superannotate/version.py @@ -1 +1 @@ -__version__ = "4.1.1b3" +__version__ = "4.1.1b4" From 27b6720bbb085c6a0c8661f80e3f7d47ce3244d8 Mon Sep 17 00:00:00 2001 From: Erik Harutyunyan Date: Tue, 30 Mar 2021 15:31:22 +0400 Subject: [PATCH 17/22] fixed bug in run_training disabling --- superannotate/ml/ml_funcs.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/superannotate/ml/ml_funcs.py b/superannotate/ml/ml_funcs.py index 484d37957..75b34159f 100644 --- a/superannotate/ml/ml_funcs.py +++ b/superannotate/ml/ml_funcs.py @@ -228,12 +228,15 @@ def run_training( raise SABaseException(0, "Invalid project types") project_type = types.pop() - upload_state = upload_state_int_to_str(project.get("upload_state")) - if upload_state == "External": - raise SABaseException( - 0, - "The function does not support projects containing images attached with URLs" + for single_project in project: + upload_state = upload_state_int_to_str( + single_project.get("upload_state") ) + if upload_state == "External": + raise SABaseException( + 0, + "The function does not support projects containing images attached with URLs" + ) base_model = base_model.get(project_type, None) if not base_model: From 5bf077c8c1882acef4d7bdc219219ab9d133d120 Mon Sep 17 00:00:00 2001 From: Erik Harutyunyan Date: Tue, 30 Mar 2021 15:45:13 +0400 Subject: [PATCH 18/22] added URL attaching to cli docs --- docs/source/cli.rst | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/docs/source/cli.rst b/docs/source/cli.rst index e8ee357aa..ac8cd5eb2 100644 --- a/docs/source/cli.rst +++ b/docs/source/cli.rst @@ -79,6 +79,19 @@ to look for. If the argument is not given then value *jpg,jpeg,png,tif,tiff,webp ---------- +.. _ref_attach_image_urls: + +Attaching image URLs +~~~~~~~~~~~~~~~~~~~~ + +To attach image URLs to project use: + +.. code-block:: bash + + superannotatecli attach-image-urls --project --attachments [--annotation_status ] + +---------- + .. _ref_upload_videos: Uploading videos From 798821bcf0059a10594b0f9467b1e5ab1f78bb14 Mon Sep 17 00:00:00 2001 From: rcmanga <62347044+rcmanga@users.noreply.github.com> Date: Tue, 30 Mar 2021 15:59:19 +0400 Subject: [PATCH 19/22] Update version.py --- superannotate/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/superannotate/version.py b/superannotate/version.py index 782e92bc6..32bdee404 100644 --- a/superannotate/version.py +++ b/superannotate/version.py @@ -1 +1 @@ -__version__ = "4.1.1b4" +__version__ = "4.1.1b5" From 5cd1775a9381b7c7b1bc9701c4aa50261669ec22 Mon Sep 17 00:00:00 2001 From: Erik Harutyunyan Date: Wed, 31 Mar 2021 10:14:27 +0400 Subject: [PATCH 20/22] prepare_export enhancement for projects with attached URLs --- superannotate/db/exports.py | 1 + 1 file changed, 1 insertion(+) diff --git a/superannotate/db/exports.py b/superannotate/db/exports.py index dabbe1967..24d0dc057 100644 --- a/superannotate/db/exports.py +++ b/superannotate/db/exports.py @@ -128,6 +128,7 @@ def prepare_export( logger.info( "Include fuse functionality is not supported for projects containing images attached with URLs" ) + include_fuse = False team_id, project_id = project["team_id"], project["id"] if annotation_statuses is None: annotation_statuses = [2, 3, 4, 5] From 1ec95ee66870173dbbba96ed1743f06cf0df0a6e Mon Sep 17 00:00:00 2001 From: Hovnatan Karapetyan Date: Wed, 31 Mar 2021 12:10:40 +0400 Subject: [PATCH 21/22] Download files in chunks --- superannotate/db/exports.py | 45 +++++++++++++++++++++++-------------- 1 file changed, 28 insertions(+), 17 deletions(-) diff --git a/superannotate/db/exports.py b/superannotate/db/exports.py index 24d0dc057..2282e94fd 100644 --- a/superannotate/db/exports.py +++ b/superannotate/db/exports.py @@ -6,6 +6,7 @@ import zipfile from datetime import datetime from pathlib import Path +import shutil import boto3 import requests @@ -209,6 +210,15 @@ def __upload_files_to_aws_thread( already_uploaded[i] = True +def _download_file(url, local_filename): + with requests.get(url, stream=True) as r: + r.raise_for_status() + with open(local_filename, 'wb') as f: + for chunk in r.iter_content(chunk_size=8192): + f.write(chunk) + return local_filename + + def download_export( project, export, folder_path, extract_zip_contents=True, to_s3_bucket=None ): @@ -243,25 +253,26 @@ def download_export( break filename = Path(res['path']).name - r = requests.get(res['download'], allow_redirects=True) - if to_s3_bucket is None: - filepath = Path(folder_path) / filename - open(filepath, 'wb').write(r.content) - if extract_zip_contents: - with zipfile.ZipFile(filepath, 'r') as f: - f.extractall(folder_path) - Path.unlink(filepath) - logger.info("Extracted %s to folder %s", filepath, folder_path) - else: - logger.info("Downloaded export ID %s to %s", res['id'], filepath) - else: - with tempfile.TemporaryDirectory() as tmpdirname: - filepath = Path(tmpdirname) / filename - open(filepath, 'wb').write(r.content) + with tempfile.TemporaryDirectory() as tmpdirname: + temp_filepath = Path(tmpdirname) / filename + _download_file(res['download'], temp_filepath) + if to_s3_bucket is None: + filepath = Path(folder_path) / filename + shutil.copyfile(temp_filepath, filepath) if extract_zip_contents: with zipfile.ZipFile(filepath, 'r') as f: - f.extractall(tmpdirname) + f.extractall(folder_path) Path.unlink(filepath) + logger.info("Extracted %s to folder %s", filepath, folder_path) + else: + logger.info( + "Downloaded export ID %s to %s", res['id'], filepath + ) + else: + if extract_zip_contents: + with zipfile.ZipFile(temp_filepath, 'r') as f: + f.extractall(tmpdirname) + Path.unlink(temp_filepath) files_to_upload = [] for file in Path(tmpdirname).rglob("*.*"): if not file.is_file(): @@ -296,4 +307,4 @@ def download_export( t.join() finish_event.set() tqdm_thread.join() - logger.info("Exported to AWS %s/%s", to_s3_bucket, folder_path) + logger.info("Exported to AWS %s/%s", to_s3_bucket, folder_path) From 8d63a50ddadd5be679a69fb84a47304c6bd06bb5 Mon Sep 17 00:00:00 2001 From: rcmanga <62347044+rcmanga@users.noreply.github.com> Date: Wed, 31 Mar 2021 18:55:23 +0400 Subject: [PATCH 22/22] Update version.py --- superannotate/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/superannotate/version.py b/superannotate/version.py index 32bdee404..7dd313ec8 100644 --- a/superannotate/version.py +++ b/superannotate/version.py @@ -1 +1 @@ -__version__ = "4.1.1b5" +__version__ = "4.1.1b6"