From 3696b47b6eca0fdca4c0366e08b264410007cf20 Mon Sep 17 00:00:00 2001 From: Erik Date: Wed, 17 Mar 2021 16:54:11 +0400 Subject: [PATCH 1/5] public links implemented --- docs/source/superannotate.sdk.rst | 1 + superannotate/__init__.py | 4 +- superannotate/__main__.py | 27 ++++ superannotate/db/projects.py | 220 ++++++++++++++++++++++++++++++ tests/attach_urls.csv | 10 ++ tests/test_attach_image_urls.py | 32 +++++ 6 files changed, 292 insertions(+), 2 deletions(-) create mode 100644 tests/attach_urls.csv create mode 100644 tests/test_attach_image_urls.py diff --git a/docs/source/superannotate.sdk.rst b/docs/source/superannotate.sdk.rst index 3a71627bf..3459a48d9 100644 --- a/docs/source/superannotate.sdk.rst +++ b/docs/source/superannotate.sdk.rst @@ -37,6 +37,7 @@ ________ .. autofunction:: superannotate.delete_folders .. autofunction:: superannotate.rename_folder .. autofunction:: superannotate.upload_images_to_project +.. autofunction:: superannotate.attach_image_urls_to_project .. autofunction:: superannotate.upload_images_from_public_urls_to_project .. autofunction:: superannotate.upload_images_from_google_cloud_to_project .. autofunction:: superannotate.upload_images_from_azure_blob_to_project diff --git a/superannotate/__init__.py b/superannotate/__init__.py index 7be9ad8b2..d5483db84 100644 --- a/superannotate/__init__.py +++ b/superannotate/__init__.py @@ -72,8 +72,8 @@ def consensus(*args, **kwargs): upload_images_from_google_cloud_to_project, upload_images_from_public_urls_to_project, upload_images_from_s3_bucket_to_project, upload_images_to_project, - upload_preannotations_from_folder_to_project, upload_video_to_project, - upload_videos_from_folder_to_project + attach_image_urls_to_project, upload_preannotations_from_folder_to_project, + upload_video_to_project, upload_videos_from_folder_to_project ) from .db.search_projects import search_projects from .db.teams import ( diff --git a/superannotate/__main__.py b/superannotate/__main__.py index 3d3832d40..9d46d76a7 100644 --- a/superannotate/__main__.py +++ b/superannotate/__main__.py @@ -62,6 +62,8 @@ def main(): create_folder(command, further_args) elif command == "upload-images": image_upload(command, further_args) + elif command == "attach-image-urls": + attach_image_urls(command, further_args) elif command == "upload-videos": video_upload(command, further_args) elif command in ["upload-preannotations", "upload-annotations"]: @@ -288,6 +290,31 @@ def image_upload(command_name, args): ) +def attach_image_urls(command_name, args): + parser = argparse.ArgumentParser(prog=_CLI_COMMAND + " " + command_name) + parser.add_argument( + '--project', required=True, help='Project name to upload' + ) + parser.add_argument( + '--attachments', + required=True, + help='path to csv file on attachments metadata' + ) + parser.add_argument( + '--annotation_status', + required=False, + default="NotStarted", + help= + 'Set images\' annotation statuses after upload. Default is NotStarted' + ) + args = parser.parse_args(args) + sa.attach_image_urls_to_project( + project=args.project, + attachments=args.attachments, + annotation_status=args.annotation_status + ) + + def export_project(command_name, args): parser = argparse.ArgumentParser(prog=_CLI_COMMAND + " " + command_name) parser.add_argument( diff --git a/superannotate/db/projects.py b/superannotate/db/projects.py index a26708486..fa0aa5b34 100644 --- a/superannotate/db/projects.py +++ b/superannotate/db/projects.py @@ -9,6 +9,7 @@ import threading import time import uuid +import pandas as pd from os.path import basename from pathlib import Path from urllib.parse import urlparse @@ -895,6 +896,225 @@ def _tqdm_download( break +def attach_image_urls_to_project( + project, attachments, annotation_status="NotStarted" +): + """Link images on external storage to SuperAnnotate. + + :param project: project name or project folder path + :type project: str or dict + :param attachments: path to csv file on attachments metadata + :type attachments: Pathlike (str or Path) + :param annotation_status: value to set the annotation statuses of the linked images: NotStarted InProgress QualityCheck Returned Completed Skipped + :type annotation_status: str + + :return: list of linked image urls, list of unreachable image urls + :rtype: tuple + """ + + project, project_folder = get_project_and_folder_metadata(project) + project_folder_name = project["name"] + ( + f'/{project_folder["name"]}' if project_folder else "" + ) + upload_state = project.get("upload_state") + if upload_state == "basic": + raise SABaseException( + 0, + "You cannot attach URLs in this type of project. Please attach it in an external storage project" + ) + upload_state = "external" + annotation_status = common.annotation_status_str_to_int(annotation_status) + team_id, project_id = project["team_id"], project["id"] + image_data = pd.read_csv(attachments) + existing_names = image_data[~image_data["name"].isnull()] + existing_images = search_images((project, project_folder)) + duplicate_idx = [] + for ind, _ in image_data[image_data["name"].isnull()].iterrows(): + while True: + name_try = str(uuid.uuid4()) + if name_try not in existing_images: + image_data.at[ind, "name"] = name_try + existing_images.append(name_try) + break + for ind, row in existing_names.iterrows(): + if row["name"] in existing_images: + duplicate_idx.append(ind) + duplicate_images = image_data.loc[duplicate_idx]["name"].tolist() + image_data.drop(labels=duplicate_idx, inplace=True) + if len(duplicate_images) != 0: + logger.warning( + "%s already existing images found that won't be uploaded.", + len(duplicate_images) + ) + image_data = pd.DataFrame(image_data, columns=["name", "url"]) + img_names_urls = image_data.values.tolist() + len_img_names_urls = len(img_names_urls) + logger.info( + "Uploading %s images to project %s.", len_img_names_urls, + project_folder_name + ) + if len_img_names_urls == 0: + return ([], [], duplicate_images) + params = {'team_id': team_id} + uploaded = [[] for _ in range(_NUM_THREADS)] + tried_upload = [[] for _ in range(_NUM_THREADS)] + couldnt_upload = [[] for _ in range(_NUM_THREADS)] + finish_event = threading.Event() + chunksize = int(math.ceil(len_img_names_urls / _NUM_THREADS)) + response = _api.send_request( + req_type='GET', + path=f'/project/{project_id}/sdkImageUploadToken', + params=params + ) + if not response.ok: + raise SABaseException( + response.status_code, "Couldn't get upload token " + response.text + ) + if project_folder is not None: + project_folder_id = project_folder["id"] + else: + project_folder_id = None + res = response.json() + prefix = res['filePath'] + tqdm_thread = threading.Thread( + target=__tqdm_thread_image_upload, + args=(len_img_names_urls, tried_upload, finish_event), + daemon=True + ) + tqdm_thread.start() + + threads = [] + for thread_id in range(_NUM_THREADS): + t = threading.Thread( + target=__attach_image_urls_to_project_thread, + args=( + res, img_names_urls, project, annotation_status, prefix, + thread_id, chunksize, couldnt_upload, uploaded, tried_upload, + project_folder_id + ), + daemon=True + ) + threads.append(t) + t.start() + for t in threads: + t.join() + finish_event.set() + tqdm_thread.join() + list_of_not_uploaded = [] + for couldnt_upload_thread in couldnt_upload: + for f in couldnt_upload_thread: + list_of_not_uploaded.append(str(f)) + list_of_uploaded = [] + for upload_thread in uploaded: + for f in upload_thread: + list_of_uploaded.append(str(f)) + + return (list_of_uploaded, list_of_not_uploaded, duplicate_images) + + +def __attach_image_urls_to_project_thread( + res, img_names_urls, project, annotation_status, prefix, thread_id, + chunksize, couldnt_upload, uploaded, tried_upload, project_folder_id +): + len_img_paths = len(img_names_urls) + start_index = thread_id * chunksize + end_index = start_index + chunksize + if start_index >= len_img_paths: + return + s3_session = boto3.Session( + aws_access_key_id=res['accessKeyId'], + aws_secret_access_key=res['secretAccessKey'], + aws_session_token=res['sessionToken'] + ) + s3_resource = s3_session.resource('s3') + bucket = s3_resource.Bucket(res["bucket"]) + prefix = res['filePath'] + uploaded_imgs = [] + uploaded_imgs_info = ([], []) + for i in range(start_index, end_index): + if i >= len_img_paths: + break + name, url = img_names_urls[i] + tried_upload[thread_id].append(name) + img_name_hash = str(uuid.uuid4()) + Path(name).suffix + key = prefix + img_name_hash + try: + bucket.put_object( + Body=json.dumps(create_empty_annotation((None, None), name)), + Key=key + ".json" + ) + except Exception as e: + logger.warning("Unable to upload image %s. %s", name, e) + couldnt_upload[thread_id].append(name) + continue + else: + uploaded_imgs.append(name) + uploaded_imgs_info[0].append(img_names_urls[i]) + uploaded_imgs_info[1].append(key) + if len(uploaded_imgs) >= 100: + try: + __create_image_url( + uploaded_imgs_info[0], uploaded_imgs_info[1], project, + annotation_status, project_folder_id + ) + except SABaseException as e: + couldnt_upload[thread_id] += uploaded_imgs + logger.warning(e) + else: + uploaded[thread_id] += uploaded_imgs + uploaded_imgs = [] + uploaded_imgs_info = ([], []) + try: + __create_image_url( + uploaded_imgs_info[0], uploaded_imgs_info[1], project, + annotation_status, project_folder_id + ) + except SABaseException as e: + couldnt_upload[thread_id] += uploaded_imgs + logger.warning(e) + else: + uploaded[thread_id] += uploaded_imgs + + +def __create_image_url( + img_names_urls, remote_paths, project, annotation_status, project_folder_id +): + if len(remote_paths) == 0: + return + team_id, project_id = project["team_id"], project["id"] + data = { + "project_id": str(project_id), + "team_id": str(team_id), + "images": [], + "annotation_status": annotation_status, + "meta": {}, + "upload_state": 2 + } + if project_folder_id is not None: + data["folder_id"] = project_folder_id + for img_name_url, remote_path in zip(img_names_urls, remote_paths): + data["images"].append( + { + "name": img_name_url[0], + "path": img_name_url[1] + } + ) + data["meta"][img_name_url[0]] = { + "width": None, + "height": None, + "annotation_json_path": remote_path + ".json", + "annotation_bluemap_path": remote_path + ".png" + } + + response = _api.send_request( + req_type='POST', path='/image/ext-create', json_req=data + ) + if not response.ok: + raise SABaseException( + response.status_code, "Couldn't ext-create image " + response.text + ) + + def upload_images_from_public_urls_to_project( project, img_urls, diff --git a/tests/attach_urls.csv b/tests/attach_urls.csv new file mode 100644 index 000000000..0a7577d28 --- /dev/null +++ b/tests/attach_urls.csv @@ -0,0 +1,10 @@ +url,name +https://drive.google.com/uc?export=download&id=1vwfCpTzcjxoEA4hhDxqapPOVvLVeS7ZS,6022a74d5384c50017c366b3 +https://drive.google.com/uc?export=download&id=1geS2YtQiTYuiduEirKVYxBujHJaIWA3V,6022a74b26aec4002575b9e8 +https://drive.google.com/uc?export=download&id=1SfGcn9hdkVM35ZP0S93eStsE7Ti4GtHU,6022a74b5384c50017c366ad +https://drive.google.com/uc?export=download&id=1SfGcn9hdkVM35ZP0S93eStsE7Ti4GtHU, +https://drive.google.com/uc?export=download&id=1SfGcn9hdkVM35ZP0S93eStsE7Ti4GtHU,6022a74b5384c50017c366cv +https://drive.google.com/uc?export=download&id=1geS2YtQiTYuiduEirKVYxBujHJaIWA3V, +https://drive.google.com/uc?export=download&id=1geS2YtQiTYuiduEirKVYxBujHJaIWA3V,6022a74b26aec4002575b9rm +https://drive.google.com/uc?export=download&id=1geS2YtQiTYuiduEirKVYxBujHJaIWA3V,6022a74b26aec4002575b9e8 + diff --git a/tests/test_attach_image_urls.py b/tests/test_attach_image_urls.py new file mode 100644 index 000000000..d8cc122f9 --- /dev/null +++ b/tests/test_attach_image_urls.py @@ -0,0 +1,32 @@ +from pathlib import Path + +import pytest + +import superannotate as sa + +PROJECT_NAME_VECTOR = "test attach image urls" +PATH_TO_URLS = Path("./tests/attach_urls.csv") + + +def test_attach_image_urls(): + projects = sa.search_projects(PROJECT_NAME_VECTOR, return_metadata=True) + for project in projects: + sa.delete_project(project) + + project = sa.create_project(PROJECT_NAME_VECTOR, "test", "Vector") + + uploaded, could_not_upload, existing_images = sa.attach_image_urls_to_project( + project, PATH_TO_URLS + ) + + assert len(uploaded) == 8 + assert len(could_not_upload) == 0 + assert len(existing_images) == 0 + + uploaded, could_not_upload, existing_images = sa.attach_image_urls_to_project( + project, PATH_TO_URLS + ) + + assert len(uploaded) == 2 + assert len(could_not_upload) == 0 + assert len(existing_images) == 6 From 2c6a94610d113069ebe29e7fc173d7bd1437415d Mon Sep 17 00:00:00 2001 From: Erik Date: Mon, 22 Mar 2021 11:40:12 +0400 Subject: [PATCH 2/5] refactored to avoid code duplication in attach image urls --- superannotate/common.py | 7 ++ superannotate/db/projects.py | 112 +++++++++++++++----------------- tests/attach_urls.csv | 1 - tests/test_attach_image_urls.py | 4 +- 4 files changed, 63 insertions(+), 61 deletions(-) diff --git a/superannotate/common.py b/superannotate/common.py index 6f3159f39..77961321b 100644 --- a/superannotate/common.py +++ b/superannotate/common.py @@ -29,6 +29,9 @@ "Completed": 5, "Skipped": 6 } + +_UPLOAD_STATES = {"Initial": 1, "Basic": 2, "External": 3} + _USER_ROLES = {"Admin": 2, "Annotator": 3, "QA": 4, "Customer": 5, "Viewer": 6} _AVAILABLE_SEGMENTATION_MODELS = ['autonomous', 'generic'] _MODEL_TRAINING_STATUSES = { @@ -118,6 +121,10 @@ def annotation_status_str_to_int(annotation_status): return _ANNOTATION_STATUSES[annotation_status] +def upload_state_str_to_int(upload_state): + return _UPLOAD_STATES[upload_state] + + def annotation_status_int_to_str(annotation_status): """Converts metadata annotation_status int value to a string diff --git a/superannotate/db/projects.py b/superannotate/db/projects.py index fa0aa5b34..fee6abad9 100644 --- a/superannotate/db/projects.py +++ b/superannotate/db/projects.py @@ -694,9 +694,14 @@ def __upload_images_to_aws_thread( if len(uploaded_imgs) >= 100: try: __create_image( - uploaded_imgs_info[0], uploaded_imgs_info[1], project, - annotation_status, prefix, uploaded_imgs_info[2], - project_folder_id + uploaded_imgs_info[0], + uploaded_imgs_info[1], + project, + annotation_status, + prefix, + uploaded_imgs_info[2], + project_folder_id, + upload_state="Basic" ) except SABaseException as e: couldnt_upload[thread_id] += uploaded_imgs @@ -707,8 +712,14 @@ def __upload_images_to_aws_thread( uploaded_imgs_info = ([], [], []) try: __create_image( - uploaded_imgs_info[0], uploaded_imgs_info[1], project, - annotation_status, prefix, uploaded_imgs_info[2], project_folder_id + uploaded_imgs_info[0], + uploaded_imgs_info[1], + project, + annotation_status, + prefix, + uploaded_imgs_info[2], + project_folder_id, + upload_state="Basic" ) except SABaseException as e: couldnt_upload[thread_id] += uploaded_imgs @@ -718,12 +729,19 @@ def __upload_images_to_aws_thread( def __create_image( - img_names, img_paths, project, annotation_status, remote_dir, sizes, - project_folder_id + img_names, + img_paths, + project, + annotation_status, + remote_dir, + sizes, + project_folder_id, + upload_state="Initial" ): if len(img_paths) == 0: return team_id, project_id = project["team_id"], project["id"] + upload_state_code = common.upload_state_str_to_int(upload_state) data = { "project_id": str(project_id), "team_id": str(team_id), @@ -732,14 +750,19 @@ def __create_image( "team_id": str(team_id), "images": [], "annotation_status": annotation_status, - "meta": {} + "meta": {}, + "upload_state": upload_state_code } if project_folder_id is not None: data["folder_id"] = project_folder_id - for img_name, img_path, size in zip(img_names, img_paths, sizes): + for img_data, img_path, size in zip(img_names, img_paths, sizes): img_name_uuid = Path(img_path).name remote_path = remote_dir + f"{img_name_uuid}" - data["images"].append({"name": img_name, "path": remote_path}) + if upload_state == "External": + img_name, img_url = img_data + else: + img_name, img_url = img_data, remote_path + data["images"].append({"name": img_name, "path": img_url}) data["meta"][img_name] = { "width": size[0], "height": size[1], @@ -982,7 +1005,6 @@ def attach_image_urls_to_project( daemon=True ) tqdm_thread.start() - threads = [] for thread_id in range(_NUM_THREADS): t = threading.Thread( @@ -1030,7 +1052,7 @@ def __attach_image_urls_to_project_thread( bucket = s3_resource.Bucket(res["bucket"]) prefix = res['filePath'] uploaded_imgs = [] - uploaded_imgs_info = ([], []) + uploaded_imgs_info = ([], [], []) for i in range(start_index, end_index): if i >= len_img_paths: break @@ -1051,11 +1073,18 @@ def __attach_image_urls_to_project_thread( uploaded_imgs.append(name) uploaded_imgs_info[0].append(img_names_urls[i]) uploaded_imgs_info[1].append(key) + uploaded_imgs_info[2].append((None, None)) if len(uploaded_imgs) >= 100: try: - __create_image_url( - uploaded_imgs_info[0], uploaded_imgs_info[1], project, - annotation_status, project_folder_id + __create_image( + uploaded_imgs_info[0], + uploaded_imgs_info[1], + project, + annotation_status, + prefix, + uploaded_imgs_info[2], + project_folder_id, + upload_state="External" ) except SABaseException as e: couldnt_upload[thread_id] += uploaded_imgs @@ -1063,11 +1092,17 @@ def __attach_image_urls_to_project_thread( else: uploaded[thread_id] += uploaded_imgs uploaded_imgs = [] - uploaded_imgs_info = ([], []) + uploaded_imgs_info = ([], [], []) try: - __create_image_url( - uploaded_imgs_info[0], uploaded_imgs_info[1], project, - annotation_status, project_folder_id + __create_image( + uploaded_imgs_info[0], + uploaded_imgs_info[1], + project, + annotation_status, + prefix, + uploaded_imgs_info[2], + project_folder_id, + upload_state="External" ) except SABaseException as e: couldnt_upload[thread_id] += uploaded_imgs @@ -1076,45 +1111,6 @@ def __attach_image_urls_to_project_thread( uploaded[thread_id] += uploaded_imgs -def __create_image_url( - img_names_urls, remote_paths, project, annotation_status, project_folder_id -): - if len(remote_paths) == 0: - return - team_id, project_id = project["team_id"], project["id"] - data = { - "project_id": str(project_id), - "team_id": str(team_id), - "images": [], - "annotation_status": annotation_status, - "meta": {}, - "upload_state": 2 - } - if project_folder_id is not None: - data["folder_id"] = project_folder_id - for img_name_url, remote_path in zip(img_names_urls, remote_paths): - data["images"].append( - { - "name": img_name_url[0], - "path": img_name_url[1] - } - ) - data["meta"][img_name_url[0]] = { - "width": None, - "height": None, - "annotation_json_path": remote_path + ".json", - "annotation_bluemap_path": remote_path + ".png" - } - - response = _api.send_request( - req_type='POST', path='/image/ext-create', json_req=data - ) - if not response.ok: - raise SABaseException( - response.status_code, "Couldn't ext-create image " + response.text - ) - - def upload_images_from_public_urls_to_project( project, img_urls, diff --git a/tests/attach_urls.csv b/tests/attach_urls.csv index 0a7577d28..354f3465a 100644 --- a/tests/attach_urls.csv +++ b/tests/attach_urls.csv @@ -6,5 +6,4 @@ https://drive.google.com/uc?export=download&id=1SfGcn9hdkVM35ZP0S93eStsE7Ti4GtHU https://drive.google.com/uc?export=download&id=1SfGcn9hdkVM35ZP0S93eStsE7Ti4GtHU,6022a74b5384c50017c366cv https://drive.google.com/uc?export=download&id=1geS2YtQiTYuiduEirKVYxBujHJaIWA3V, https://drive.google.com/uc?export=download&id=1geS2YtQiTYuiduEirKVYxBujHJaIWA3V,6022a74b26aec4002575b9rm -https://drive.google.com/uc?export=download&id=1geS2YtQiTYuiduEirKVYxBujHJaIWA3V,6022a74b26aec4002575b9e8 diff --git a/tests/test_attach_image_urls.py b/tests/test_attach_image_urls.py index d8cc122f9..82be922fd 100644 --- a/tests/test_attach_image_urls.py +++ b/tests/test_attach_image_urls.py @@ -19,7 +19,7 @@ def test_attach_image_urls(): project, PATH_TO_URLS ) - assert len(uploaded) == 8 + assert len(uploaded) == 7 assert len(could_not_upload) == 0 assert len(existing_images) == 0 @@ -29,4 +29,4 @@ def test_attach_image_urls(): assert len(uploaded) == 2 assert len(could_not_upload) == 0 - assert len(existing_images) == 6 + assert len(existing_images) == 5 From c4e142cbc205eae356a3ecce108cdf410ae5c790 Mon Sep 17 00:00:00 2001 From: Erik Date: Tue, 23 Mar 2021 10:19:12 +0400 Subject: [PATCH 3/5] Disabled some SDK functions for projects with URL attached images --- superannotate/common.py | 9 ++++-- superannotate/db/images.py | 13 ++++++++ superannotate/db/project_images.py | 14 ++++++-- superannotate/db/projects.py | 52 +++++++++++++++++++++++++++--- superannotate/ml/ml_funcs.py | 23 +++++++++++-- 5 files changed, 101 insertions(+), 10 deletions(-) diff --git a/superannotate/common.py b/superannotate/common.py index 77961321b..aa591d74e 100644 --- a/superannotate/common.py +++ b/superannotate/common.py @@ -30,7 +30,8 @@ "Skipped": 6 } -_UPLOAD_STATES = {"Initial": 1, "Basic": 2, "External": 3} +_UPLOAD_STATES_STR_TO_CODES = {"Initial": 1, "Basic": 2, "External": 3} +_UPLOAD_STATES_CODES_TO_STR = {1: "Initial", 2: "Basic", 3: "External"} _USER_ROLES = {"Admin": 2, "Annotator": 3, "QA": 4, "Customer": 5, "Viewer": 6} _AVAILABLE_SEGMENTATION_MODELS = ['autonomous', 'generic'] @@ -122,7 +123,11 @@ def annotation_status_str_to_int(annotation_status): def upload_state_str_to_int(upload_state): - return _UPLOAD_STATES[upload_state] + return _UPLOAD_STATES_STR_TO_CODES[upload_state] + + +def upload_state_int_to_str(upload_state): + return _UPLOAD_STATES_CODES_TO_STR[upload_state] def annotation_status_int_to_str(annotation_status): diff --git a/superannotate/db/images.py b/superannotate/db/images.py index ee795f773..df809434b 100644 --- a/superannotate/db/images.py +++ b/superannotate/db/images.py @@ -631,6 +631,12 @@ def download_image( ) project, project_folder = get_project_and_folder_metadata(project) + upload_state = common.upload_state_int_to_str(project.get("upload_state")) + if upload_state == "External": + raise SABaseException( + 0, + "The function does not support projects containing images attached with URLs" + ) img = get_image_bytes( (project, project_folder), image_name, variant=variant ) @@ -698,6 +704,13 @@ def get_image_bytes(project, image_name, variant='original'): :return: io.BytesIO() of the image :rtype: io.BytesIO() """ + project, project_folder = get_project_and_folder_metadata(project) + upload_state = common.upload_state_int_to_str(project.get("upload_state")) + if upload_state == "External": + raise SABaseException( + 0, + "The function does not support projects containing images attached with URLs" + ) if variant not in ["original", "lores"]: raise SABaseException( 0, "Image download variant should be either original or lores" diff --git a/superannotate/db/project_images.py b/superannotate/db/project_images.py index 3469a2d6d..3b054cf06 100644 --- a/superannotate/db/project_images.py +++ b/superannotate/db/project_images.py @@ -51,6 +51,12 @@ def upload_image_to_project( :type image_quality_in_editor: str """ project, project_folder = get_project_and_folder_metadata(project) + upload_state = common.upload_state_int_to_str(project.get("upload_state")) + if upload_state == "External": + raise SABaseException( + 0, + "The function does not support projects containing images attached with URLs" + ) annotation_status = common.annotation_status_str_to_int(annotation_status) if image_quality_in_editor is None: image_quality_in_editor = get_project_default_image_quality_in_editor( @@ -121,8 +127,12 @@ def upload_image_to_project( else: project_folder_id = None __create_image( - [img_name], [key], project, annotation_status, prefix, - [images_info_and_array[2]], project_folder_id + [img_name], [key], + project, + annotation_status, + prefix, [images_info_and_array[2]], + project_folder_id, + upload_state="Basic" ) while True: diff --git a/superannotate/db/projects.py b/superannotate/db/projects.py index fee6abad9..d69172d7c 100644 --- a/superannotate/db/projects.py +++ b/superannotate/db/projects.py @@ -237,6 +237,13 @@ def upload_video_to_project( :return: filenames of uploaded images :rtype: list of strs """ + project, project_folder = get_project_and_folder_metadata(project) + upload_state = common.upload_state_int_to_str(project.get("upload_state")) + if upload_state == "External": + raise SABaseException( + 0, + "The function does not support projects containing images attached with URLs" + ) logger.info("Uploading from video %s.", str(video_path)) rotate_code = None try: @@ -390,6 +397,12 @@ def upload_videos_from_folder_to_project( :rtype: tuple of list of strs """ project, project_folder = get_project_and_folder_metadata(project) + upload_state = common.upload_state_int_to_str(project.get("upload_state")) + if upload_state == "External": + raise SABaseException( + 0, + "The function does not support projects containing images attached with URLs" + ) if recursive_subfolders: logger.warning( "When using recursive subfolder parsing same name videos in different subfolders will overwrite each other." @@ -480,6 +493,12 @@ def upload_images_from_folder_to_project( project_folder_name = project["name"] + ( f'/{project_folder["name"]}' if project_folder else "" ) + upload_state = common.upload_state_int_to_str(project.get("upload_state")) + if upload_state == "External": + raise SABaseException( + 0, + "The function does not support projects containing images attached with URLs" + ) if recursive_subfolders: logger.info( "When using recursive subfolder parsing same name images in different subfolders will overwrite each other." @@ -812,6 +831,12 @@ def upload_images_to_project( project_folder_name = project["name"] + ( f'/{project_folder["name"]}' if project_folder else "" ) + upload_state = common.upload_state_int_to_str(project.get("upload_state")) + if upload_state == "External": + raise SABaseException( + 0, + "The function does not support projects containing images attached with URLs" + ) if not isinstance(img_paths, list): raise SABaseException( 0, "img_paths argument to upload_images_to_project should be a list" @@ -939,13 +964,12 @@ def attach_image_urls_to_project( project_folder_name = project["name"] + ( f'/{project_folder["name"]}' if project_folder else "" ) - upload_state = project.get("upload_state") - if upload_state == "basic": + upload_state = common.upload_state_int_to_str(project.get("upload_state")) + if upload_state == "Basic": raise SABaseException( 0, "You cannot attach URLs in this type of project. Please attach it in an external storage project" ) - upload_state = "external" annotation_status = common.annotation_status_str_to_int(annotation_status) team_id, project_id = project["team_id"], project["id"] image_data = pd.read_csv(attachments) @@ -1144,7 +1168,13 @@ def upload_images_from_public_urls_to_project( images_to_upload = [] duplicate_images_filenames = [] path_to_url = {} - + project, project_folder = get_project_and_folder_metadata(project) + upload_state = common.upload_state_int_to_str(project.get("upload_state")) + if upload_state == "External": + raise SABaseException( + 0, + "The function does not support projects containing images attached with URLs" + ) finish_event = threading.Event() tqdm_thread = threading.Thread( target=_tqdm_download, @@ -1246,6 +1276,13 @@ def upload_images_from_google_cloud_to_project( images_to_upload = [] duplicate_images_filenames = [] path_to_url = {} + project, project_folder = get_project_and_folder_metadata(project) + upload_state = common.upload_state_int_to_str(project.get("upload_state")) + if upload_state == "External": + raise SABaseException( + 0, + "The function does not support projects containing images attached with URLs" + ) cloud_client = storage.Client(project=google_project) bucket = cloud_client.get_bucket(bucket_name) image_blobs = bucket.list_blobs(prefix=folder_path) @@ -1322,6 +1359,13 @@ def upload_images_from_azure_blob_to_project( images_to_upload = [] duplicate_images_filenames = [] path_to_url = {} + project, project_folder = get_project_and_folder_metadata(project) + upload_state = common.upload_state_int_to_str(project.get("upload_state")) + if upload_state == "External": + raise SABaseException( + 0, + "The function does not support projects containing images attached with URLs" + ) connect_key = os.getenv('AZURE_STORAGE_CONNECTION_STRING') blob_service_client = BlobServiceClient.from_connection_string(connect_key) container_client = blob_service_client.get_container_client(container_name) diff --git a/superannotate/ml/ml_funcs.py b/superannotate/ml/ml_funcs.py index d7678e43c..484d37957 100644 --- a/superannotate/ml/ml_funcs.py +++ b/superannotate/ml/ml_funcs.py @@ -17,7 +17,7 @@ from ..api import API from ..common import ( _AVAILABLE_SEGMENTATION_MODELS, model_training_status_int_to_str, - project_type_str_to_int + project_type_str_to_int, upload_state_int_to_str ) from ..db.images import get_image_metadata, search_images from ..exceptions import SABaseException @@ -56,7 +56,12 @@ def run_prediction(project, images_list, model): f"Specified project has type {project['type']}, and does not correspond to the type of provided model" ) project_id = project["id"] - + upload_state = upload_state_int_to_str(project.get("upload_state")) + if upload_state == "External": + raise SABaseException( + 0, + "The function does not support projects containing images attached with URLs" + ) images_metadata = get_image_metadata(project, images_list) if isinstance(images_metadata, dict): images_metadata = [images_metadata] @@ -126,6 +131,13 @@ def run_segmentation(project, images_list, model): ) raise SABaseException(0, "Model Does not exist") + upload_state = upload_state_int_to_str(project.get("upload_state")) + if upload_state == "External": + raise SABaseException( + 0, + "The function does not support projects containing images attached with URLs" + ) + images_metadata = get_image_metadata(project, images_list) images_metadata.sort(key=lambda x: x["name"]) @@ -216,6 +228,13 @@ def run_training( raise SABaseException(0, "Invalid project types") project_type = types.pop() + upload_state = upload_state_int_to_str(project.get("upload_state")) + if upload_state == "External": + raise SABaseException( + 0, + "The function does not support projects containing images attached with URLs" + ) + base_model = base_model.get(project_type, None) if not base_model: logger.error( From 35bc63f739e58c7df1ad8886e80774fed7059e33 Mon Sep 17 00:00:00 2001 From: Erik Date: Fri, 26 Mar 2021 15:51:29 +0400 Subject: [PATCH 4/5] handling duplicates in csv in attach URLs, disabling fuse generation in exports for projects with attached URLs --- superannotate/db/exports.py | 7 ++++++- superannotate/db/projects.py | 7 ++++++- tests/attach_urls.csv | 3 ++- tests/test_attach_image_urls.py | 4 ++-- 4 files changed, 16 insertions(+), 5 deletions(-) diff --git a/superannotate/db/exports.py b/superannotate/db/exports.py index dc0a4f74c..dabbe1967 100644 --- a/superannotate/db/exports.py +++ b/superannotate/db/exports.py @@ -12,7 +12,7 @@ from tqdm import tqdm from ..api import API -from ..common import annotation_status_str_to_int +from ..common import annotation_status_str_to_int, upload_state_int_to_str from ..exceptions import ( SABaseException, SAExistingExportNameException, SANonExistingExportNameException @@ -123,6 +123,11 @@ def prepare_export( """ if not isinstance(project, dict): project = get_project_metadata_bare(project) + upload_state = upload_state_int_to_str(project.get("upload_state")) + if upload_state == "External" and include_fuse == True: + logger.info( + "Include fuse functionality is not supported for projects containing images attached with URLs" + ) team_id, project_id = project["team_id"], project["id"] if annotation_statuses is None: annotation_statuses = [2, 3, 4, 5] diff --git a/superannotate/db/projects.py b/superannotate/db/projects.py index d69172d7c..be91fce08 100644 --- a/superannotate/db/projects.py +++ b/superannotate/db/projects.py @@ -973,7 +973,11 @@ def attach_image_urls_to_project( annotation_status = common.annotation_status_str_to_int(annotation_status) team_id, project_id = project["team_id"], project["id"] image_data = pd.read_csv(attachments) + image_data = image_data[~image_data["url"].isnull()] existing_names = image_data[~image_data["name"].isnull()] + duplicate_idx_csv = existing_names.duplicated(subset="name", keep="first") + duplicate_images = existing_names[duplicate_idx_csv]["name"].tolist() + existing_names = existing_names[~duplicate_idx_csv] existing_images = search_images((project, project_folder)) duplicate_idx = [] for ind, _ in image_data[image_data["name"].isnull()].iterrows(): @@ -983,10 +987,11 @@ def attach_image_urls_to_project( image_data.at[ind, "name"] = name_try existing_images.append(name_try) break + image_data.drop_duplicates(subset="name", keep="first", inplace=True) for ind, row in existing_names.iterrows(): if row["name"] in existing_images: duplicate_idx.append(ind) - duplicate_images = image_data.loc[duplicate_idx]["name"].tolist() + duplicate_images.extend(image_data.loc[duplicate_idx]["name"].tolist()) image_data.drop(labels=duplicate_idx, inplace=True) if len(duplicate_images) != 0: logger.warning( diff --git a/tests/attach_urls.csv b/tests/attach_urls.csv index 354f3465a..702c509dd 100644 --- a/tests/attach_urls.csv +++ b/tests/attach_urls.csv @@ -6,4 +6,5 @@ https://drive.google.com/uc?export=download&id=1SfGcn9hdkVM35ZP0S93eStsE7Ti4GtHU https://drive.google.com/uc?export=download&id=1SfGcn9hdkVM35ZP0S93eStsE7Ti4GtHU,6022a74b5384c50017c366cv https://drive.google.com/uc?export=download&id=1geS2YtQiTYuiduEirKVYxBujHJaIWA3V, https://drive.google.com/uc?export=download&id=1geS2YtQiTYuiduEirKVYxBujHJaIWA3V,6022a74b26aec4002575b9rm - +https://drive.google.com/uc?export=download&id=1SfGcn9hdkVM35ZP0S93eStsE7Ti4GtHU,6022a74b5384c50017c366cv +,fjvnvjsvnsdjcndsjcndjcs diff --git a/tests/test_attach_image_urls.py b/tests/test_attach_image_urls.py index 82be922fd..4d417990d 100644 --- a/tests/test_attach_image_urls.py +++ b/tests/test_attach_image_urls.py @@ -21,7 +21,7 @@ def test_attach_image_urls(): assert len(uploaded) == 7 assert len(could_not_upload) == 0 - assert len(existing_images) == 0 + assert len(existing_images) == 1 uploaded, could_not_upload, existing_images = sa.attach_image_urls_to_project( project, PATH_TO_URLS @@ -29,4 +29,4 @@ def test_attach_image_urls(): assert len(uploaded) == 2 assert len(could_not_upload) == 0 - assert len(existing_images) == 5 + assert len(existing_images) == 6 From 49a083a6e7c6eaa5da9977507590e65f36a333a9 Mon Sep 17 00:00:00 2001 From: Erik Date: Fri, 26 Mar 2021 15:56:50 +0400 Subject: [PATCH 5/5] fixed docstring to include dupe images list on return --- superannotate/db/projects.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/superannotate/db/projects.py b/superannotate/db/projects.py index be91fce08..5713414df 100644 --- a/superannotate/db/projects.py +++ b/superannotate/db/projects.py @@ -956,7 +956,7 @@ def attach_image_urls_to_project( :param annotation_status: value to set the annotation statuses of the linked images: NotStarted InProgress QualityCheck Returned Completed Skipped :type annotation_status: str - :return: list of linked image urls, list of unreachable image urls + :return: list of linked image names, list of failed image names, list of duplicate image names :rtype: tuple """