diff --git a/superannotate/db/images.py b/superannotate/db/images.py index f0ba8de12..5572c686c 100644 --- a/superannotate/db/images.py +++ b/superannotate/db/images.py @@ -231,30 +231,32 @@ def get_image_metadata(project, image_names, return_dict_on_single_output=True): else: project_folder_id = None + chunk_size = 500 + chunks = [image_names[i:i + chunk_size] for i in range(0, len(image_names), chunk_size)] + json_req = { 'project_id': project['id'], 'team_id': _api.team_id, - 'names': image_names, } + if project_folder_id is not None: json_req["folder_id"] = project_folder_id - response = _api.send_request( - req_type='POST', - path='/images/getBulk', - json_req=json_req, - ) - if not response.ok: - raise SABaseException( - response.status_code, - "Couldn't get image metadata. " + response.text - ) - metadata_raw = response.json() + metadata_raw = [] + for chunk in chunks: + json_req['names'] = chunk + response = _api.send_request( + req_type='POST', + path='/images/getBulk', + json_req=json_req, + ) + if not response.ok: + raise SABaseException(response.status_code,"Couldn't get image metadata. " + response.text) + metadata_raw += response.json() + metadata_without_deleted = [] - for im_metadata in metadata_raw: - if 'delete' in im_metadata and im_metadata['delete'] == 1: - continue - metadata_without_deleted.append(im_metadata) + metadata_without_deleted = [ i for i in metadata_raw if i['delete'] != 1 ] + if len(metadata_without_deleted) == 0: raise SABaseException( 0, diff --git a/superannotate/db/project_images.py b/superannotate/db/project_images.py index 156791b46..437856820 100644 --- a/superannotate/db/project_images.py +++ b/superannotate/db/project_images.py @@ -19,6 +19,7 @@ __create_image, get_image_array_to_upload, get_project_default_image_quality_in_editor, upload_image_array_to_s3 ) +from .utils import _get_upload_auth_token logger = logging.getLogger("superannotate-python-sdk") _api = API.get_instance() @@ -50,7 +51,7 @@ def upload_image_to_project( Can be either "compressed" or "original". If None then the default value in project settings will be used. :type image_quality_in_editor: str """ - project, project_folder = get_project_and_folder_metadata(project) + project, folder = get_project_and_folder_metadata(project) upload_state = common.upload_state_int_to_str(project.get("upload_state")) if upload_state == "External": raise SABaseException( @@ -92,21 +93,18 @@ def upload_image_to_project( 0, "Image name img_name should be set if img is not Pathlike" ) + if folder: + folder_id = folder["id"] + else: + folder_id = get_project_root_folder_id(project) + team_id, project_id = project["team_id"], project["id"] params = { 'team_id': team_id, + 'folder_id' : folder_id } - response = _api.send_request( - req_type='GET', - path=f'/project/{project_id}/sdkImageUploadToken', - params=params - ) - if not response.ok: - raise SABaseException( - response.status_code, "Couldn't get upload token " + response.text - ) - res = response.json() - prefix = res['filePath'] + res = _get_upload_auth_token(params=params,project_id=project_id) + prefix = res['filePath'] s3_session = boto3.Session( aws_access_key_id=res['accessKeyId'], aws_secret_access_key=res['secretAccessKey'], @@ -122,16 +120,12 @@ def upload_image_to_project( except Exception as e: raise SABaseException(0, "Couldn't upload to data server.") from e - if project_folder is not None: - project_folder_id = project_folder["id"] - else: - project_folder_id = None __create_image( [img_name], [key], project, annotation_status, prefix, [images_info_and_array[2]], - project_folder_id, + folder_id, upload_state="Basic" ) @@ -171,7 +165,7 @@ def _copy_images( destination_folder_id = get_project_root_folder_id(destination_project) json_req["destination_folder_id"] = destination_folder_id res = {} - res['skipped'] = 0 + res['skipped'] = [] for start_index in range(0, len(image_names), NUM_TO_SEND): json_req["image_names"] = image_names[start_index:start_index + NUM_TO_SEND] @@ -239,7 +233,7 @@ def copy_images( source_project["name"] + "" if source_project_folder is None else "/" + source_project_folder["name"], destination_project["name"] + "" if destination_project_folder is None else "/" + - destination_project_folder["name"], res["skipped"] + destination_project_folder["name"], len(res["skipped"]) ) return res["skipped"] diff --git a/superannotate/db/projects.py b/superannotate/db/projects.py index 7719a9f0b..1c664440f 100644 --- a/superannotate/db/projects.py +++ b/superannotate/db/projects.py @@ -34,12 +34,13 @@ fill_class_and_attribute_ids, get_annotation_classes_name_to_id, search_annotation_classes ) -from .images import get_image_metadata, search_images, search_images_all_folders +from .images import get_image_metadata, search_images, search_images_all_folders, get_project_root_folder_id from .project_api import ( get_project_and_folder_metadata, get_project_metadata_bare, get_project_metadata_with_users ) from .users import get_team_contributor_metadata +from .utils import _get_upload_auth_token logger = logging.getLogger("superannotate-python-sdk") @@ -825,9 +826,9 @@ def upload_images_to_project( :return: uploaded, could-not-upload, existing-images filepaths :rtype: tuple (3 members) of list of strs """ - project, project_folder = get_project_and_folder_metadata(project) - project_folder_name = project["name"] + ( - f'/{project_folder["name"]}' if project_folder else "" + project, folder = get_project_and_folder_metadata(project) + folder_name = project["name"] + ( + f'/{folder["name"]}' if folder else "" ) upload_state = common.upload_state_int_to_str(project.get("upload_state")) if upload_state == "External": @@ -845,7 +846,7 @@ def upload_images_to_project( project ) team_id, project_id = project["team_id"], project["id"] - existing_images = search_images((project, project_folder)) + existing_images = search_images((project, folder)) duplicate_images = [] for existing_image in existing_images: i = -1 @@ -863,31 +864,31 @@ def upload_images_to_project( ) len_img_paths = len(img_paths) logger.info( - "Uploading %s images to project %s.", len_img_paths, project_folder_name + "Uploading %s images to project %s.", len_img_paths, folder_name ) if len_img_paths == 0: return ([], [], duplicate_images) - params = {'team_id': team_id} + + + if folder: + folder_id = folder["id"] + else: + folder_id = get_project_root_folder_id(project) + + params = {'team_id': team_id , 'folder_id' : folder_id } uploaded = [[] for _ in range(_NUM_THREADS)] tried_upload = [[] for _ in range(_NUM_THREADS)] couldnt_upload = [[] for _ in range(_NUM_THREADS)] finish_event = threading.Event() - chunksize = int(math.ceil(len(img_paths) / _NUM_THREADS)) - response = _api.send_request( - req_type='GET', - path=f'/project/{project_id}/sdkImageUploadToken', - params=params - ) - if not response.ok: - raise SABaseException( - response.status_code, "Couldn't get upload token " + response.text - ) - if project_folder is not None: - project_folder_id = project_folder["id"] - else: - project_folder_id = None - res = response.json() + + res = _get_upload_auth_token(params=params,project_id=project_id) + prefix = res['filePath'] + limit = res['availableImageCount'] + images_to_upload = img_paths[:limit] + images_to_skip = img_paths[limit:] + chunksize = int(math.ceil(len(images_to_upload) / _NUM_THREADS)) + tqdm_thread = threading.Thread( target=__tqdm_thread_image_upload, args=(len_img_paths, tried_upload, finish_event), @@ -900,9 +901,9 @@ def upload_images_to_project( t = threading.Thread( target=__upload_images_to_aws_thread, args=( - res, img_paths, project, annotation_status, prefix, thread_id, + res, images_to_upload, project, annotation_status, prefix, thread_id, chunksize, couldnt_upload, uploaded, tried_upload, - image_quality_in_editor, from_s3_bucket, project_folder_id + image_quality_in_editor, from_s3_bucket, folder_id ), daemon=True ) @@ -921,6 +922,7 @@ def upload_images_to_project( for f in upload_thread: list_of_uploaded.append(str(f)) + list_of_not_uploaded += images_to_skip return (list_of_uploaded, list_of_not_uploaded, duplicate_images) @@ -958,9 +960,9 @@ def attach_image_urls_to_project( :rtype: tuple """ - project, project_folder = get_project_and_folder_metadata(project) - project_folder_name = project["name"] + ( - f'/{project_folder["name"]}' if project_folder else "" + project, folder = get_project_and_folder_metadata(project) + folder_name = project["name"] + ( + f'/{folder["name"]}' if folder else "" ) upload_state = common.upload_state_int_to_str(project.get("upload_state")) if upload_state == "Basic": @@ -976,7 +978,7 @@ def attach_image_urls_to_project( duplicate_idx_csv = existing_names.duplicated(subset="name", keep="first") duplicate_images = existing_names[duplicate_idx_csv]["name"].tolist() existing_names = existing_names[~duplicate_idx_csv] - existing_images = search_images((project, project_folder)) + existing_images = search_images((project, folder)) duplicate_idx = [] for ind, _ in image_data[image_data["name"].isnull()].iterrows(): while True: @@ -998,37 +1000,35 @@ def attach_image_urls_to_project( ) image_data = pd.DataFrame(image_data, columns=["name", "url"]) img_names_urls = image_data.values.tolist() - len_img_names_urls = len(img_names_urls) logger.info( - "Attaching %s images to project %s.", len_img_names_urls, - project_folder_name + "Uploading %s images to project %s.", len(img_names_urls), + folder_name ) - if len_img_names_urls == 0: + if len(img_names_urls) == 0: return ([], [], duplicate_images) - params = {'team_id': team_id} + + if folder: + folder_id = folder["id"] + else: + folder_id = get_project_root_folder_id(project) + + params = {'team_id': team_id , 'folder_id' : folder_id } uploaded = [[] for _ in range(_NUM_THREADS)] tried_upload = [[] for _ in range(_NUM_THREADS)] couldnt_upload = [[] for _ in range(_NUM_THREADS)] finish_event = threading.Event() - chunksize = int(math.ceil(len_img_names_urls / _NUM_THREADS)) - response = _api.send_request( - req_type='GET', - path=f'/project/{project_id}/sdkImageUploadToken', - params=params - ) - if not response.ok: - raise SABaseException( - response.status_code, "Couldn't get upload token " + response.text - ) - if project_folder is not None: - project_folder_id = project_folder["id"] - else: - project_folder_id = None - res = response.json() + + res = _get_upload_auth_token(params=params,project_id=project_id) + prefix = res['filePath'] + limit = res['availableImageCount'] + images_to_upload = img_names_urls[:limit] + images_to_skip = img_names_urls[limit:] + chunksize = int(math.ceil(len(images_to_upload) / _NUM_THREADS)) + tqdm_thread = threading.Thread( target=__tqdm_thread_image_upload, - args=(len_img_names_urls, tried_upload, finish_event), + args=(len(images_to_upload), tried_upload, finish_event), daemon=True ) tqdm_thread.start() @@ -1037,9 +1037,9 @@ def attach_image_urls_to_project( t = threading.Thread( target=__attach_image_urls_to_project_thread, args=( - res, img_names_urls, project, annotation_status, prefix, + res, images_to_upload, project, annotation_status, prefix, thread_id, chunksize, couldnt_upload, uploaded, tried_upload, - project_folder_id + folder_id ), daemon=True ) @@ -1058,6 +1058,7 @@ def attach_image_urls_to_project( for f in upload_thread: list_of_uploaded.append(str(f)) + list_of_not_uploaded += [i[0] for i in images_to_skip ] return (list_of_uploaded, list_of_not_uploaded, duplicate_images) diff --git a/superannotate/db/utils.py b/superannotate/db/utils.py new file mode 100644 index 000000000..41078b942 --- /dev/null +++ b/superannotate/db/utils.py @@ -0,0 +1,19 @@ +from ..api import API +from ..exceptions import SABaseException, SAImageSizeTooLarge +_api = API.get_instance() + + +def _get_upload_auth_token(params,project_id): + response = _api.send_request( + req_type='GET', + path=f'/project/{project_id}/sdkImageUploadToken', + params=params + ) + if not response.ok: + raise SABaseException( + response.status_code, "Couldn't get upload token " + response.text + ) + + res = response.json() + return res +