diff --git a/src/superannotate/lib/app/helpers.py b/src/superannotate/lib/app/helpers.py index 64ce8d1c9..98242cf19 100644 --- a/src/superannotate/lib/app/helpers.py +++ b/src/superannotate/lib/app/helpers.py @@ -1,3 +1,4 @@ +import uuid from ast import literal_eval from pathlib import Path from typing import List @@ -120,3 +121,33 @@ def metric_is_plottable(key): if key == "total_loss" or "mIoU" in key or "mAP" in key or key == "iteration": return True return False + + +def get_paths_and_duplicated_from_csv(csv_path): + image_data = pd.read_csv(csv_path, dtype=str) + image_data = image_data[~image_data["url"].isnull()] + if "name" in image_data.columns: + image_data["name"] = ( + image_data["name"] + .fillna("") + .apply(lambda cell: cell if str(cell).strip() else str(uuid.uuid4())) + ) + else: + image_data["name"] = [str(uuid.uuid4()) for _ in range(len(image_data.index))] + + image_data = pd.DataFrame(image_data, columns=["name", "url"]) + img_names_urls = image_data.rename(columns={"url": "path"}).to_dict( + orient="records" + ) + duplicate_images = [] + seen = [] + images_to_upload = [] + for i in img_names_urls: + temp = i["name"] + i["name"] = i["name"].strip() + if i["name"] not in seen: + seen.append(i["name"]) + images_to_upload.append(i) + else: + duplicate_images.append(temp) + return images_to_upload, duplicate_images diff --git a/src/superannotate/lib/app/interface/cli_interface.py b/src/superannotate/lib/app/interface/cli_interface.py index 0be6a0633..b336a368b 100644 --- a/src/superannotate/lib/app/interface/cli_interface.py +++ b/src/superannotate/lib/app/interface/cli_interface.py @@ -3,12 +3,10 @@ import os import sys import tempfile -import uuid from typing import Any from typing import Optional import lib.core as constances -import pandas as pd from lib import __file__ as lib_path from lib.app.helpers import split_project_path from lib.app.input_converters.conversion import import_annotation @@ -22,7 +20,6 @@ from lib.app.interface.sdk_interface import upload_images_from_folder_to_project from lib.app.interface.sdk_interface import upload_preannotations_from_folder_to_project from lib.app.interface.sdk_interface import upload_videos_from_folder_to_project -from lib.app.serializers import ImageSerializer from lib.core.entities import ConfigEntity from lib.infrastructure.controller import Controller from lib.infrastructure.repositories import ConfigRepository @@ -263,43 +260,6 @@ def attach_document_urls( ) sys.exit(0) - def _attach_urls( - self, project: str, attachments: str, annotation_status: Optional[Any] = None - ): - project_name, folder_name = split_project_path(project) - - image_data = pd.read_csv(attachments, dtype=str) - image_data = image_data[~image_data["url"].isnull()] - for ind, _ in image_data[image_data["name"].isnull()].iterrows(): - image_data.at[ind, "name"] = str(uuid.uuid4()) - - image_data = pd.DataFrame(image_data, columns=["name", "url"]) - img_names_urls = image_data.rename(columns={"url": "path"}).to_dict( - orient="records" - ) - list_of_not_uploaded = [] - duplicate_images = [] - for i in range(0, len(img_names_urls), 500): - response = self.controller.attach_urls( - project_name=project_name, - folder_name=folder_name, - files=ImageSerializer.deserialize( - img_names_urls[i : i + 500] # noqa: E203 - ), - annotation_status=annotation_status, - ) - if response.errors: - list_of_not_uploaded.append(response.data[0]) - duplicate_images.append(response.data[1]) - - list_of_uploaded = [ - image["name"] - for image in img_names_urls - if image["name"] not in list_of_not_uploaded - ] - - return list_of_uploaded, list_of_not_uploaded, duplicate_images - def upload_videos( self, project, diff --git a/src/superannotate/lib/app/interface/sdk_interface.py b/src/superannotate/lib/app/interface/sdk_interface.py index 7c0ddad4a..92276e373 100644 --- a/src/superannotate/lib/app/interface/sdk_interface.py +++ b/src/superannotate/lib/app/interface/sdk_interface.py @@ -5,7 +5,6 @@ import os import tempfile import time -import uuid from collections import Counter from collections import namedtuple from pathlib import Path @@ -17,7 +16,6 @@ import boto3 import lib.core as constances -import pandas as pd import plotly.graph_objects as go from lib.app.annotation_helpers import add_annotation_bbox_to_json from lib.app.annotation_helpers import add_annotation_comment_to_json @@ -29,6 +27,7 @@ from lib.app.annotation_helpers import add_annotation_template_to_json from lib.app.helpers import extract_project_folder from lib.app.helpers import get_annotation_paths +from lib.app.helpers import get_paths_and_duplicated_from_csv from lib.app.helpers import reformat_metrics_json from lib.app.interface.types import AnnotationType from lib.app.interface.types import NotEmptyStr @@ -2291,6 +2290,7 @@ def download_image( ) if response.errors: raise AppException(response.errors) + logger.info(f"Downloaded image {image_name} to {local_dir_path} ") return response.data @@ -2314,47 +2314,26 @@ def attach_image_urls_to_project( :rtype: tuple """ project_name, folder_name = extract_project_folder(project) - project = controller.get_project_metadata(project_name).data - if project["project"].project_type == constances.ProjectType.VIDEO.value: - raise AppException( - "The function does not support projects containing videos attached with URLs" - ) - - image_data = pd.read_csv(attachments, dtype=str) - image_data = image_data[~image_data["url"].isnull()] - if "name" in image_data.columns: - image_data["name"] = ( - image_data["name"] - .fillna("") - .apply(lambda cell: cell if str(cell).strip() else str(uuid.uuid4())) - ) - else: - image_data["name"] = [str(uuid.uuid4()) for _ in range(len(image_data.index))] - - image_data = pd.DataFrame(image_data, columns=["name", "url"]) - img_names_urls = image_data.rename(columns={"url": "path"}).to_dict( - orient="records" - ) + images_to_upload, duplicate_images = get_paths_and_duplicated_from_csv(attachments) list_of_not_uploaded = [] - duplicate_images = [] - for i in range(0, len(img_names_urls), 500): - response = controller.attach_urls( - project_name=project_name, - folder_name=folder_name, - files=ImageSerializer.deserialize( - img_names_urls[i : i + 500] # noqa: E203 - ), - annotation_status=annotation_status, - ) - if response.errors: - logger.error(response.errors) - else: - list_of_not_uploaded.append(response.data[0]) - duplicate_images.append(response.data[1]) + with tqdm(total=len(images_to_upload), desc="Attaching urls") as progress_bar: + for i in range(0, len(images_to_upload), 500): + response = controller.attach_urls( + project_name=project_name, + folder_name=folder_name, + files=ImageSerializer.deserialize( + images_to_upload[i : i + 500] # noqa: E203 + ), + annotation_status=annotation_status, + ) + if response.errors: + list_of_not_uploaded.append(response.data[0]) + duplicate_images.append(response.data[1]) + progress_bar.update(len(images_to_upload[i : i + 500])) list_of_uploaded = [ image["name"] - for image in img_names_urls + for image in images_to_upload if image["name"] not in list_of_not_uploaded ] @@ -2378,43 +2357,26 @@ def attach_video_urls_to_project( :rtype: (list, list, list) """ project_name, folder_name = extract_project_folder(project) - project = controller.get_project_metadata(project_name).data - if project["project"].project_type != constances.ProjectType.VIDEO.value: - raise AppException("The function does not support") - - image_data = pd.read_csv(attachments, dtype=str) - image_data = image_data[~image_data["url"].isnull()] - if "name" in image_data.columns: - image_data["name"] = ( - image_data["name"] - .fillna("") - .apply(lambda cell: cell if str(cell).strip() else str(uuid.uuid4())) - ) - else: - image_data["name"] = [str(uuid.uuid4()) for _ in range(len(image_data.index))] - - image_data = pd.DataFrame(image_data, columns=["name", "url"]) - img_names_urls = image_data.rename(columns={"url": "path"}).to_dict( - orient="records" - ) + images_to_upload, duplicate_images = get_paths_and_duplicated_from_csv(attachments) list_of_not_uploaded = [] - duplicate_images = [] - for i in range(0, len(img_names_urls), 500): - response = controller.attach_urls( - project_name=project_name, - folder_name=folder_name, - files=ImageSerializer.deserialize( - img_names_urls[i : i + 500] # noqa: E203 - ), - annotation_status=annotation_status, - ) - if not response.errors: - list_of_not_uploaded.append(response.data[0]) - duplicate_images.append(response.data[1]) + with tqdm(total=len(images_to_upload), desc="Attaching urls") as progress_bar: + for i in range(0, len(images_to_upload), 500): + response = controller.attach_urls( + project_name=project_name, + folder_name=folder_name, + files=ImageSerializer.deserialize( + images_to_upload[i : i + 500] # noqa: E203 + ), + annotation_status=annotation_status, + ) + if response.errors: + list_of_not_uploaded.append(response.data[0]) + duplicate_images.append(response.data[1]) + progress_bar.update(len(images_to_upload[i : i + 500])) list_of_uploaded = [ image["name"] - for image in img_names_urls + for image in images_to_upload if image["name"] not in list_of_not_uploaded ] @@ -3672,40 +3634,26 @@ def attach_document_urls_to_project( :rtype: tuple """ project_name, folder_name = extract_project_folder(project) - - image_data = pd.read_csv(attachments, dtype=str) - image_data = image_data[~image_data["url"].isnull()] - if "name" in image_data.columns: - image_data["name"] = ( - image_data["name"] - .fillna("") - .apply(lambda cell: cell if str(cell).strip() else str(uuid.uuid4())) - ) - else: - image_data["name"] = [str(uuid.uuid4()) for _ in range(len(image_data.index))] - - image_data = pd.DataFrame(image_data, columns=["name", "url"]) - img_names_urls = image_data.rename(columns={"url": "path"}).to_dict( - orient="records" - ) + images_to_upload, duplicate_images = get_paths_and_duplicated_from_csv(attachments) list_of_not_uploaded = [] - duplicate_images = [] - for i in range(0, len(img_names_urls), 500): - response = controller.attach_urls( - project_name=project_name, - folder_name=folder_name, - files=ImageSerializer.deserialize( - img_names_urls[i : i + 500] # noqa: E203 - ), - annotation_status=annotation_status, - ) - if response.errors: - list_of_not_uploaded.append(response.data[0]) - duplicate_images.append(response.data[1]) + with tqdm(total=len(images_to_upload), desc="Attaching urls") as progress_bar: + for i in range(0, len(images_to_upload), 500): + response = controller.attach_urls( + project_name=project_name, + folder_name=folder_name, + files=ImageSerializer.deserialize( + images_to_upload[i : i + 500] # noqa: E203 + ), + annotation_status=annotation_status, + ) + if response.errors: + list_of_not_uploaded.append(response.data[0]) + duplicate_images.append(response.data[1]) + progress_bar.update(len(images_to_upload[i : i + 500])) list_of_uploaded = [ image["name"] - for image in img_names_urls + for image in images_to_upload if image["name"] not in list_of_not_uploaded ] diff --git a/src/superannotate/lib/core/usecases.py b/src/superannotate/lib/core/usecases.py index 65b38d7f0..6e3cb4ba6 100644 --- a/src/superannotate/lib/core/usecases.py +++ b/src/superannotate/lib/core/usecases.py @@ -2281,9 +2281,11 @@ def fill_classes_data(self, annotations: dict): for annotation in ( i for i in annotations["instances"] if i.get("type", None) == "template" ): - annotation["templateName"] = templates.get( - annotation.get("templateId", ""), -1 + template_name = templates.get( + annotation.get("templateId"), None ) + if template_name: + annotation["templateName"] = template_name for annotation in [i for i in annotations["instances"] if "classId" in i]: annotation_class_id = annotation["classId"] diff --git a/tests/data_set/csv_files/text_urls.csv b/tests/data_set/csv_files/text_urls.csv index 1f7edaccd..7d9ff0b33 100644 --- a/tests/data_set/csv_files/text_urls.csv +++ b/tests/data_set/csv_files/text_urls.csv @@ -9,5 +9,5 @@ https://sa-public-text-files.s3.us-west-2.amazonaws.com/grmnda/text1_%D0%9B%D0%B https://sa-public-text-files.s3.us-west-2.amazonaws.com/grmnda/%D5%B6%D5%A1%D6%80%20%D5%A4%D5%B8%D5%BD.txt, textՆարԴոս https://sa-public-text-files.s3.us-west-2.amazonaws.com/grmnda/small%20gods_%20tortoise.txt, https://sa-public-text-files.s3.us-west-2.amazonaws.com/grmnda/small%20gods_%20tortoise.txt, -https://sa-public-text-files.s3.us-west-2.amazonaws.com/grmnda/small%20gods_%20tortoise.txt, -https://sa-public-text-files.s3.us-west-2.amazonaws.com/grmnda/small%20gods_%20tortoise.txt, +https://sa-public-text-files.s3.us-west-2.amazonaws.com/grmnda/small%20gods_%20tortoise.txt,same_name +https://sa-public-text-files.s3.us-west-2.amazonaws.com/grmnda/small%20gods_%20tortoise.txt,same_name diff --git a/tests/integration/base.py b/tests/integration/base.py index 050793d4b..f4f2e467d 100644 --- a/tests/integration/base.py +++ b/tests/integration/base.py @@ -23,4 +23,7 @@ def setUp(self, *args, **kwargs): def tearDown(self) -> None: projects = sa.search_projects(self.PROJECT_NAME, return_metadata=True) for project in projects: - sa.delete_project(project) + try: + sa.delete_project(project) + except Exception: + pass diff --git a/tests/integration/test_attach_document_urls.py b/tests/integration/test_attach_document_urls.py index 9fe20cf9a..a4491c264 100644 --- a/tests/integration/test_attach_document_urls.py +++ b/tests/integration/test_attach_document_urls.py @@ -5,7 +5,7 @@ from tests.integration.base import BaseTestCase -class TestImageUrls(BaseTestCase): +class TestDocumentUrls(BaseTestCase): PROJECT_NAME = "test attach document urls" PATH_TO_URLS = "data_set/csv_files/text_urls.csv" PROJECT_DESCRIPTION = "desc" @@ -16,7 +16,38 @@ def test_attach_image_urls(self): self.PROJECT_NAME, os.path.join(dirname(dirname(__file__)), self.PATH_TO_URLS), ) - self.assertEqual(len(uploaded), 12) + self.assertEqual(len(uploaded), 11) self.assertEqual(len(could_not_upload), 0) - self.assertEqual(len(existing_images), 0) + self.assertEqual(len(existing_images), 1) + + +class TestImageUrls(BaseTestCase): + PROJECT_NAME = "test attach image urls" + PATH_TO_URLS = "data_set/csv_files/text_urls.csv" + PROJECT_DESCRIPTION = "desc" + PROJECT_TYPE = "Vector" + def test_attach_image_urls(self): + uploaded, could_not_upload, existing_images = sa.attach_image_urls_to_project( + self.PROJECT_NAME, + os.path.join(dirname(dirname(__file__)), self.PATH_TO_URLS), + ) + self.assertEqual(len(uploaded), 11) + self.assertEqual(len(could_not_upload), 0) + self.assertEqual(len(existing_images), 1) + + +class TestVideoUrls(BaseTestCase): + PROJECT_NAME = "test attach video urls" + PATH_TO_URLS = "data_set/csv_files/text_urls.csv" + PROJECT_DESCRIPTION = "desc" + PROJECT_TYPE = "Video" + + def test_attach_image_urls(self): + uploaded, could_not_upload, existing_images = sa.attach_video_urls_to_project( + self.PROJECT_NAME, + os.path.join(dirname(dirname(__file__)), self.PATH_TO_URLS), + ) + self.assertEqual(len(uploaded), 11) + self.assertEqual(len(could_not_upload), 0) + self.assertEqual(len(existing_images), 1) diff --git a/tests/integration/test_attach_image_urls.py b/tests/integration/test_attach_image_urls.py index c583ba6c0..f9d62467e 100644 --- a/tests/integration/test_attach_image_urls.py +++ b/tests/integration/test_attach_image_urls.py @@ -21,8 +21,8 @@ def test_attach_image_urls(self): self.assertEqual(UploadState.EXTERNAL.name, project_metadata["upload_state"]) - self.assertEqual(len(uploaded), 8) - self.assertEqual(len(could_not_upload), 1) + self.assertEqual(len(uploaded), 7) + self.assertEqual(len(could_not_upload), 0) self.assertEqual(len(existing_images), 1) images = sa.search_images(project=self.PROJECT_NAME, return_metadata=True) self.assertTrue(all([image["name"] for image in images])) diff --git a/tests/integration/test_attach_video_urls.py b/tests/integration/test_attach_video_urls.py index ddebc8989..58897c168 100644 --- a/tests/integration/test_attach_video_urls.py +++ b/tests/integration/test_attach_video_urls.py @@ -25,8 +25,8 @@ def test_attach_video_urls(self): self.PROJECT_NAME, self.csv_path, ) - self.assertEqual(len(uploaded), 8) - self.assertEqual(len(could_not_upload), 1) + self.assertEqual(len(uploaded), 7) + self.assertEqual(len(could_not_upload), 0) self.assertEqual(len(existing_images), 1) def test_attach_video_urls_without_name_column(self): @@ -34,6 +34,6 @@ def test_attach_video_urls_without_name_column(self): self.PROJECT_NAME, self.csv_path_without_name_column ) - self.assertEqual(len(uploaded), 8) - self.assertEqual(len(could_not_upload), 1) + self.assertEqual(len(uploaded), 7) + self.assertEqual(len(could_not_upload), 0) self.assertEqual(len(existing_images), 1) diff --git a/tests/integration/test_single_annotation_download.py b/tests/integration/test_single_annotation_download.py index 94a01ee79..2d26e9e5f 100644 --- a/tests/integration/test_single_annotation_download.py +++ b/tests/integration/test_single_annotation_download.py @@ -54,6 +54,7 @@ def test_annotation_download_upload_vector(self): for j in i["attributes"]: j.pop("groupId", None) j.pop("id", None) + # TODO check teplateId -1 when assert downloaded_json == uploaded_json