Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 31 additions & 0 deletions src/superannotate/lib/app/helpers.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import uuid
from ast import literal_eval
from pathlib import Path
from typing import List
Expand Down Expand Up @@ -120,3 +121,33 @@ def metric_is_plottable(key):
if key == "total_loss" or "mIoU" in key or "mAP" in key or key == "iteration":
return True
return False


def get_paths_and_duplicated_from_csv(csv_path):
image_data = pd.read_csv(csv_path, dtype=str)
image_data = image_data[~image_data["url"].isnull()]
if "name" in image_data.columns:
image_data["name"] = (
image_data["name"]
.fillna("")
.apply(lambda cell: cell if str(cell).strip() else str(uuid.uuid4()))
)
else:
image_data["name"] = [str(uuid.uuid4()) for _ in range(len(image_data.index))]

image_data = pd.DataFrame(image_data, columns=["name", "url"])
img_names_urls = image_data.rename(columns={"url": "path"}).to_dict(
orient="records"
)
duplicate_images = []
seen = []
images_to_upload = []
for i in img_names_urls:
temp = i["name"]
i["name"] = i["name"].strip()
if i["name"] not in seen:
seen.append(i["name"])
images_to_upload.append(i)
else:
duplicate_images.append(temp)
return images_to_upload, duplicate_images
40 changes: 0 additions & 40 deletions src/superannotate/lib/app/interface/cli_interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,10 @@
import os
import sys
import tempfile
import uuid
from typing import Any
from typing import Optional

import lib.core as constances
import pandas as pd
from lib import __file__ as lib_path
from lib.app.helpers import split_project_path
from lib.app.input_converters.conversion import import_annotation
Expand All @@ -22,7 +20,6 @@
from lib.app.interface.sdk_interface import upload_images_from_folder_to_project
from lib.app.interface.sdk_interface import upload_preannotations_from_folder_to_project
from lib.app.interface.sdk_interface import upload_videos_from_folder_to_project
from lib.app.serializers import ImageSerializer
from lib.core.entities import ConfigEntity
from lib.infrastructure.controller import Controller
from lib.infrastructure.repositories import ConfigRepository
Expand Down Expand Up @@ -263,43 +260,6 @@ def attach_document_urls(
)
sys.exit(0)

def _attach_urls(
self, project: str, attachments: str, annotation_status: Optional[Any] = None
):
project_name, folder_name = split_project_path(project)

image_data = pd.read_csv(attachments, dtype=str)
image_data = image_data[~image_data["url"].isnull()]
for ind, _ in image_data[image_data["name"].isnull()].iterrows():
image_data.at[ind, "name"] = str(uuid.uuid4())

image_data = pd.DataFrame(image_data, columns=["name", "url"])
img_names_urls = image_data.rename(columns={"url": "path"}).to_dict(
orient="records"
)
list_of_not_uploaded = []
duplicate_images = []
for i in range(0, len(img_names_urls), 500):
response = self.controller.attach_urls(
project_name=project_name,
folder_name=folder_name,
files=ImageSerializer.deserialize(
img_names_urls[i : i + 500] # noqa: E203
),
annotation_status=annotation_status,
)
if response.errors:
list_of_not_uploaded.append(response.data[0])
duplicate_images.append(response.data[1])

list_of_uploaded = [
image["name"]
for image in img_names_urls
if image["name"] not in list_of_not_uploaded
]

return list_of_uploaded, list_of_not_uploaded, duplicate_images

def upload_videos(
self,
project,
Expand Down
152 changes: 50 additions & 102 deletions src/superannotate/lib/app/interface/sdk_interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
import os
import tempfile
import time
import uuid
from collections import Counter
from collections import namedtuple
from pathlib import Path
Expand All @@ -17,7 +16,6 @@

import boto3
import lib.core as constances
import pandas as pd
import plotly.graph_objects as go
from lib.app.annotation_helpers import add_annotation_bbox_to_json
from lib.app.annotation_helpers import add_annotation_comment_to_json
Expand All @@ -29,6 +27,7 @@
from lib.app.annotation_helpers import add_annotation_template_to_json
from lib.app.helpers import extract_project_folder
from lib.app.helpers import get_annotation_paths
from lib.app.helpers import get_paths_and_duplicated_from_csv
from lib.app.helpers import reformat_metrics_json
from lib.app.interface.types import AnnotationType
from lib.app.interface.types import NotEmptyStr
Expand Down Expand Up @@ -2291,6 +2290,7 @@ def download_image(
)
if response.errors:
raise AppException(response.errors)
logger.info(f"Downloaded image {image_name} to {local_dir_path} ")
return response.data


Expand All @@ -2314,47 +2314,26 @@ def attach_image_urls_to_project(
:rtype: tuple
"""
project_name, folder_name = extract_project_folder(project)
project = controller.get_project_metadata(project_name).data
if project["project"].project_type == constances.ProjectType.VIDEO.value:
raise AppException(
"The function does not support projects containing videos attached with URLs"
)

image_data = pd.read_csv(attachments, dtype=str)
image_data = image_data[~image_data["url"].isnull()]
if "name" in image_data.columns:
image_data["name"] = (
image_data["name"]
.fillna("")
.apply(lambda cell: cell if str(cell).strip() else str(uuid.uuid4()))
)
else:
image_data["name"] = [str(uuid.uuid4()) for _ in range(len(image_data.index))]

image_data = pd.DataFrame(image_data, columns=["name", "url"])
img_names_urls = image_data.rename(columns={"url": "path"}).to_dict(
orient="records"
)
images_to_upload, duplicate_images = get_paths_and_duplicated_from_csv(attachments)
list_of_not_uploaded = []
duplicate_images = []
for i in range(0, len(img_names_urls), 500):
response = controller.attach_urls(
project_name=project_name,
folder_name=folder_name,
files=ImageSerializer.deserialize(
img_names_urls[i : i + 500] # noqa: E203
),
annotation_status=annotation_status,
)
if response.errors:
logger.error(response.errors)
else:
list_of_not_uploaded.append(response.data[0])
duplicate_images.append(response.data[1])

with tqdm(total=len(images_to_upload), desc="Attaching urls") as progress_bar:
for i in range(0, len(images_to_upload), 500):
response = controller.attach_urls(
project_name=project_name,
folder_name=folder_name,
files=ImageSerializer.deserialize(
images_to_upload[i : i + 500] # noqa: E203
),
annotation_status=annotation_status,
)
if response.errors:
list_of_not_uploaded.append(response.data[0])
duplicate_images.append(response.data[1])
progress_bar.update(len(images_to_upload[i : i + 500]))
list_of_uploaded = [
image["name"]
for image in img_names_urls
for image in images_to_upload
if image["name"] not in list_of_not_uploaded
]

Expand All @@ -2378,43 +2357,26 @@ def attach_video_urls_to_project(
:rtype: (list, list, list)
"""
project_name, folder_name = extract_project_folder(project)
project = controller.get_project_metadata(project_name).data
if project["project"].project_type != constances.ProjectType.VIDEO.value:
raise AppException("The function does not support")

image_data = pd.read_csv(attachments, dtype=str)
image_data = image_data[~image_data["url"].isnull()]
if "name" in image_data.columns:
image_data["name"] = (
image_data["name"]
.fillna("")
.apply(lambda cell: cell if str(cell).strip() else str(uuid.uuid4()))
)
else:
image_data["name"] = [str(uuid.uuid4()) for _ in range(len(image_data.index))]

image_data = pd.DataFrame(image_data, columns=["name", "url"])
img_names_urls = image_data.rename(columns={"url": "path"}).to_dict(
orient="records"
)
images_to_upload, duplicate_images = get_paths_and_duplicated_from_csv(attachments)
list_of_not_uploaded = []
duplicate_images = []
for i in range(0, len(img_names_urls), 500):
response = controller.attach_urls(
project_name=project_name,
folder_name=folder_name,
files=ImageSerializer.deserialize(
img_names_urls[i : i + 500] # noqa: E203
),
annotation_status=annotation_status,
)
if not response.errors:
list_of_not_uploaded.append(response.data[0])
duplicate_images.append(response.data[1])

with tqdm(total=len(images_to_upload), desc="Attaching urls") as progress_bar:
for i in range(0, len(images_to_upload), 500):
response = controller.attach_urls(
project_name=project_name,
folder_name=folder_name,
files=ImageSerializer.deserialize(
images_to_upload[i : i + 500] # noqa: E203
),
annotation_status=annotation_status,
)
if response.errors:
list_of_not_uploaded.append(response.data[0])
duplicate_images.append(response.data[1])
progress_bar.update(len(images_to_upload[i : i + 500]))
list_of_uploaded = [
image["name"]
for image in img_names_urls
for image in images_to_upload
if image["name"] not in list_of_not_uploaded
]

Expand Down Expand Up @@ -3672,40 +3634,26 @@ def attach_document_urls_to_project(
:rtype: tuple
"""
project_name, folder_name = extract_project_folder(project)

image_data = pd.read_csv(attachments, dtype=str)
image_data = image_data[~image_data["url"].isnull()]
if "name" in image_data.columns:
image_data["name"] = (
image_data["name"]
.fillna("")
.apply(lambda cell: cell if str(cell).strip() else str(uuid.uuid4()))
)
else:
image_data["name"] = [str(uuid.uuid4()) for _ in range(len(image_data.index))]

image_data = pd.DataFrame(image_data, columns=["name", "url"])
img_names_urls = image_data.rename(columns={"url": "path"}).to_dict(
orient="records"
)
images_to_upload, duplicate_images = get_paths_and_duplicated_from_csv(attachments)
list_of_not_uploaded = []
duplicate_images = []
for i in range(0, len(img_names_urls), 500):
response = controller.attach_urls(
project_name=project_name,
folder_name=folder_name,
files=ImageSerializer.deserialize(
img_names_urls[i : i + 500] # noqa: E203
),
annotation_status=annotation_status,
)
if response.errors:
list_of_not_uploaded.append(response.data[0])
duplicate_images.append(response.data[1])

with tqdm(total=len(images_to_upload), desc="Attaching urls") as progress_bar:
for i in range(0, len(images_to_upload), 500):
response = controller.attach_urls(
project_name=project_name,
folder_name=folder_name,
files=ImageSerializer.deserialize(
images_to_upload[i : i + 500] # noqa: E203
),
annotation_status=annotation_status,
)
if response.errors:
list_of_not_uploaded.append(response.data[0])
duplicate_images.append(response.data[1])
progress_bar.update(len(images_to_upload[i : i + 500]))
list_of_uploaded = [
image["name"]
for image in img_names_urls
for image in images_to_upload
if image["name"] not in list_of_not_uploaded
]

Expand Down
6 changes: 4 additions & 2 deletions src/superannotate/lib/core/usecases.py
Original file line number Diff line number Diff line change
Expand Up @@ -2281,9 +2281,11 @@ def fill_classes_data(self, annotations: dict):
for annotation in (
i for i in annotations["instances"] if i.get("type", None) == "template"
):
annotation["templateName"] = templates.get(
annotation.get("templateId", ""), -1
template_name = templates.get(
annotation.get("templateId"), None
)
if template_name:
annotation["templateName"] = template_name

for annotation in [i for i in annotations["instances"] if "classId" in i]:
annotation_class_id = annotation["classId"]
Expand Down
4 changes: 2 additions & 2 deletions tests/data_set/csv_files/text_urls.csv
Original file line number Diff line number Diff line change
Expand Up @@ -9,5 +9,5 @@ https://sa-public-text-files.s3.us-west-2.amazonaws.com/grmnda/text1_%D0%9B%D0%B
https://sa-public-text-files.s3.us-west-2.amazonaws.com/grmnda/%D5%B6%D5%A1%D6%80%20%D5%A4%D5%B8%D5%BD.txt, textՆարԴոս
https://sa-public-text-files.s3.us-west-2.amazonaws.com/grmnda/small%20gods_%20tortoise.txt,
https://sa-public-text-files.s3.us-west-2.amazonaws.com/grmnda/small%20gods_%20tortoise.txt,
https://sa-public-text-files.s3.us-west-2.amazonaws.com/grmnda/small%20gods_%20tortoise.txt,
https://sa-public-text-files.s3.us-west-2.amazonaws.com/grmnda/small%20gods_%20tortoise.txt,
https://sa-public-text-files.s3.us-west-2.amazonaws.com/grmnda/small%20gods_%20tortoise.txt,same_name
https://sa-public-text-files.s3.us-west-2.amazonaws.com/grmnda/small%20gods_%20tortoise.txt,same_name
5 changes: 4 additions & 1 deletion tests/integration/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,4 +23,7 @@ def setUp(self, *args, **kwargs):
def tearDown(self) -> None:
projects = sa.search_projects(self.PROJECT_NAME, return_metadata=True)
for project in projects:
sa.delete_project(project)
try:
sa.delete_project(project)
except Exception:
pass
37 changes: 34 additions & 3 deletions tests/integration/test_attach_document_urls.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from tests.integration.base import BaseTestCase


class TestImageUrls(BaseTestCase):
class TestDocumentUrls(BaseTestCase):
PROJECT_NAME = "test attach document urls"
PATH_TO_URLS = "data_set/csv_files/text_urls.csv"
PROJECT_DESCRIPTION = "desc"
Expand All @@ -16,7 +16,38 @@ def test_attach_image_urls(self):
self.PROJECT_NAME,
os.path.join(dirname(dirname(__file__)), self.PATH_TO_URLS),
)
self.assertEqual(len(uploaded), 12)
self.assertEqual(len(uploaded), 11)
self.assertEqual(len(could_not_upload), 0)
self.assertEqual(len(existing_images), 0)
self.assertEqual(len(existing_images), 1)


class TestImageUrls(BaseTestCase):
PROJECT_NAME = "test attach image urls"
PATH_TO_URLS = "data_set/csv_files/text_urls.csv"
PROJECT_DESCRIPTION = "desc"
PROJECT_TYPE = "Vector"

def test_attach_image_urls(self):
uploaded, could_not_upload, existing_images = sa.attach_image_urls_to_project(
self.PROJECT_NAME,
os.path.join(dirname(dirname(__file__)), self.PATH_TO_URLS),
)
self.assertEqual(len(uploaded), 11)
self.assertEqual(len(could_not_upload), 0)
self.assertEqual(len(existing_images), 1)


class TestVideoUrls(BaseTestCase):
PROJECT_NAME = "test attach video urls"
PATH_TO_URLS = "data_set/csv_files/text_urls.csv"
PROJECT_DESCRIPTION = "desc"
PROJECT_TYPE = "Video"

def test_attach_image_urls(self):
uploaded, could_not_upload, existing_images = sa.attach_video_urls_to_project(
self.PROJECT_NAME,
os.path.join(dirname(dirname(__file__)), self.PATH_TO_URLS),
)
self.assertEqual(len(uploaded), 11)
self.assertEqual(len(could_not_upload), 0)
self.assertEqual(len(existing_images), 1)
Loading