Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 11 additions & 7 deletions src/superannotate/lib/core/usecases/items.py
Original file line number Diff line number Diff line change
Expand Up @@ -1017,20 +1017,19 @@ def __distribute_to_results(self, item_id, response, item):

if item_id in response.data["success"]:
self.results["succeeded"].append(item)
response.data["success"].discard(item_id)
elif item_id in response.data["skipped"]:
self.results["skipped"].append(item)
else:
response.data["skipped"].discard(item_id)
elif item_id in response.data["failed"]:
self.results["failed"].append(item)
response.data["failed"].discard(item_id)

def validate_items(
self,
):

filtered_items = self.__filter_duplicates()
if len(filtered_items) != len(self.items):
self.reporter.log_info(
f"Dropping duplicates. Found {len(filtered_items)} / {len(self.items)} unique items."
)
self.items = filtered_items
self.items = self.__filter_invalid_items()
self.__separate_to_paths()
Expand Down Expand Up @@ -1077,10 +1076,15 @@ def execute(

response = None

for i in range(0, len(self.item_ids), self.CHUNK_SIZE):
unique_item_ids = list(set(self.item_ids))
if len(self.items) > len(unique_item_ids):
self.reporter.log_info(
f"Dropping duplicates. Found {len(unique_item_ids)} / {len(self.items)} unique items."
)
for i in range(0, len(unique_item_ids), self.CHUNK_SIZE):
tmp_response = self._service_provider.subsets.add_items(
project=self.project,
item_ids=self.item_ids[i : i + self.CHUNK_SIZE], # noqa
item_ids=unique_item_ids[i : i + self.CHUNK_SIZE], # noqa
subset=subset,
)

Expand Down
17 changes: 16 additions & 1 deletion tests/integration/subsets/test_subsets.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,4 +18,19 @@ def test_add_items_to_subset(self):
subset_data = []
for i in item_names:
subset_data.append({"name": i["name"], "path": self.PROJECT_NAME})
sa.add_items_to_subset(self.PROJECT_NAME, self.SUBSET_NAME, subset_data)
result = sa.add_items_to_subset(self.PROJECT_NAME, self.SUBSET_NAME, subset_data)
assert len(subset_data) == len(result["succeeded"])

def test_add_to_subset_with_duplicates_items(self):
with self.assertLogs("sa", level="INFO") as cm:
sa.attach_items(self.PROJECT_NAME, [{"name": "earth_mov_001.jpg", "url": "url_1"}]) # noqa
item_metadata = sa.get_item_metadata(self.PROJECT_NAME, "earth_mov_001.jpg")
subset_data = [{"name": "earth_mov_001.jpg", "path": self.PROJECT_NAME}, {"id": item_metadata["id"]}]
result = sa.add_items_to_subset(self.PROJECT_NAME, self.SUBSET_NAME, subset_data)
assert len(result["succeeded"]) == 1
assert (
"INFO:sa:Dropping duplicates. Found 1 / 2 unique items." == cm.output[2]
)