diff --git a/src/superannotate/lib/core/usecases/items.py b/src/superannotate/lib/core/usecases/items.py index 27dd3824b..c1205484d 100644 --- a/src/superannotate/lib/core/usecases/items.py +++ b/src/superannotate/lib/core/usecases/items.py @@ -1017,20 +1017,19 @@ def __distribute_to_results(self, item_id, response, item): if item_id in response.data["success"]: self.results["succeeded"].append(item) + response.data["success"].discard(item_id) elif item_id in response.data["skipped"]: self.results["skipped"].append(item) - else: + response.data["skipped"].discard(item_id) + elif item_id in response.data["failed"]: self.results["failed"].append(item) + response.data["failed"].discard(item_id) def validate_items( self, ): filtered_items = self.__filter_duplicates() - if len(filtered_items) != len(self.items): - self.reporter.log_info( - f"Dropping duplicates. Found {len(filtered_items)} / {len(self.items)} unique items." - ) self.items = filtered_items self.items = self.__filter_invalid_items() self.__separate_to_paths() @@ -1077,10 +1076,15 @@ def execute( response = None - for i in range(0, len(self.item_ids), self.CHUNK_SIZE): + unique_item_ids = list(set(self.item_ids)) + if len(self.items) > len(unique_item_ids): + self.reporter.log_info( + f"Dropping duplicates. Found {len(unique_item_ids)} / {len(self.items)} unique items." + ) + for i in range(0, len(unique_item_ids), self.CHUNK_SIZE): tmp_response = self._service_provider.subsets.add_items( project=self.project, - item_ids=self.item_ids[i : i + self.CHUNK_SIZE], # noqa + item_ids=unique_item_ids[i : i + self.CHUNK_SIZE], # noqa subset=subset, ) diff --git a/tests/integration/subsets/test_subsets.py b/tests/integration/subsets/test_subsets.py index a3a06bd9b..e3ceffb41 100644 --- a/tests/integration/subsets/test_subsets.py +++ b/tests/integration/subsets/test_subsets.py @@ -18,4 +18,19 @@ def test_add_items_to_subset(self): subset_data = [] for i in item_names: subset_data.append({"name": i["name"], "path": self.PROJECT_NAME}) - sa.add_items_to_subset(self.PROJECT_NAME, self.SUBSET_NAME, subset_data) + result = sa.add_items_to_subset(self.PROJECT_NAME, self.SUBSET_NAME, subset_data) + assert len(subset_data) == len(result["succeeded"]) + + def test_add_to_subset_with_duplicates_items(self): + with self.assertLogs("sa", level="INFO") as cm: + sa.attach_items(self.PROJECT_NAME, [{"name": "earth_mov_001.jpg", "url": "url_1"}]) # noqa + item_metadata = sa.get_item_metadata(self.PROJECT_NAME, "earth_mov_001.jpg") + subset_data = [{"name": "earth_mov_001.jpg", "path": self.PROJECT_NAME}, {"id": item_metadata["id"]}] + result = sa.add_items_to_subset(self.PROJECT_NAME, self.SUBSET_NAME, subset_data) + assert len(result["succeeded"]) == 1 + assert ( + "INFO:sa:Dropping duplicates. Found 1 / 2 unique items." == cm.output[2] + ) + + +