diff --git a/src/spdx/parser/jsonlikedict/dict_parsing_functions.py b/src/spdx/parser/jsonlikedict/dict_parsing_functions.py index f5d361067..c2cde1e9e 100644 --- a/src/spdx/parser/jsonlikedict/dict_parsing_functions.py +++ b/src/spdx/parser/jsonlikedict/dict_parsing_functions.py @@ -92,3 +92,8 @@ def parse_list_of_elements(list_of_elements: List[Dict], method_to_parse_element method_to_parse_element) raise_parsing_error_if_logger_has_messages(logger) return parsed_elements + + +def delete_duplicates_from_list(list_with_potential_duplicates: List[Any]) -> List[Any]: + list_without_duplicates = list(dict.fromkeys(list_with_potential_duplicates)) + return list_without_duplicates diff --git a/src/spdx/parser/jsonlikedict/relationship_parser.py b/src/spdx/parser/jsonlikedict/relationship_parser.py index f17a63738..2804eb612 100644 --- a/src/spdx/parser/jsonlikedict/relationship_parser.py +++ b/src/spdx/parser/jsonlikedict/relationship_parser.py @@ -13,9 +13,10 @@ from spdx.model.relationship import Relationship, RelationshipType from spdx.model.typing.constructor_type_errors import ConstructorTypeErrors from spdx.parser.error import SPDXParsingError -from spdx.parser.jsonlikedict.dict_parsing_functions import raise_parsing_error_if_logger_has_messages, json_str_to_enum_name, \ +from spdx.parser.jsonlikedict.dict_parsing_functions import raise_parsing_error_if_logger_has_messages, \ + json_str_to_enum_name, \ construct_or_raise_parsing_error, \ - parse_field_or_log_error, parse_field_or_no_assertion_or_none + parse_field_or_log_error, parse_field_or_no_assertion_or_none, delete_duplicates_from_list from spdx.parser.logger import Logger @@ -31,7 +32,7 @@ def parse_all_relationships(self, input_doc_dict: Dict) -> List[Relationship]: relationships.extend( parse_field_or_log_error(self.logger, relationship_dicts, self.parse_relationship, [], True)) - document_describes: List[str] = input_doc_dict.get("documentDescribes", []) + document_describes: List[str] = delete_duplicates_from_list(input_doc_dict.get("documentDescribes", [])) doc_spdx_id: Optional[str] = input_doc_dict.get("SPDXID") relationships.extend( @@ -102,7 +103,7 @@ def parse_has_files(self, package_dicts: List[Dict], existing_relationships: Lis contains_relationships = [] for package in package_dicts: package_spdx_id: Optional[str] = package.get("SPDXID") - contained_files: Optional[str] = package.get("hasFiles") + contained_files: List[str] = delete_duplicates_from_list(package.get("hasFiles", [])) if not contained_files: continue for file_spdx_id in contained_files: diff --git a/tests/parser/json/test_json_parser.py b/tests/parser/json/test_json_parser.py index 3acbfa14e..6939db9f8 100644 --- a/tests/parser/json/test_json_parser.py +++ b/tests/parser/json/test_json_parser.py @@ -31,7 +31,7 @@ def test_parse_json_with_2_3_example(): assert len(doc.files) == 5 assert len(doc.packages) == 4 assert len(doc.snippets) == 1 - assert len(doc.relationships) == 23 + assert len(doc.relationships) == 13 assert len(doc.extracted_licensing_info) == 5 def test_parse_json_with_2_2_example(): diff --git a/tests/parser/jsonlikedict/test_relationship_parser.py b/tests/parser/jsonlikedict/test_relationship_parser.py index 20dc39a42..79dae728c 100644 --- a/tests/parser/jsonlikedict/test_relationship_parser.py +++ b/tests/parser/jsonlikedict/test_relationship_parser.py @@ -79,26 +79,39 @@ def test_parse_document_describes(): Relationship("SPDXRef-DOCUMENT", RelationshipType.DESCRIBES, "SPDXRef-Snippet")]) -def test_parse_document_describes_without_duplicating_relationships(): +@pytest.mark.parametrize("document_describes,relationships,parsed_relationships", + [(["SPDXRef-Package", "SPDXRef-File"], [ + {"spdxElementId": "SPDXRef-DOCUMENT", "relatedSpdxElement": "SPDXRef-Package", + "relationshipType": "DESCRIBES", "comment": "This relationship has a comment."}, + {"spdxElementId": "SPDXRef-File", "relatedSpdxElement": "SPDXRef-DOCUMENT", + "relationshipType": "DESCRIBED_BY", "comment": "This relationship has a comment."}], [ + Relationship(related_spdx_element_id="SPDXRef-Package", + relationship_type=RelationshipType.DESCRIBES, + spdx_element_id="SPDXRef-DOCUMENT", + comment="This relationship has a comment."), + Relationship(related_spdx_element_id="SPDXRef-DOCUMENT", + relationship_type=RelationshipType.DESCRIBED_BY, + spdx_element_id="SPDXRef-File", + comment="This relationship has a comment.")]), + (["SPDXRef-Package", "SPDXRef-File", "SPDXRef-Package"], [], [ + Relationship(related_spdx_element_id="SPDXRef-Package", + relationship_type=RelationshipType.DESCRIBES, + spdx_element_id="SPDXRef-DOCUMENT"), + Relationship(related_spdx_element_id="SPDXRef-File", + relationship_type=RelationshipType.DESCRIBES, + spdx_element_id="SPDXRef-DOCUMENT")])]) +def test_parse_document_describes_without_duplicating_relationships(document_describes, relationships, + parsed_relationships): relationship_parser = RelationshipParser() document_dict = { "SPDXID": "SPDXRef-DOCUMENT", - "documentDescribes": ["SPDXRef-Package", "SPDXRef-File"], - "relationships": [{"spdxElementId": "SPDXRef-DOCUMENT", "relatedSpdxElement": "SPDXRef-Package", - "relationshipType": "DESCRIBES", - "comment": "This relationship has a comment."}, - {"spdxElementId": "SPDXRef-File", "relatedSpdxElement": "SPDXRef-DOCUMENT", - "relationshipType": "DESCRIBED_BY", "comment": "This relationship has a comment."} - ]} + "documentDescribes": document_describes, + "relationships": relationships} relationships = relationship_parser.parse_all_relationships(document_dict) - assert len(relationships) == 2 - TestCase().assertCountEqual(relationships, [ - Relationship(related_spdx_element_id="SPDXRef-Package", relationship_type=RelationshipType.DESCRIBES, - spdx_element_id="SPDXRef-DOCUMENT", comment="This relationship has a comment."), - Relationship(related_spdx_element_id="SPDXRef-DOCUMENT", relationship_type=RelationshipType.DESCRIBED_BY, - spdx_element_id="SPDXRef-File", comment="This relationship has a comment.")]) + assert len(relationships) == len(parsed_relationships) + TestCase().assertCountEqual(relationships, parsed_relationships) def test_parse_has_files(): @@ -121,22 +134,34 @@ def test_parse_has_files(): related_spdx_element_id="SPDXRef-File2")]) -def test_parse_has_files_without_duplicating_relationships(): +@pytest.mark.parametrize("has_files,existing_relationships,contains_relationships", + [(["SPDXRef-File1", "SPDXRef-File2"], [ + Relationship(spdx_element_id="SPDXRef-Package", + relationship_type=RelationshipType.CONTAINS, + related_spdx_element_id="SPDXRef-File1", + comment="This relationship has a comment."), + Relationship(spdx_element_id="SPDXRef-File2", + relationship_type=RelationshipType.CONTAINED_BY, + related_spdx_element_id="SPDXRef-Package")], []), + (["SPDXRef-File1", "SPDXRef-File2", "SPDXRef-File1"], [], [ + Relationship(spdx_element_id="SPDXRef-Package", + relationship_type=RelationshipType.CONTAINS, + related_spdx_element_id="SPDXRef-File1"), + Relationship(spdx_element_id="SPDXRef-Package", + relationship_type=RelationshipType.CONTAINS, + related_spdx_element_id="SPDXRef-File2")])]) +def test_parse_has_files_without_duplicating_relationships(has_files, existing_relationships, + contains_relationships): relationship_parser = RelationshipParser() document_dict = { "packages": [{ "SPDXID": "SPDXRef-Package", - "hasFiles": ["SPDXRef-File1", "SPDXRef-File2"] + "hasFiles": has_files }] } - existing_relationships = [ - Relationship(spdx_element_id="SPDXRef-Package", relationship_type=RelationshipType.CONTAINS, - related_spdx_element_id="SPDXRef-File1", comment="This relationship has a comment."), - Relationship(spdx_element_id="SPDXRef-File2", relationship_type=RelationshipType.CONTAINED_BY, - related_spdx_element_id="SPDXRef-Package")] - relationships = relationship_parser.parse_has_files(document_dict.get("packages"), existing_relationships=existing_relationships) - assert len(relationships) == 0 + assert len(relationships) == len(contains_relationships) + TestCase().assertCountEqual(relationships, contains_relationships)