Skip to content

Commit

Permalink
[issue-400] ignore duplicated relationships while parsing
Browse files Browse the repository at this point in the history
Signed-off-by: Meret Behrens <meret.behrens@tngtech.com>
  • Loading branch information
meretp committed Jan 12, 2023
1 parent 7b71d90 commit 3fbc88b
Show file tree
Hide file tree
Showing 4 changed files with 59 additions and 28 deletions.
5 changes: 5 additions & 0 deletions src/spdx/parser/jsonlikedict/dict_parsing_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,3 +92,8 @@ def parse_list_of_elements(list_of_elements: List[Dict], method_to_parse_element
method_to_parse_element)
raise_parsing_error_if_logger_has_messages(logger)
return parsed_elements


def delete_duplicates_from_list(list_with_potential_duplicates: List[Any]) -> List[Any]:
list_without_duplicates = list(dict.fromkeys(list_with_potential_duplicates))
return list_without_duplicates
9 changes: 5 additions & 4 deletions src/spdx/parser/jsonlikedict/relationship_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,10 @@
from spdx.model.relationship import Relationship, RelationshipType
from spdx.model.typing.constructor_type_errors import ConstructorTypeErrors
from spdx.parser.error import SPDXParsingError
from spdx.parser.jsonlikedict.dict_parsing_functions import raise_parsing_error_if_logger_has_messages, json_str_to_enum_name, \
from spdx.parser.jsonlikedict.dict_parsing_functions import raise_parsing_error_if_logger_has_messages, \
json_str_to_enum_name, \
construct_or_raise_parsing_error, \
parse_field_or_log_error, parse_field_or_no_assertion_or_none
parse_field_or_log_error, parse_field_or_no_assertion_or_none, delete_duplicates_from_list
from spdx.parser.logger import Logger


Expand All @@ -31,7 +32,7 @@ def parse_all_relationships(self, input_doc_dict: Dict) -> List[Relationship]:
relationships.extend(
parse_field_or_log_error(self.logger, relationship_dicts, self.parse_relationship, [], True))

document_describes: List[str] = input_doc_dict.get("documentDescribes", [])
document_describes: List[str] = delete_duplicates_from_list(input_doc_dict.get("documentDescribes", []))
doc_spdx_id: Optional[str] = input_doc_dict.get("SPDXID")

relationships.extend(
Expand Down Expand Up @@ -102,7 +103,7 @@ def parse_has_files(self, package_dicts: List[Dict], existing_relationships: Lis
contains_relationships = []
for package in package_dicts:
package_spdx_id: Optional[str] = package.get("SPDXID")
contained_files: Optional[str] = package.get("hasFiles")
contained_files: List[str] = delete_duplicates_from_list(package.get("hasFiles", []))
if not contained_files:
continue
for file_spdx_id in contained_files:
Expand Down
2 changes: 1 addition & 1 deletion tests/parser/json/test_json_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ def test_parse_json_with_2_3_example():
assert len(doc.files) == 5
assert len(doc.packages) == 4
assert len(doc.snippets) == 1
assert len(doc.relationships) == 23
assert len(doc.relationships) == 13
assert len(doc.extracted_licensing_info) == 5

def test_parse_json_with_2_2_example():
Expand Down
71 changes: 48 additions & 23 deletions tests/parser/jsonlikedict/test_relationship_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,26 +79,39 @@ def test_parse_document_describes():
Relationship("SPDXRef-DOCUMENT", RelationshipType.DESCRIBES, "SPDXRef-Snippet")])


def test_parse_document_describes_without_duplicating_relationships():
@pytest.mark.parametrize("document_describes,relationships,parsed_relationships",
[(["SPDXRef-Package", "SPDXRef-File"], [
{"spdxElementId": "SPDXRef-DOCUMENT", "relatedSpdxElement": "SPDXRef-Package",
"relationshipType": "DESCRIBES", "comment": "This relationship has a comment."},
{"spdxElementId": "SPDXRef-File", "relatedSpdxElement": "SPDXRef-DOCUMENT",
"relationshipType": "DESCRIBED_BY", "comment": "This relationship has a comment."}], [
Relationship(related_spdx_element_id="SPDXRef-Package",
relationship_type=RelationshipType.DESCRIBES,
spdx_element_id="SPDXRef-DOCUMENT",
comment="This relationship has a comment."),
Relationship(related_spdx_element_id="SPDXRef-DOCUMENT",
relationship_type=RelationshipType.DESCRIBED_BY,
spdx_element_id="SPDXRef-File",
comment="This relationship has a comment.")]),
(["SPDXRef-Package", "SPDXRef-File", "SPDXRef-Package"], [], [
Relationship(related_spdx_element_id="SPDXRef-Package",
relationship_type=RelationshipType.DESCRIBES,
spdx_element_id="SPDXRef-DOCUMENT"),
Relationship(related_spdx_element_id="SPDXRef-File",
relationship_type=RelationshipType.DESCRIBES,
spdx_element_id="SPDXRef-DOCUMENT")])])
def test_parse_document_describes_without_duplicating_relationships(document_describes, relationships,
parsed_relationships):
relationship_parser = RelationshipParser()
document_dict = {
"SPDXID": "SPDXRef-DOCUMENT",
"documentDescribes": ["SPDXRef-Package", "SPDXRef-File"],
"relationships": [{"spdxElementId": "SPDXRef-DOCUMENT", "relatedSpdxElement": "SPDXRef-Package",
"relationshipType": "DESCRIBES",
"comment": "This relationship has a comment."},
{"spdxElementId": "SPDXRef-File", "relatedSpdxElement": "SPDXRef-DOCUMENT",
"relationshipType": "DESCRIBED_BY", "comment": "This relationship has a comment."}
]}
"documentDescribes": document_describes,
"relationships": relationships}

relationships = relationship_parser.parse_all_relationships(document_dict)

assert len(relationships) == 2
TestCase().assertCountEqual(relationships, [
Relationship(related_spdx_element_id="SPDXRef-Package", relationship_type=RelationshipType.DESCRIBES,
spdx_element_id="SPDXRef-DOCUMENT", comment="This relationship has a comment."),
Relationship(related_spdx_element_id="SPDXRef-DOCUMENT", relationship_type=RelationshipType.DESCRIBED_BY,
spdx_element_id="SPDXRef-File", comment="This relationship has a comment.")])
assert len(relationships) == len(parsed_relationships)
TestCase().assertCountEqual(relationships, parsed_relationships)


def test_parse_has_files():
Expand All @@ -121,22 +134,34 @@ def test_parse_has_files():
related_spdx_element_id="SPDXRef-File2")])


def test_parse_has_files_without_duplicating_relationships():
@pytest.mark.parametrize("has_files,existing_relationships,contains_relationships",
[(["SPDXRef-File1", "SPDXRef-File2"], [
Relationship(spdx_element_id="SPDXRef-Package",
relationship_type=RelationshipType.CONTAINS,
related_spdx_element_id="SPDXRef-File1",
comment="This relationship has a comment."),
Relationship(spdx_element_id="SPDXRef-File2",
relationship_type=RelationshipType.CONTAINED_BY,
related_spdx_element_id="SPDXRef-Package")], []),
(["SPDXRef-File1", "SPDXRef-File2", "SPDXRef-File1"], [], [
Relationship(spdx_element_id="SPDXRef-Package",
relationship_type=RelationshipType.CONTAINS,
related_spdx_element_id="SPDXRef-File1"),
Relationship(spdx_element_id="SPDXRef-Package",
relationship_type=RelationshipType.CONTAINS,
related_spdx_element_id="SPDXRef-File2")])])
def test_parse_has_files_without_duplicating_relationships(has_files, existing_relationships,
contains_relationships):
relationship_parser = RelationshipParser()
document_dict = {
"packages":
[{
"SPDXID": "SPDXRef-Package",
"hasFiles": ["SPDXRef-File1", "SPDXRef-File2"]
"hasFiles": has_files
}]
}
existing_relationships = [
Relationship(spdx_element_id="SPDXRef-Package", relationship_type=RelationshipType.CONTAINS,
related_spdx_element_id="SPDXRef-File1", comment="This relationship has a comment."),
Relationship(spdx_element_id="SPDXRef-File2", relationship_type=RelationshipType.CONTAINED_BY,
related_spdx_element_id="SPDXRef-Package")]

relationships = relationship_parser.parse_has_files(document_dict.get("packages"),
existing_relationships=existing_relationships)

assert len(relationships) == 0
assert len(relationships) == len(contains_relationships)
TestCase().assertCountEqual(relationships, contains_relationships)

0 comments on commit 3fbc88b

Please sign in to comment.