Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

make relationship parsing to be more efficient through precomputation #743

Merged
merged 4 commits into from Aug 17, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
23 changes: 16 additions & 7 deletions src/spdx_tools/spdx/parser/jsonlikedict/relationship_parser.py
Expand Up @@ -35,24 +35,34 @@ def parse_all_relationships(self, input_doc_dict: Dict) -> List[Relationship]:
document_describes: List[str] = delete_duplicates_from_list(input_doc_dict.get("documentDescribes", []))
doc_spdx_id: Optional[str] = input_doc_dict.get("SPDXID")

existing_relationships_without_comments: List[Relationship] = self.get_all_relationships_without_comments(
relationships
)
relationships.extend(
parse_field_or_log_error(
self.logger,
document_describes,
lambda x: self.parse_document_describes(
doc_spdx_id=doc_spdx_id, described_spdx_ids=x, existing_relationships=relationships
doc_spdx_id=doc_spdx_id,
described_spdx_ids=x,
existing_relationships=existing_relationships_without_comments,
),
[],
)
)

package_dicts: List[Dict] = input_doc_dict.get("packages", [])
existing_relationships_without_comments: List[Relationship] = self.get_all_relationships_without_comments(
relationships
)

relationships.extend(
parse_field_or_log_error(
self.logger,
package_dicts,
lambda x: self.parse_has_files(package_dicts=x, existing_relationships=relationships),
lambda x: self.parse_has_files(
package_dicts=x, existing_relationships=existing_relationships_without_comments
),
[],
)
)
Expand Down Expand Up @@ -123,6 +133,7 @@ def parse_document_describes(
def parse_has_files(
self, package_dicts: List[Dict], existing_relationships: List[Relationship]
) -> List[Relationship]:
# assume existing relationships are stripped of comments
logger = Logger()
contains_relationships = []
for package in package_dicts:
Expand Down Expand Up @@ -151,13 +162,11 @@ def parse_has_files(
def check_if_relationship_exists(
self, relationship: Relationship, existing_relationships: List[Relationship]
) -> bool:
existing_relationships_without_comments: List[Relationship] = self.get_all_relationships_without_comments(
existing_relationships
)
if relationship in existing_relationships_without_comments:
# assume existing relationships are stripped of comments
if relationship in existing_relationships:
return True
relationship_inverted: Relationship = self.invert_relationship(relationship)
if relationship_inverted in existing_relationships_without_comments:
if relationship_inverted in existing_relationships:
return True

return False
Expand Down
4 changes: 2 additions & 2 deletions tests/spdx/parser/all_formats/test_parse_from_file.py
Expand Up @@ -36,7 +36,7 @@ def test_parse_from_file_with_2_3_example(self, parser, format_name, extension):
doc = parser.parse_from_file(
os.path.join(os.path.dirname(__file__), f"../../data/SPDX{format_name}Example-v2.3.spdx{extension}")
)
assert type(doc) == Document
assert isinstance(doc, Document)
assert len(doc.annotations) == 5
assert len(doc.files) == 5
assert len(doc.packages) == 4
Expand All @@ -48,7 +48,7 @@ def test_parse_json_with_2_2_example(self, parser, format_name, extension):
doc = parser.parse_from_file(
os.path.join(os.path.dirname(__file__), f"../../data/SPDX{format_name}Example-v2.2.spdx{extension}")
)
assert type(doc) == Document
assert isinstance(doc, Document)
assert len(doc.annotations) == 5
assert len(doc.files) == 4
assert len(doc.packages) == 4
Expand Down
4 changes: 2 additions & 2 deletions tests/spdx/parser/jsonlikedict/test_dict_parsing_functions.py
Expand Up @@ -34,7 +34,7 @@ def test_invalid_json_str_to_enum(invalid_json_str, expected_message):
def test_parse_field_or_no_assertion(input_str, expected_type):
resulting_value = parse_field_or_no_assertion(input_str, lambda x: x)

assert type(resulting_value) == expected_type
assert isinstance(resulting_value, expected_type)


@pytest.mark.parametrize(
Expand All @@ -43,4 +43,4 @@ def test_parse_field_or_no_assertion(input_str, expected_type):
def test_parse_field_or_no_assertion_or_none(input_str, expected_type):
resulting_value = parse_field_or_no_assertion_or_none(input_str, lambda x: x)

assert type(resulting_value) == expected_type
assert isinstance(resulting_value, expected_type)
2 changes: 1 addition & 1 deletion tests/spdx/parser/jsonlikedict/test_relationship_parser.py
Expand Up @@ -169,14 +169,14 @@ def test_parse_has_files():
@pytest.mark.parametrize(
"has_files,existing_relationships,contains_relationships",
[
# pre-requisite for parse_has_files requires that comments in relationships are stripped
(
["SPDXRef-File1", "SPDXRef-File2"],
[
Relationship(
spdx_element_id="SPDXRef-Package",
relationship_type=RelationshipType.CONTAINS,
related_spdx_element_id="SPDXRef-File1",
comment="This relationship has a comment.",
),
Relationship(
spdx_element_id="SPDXRef-File2",
Expand Down