diff --git a/src/spdx/document_utils.py b/src/spdx/document_utils.py index a050ee5a2..647683fdd 100644 --- a/src/spdx/document_utils.py +++ b/src/spdx/document_utils.py @@ -1,7 +1,8 @@ # SPDX-FileCopyrightText: 2022 spdx contributors # # SPDX-License-Identifier: Apache-2.0 -from typing import Dict, List, Union +from copy import deepcopy +from typing import Any, Dict, List, Union from spdx.model.document import Document from spdx.model.file import File @@ -29,3 +30,30 @@ def get_contained_spdx_elements(document: Document) -> Dict[str, Union[Package, contained_spdx_elements.update({snippet.spdx_id: snippet for snippet in document.snippets}) return contained_spdx_elements + + +def create_document_without_duplicates(document: Document) -> Document: + document_without_duplicates = deepcopy(document) + for elements in [ + [document_without_duplicates.creation_info], + document_without_duplicates.files, + document_without_duplicates.packages, + document_without_duplicates.snippets, + document_without_duplicates.extracted_licensing_info, + ]: + for element in elements: + for key, value in element.__dict__.items(): + if isinstance(value, list): + value_without_duplicates = create_list_without_duplicates(value) + setattr(element, key, value_without_duplicates) + + return document_without_duplicates + + +def create_list_without_duplicates(list_with_potential_duplicates: List[Any]) -> List[Any]: + list_without_duplicates = [] + for element in list_with_potential_duplicates: + if element not in list_without_duplicates: + list_without_duplicates.append(element) + + return list_without_duplicates diff --git a/src/spdx/writer/json/json_writer.py b/src/spdx/writer/json/json_writer.py index 6a11671ce..88880d37c 100644 --- a/src/spdx/writer/json/json_writer.py +++ b/src/spdx/writer/json/json_writer.py @@ -4,6 +4,7 @@ import json from typing import List +from spdx.document_utils import create_document_without_duplicates from spdx.jsonschema.document_converter import DocumentConverter from spdx.model.document import Document from spdx.validation.document_validator import validate_full_spdx_document @@ -11,7 +12,11 @@ def write_document_to_file( - document: Document, file_name: str, validate: bool = True, converter: DocumentConverter = None + document: Document, + file_name: str, + validate: bool = True, + converter: DocumentConverter = None, + drop_duplicates: bool = True, ): """ Serializes the provided document to json and writes it to a file with the provided name. Unless validate is set @@ -22,6 +27,8 @@ def write_document_to_file( validation_messages: List[ValidationMessage] = validate_full_spdx_document(document) if validation_messages: raise ValueError(f"Document is not valid. The following errors were detected: {validation_messages}") + if drop_duplicates: + document = create_document_without_duplicates(document) if converter is None: converter = DocumentConverter() document_dict = converter.convert(document) diff --git a/src/spdx/writer/rdf/rdf_writer.py b/src/spdx/writer/rdf/rdf_writer.py index 0e07a5818..78d30f6df 100644 --- a/src/spdx/writer/rdf/rdf_writer.py +++ b/src/spdx/writer/rdf/rdf_writer.py @@ -6,6 +6,7 @@ from rdflib import DOAP, Graph from rdflib.compare import to_isomorphic +from spdx.document_utils import create_document_without_duplicates from spdx.model.document import Document from spdx.rdfschema.namespace import POINTER_NAMESPACE, SPDX_NAMESPACE from spdx.validation.document_validator import validate_full_spdx_document @@ -19,12 +20,13 @@ from spdx.writer.rdf.snippet_writer import add_snippet_to_graph -def write_document_to_file(document: Document, file_name: str, validate: bool): +def write_document_to_file(document: Document, file_name: str, validate: bool, drop_duplicates: bool = True): if validate: validation_messages: List[ValidationMessage] = validate_full_spdx_document(document) if validation_messages: raise ValueError(f"Document is not valid. The following errors were detected: {validation_messages}") - + if drop_duplicates: + document = create_document_without_duplicates(document) graph = Graph() doc_namespace = document.creation_info.document_namespace external_doc_ref_to_namespace: Dict[str, str] = { diff --git a/src/spdx/writer/tagvalue/tagvalue_writer.py b/src/spdx/writer/tagvalue/tagvalue_writer.py index 3c94e3dc4..418d3afee 100644 --- a/src/spdx/writer/tagvalue/tagvalue_writer.py +++ b/src/spdx/writer/tagvalue/tagvalue_writer.py @@ -10,6 +10,7 @@ # limitations under the License. from typing import List, TextIO +from spdx.document_utils import create_document_without_duplicates from spdx.model.document import Document from spdx.validation.document_validator import validate_full_spdx_document from spdx.validation.validation_message import ValidationMessage @@ -29,11 +30,13 @@ ) -def write_document_to_file(document: Document, file_name: str, validate: bool = True): +def write_document_to_file(document: Document, file_name: str, validate: bool = True, drop_duplicates: bool = True): if validate: validation_messages: List[ValidationMessage] = validate_full_spdx_document(document) if validation_messages: raise ValueError(f"Document is not valid. The following errors were detected: {validation_messages}") + if drop_duplicates: + document = create_document_without_duplicates(document) with open(file_name, "w") as out: write_document(document, out) diff --git a/src/spdx/writer/xml/xml_writer.py b/src/spdx/writer/xml/xml_writer.py index 678010183..d9176cccc 100644 --- a/src/spdx/writer/xml/xml_writer.py +++ b/src/spdx/writer/xml/xml_writer.py @@ -5,6 +5,7 @@ import xmltodict +from spdx.document_utils import create_document_without_duplicates from spdx.jsonschema.document_converter import DocumentConverter from spdx.model.document import Document from spdx.validation.document_validator import validate_full_spdx_document @@ -12,7 +13,11 @@ def write_document_to_file( - document: Document, file_name: str, validate: bool = True, converter: DocumentConverter = None + document: Document, + file_name: str, + validate: bool = True, + converter: DocumentConverter = None, + drop_duplicates: bool = True, ): """ Serializes the provided document to XML and writes it to a file with the provided name. Unless validate is set @@ -23,6 +28,9 @@ def write_document_to_file( validation_messages: List[ValidationMessage] = validate_full_spdx_document(document) if validation_messages: raise ValueError(f"Document is not valid. The following errors were detected: {validation_messages}") + if drop_duplicates: + document = create_document_without_duplicates(document) + if converter is None: converter = DocumentConverter() document_dict = {"Document": converter.convert(document)} diff --git a/src/spdx/writer/yaml/yaml_writer.py b/src/spdx/writer/yaml/yaml_writer.py index de3e27571..e93915f21 100644 --- a/src/spdx/writer/yaml/yaml_writer.py +++ b/src/spdx/writer/yaml/yaml_writer.py @@ -5,6 +5,7 @@ import yaml +from spdx.document_utils import create_document_without_duplicates from spdx.jsonschema.document_converter import DocumentConverter from spdx.model.document import Document from spdx.validation.document_validator import validate_full_spdx_document @@ -12,7 +13,11 @@ def write_document_to_file( - document: Document, file_name: str, validate: bool = True, converter: DocumentConverter = None + document: Document, + file_name: str, + validate: bool = True, + converter: DocumentConverter = None, + drop_duplicates: bool = True, ): """ Serializes the provided document to yaml and writes it to a file with the provided name. Unless validate is set @@ -23,6 +28,8 @@ def write_document_to_file( validation_messages: List[ValidationMessage] = validate_full_spdx_document(document) if validation_messages: raise ValueError(f"Document is not valid. The following errors were detected: {validation_messages}") + if drop_duplicates: + document = create_document_without_duplicates(document) if converter is None: converter = DocumentConverter() document_dict = converter.convert(document) diff --git a/tests/spdx/test_document_utils.py b/tests/spdx/test_document_utils.py index a81a39bd8..bb40da4ff 100644 --- a/tests/spdx/test_document_utils.py +++ b/tests/spdx/test_document_utils.py @@ -5,8 +5,28 @@ import pytest -from spdx.document_utils import get_contained_spdx_element_ids, get_contained_spdx_elements, get_element_from_spdx_id -from tests.spdx.fixtures import document_fixture, file_fixture, package_fixture, snippet_fixture +from spdx.document_utils import ( + create_document_without_duplicates, + create_list_without_duplicates, + get_contained_spdx_element_ids, + get_contained_spdx_elements, + get_element_from_spdx_id, +) +from spdx.model.file import FileType +from spdx.model.spdx_no_assertion import SpdxNoAssertion +from spdx.model.spdx_none import SpdxNone +from tests.spdx.fixtures import ( + actor_fixture, + checksum_fixture, + creation_info_fixture, + document_fixture, + external_document_ref_fixture, + external_package_ref_fixture, + extracted_licensing_info_fixture, + file_fixture, + package_fixture, + snippet_fixture, +) @pytest.fixture @@ -34,3 +54,74 @@ def test_get_contained_spdx_elements(variables): assert contained_elements[package.spdx_id] == package assert contained_elements[file.spdx_id] == file assert contained_elements[snippet.spdx_id] == snippet + + +def test_create_list_without_duplicates(): + list_with_duplicates = [1, 2, 3, 5, 1, 67, 9, 67] + + list_without_duplicates = create_list_without_duplicates(list_with_duplicates) + + assert list_without_duplicates == [1, 2, 3, 5, 67, 9] + + +def test_create_document_without_duplicates(): + document = document_fixture( + creation_info=creation_info_fixture( + creators=[actor_fixture(name="creatorName"), actor_fixture(name="creatorName")], + external_document_refs=[external_document_ref_fixture(), external_document_ref_fixture()], + ), + packages=[ + package_fixture( + checksums=[checksum_fixture(), checksum_fixture()], + license_info_from_files=[SpdxNoAssertion(), SpdxNoAssertion()], + external_references=[external_package_ref_fixture(), external_package_ref_fixture()], + attribution_texts=["duplicated text", "duplicated text"], + ) + ], + files=[ + file_fixture( + checksums=[checksum_fixture(), checksum_fixture()], + file_types=[FileType.TEXT, FileType.TEXT], + license_info_in_file=[SpdxNoAssertion(), SpdxNoAssertion()], + contributors=["duplicated contributor", "duplicated contributor"], + attribution_texts=["duplicated text", "duplicated text"], + ) + ], + snippets=[ + snippet_fixture( + license_info_in_snippet=[SpdxNone(), SpdxNone()], + attribution_texts=["duplicated text", "duplicated text"], + ) + ], + extracted_licensing_info=[ + extracted_licensing_info_fixture(cross_references=["duplicated reference", "duplicated reference"]) + ], + ) + expected_document = document_fixture( + creation_info=creation_info_fixture( + creators=[actor_fixture(name="creatorName")], external_document_refs=[external_document_ref_fixture()] + ), + packages=[ + package_fixture( + checksums=[checksum_fixture()], + license_info_from_files=[SpdxNoAssertion()], + external_references=[external_package_ref_fixture()], + attribution_texts=["duplicated text"], + ) + ], + files=[ + file_fixture( + checksums=[checksum_fixture()], + file_types=[FileType.TEXT], + license_info_in_file=[SpdxNoAssertion()], + contributors=["duplicated contributor"], + attribution_texts=["duplicated text"], + ) + ], + snippets=[snippet_fixture(license_info_in_snippet=[SpdxNone()], attribution_texts=["duplicated text"])], + extracted_licensing_info=[extracted_licensing_info_fixture(cross_references=["duplicated reference"])], + ) + + document_without_duplicates = create_document_without_duplicates(document) + + assert document_without_duplicates == expected_document