Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[issue_508] add methods to remove duplicated items from list properties #591

Merged
merged 1 commit into from Apr 18, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
30 changes: 29 additions & 1 deletion src/spdx/document_utils.py
@@ -1,7 +1,8 @@
# SPDX-FileCopyrightText: 2022 spdx contributors
#
# SPDX-License-Identifier: Apache-2.0
from typing import Dict, List, Union
from copy import deepcopy
from typing import Any, Dict, List, Union

from spdx.model.document import Document
from spdx.model.file import File
Expand Down Expand Up @@ -29,3 +30,30 @@ def get_contained_spdx_elements(document: Document) -> Dict[str, Union[Package,
contained_spdx_elements.update({snippet.spdx_id: snippet for snippet in document.snippets})

return contained_spdx_elements


def create_document_without_duplicates(document: Document) -> Document:
document_without_duplicates = deepcopy(document)
for elements in [
[document_without_duplicates.creation_info],
document_without_duplicates.files,
document_without_duplicates.packages,
document_without_duplicates.snippets,
document_without_duplicates.extracted_licensing_info,
]:
for element in elements:
for key, value in element.__dict__.items():
if isinstance(value, list):
value_without_duplicates = create_list_without_duplicates(value)
setattr(element, key, value_without_duplicates)

return document_without_duplicates


def create_list_without_duplicates(list_with_potential_duplicates: List[Any]) -> List[Any]:
list_without_duplicates = []
for element in list_with_potential_duplicates:
if element not in list_without_duplicates:
list_without_duplicates.append(element)

return list_without_duplicates
9 changes: 8 additions & 1 deletion src/spdx/writer/json/json_writer.py
Expand Up @@ -4,14 +4,19 @@
import json
from typing import List

from spdx.document_utils import create_document_without_duplicates
from spdx.jsonschema.document_converter import DocumentConverter
from spdx.model.document import Document
from spdx.validation.document_validator import validate_full_spdx_document
from spdx.validation.validation_message import ValidationMessage


def write_document_to_file(
document: Document, file_name: str, validate: bool = True, converter: DocumentConverter = None
document: Document,
file_name: str,
validate: bool = True,
converter: DocumentConverter = None,
drop_duplicates: bool = True,
):
"""
Serializes the provided document to json and writes it to a file with the provided name. Unless validate is set
Expand All @@ -22,6 +27,8 @@ def write_document_to_file(
validation_messages: List[ValidationMessage] = validate_full_spdx_document(document)
if validation_messages:
raise ValueError(f"Document is not valid. The following errors were detected: {validation_messages}")
if drop_duplicates:
document = create_document_without_duplicates(document)
if converter is None:
converter = DocumentConverter()
document_dict = converter.convert(document)
Expand Down
6 changes: 4 additions & 2 deletions src/spdx/writer/rdf/rdf_writer.py
Expand Up @@ -6,6 +6,7 @@
from rdflib import DOAP, Graph
from rdflib.compare import to_isomorphic

from spdx.document_utils import create_document_without_duplicates
from spdx.model.document import Document
from spdx.rdfschema.namespace import POINTER_NAMESPACE, SPDX_NAMESPACE
from spdx.validation.document_validator import validate_full_spdx_document
Expand All @@ -19,12 +20,13 @@
from spdx.writer.rdf.snippet_writer import add_snippet_to_graph


def write_document_to_file(document: Document, file_name: str, validate: bool):
def write_document_to_file(document: Document, file_name: str, validate: bool, drop_duplicates: bool = True):
if validate:
validation_messages: List[ValidationMessage] = validate_full_spdx_document(document)
if validation_messages:
raise ValueError(f"Document is not valid. The following errors were detected: {validation_messages}")

if drop_duplicates:
document = create_document_without_duplicates(document)
graph = Graph()
doc_namespace = document.creation_info.document_namespace
external_doc_ref_to_namespace: Dict[str, str] = {
Expand Down
5 changes: 4 additions & 1 deletion src/spdx/writer/tagvalue/tagvalue_writer.py
Expand Up @@ -10,6 +10,7 @@
# limitations under the License.
from typing import List, TextIO

from spdx.document_utils import create_document_without_duplicates
from spdx.model.document import Document
from spdx.validation.document_validator import validate_full_spdx_document
from spdx.validation.validation_message import ValidationMessage
Expand All @@ -29,11 +30,13 @@
)


def write_document_to_file(document: Document, file_name: str, validate: bool = True):
def write_document_to_file(document: Document, file_name: str, validate: bool = True, drop_duplicates: bool = True):
if validate:
validation_messages: List[ValidationMessage] = validate_full_spdx_document(document)
if validation_messages:
raise ValueError(f"Document is not valid. The following errors were detected: {validation_messages}")
if drop_duplicates:
document = create_document_without_duplicates(document)

with open(file_name, "w") as out:
write_document(document, out)
Expand Down
10 changes: 9 additions & 1 deletion src/spdx/writer/xml/xml_writer.py
Expand Up @@ -5,14 +5,19 @@

import xmltodict

from spdx.document_utils import create_document_without_duplicates
from spdx.jsonschema.document_converter import DocumentConverter
from spdx.model.document import Document
from spdx.validation.document_validator import validate_full_spdx_document
from spdx.validation.validation_message import ValidationMessage


def write_document_to_file(
document: Document, file_name: str, validate: bool = True, converter: DocumentConverter = None
document: Document,
file_name: str,
validate: bool = True,
converter: DocumentConverter = None,
drop_duplicates: bool = True,
):
"""
Serializes the provided document to XML and writes it to a file with the provided name. Unless validate is set
Expand All @@ -23,6 +28,9 @@ def write_document_to_file(
validation_messages: List[ValidationMessage] = validate_full_spdx_document(document)
if validation_messages:
raise ValueError(f"Document is not valid. The following errors were detected: {validation_messages}")
if drop_duplicates:
document = create_document_without_duplicates(document)

if converter is None:
converter = DocumentConverter()
document_dict = {"Document": converter.convert(document)}
Expand Down
9 changes: 8 additions & 1 deletion src/spdx/writer/yaml/yaml_writer.py
Expand Up @@ -5,14 +5,19 @@

import yaml

from spdx.document_utils import create_document_without_duplicates
from spdx.jsonschema.document_converter import DocumentConverter
from spdx.model.document import Document
from spdx.validation.document_validator import validate_full_spdx_document
from spdx.validation.validation_message import ValidationMessage


def write_document_to_file(
document: Document, file_name: str, validate: bool = True, converter: DocumentConverter = None
document: Document,
file_name: str,
validate: bool = True,
converter: DocumentConverter = None,
drop_duplicates: bool = True,
):
"""
Serializes the provided document to yaml and writes it to a file with the provided name. Unless validate is set
Expand All @@ -23,6 +28,8 @@ def write_document_to_file(
validation_messages: List[ValidationMessage] = validate_full_spdx_document(document)
if validation_messages:
raise ValueError(f"Document is not valid. The following errors were detected: {validation_messages}")
if drop_duplicates:
document = create_document_without_duplicates(document)
if converter is None:
converter = DocumentConverter()
document_dict = converter.convert(document)
Expand Down
95 changes: 93 additions & 2 deletions tests/spdx/test_document_utils.py
Expand Up @@ -5,8 +5,28 @@

import pytest

from spdx.document_utils import get_contained_spdx_element_ids, get_contained_spdx_elements, get_element_from_spdx_id
from tests.spdx.fixtures import document_fixture, file_fixture, package_fixture, snippet_fixture
from spdx.document_utils import (
create_document_without_duplicates,
create_list_without_duplicates,
get_contained_spdx_element_ids,
get_contained_spdx_elements,
get_element_from_spdx_id,
)
from spdx.model.file import FileType
from spdx.model.spdx_no_assertion import SpdxNoAssertion
from spdx.model.spdx_none import SpdxNone
from tests.spdx.fixtures import (
actor_fixture,
checksum_fixture,
creation_info_fixture,
document_fixture,
external_document_ref_fixture,
external_package_ref_fixture,
extracted_licensing_info_fixture,
file_fixture,
package_fixture,
snippet_fixture,
)


@pytest.fixture
Expand Down Expand Up @@ -34,3 +54,74 @@ def test_get_contained_spdx_elements(variables):
assert contained_elements[package.spdx_id] == package
assert contained_elements[file.spdx_id] == file
assert contained_elements[snippet.spdx_id] == snippet


def test_create_list_without_duplicates():
list_with_duplicates = [1, 2, 3, 5, 1, 67, 9, 67]

list_without_duplicates = create_list_without_duplicates(list_with_duplicates)

assert list_without_duplicates == [1, 2, 3, 5, 67, 9]


def test_create_document_without_duplicates():
document = document_fixture(
creation_info=creation_info_fixture(
creators=[actor_fixture(name="creatorName"), actor_fixture(name="creatorName")],
external_document_refs=[external_document_ref_fixture(), external_document_ref_fixture()],
),
packages=[
package_fixture(
checksums=[checksum_fixture(), checksum_fixture()],
license_info_from_files=[SpdxNoAssertion(), SpdxNoAssertion()],
external_references=[external_package_ref_fixture(), external_package_ref_fixture()],
attribution_texts=["duplicated text", "duplicated text"],
)
],
files=[
file_fixture(
checksums=[checksum_fixture(), checksum_fixture()],
file_types=[FileType.TEXT, FileType.TEXT],
license_info_in_file=[SpdxNoAssertion(), SpdxNoAssertion()],
contributors=["duplicated contributor", "duplicated contributor"],
attribution_texts=["duplicated text", "duplicated text"],
)
],
snippets=[
snippet_fixture(
license_info_in_snippet=[SpdxNone(), SpdxNone()],
attribution_texts=["duplicated text", "duplicated text"],
)
],
extracted_licensing_info=[
extracted_licensing_info_fixture(cross_references=["duplicated reference", "duplicated reference"])
],
)
expected_document = document_fixture(
creation_info=creation_info_fixture(
creators=[actor_fixture(name="creatorName")], external_document_refs=[external_document_ref_fixture()]
),
packages=[
package_fixture(
checksums=[checksum_fixture()],
license_info_from_files=[SpdxNoAssertion()],
external_references=[external_package_ref_fixture()],
attribution_texts=["duplicated text"],
)
],
files=[
file_fixture(
checksums=[checksum_fixture()],
file_types=[FileType.TEXT],
license_info_in_file=[SpdxNoAssertion()],
contributors=["duplicated contributor"],
attribution_texts=["duplicated text"],
)
],
snippets=[snippet_fixture(license_info_in_snippet=[SpdxNone()], attribution_texts=["duplicated text"])],
extracted_licensing_info=[extracted_licensing_info_fixture(cross_references=["duplicated reference"])],
)

document_without_duplicates = create_document_without_duplicates(document)

assert document_without_duplicates == expected_document