Skip to content

Commit

Permalink
[issue_508] add methods to remove duplicated items from list properties
Browse files Browse the repository at this point in the history
Signed-off-by: Meret Behrens <meret.behrens@tngtech.com>
  • Loading branch information
meretp committed Apr 18, 2023
1 parent d9a2e01 commit 87c3578
Show file tree
Hide file tree
Showing 7 changed files with 155 additions and 9 deletions.
30 changes: 29 additions & 1 deletion src/spdx/document_utils.py
@@ -1,7 +1,8 @@
# SPDX-FileCopyrightText: 2022 spdx contributors
#
# SPDX-License-Identifier: Apache-2.0
from typing import Dict, List, Union
from copy import deepcopy
from typing import Any, Dict, List, Union

from spdx.model.document import Document
from spdx.model.file import File
Expand Down Expand Up @@ -29,3 +30,30 @@ def get_contained_spdx_elements(document: Document) -> Dict[str, Union[Package,
contained_spdx_elements.update({snippet.spdx_id: snippet for snippet in document.snippets})

return contained_spdx_elements


def create_document_without_duplicates(document: Document) -> Document:
document_without_duplicates = deepcopy(document)
for elements in [
[document_without_duplicates.creation_info],
document_without_duplicates.files,
document_without_duplicates.packages,
document_without_duplicates.snippets,
document_without_duplicates.extracted_licensing_info,
]:
for element in elements:
for key, value in element.__dict__.items():
if isinstance(value, list):
value_without_duplicates = create_list_without_duplicates(value)
setattr(element, key, value_without_duplicates)

return document_without_duplicates


def create_list_without_duplicates(list_with_potential_duplicates: List[Any]) -> List[Any]:
list_without_duplicates = []
for element in list_with_potential_duplicates:
if element not in list_without_duplicates:
list_without_duplicates.append(deepcopy(element))

return list_without_duplicates
9 changes: 8 additions & 1 deletion src/spdx/writer/json/json_writer.py
Expand Up @@ -4,14 +4,19 @@
import json
from typing import List

from spdx.document_utils import create_document_without_duplicates
from spdx.jsonschema.document_converter import DocumentConverter
from spdx.model.document import Document
from spdx.validation.document_validator import validate_full_spdx_document
from spdx.validation.validation_message import ValidationMessage


def write_document_to_file(
document: Document, file_name: str, validate: bool = True, converter: DocumentConverter = None
document: Document,
file_name: str,
validate: bool = True,
converter: DocumentConverter = None,
drop_duplicates: bool = True,
):
"""
Serializes the provided document to json and writes it to a file with the provided name. Unless validate is set
Expand All @@ -22,6 +27,8 @@ def write_document_to_file(
validation_messages: List[ValidationMessage] = validate_full_spdx_document(document)
if validation_messages:
raise ValueError(f"Document is not valid. The following errors were detected: {validation_messages}")
if drop_duplicates:
document = create_document_without_duplicates(document)
if converter is None:
converter = DocumentConverter()
document_dict = converter.convert(document)
Expand Down
6 changes: 4 additions & 2 deletions src/spdx/writer/rdf/rdf_writer.py
Expand Up @@ -6,6 +6,7 @@
from rdflib import DOAP, Graph
from rdflib.compare import to_isomorphic

from spdx.document_utils import create_document_without_duplicates
from spdx.model.document import Document
from spdx.rdfschema.namespace import POINTER_NAMESPACE, SPDX_NAMESPACE
from spdx.validation.document_validator import validate_full_spdx_document
Expand All @@ -19,12 +20,13 @@
from spdx.writer.rdf.snippet_writer import add_snippet_to_graph


def write_document_to_file(document: Document, file_name: str, validate: bool):
def write_document_to_file(document: Document, file_name: str, validate: bool, drop_duplicates: bool = True):
if validate:
validation_messages: List[ValidationMessage] = validate_full_spdx_document(document)
if validation_messages:
raise ValueError(f"Document is not valid. The following errors were detected: {validation_messages}")

if drop_duplicates:
document = create_document_without_duplicates(document)
graph = Graph()
doc_namespace = document.creation_info.document_namespace
external_doc_ref_to_namespace: Dict[str, str] = {
Expand Down
5 changes: 4 additions & 1 deletion src/spdx/writer/tagvalue/tagvalue_writer.py
Expand Up @@ -10,6 +10,7 @@
# limitations under the License.
from typing import List, TextIO

from spdx.document_utils import create_document_without_duplicates
from spdx.model.document import Document
from spdx.validation.document_validator import validate_full_spdx_document
from spdx.validation.validation_message import ValidationMessage
Expand All @@ -29,11 +30,13 @@
)


def write_document_to_file(document: Document, file_name: str, validate: bool = True):
def write_document_to_file(document: Document, file_name: str, validate: bool = True, drop_duplicates: bool = True):
if validate:
validation_messages: List[ValidationMessage] = validate_full_spdx_document(document)
if validation_messages:
raise ValueError(f"Document is not valid. The following errors were detected: {validation_messages}")
if drop_duplicates:
document = create_document_without_duplicates(document)

with open(file_name, "w") as out:
write_document(document, out)
Expand Down
10 changes: 9 additions & 1 deletion src/spdx/writer/xml/xml_writer.py
Expand Up @@ -5,14 +5,19 @@

import xmltodict

from spdx.document_utils import create_document_without_duplicates
from spdx.jsonschema.document_converter import DocumentConverter
from spdx.model.document import Document
from spdx.validation.document_validator import validate_full_spdx_document
from spdx.validation.validation_message import ValidationMessage


def write_document_to_file(
document: Document, file_name: str, validate: bool = True, converter: DocumentConverter = None
document: Document,
file_name: str,
validate: bool = True,
converter: DocumentConverter = None,
drop_duplicates: bool = True,
):
"""
Serializes the provided document to XML and writes it to a file with the provided name. Unless validate is set
Expand All @@ -23,6 +28,9 @@ def write_document_to_file(
validation_messages: List[ValidationMessage] = validate_full_spdx_document(document)
if validation_messages:
raise ValueError(f"Document is not valid. The following errors were detected: {validation_messages}")
if drop_duplicates:
document = create_document_without_duplicates(document)

if converter is None:
converter = DocumentConverter()
document_dict = {"Document": converter.convert(document)}
Expand Down
9 changes: 8 additions & 1 deletion src/spdx/writer/yaml/yaml_writer.py
Expand Up @@ -5,14 +5,19 @@

import yaml

from spdx.document_utils import create_document_without_duplicates
from spdx.jsonschema.document_converter import DocumentConverter
from spdx.model.document import Document
from spdx.validation.document_validator import validate_full_spdx_document
from spdx.validation.validation_message import ValidationMessage


def write_document_to_file(
document: Document, file_name: str, validate: bool = True, converter: DocumentConverter = None
document: Document,
file_name: str,
validate: bool = True,
converter: DocumentConverter = None,
drop_duplicates: bool = True,
):
"""
Serializes the provided document to yaml and writes it to a file with the provided name. Unless validate is set
Expand All @@ -23,6 +28,8 @@ def write_document_to_file(
validation_messages: List[ValidationMessage] = validate_full_spdx_document(document)
if validation_messages:
raise ValueError(f"Document is not valid. The following errors were detected: {validation_messages}")
if drop_duplicates:
document = create_document_without_duplicates(document)
if converter is None:
converter = DocumentConverter()
document_dict = converter.convert(document)
Expand Down
95 changes: 93 additions & 2 deletions tests/spdx/test_document_utils.py
Expand Up @@ -5,8 +5,28 @@

import pytest

from spdx.document_utils import get_contained_spdx_element_ids, get_contained_spdx_elements, get_element_from_spdx_id
from tests.spdx.fixtures import document_fixture, file_fixture, package_fixture, snippet_fixture
from spdx.document_utils import (
create_document_without_duplicates,
create_list_without_duplicates,
get_contained_spdx_element_ids,
get_contained_spdx_elements,
get_element_from_spdx_id,
)
from spdx.model.file import FileType
from spdx.model.spdx_no_assertion import SpdxNoAssertion
from spdx.model.spdx_none import SpdxNone
from tests.spdx.fixtures import (
actor_fixture,
checksum_fixture,
creation_info_fixture,
document_fixture,
external_document_ref_fixture,
external_package_ref_fixture,
extracted_licensing_info_fixture,
file_fixture,
package_fixture,
snippet_fixture,
)


@pytest.fixture
Expand Down Expand Up @@ -34,3 +54,74 @@ def test_get_contained_spdx_elements(variables):
assert contained_elements[package.spdx_id] == package
assert contained_elements[file.spdx_id] == file
assert contained_elements[snippet.spdx_id] == snippet


def test_create_list_without_duplicates():
list_with_duplicates = [1, 2, 3, 5, 1, 67, 9, 67]

list_without_duplicates = create_list_without_duplicates(list_with_duplicates)

assert list_without_duplicates == [1, 2, 3, 5, 67, 9]


def test_create_document_without_duplicates():
document = document_fixture(
creation_info=creation_info_fixture(
creators=[actor_fixture(name="creatorName"), actor_fixture(name="creatorName")],
external_document_refs=[external_document_ref_fixture(), external_document_ref_fixture()],
),
packages=[
package_fixture(
checksums=[checksum_fixture(), checksum_fixture()],
license_info_from_files=[SpdxNoAssertion(), SpdxNoAssertion()],
external_references=[external_package_ref_fixture(), external_package_ref_fixture()],
attribution_texts=["duplicated text", "duplicated text"],
)
],
files=[
file_fixture(
checksums=[checksum_fixture(), checksum_fixture()],
file_types=[FileType.TEXT, FileType.TEXT],
license_info_in_file=[SpdxNoAssertion(), SpdxNoAssertion()],
contributors=["duplicated contributor", "duplicated contributor"],
attribution_texts=["duplicated text", "duplicated text"],
)
],
snippets=[
snippet_fixture(
license_info_in_snippet=[SpdxNone(), SpdxNone()],
attribution_texts=["duplicated text", "duplicated text"],
)
],
extracted_licensing_info=[
extracted_licensing_info_fixture(cross_references=["duplicated reference", "duplicated reference"])
],
)
expected_document = document_fixture(
creation_info=creation_info_fixture(
creators=[actor_fixture(name="creatorName")], external_document_refs=[external_document_ref_fixture()]
),
packages=[
package_fixture(
checksums=[checksum_fixture()],
license_info_from_files=[SpdxNoAssertion()],
external_references=[external_package_ref_fixture()],
attribution_texts=["duplicated text"],
)
],
files=[
file_fixture(
checksums=[checksum_fixture()],
file_types=[FileType.TEXT],
license_info_in_file=[SpdxNoAssertion()],
contributors=["duplicated contributor"],
attribution_texts=["duplicated text"],
)
],
snippets=[snippet_fixture(license_info_in_snippet=[SpdxNone()], attribution_texts=["duplicated text"])],
extracted_licensing_info=[extracted_licensing_info_fixture(cross_references=["duplicated reference"])],
)

document_without_duplicates = create_document_without_duplicates(document)

assert document_without_duplicates == expected_document

0 comments on commit 87c3578

Please sign in to comment.