From f830ebae9f6e29fdb3d66fe25c70d4c3abf858c4 Mon Sep 17 00:00:00 2001 From: Meret Behrens Date: Mon, 28 Nov 2022 13:49:03 +0100 Subject: [PATCH] refactor Checksum identifier Signed-off-by: Meret Behrens --- spdx/checksum.py | 35 ++++++++++++++++------------- spdx/parsers/jsonyamlxmlbuilders.py | 7 +++--- spdx/parsers/rdf.py | 2 +- spdx/parsers/rdfbuilders.py | 17 ++++++++------ spdx/parsers/tagvaluebuilders.py | 8 +++---- tests/test_checksum.py | 25 ++++++++++++--------- tests/test_document.py | 22 +++++++++--------- tests/test_jsonyamlxml_writer.py | 4 ++-- 8 files changed, 66 insertions(+), 54 deletions(-) diff --git a/spdx/checksum.py b/spdx/checksum.py index 3193cdfb8..0cfd8dca8 100644 --- a/spdx/checksum.py +++ b/spdx/checksum.py @@ -39,34 +39,39 @@ def checksum_to_rdf(self): return "checksumAlgorithm_" + self.name.lower() @classmethod - def checksum_from_rdf(cls, identifier: str) -> str: + def checksum_from_rdf(cls, identifier: str) -> 'ChecksumAlgorithm': identifier = identifier.split('_', 1)[-1].upper() blake_checksum = re.compile(r"^(BLAKE2B)(256|384|512)$", re.UNICODE) match = blake_checksum.match(identifier) if match: identifier = match[1] + '_' + match[2] - return identifier + if identifier not in ChecksumAlgorithm.__members__: + raise ValueError(f"Invalid algorithm for checksum: {identifier}") + return ChecksumAlgorithm[identifier] @classmethod - def checksum_algorithm_from_string(cls, value: str) -> Optional['Checksum']: - CHECKSUM_RE = re.compile("(ADLER32|BLAKE2b-256|BLAKE2b-384|BLAKE2b-512|BLAKE3|MD2|MD4|MD5|MD6|" \ - "SHA1|SHA224|SHA256|SHA384|SHA512|SHA3-256|SHA3-384|SHA3-512):\\s*([a-fA-F0-9]*)") - match = CHECKSUM_RE.match(value) - if match: - return Checksum(identifier=match.group(1), value=match.group(2)) - else: - return None + def checksum_algorithm_from_string(cls, identifier: str) -> 'ChecksumAlgorithm': + identifier.replace("-", "_").upper() + if identifier not in ChecksumAlgorithm.__members__: + raise ValueError(f"Invalid algorithm for checksum: {identifier}") + return ChecksumAlgorithm[identifier] class Checksum(object): """Generic checksum algorithm.""" - def __init__(self, identifier: str, value: str): - reformated_identifier = identifier - if reformated_identifier not in ChecksumAlgorithm.__members__: - raise ValueError('Invalid algorithm for Checksum: {}'.format(identifier)) - self.identifier = ChecksumAlgorithm[reformated_identifier] + def __init__(self, identifier: ChecksumAlgorithm, value: str): + self.identifier = identifier self.value = value + @classmethod + def checksum_from_string(cls, value: str) -> 'Checksum': + CHECKSUM_RE = re.compile("(ADLER32|BLAKE2b-256|BLAKE2b-384|BLAKE2b-512|BLAKE3|MD2|MD4|MD5|MD6|" \ + "SHA1|SHA224|SHA256|SHA384|SHA512|SHA3-256|SHA3-384|SHA3-512):\\s*([a-fA-F0-9]*)") + match = CHECKSUM_RE.match(value) + identifier = ChecksumAlgorithm.checksum_algorithm_from_string(match.group(1)) + return Checksum(identifier=identifier, value=match.group(2)) + + def to_tv(self): return "{0}: {1}".format(self.identifier.name, self.value) diff --git a/spdx/parsers/jsonyamlxmlbuilders.py b/spdx/parsers/jsonyamlxmlbuilders.py index 8ab318652..d514af7ab 100644 --- a/spdx/parsers/jsonyamlxmlbuilders.py +++ b/spdx/parsers/jsonyamlxmlbuilders.py @@ -14,7 +14,7 @@ from spdx.parsers import rdfbuilders from spdx.parsers import tagvaluebuilders from spdx.parsers import validations -from spdx.checksum import Checksum +from spdx.checksum import Checksum, ChecksumAlgorithm from spdx.parsers.builderexceptions import SPDXValueError from spdx.parsers.builderexceptions import CardinalityError from spdx.parsers.builderexceptions import OrderError @@ -173,11 +173,12 @@ def set_file_checksum(self, doc: Document, checksum: Union[Dict, Checksum, str]) if isinstance(checksum, dict): algo = checksum.get('algorithm') or 'SHA1' - self.file(doc).set_checksum(Checksum(algo, checksum.get('checksumValue'))) + identifier = ChecksumAlgorithm.checksum_algorithm_from_string(algo) + self.file(doc).set_checksum(Checksum(identifier, checksum.get('checksumValue'))) elif isinstance(checksum, Checksum): self.file(doc).set_checksum(checksum) elif isinstance(checksum, str): - self.file(doc).set_checksum(Checksum("SHA1", checksum)) + self.file(doc).set_checksum(Checksum(ChecksumAlgorithm.SHA1, checksum)) return True def set_file_notice(self, doc, text): diff --git a/spdx/parsers/rdf.py b/spdx/parsers/rdf.py index 9e29341dc..279156867 100644 --- a/spdx/parsers/rdf.py +++ b/spdx/parsers/rdf.py @@ -67,7 +67,7 @@ } -def convert_rdf_checksum_algorithm(rdf_checksum_algorithm: str) -> str: +def convert_rdf_checksum_algorithm(rdf_checksum_algorithm: str) -> ChecksumAlgorithm: split_string = rdf_checksum_algorithm.split('#') if len(split_string) != 2: raise SPDXValueError('Unknown checksum algorithm {}'.format(rdf_checksum_algorithm)) diff --git a/spdx/parsers/rdfbuilders.py b/spdx/parsers/rdfbuilders.py index dc04aefd1..c7845ae4d 100644 --- a/spdx/parsers/rdfbuilders.py +++ b/spdx/parsers/rdfbuilders.py @@ -16,7 +16,7 @@ from spdx import license from spdx import package from spdx import version -from spdx.checksum import Checksum +from spdx.checksum import Checksum, ChecksumAlgorithm from spdx.document import Document from spdx.parsers.builderexceptions import CardinalityError from spdx.parsers.builderexceptions import OrderError @@ -146,7 +146,7 @@ def set_chksum(self, doc, chk_sum): """ if chk_sum: doc.ext_document_references[-1].checksum = Checksum( - "SHA1", chk_sum + ChecksumAlgorithm.SHA1, chk_sum ) else: raise SPDXValueError("ExternalDocumentRef::Checksum") @@ -197,15 +197,17 @@ def set_pkg_checksum(self, doc, checksum: Union[Checksum, Dict]): """ self.assert_package_exists() if isinstance(checksum, dict): - algo = checksum.get('algorithm') or 'SHA1' + algo = checksum.get('algorithm') or ChecksumAlgorithm.SHA1 if algo.startswith('checksumAlgorithm_'): - algo = convert_rdf_checksum_algorithm(algo) or 'SHA1' + algo = convert_rdf_checksum_algorithm(algo) or ChecksumAlgorithm.SHA1 + else: + algo = ChecksumAlgorithm.checksum_algorithm_from_string(algo) doc.packages[-1].set_checksum(Checksum(identifier=algo, value=checksum.get('checksumValue'))) elif isinstance(checksum, Checksum): doc.packages[-1].set_checksum(checksum) elif isinstance(checksum, str): # kept for backwards compatibility - doc.packages[-1].set_checksum(Checksum(identifier="SHA1", value=checksum)) + doc.packages[-1].set_checksum(Checksum(identifier=ChecksumAlgorithm.SHA1, value=checksum)) else: raise SPDXValueError("Invalid value for package checksum.") @@ -398,13 +400,14 @@ def set_file_checksum(self, doc: Document, chk_sum: Union[Checksum, Dict, str]): """ if self.has_file(doc): if isinstance(chk_sum, dict): - self.file(doc).set_checksum(Checksum(chk_sum.get('algorithm'), + identifier = ChecksumAlgorithm.checksum_algorithm_from_string(chk_sum.get('algorithm')) + self.file(doc).set_checksum(Checksum(identifier, chk_sum.get('checksumValue'))) elif isinstance(chk_sum, Checksum): self.file(doc).set_checksum(chk_sum) elif isinstance(chk_sum, str): # kept for backwards compatibility - self.file(doc).set_checksum(Checksum("SHA1", chk_sum)) + self.file(doc).set_checksum(Checksum(ChecksumAlgorithm.SHA1, chk_sum)) return True def set_file_license_comment(self, doc, text): diff --git a/spdx/parsers/tagvaluebuilders.py b/spdx/parsers/tagvaluebuilders.py index 5ec1970c3..5f03816c4 100644 --- a/spdx/parsers/tagvaluebuilders.py +++ b/spdx/parsers/tagvaluebuilders.py @@ -22,7 +22,7 @@ from spdx import snippet from spdx import utils from spdx import version -from spdx.checksum import ChecksumAlgorithm +from spdx.checksum import Checksum from spdx.document import ExternalDocumentRef, Document from spdx.package import PackagePurpose @@ -187,7 +187,7 @@ def set_chksum(self, doc, chksum): """ Set the `check_sum` attribute of the `ExternalDocumentRef` object. """ - doc.ext_document_references[-1].checksum = ChecksumAlgorithm.checksum_algorithm_from_string(chksum) + doc.ext_document_references[-1].checksum = Checksum.checksum_from_string(chksum) def add_ext_doc_refs(self, doc, ext_doc_id, spdx_doc_uri, chksum): self.set_ext_doc_id(doc, ext_doc_id) @@ -773,7 +773,7 @@ def set_pkg_checksum(self, doc, checksum): """ self.assert_package_exists() self.package_chk_sum_set = True - doc.packages[-1].set_checksum(ChecksumAlgorithm.checksum_algorithm_from_string(checksum)) + doc.packages[-1].set_checksum(Checksum.checksum_from_string(checksum)) return True def set_pkg_source_info(self, doc, text): @@ -1183,7 +1183,7 @@ def set_file_checksum(self, doc: Document, checksum: str): Raise OrderError if no file defined. """ if self.has_file(doc): - new_checksum = ChecksumAlgorithm.checksum_algorithm_from_string(checksum) + new_checksum = Checksum.checksum_from_string(checksum) self.file(doc).set_checksum(new_checksum) else: raise OrderError("File::CheckSum") diff --git a/tests/test_checksum.py b/tests/test_checksum.py index d37faf7a2..841294804 100644 --- a/tests/test_checksum.py +++ b/tests/test_checksum.py @@ -25,21 +25,24 @@ def test_checksum_to_rdf(algorithm, expected): @pytest.mark.parametrize("expected,rdf_algorithm", - [("SHA1", "checksumAlgorithm_sha1"), ("SHA224", "checksumAlgorithm_sha224"), - ("SHA3_256", "checksumAlgorithm_sha3_256"), ("BLAKE2B_256", "checksumAlgorithm_blake2b256"), - ("MD5", "checksumAlgorithm_md5")]) + [(ChecksumAlgorithm.SHA1, "checksumAlgorithm_sha1"), + (ChecksumAlgorithm.SHA224, "checksumAlgorithm_sha224"), + (ChecksumAlgorithm.SHA3_256, "checksumAlgorithm_sha3_256"), + (ChecksumAlgorithm.BLAKE2B_256, "checksumAlgorithm_blake2b256"), + (ChecksumAlgorithm.MD5, "checksumAlgorithm_md5")]) def test_checksum_from_rdf(rdf_algorithm, expected): algorithm = ChecksumAlgorithm.checksum_from_rdf(rdf_algorithm) assert algorithm == expected -@pytest.mark.parametrize("expected,rdf_algorithm", - [("SHA1", "_checksumAlgorithm_sha1"), ("SHA224", "checksumAlgorithm_sha_224"), - ("SHA3_256", "checksumAlgorithm_sha3256"), ("BLAKE2B_256", "checksumAlgorithm_blake2b 256"), - ("BLAKE2B_256", "checksumAlgorithm_blake2b-256"), - ("BLAKE2B_256", "checksumAlgorithm_bblake2b 256")]) -def test_checksum_from_wrong_rdf(rdf_algorithm, expected): - algorithm = ChecksumAlgorithm.checksum_from_rdf(rdf_algorithm) +@pytest.mark.parametrize("rdf_algorithm", + ["_checksumAlgorithm_sha1", "checksumAlgorithm_sha_224", "checksumAlgorithm_sha3256", + "checksumAlgorithm_blake2b 256", "checksumAlgorithm_blake2b-256", + "checksumAlgorithm_bblake2b 256"]) +def test_checksum_from_wrong_rdf(rdf_algorithm): + with pytest.raises(ValueError) as error: + ChecksumAlgorithm.checksum_from_rdf(rdf_algorithm) + + assert str(error.value).startswith("Invalid algorithm for checksum") - assert algorithm != expected diff --git a/tests/test_document.py b/tests/test_document.py index ca80d90d4..7f9a1d677 100644 --- a/tests/test_document.py +++ b/tests/test_document.py @@ -16,7 +16,7 @@ from datetime import datetime from unittest import TestCase -from spdx.checksum import Checksum +from spdx.checksum import Checksum, ChecksumAlgorithm from spdx.config import LICENSE_MAP, EXCEPTION_MAP from spdx.creationinfo import Tool from spdx.document import Document, ExternalDocumentRef @@ -65,7 +65,7 @@ def test_creation(self): document.add_ext_document_reference( ExternalDocumentRef('DocumentRef-spdx-tool-2.1', 'https://spdx.org/spdxdocs/spdx-tools-v2.1-3F2504E0-4F89-41D3-9A0C-0305E82C3301', - Checksum('SHA1', 'SOME-SHA1')) + Checksum(ChecksumAlgorithm.SHA1, 'SOME-SHA1')) ) assert document.comment is None assert document.version == Version(2, 1) @@ -80,11 +80,11 @@ def test_document_validate_failures_returns_informative_messages(self): 'Sample_Document-V2.1', spdx_id='SPDXRef-DOCUMENT', namespace='https://spdx.org/spdxdocs/spdx-example-444504E0-4F89-41D3-9A0C-0305E82C3301') pack = doc.package = Package('some/path', NoAssert()) - pack.set_checksum(Checksum('SHA256', 'SOME-SHA256')) + pack.set_checksum(Checksum(ChecksumAlgorithm.SHA256, 'SOME-SHA256')) file1 = File('./some/path/tofile') file1.name = './some/path/tofile' file1.spdx_id = 'SPDXRef-File' - file1.set_checksum(Checksum('SHA1', 'SOME-SHA1')) + file1.set_checksum(Checksum(ChecksumAlgorithm.SHA1, 'SOME-SHA1')) lic1 = License.from_identifier('LGPL-2.1-only') file1.add_lics(lic1) pack.add_lics_from_file(lic1) @@ -105,7 +105,7 @@ def test_document_is_valid_when_using_or_later_licenses(self): package = doc.package = Package(name='some/path', download_location=NoAssert()) package.spdx_id = 'SPDXRef-Package' package.cr_text = 'Some copyright' - package.set_checksum(Checksum('SHA1', 'SOME-SHA1')) + package.set_checksum(Checksum(ChecksumAlgorithm.SHA1, 'SOME-SHA1')) package.verif_code = 'SOME code' package.license_declared = NoAssert() package.conc_lics = NoAssert() @@ -114,7 +114,7 @@ def test_document_is_valid_when_using_or_later_licenses(self): file1.name = './some/path/tofile' file1.spdx_id = 'SPDXRef-File' file1.file_types = [FileType.OTHER] - file1.set_checksum(Checksum('SHA1', 'SOME-SHA1')) + file1.set_checksum(Checksum(ChecksumAlgorithm.SHA1, 'SOME-SHA1')) file1.conc_lics = NoAssert() file1.copyright = NoAssert() @@ -178,8 +178,8 @@ def _get_lgpl_doc(self, or_later=False): package.spdx_id = 'SPDXRef-Package' package.cr_text = 'Some copyright' package.verif_code = 'SOME code' - package.set_checksum(Checksum('SHA1', 'SOME-SHA1')) - package.set_checksum(Checksum('SHA256', 'SOME-SHA256')) + package.set_checksum(Checksum(ChecksumAlgorithm.SHA1, 'SOME-SHA1')) + package.set_checksum(Checksum(ChecksumAlgorithm.SHA256, 'SOME-SHA256')) package.license_declared = NoAssert() package.conc_lics = NoAssert() package.primary_package_purpose = PackagePurpose.FILE @@ -191,8 +191,8 @@ def _get_lgpl_doc(self, or_later=False): file1 = File('./some/path/tofile') file1.name = './some/path/tofile' file1.spdx_id = 'SPDXRef-File' - file1.set_checksum(Checksum('SHA1', 'SOME-SHA1')) - file1.set_checksum(Checksum('SHA256', 'SOME-SHA256')) + file1.set_checksum(Checksum(ChecksumAlgorithm.SHA1, 'SOME-SHA1')) + file1.set_checksum(Checksum(ChecksumAlgorithm.SHA256, 'SOME-SHA256')) file1.conc_lics = NoAssert() file1.copyright = NoAssert() file1.file_types = [FileType.OTHER, FileType.SOURCE] @@ -246,7 +246,7 @@ def _get_lgpl_multi_package_doc(self, or_later=False): file1 = File('./some/path/tofile') file1.name = './some/path/tofile' file1.spdx_id = 'SPDXRef-File' - file1.set_checksum(Checksum('SHA1', 'SOME-SHA1')) + file1.set_checksum(Checksum(ChecksumAlgorithm.SHA1, 'SOME-SHA1')) file1.conc_lics = NoAssert() file1.copyright = NoAssert() diff --git a/tests/test_jsonyamlxml_writer.py b/tests/test_jsonyamlxml_writer.py index fd45b3503..0ed226e36 100644 --- a/tests/test_jsonyamlxml_writer.py +++ b/tests/test_jsonyamlxml_writer.py @@ -5,7 +5,7 @@ import pytest -from spdx.checksum import Checksum +from spdx.checksum import Checksum, ChecksumAlgorithm from spdx.document import Document from spdx.file import File from spdx.license import License @@ -148,7 +148,7 @@ def minimal_document(): def minimal_file(): file = File(name="Example File", spdx_id="SPDXRef-File") - file.set_checksum(Checksum('SHA1', 'some-sha1-value')) + file.set_checksum(Checksum(ChecksumAlgorithm.SHA1, 'some-sha1-value')) return file