Skip to content

Commit

Permalink
refactor Checksum identifier
Browse files Browse the repository at this point in the history
Signed-off-by: Meret Behrens <meret.behrens@tngtech.com>
  • Loading branch information
meretp committed Nov 28, 2022
1 parent 78b6ca5 commit f830eba
Show file tree
Hide file tree
Showing 8 changed files with 66 additions and 54 deletions.
35 changes: 20 additions & 15 deletions spdx/checksum.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,34 +39,39 @@ def checksum_to_rdf(self):
return "checksumAlgorithm_" + self.name.lower()

@classmethod
def checksum_from_rdf(cls, identifier: str) -> str:
def checksum_from_rdf(cls, identifier: str) -> 'ChecksumAlgorithm':
identifier = identifier.split('_', 1)[-1].upper()
blake_checksum = re.compile(r"^(BLAKE2B)(256|384|512)$", re.UNICODE)
match = blake_checksum.match(identifier)
if match:
identifier = match[1] + '_' + match[2]
return identifier
if identifier not in ChecksumAlgorithm.__members__:
raise ValueError(f"Invalid algorithm for checksum: {identifier}")
return ChecksumAlgorithm[identifier]

@classmethod
def checksum_algorithm_from_string(cls, value: str) -> Optional['Checksum']:
CHECKSUM_RE = re.compile("(ADLER32|BLAKE2b-256|BLAKE2b-384|BLAKE2b-512|BLAKE3|MD2|MD4|MD5|MD6|" \
"SHA1|SHA224|SHA256|SHA384|SHA512|SHA3-256|SHA3-384|SHA3-512):\\s*([a-fA-F0-9]*)")
match = CHECKSUM_RE.match(value)
if match:
return Checksum(identifier=match.group(1), value=match.group(2))
else:
return None
def checksum_algorithm_from_string(cls, identifier: str) -> 'ChecksumAlgorithm':
identifier.replace("-", "_").upper()
if identifier not in ChecksumAlgorithm.__members__:
raise ValueError(f"Invalid algorithm for checksum: {identifier}")
return ChecksumAlgorithm[identifier]


class Checksum(object):
"""Generic checksum algorithm."""

def __init__(self, identifier: str, value: str):
reformated_identifier = identifier
if reformated_identifier not in ChecksumAlgorithm.__members__:
raise ValueError('Invalid algorithm for Checksum: {}'.format(identifier))
self.identifier = ChecksumAlgorithm[reformated_identifier]
def __init__(self, identifier: ChecksumAlgorithm, value: str):
self.identifier = identifier
self.value = value

@classmethod
def checksum_from_string(cls, value: str) -> 'Checksum':
CHECKSUM_RE = re.compile("(ADLER32|BLAKE2b-256|BLAKE2b-384|BLAKE2b-512|BLAKE3|MD2|MD4|MD5|MD6|" \
"SHA1|SHA224|SHA256|SHA384|SHA512|SHA3-256|SHA3-384|SHA3-512):\\s*([a-fA-F0-9]*)")
match = CHECKSUM_RE.match(value)
identifier = ChecksumAlgorithm.checksum_algorithm_from_string(match.group(1))
return Checksum(identifier=identifier, value=match.group(2))


def to_tv(self):
return "{0}: {1}".format(self.identifier.name, self.value)
7 changes: 4 additions & 3 deletions spdx/parsers/jsonyamlxmlbuilders.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
from spdx.parsers import rdfbuilders
from spdx.parsers import tagvaluebuilders
from spdx.parsers import validations
from spdx.checksum import Checksum
from spdx.checksum import Checksum, ChecksumAlgorithm
from spdx.parsers.builderexceptions import SPDXValueError
from spdx.parsers.builderexceptions import CardinalityError
from spdx.parsers.builderexceptions import OrderError
Expand Down Expand Up @@ -173,11 +173,12 @@ def set_file_checksum(self, doc: Document, checksum: Union[Dict, Checksum, str])

if isinstance(checksum, dict):
algo = checksum.get('algorithm') or 'SHA1'
self.file(doc).set_checksum(Checksum(algo, checksum.get('checksumValue')))
identifier = ChecksumAlgorithm.checksum_algorithm_from_string(algo)
self.file(doc).set_checksum(Checksum(identifier, checksum.get('checksumValue')))
elif isinstance(checksum, Checksum):
self.file(doc).set_checksum(checksum)
elif isinstance(checksum, str):
self.file(doc).set_checksum(Checksum("SHA1", checksum))
self.file(doc).set_checksum(Checksum(ChecksumAlgorithm.SHA1, checksum))
return True

def set_file_notice(self, doc, text):
Expand Down
2 changes: 1 addition & 1 deletion spdx/parsers/rdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@
}


def convert_rdf_checksum_algorithm(rdf_checksum_algorithm: str) -> str:
def convert_rdf_checksum_algorithm(rdf_checksum_algorithm: str) -> ChecksumAlgorithm:
split_string = rdf_checksum_algorithm.split('#')
if len(split_string) != 2:
raise SPDXValueError('Unknown checksum algorithm {}'.format(rdf_checksum_algorithm))
Expand Down
17 changes: 10 additions & 7 deletions spdx/parsers/rdfbuilders.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
from spdx import license
from spdx import package
from spdx import version
from spdx.checksum import Checksum
from spdx.checksum import Checksum, ChecksumAlgorithm
from spdx.document import Document
from spdx.parsers.builderexceptions import CardinalityError
from spdx.parsers.builderexceptions import OrderError
Expand Down Expand Up @@ -146,7 +146,7 @@ def set_chksum(self, doc, chk_sum):
"""
if chk_sum:
doc.ext_document_references[-1].checksum = Checksum(
"SHA1", chk_sum
ChecksumAlgorithm.SHA1, chk_sum
)
else:
raise SPDXValueError("ExternalDocumentRef::Checksum")
Expand Down Expand Up @@ -197,15 +197,17 @@ def set_pkg_checksum(self, doc, checksum: Union[Checksum, Dict]):
"""
self.assert_package_exists()
if isinstance(checksum, dict):
algo = checksum.get('algorithm') or 'SHA1'
algo = checksum.get('algorithm') or ChecksumAlgorithm.SHA1
if algo.startswith('checksumAlgorithm_'):
algo = convert_rdf_checksum_algorithm(algo) or 'SHA1'
algo = convert_rdf_checksum_algorithm(algo) or ChecksumAlgorithm.SHA1
else:
algo = ChecksumAlgorithm.checksum_algorithm_from_string(algo)
doc.packages[-1].set_checksum(Checksum(identifier=algo, value=checksum.get('checksumValue')))
elif isinstance(checksum, Checksum):
doc.packages[-1].set_checksum(checksum)
elif isinstance(checksum, str):
# kept for backwards compatibility
doc.packages[-1].set_checksum(Checksum(identifier="SHA1", value=checksum))
doc.packages[-1].set_checksum(Checksum(identifier=ChecksumAlgorithm.SHA1, value=checksum))
else:
raise SPDXValueError("Invalid value for package checksum.")

Expand Down Expand Up @@ -398,13 +400,14 @@ def set_file_checksum(self, doc: Document, chk_sum: Union[Checksum, Dict, str]):
"""
if self.has_file(doc):
if isinstance(chk_sum, dict):
self.file(doc).set_checksum(Checksum(chk_sum.get('algorithm'),
identifier = ChecksumAlgorithm.checksum_algorithm_from_string(chk_sum.get('algorithm'))
self.file(doc).set_checksum(Checksum(identifier,
chk_sum.get('checksumValue')))
elif isinstance(chk_sum, Checksum):
self.file(doc).set_checksum(chk_sum)
elif isinstance(chk_sum, str):
# kept for backwards compatibility
self.file(doc).set_checksum(Checksum("SHA1", chk_sum))
self.file(doc).set_checksum(Checksum(ChecksumAlgorithm.SHA1, chk_sum))
return True

def set_file_license_comment(self, doc, text):
Expand Down
8 changes: 4 additions & 4 deletions spdx/parsers/tagvaluebuilders.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
from spdx import snippet
from spdx import utils
from spdx import version
from spdx.checksum import ChecksumAlgorithm
from spdx.checksum import Checksum

from spdx.document import ExternalDocumentRef, Document
from spdx.package import PackagePurpose
Expand Down Expand Up @@ -187,7 +187,7 @@ def set_chksum(self, doc, chksum):
"""
Set the `check_sum` attribute of the `ExternalDocumentRef` object.
"""
doc.ext_document_references[-1].checksum = ChecksumAlgorithm.checksum_algorithm_from_string(chksum)
doc.ext_document_references[-1].checksum = Checksum.checksum_from_string(chksum)

def add_ext_doc_refs(self, doc, ext_doc_id, spdx_doc_uri, chksum):
self.set_ext_doc_id(doc, ext_doc_id)
Expand Down Expand Up @@ -773,7 +773,7 @@ def set_pkg_checksum(self, doc, checksum):
"""
self.assert_package_exists()
self.package_chk_sum_set = True
doc.packages[-1].set_checksum(ChecksumAlgorithm.checksum_algorithm_from_string(checksum))
doc.packages[-1].set_checksum(Checksum.checksum_from_string(checksum))
return True

def set_pkg_source_info(self, doc, text):
Expand Down Expand Up @@ -1183,7 +1183,7 @@ def set_file_checksum(self, doc: Document, checksum: str):
Raise OrderError if no file defined.
"""
if self.has_file(doc):
new_checksum = ChecksumAlgorithm.checksum_algorithm_from_string(checksum)
new_checksum = Checksum.checksum_from_string(checksum)
self.file(doc).set_checksum(new_checksum)
else:
raise OrderError("File::CheckSum")
Expand Down
25 changes: 14 additions & 11 deletions tests/test_checksum.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,21 +25,24 @@ def test_checksum_to_rdf(algorithm, expected):


@pytest.mark.parametrize("expected,rdf_algorithm",
[("SHA1", "checksumAlgorithm_sha1"), ("SHA224", "checksumAlgorithm_sha224"),
("SHA3_256", "checksumAlgorithm_sha3_256"), ("BLAKE2B_256", "checksumAlgorithm_blake2b256"),
("MD5", "checksumAlgorithm_md5")])
[(ChecksumAlgorithm.SHA1, "checksumAlgorithm_sha1"),
(ChecksumAlgorithm.SHA224, "checksumAlgorithm_sha224"),
(ChecksumAlgorithm.SHA3_256, "checksumAlgorithm_sha3_256"),
(ChecksumAlgorithm.BLAKE2B_256, "checksumAlgorithm_blake2b256"),
(ChecksumAlgorithm.MD5, "checksumAlgorithm_md5")])
def test_checksum_from_rdf(rdf_algorithm, expected):
algorithm = ChecksumAlgorithm.checksum_from_rdf(rdf_algorithm)

assert algorithm == expected


@pytest.mark.parametrize("expected,rdf_algorithm",
[("SHA1", "_checksumAlgorithm_sha1"), ("SHA224", "checksumAlgorithm_sha_224"),
("SHA3_256", "checksumAlgorithm_sha3256"), ("BLAKE2B_256", "checksumAlgorithm_blake2b 256"),
("BLAKE2B_256", "checksumAlgorithm_blake2b-256"),
("BLAKE2B_256", "checksumAlgorithm_bblake2b 256")])
def test_checksum_from_wrong_rdf(rdf_algorithm, expected):
algorithm = ChecksumAlgorithm.checksum_from_rdf(rdf_algorithm)
@pytest.mark.parametrize("rdf_algorithm",
["_checksumAlgorithm_sha1", "checksumAlgorithm_sha_224", "checksumAlgorithm_sha3256",
"checksumAlgorithm_blake2b 256", "checksumAlgorithm_blake2b-256",
"checksumAlgorithm_bblake2b 256"])
def test_checksum_from_wrong_rdf(rdf_algorithm):
with pytest.raises(ValueError) as error:
ChecksumAlgorithm.checksum_from_rdf(rdf_algorithm)

assert str(error.value).startswith("Invalid algorithm for checksum")

assert algorithm != expected
22 changes: 11 additions & 11 deletions tests/test_document.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
from datetime import datetime
from unittest import TestCase

from spdx.checksum import Checksum
from spdx.checksum import Checksum, ChecksumAlgorithm
from spdx.config import LICENSE_MAP, EXCEPTION_MAP
from spdx.creationinfo import Tool
from spdx.document import Document, ExternalDocumentRef
Expand Down Expand Up @@ -65,7 +65,7 @@ def test_creation(self):
document.add_ext_document_reference(
ExternalDocumentRef('DocumentRef-spdx-tool-2.1',
'https://spdx.org/spdxdocs/spdx-tools-v2.1-3F2504E0-4F89-41D3-9A0C-0305E82C3301',
Checksum('SHA1', 'SOME-SHA1'))
Checksum(ChecksumAlgorithm.SHA1, 'SOME-SHA1'))
)
assert document.comment is None
assert document.version == Version(2, 1)
Expand All @@ -80,11 +80,11 @@ def test_document_validate_failures_returns_informative_messages(self):
'Sample_Document-V2.1', spdx_id='SPDXRef-DOCUMENT',
namespace='https://spdx.org/spdxdocs/spdx-example-444504E0-4F89-41D3-9A0C-0305E82C3301')
pack = doc.package = Package('some/path', NoAssert())
pack.set_checksum(Checksum('SHA256', 'SOME-SHA256'))
pack.set_checksum(Checksum(ChecksumAlgorithm.SHA256, 'SOME-SHA256'))
file1 = File('./some/path/tofile')
file1.name = './some/path/tofile'
file1.spdx_id = 'SPDXRef-File'
file1.set_checksum(Checksum('SHA1', 'SOME-SHA1'))
file1.set_checksum(Checksum(ChecksumAlgorithm.SHA1, 'SOME-SHA1'))
lic1 = License.from_identifier('LGPL-2.1-only')
file1.add_lics(lic1)
pack.add_lics_from_file(lic1)
Expand All @@ -105,7 +105,7 @@ def test_document_is_valid_when_using_or_later_licenses(self):
package = doc.package = Package(name='some/path', download_location=NoAssert())
package.spdx_id = 'SPDXRef-Package'
package.cr_text = 'Some copyright'
package.set_checksum(Checksum('SHA1', 'SOME-SHA1'))
package.set_checksum(Checksum(ChecksumAlgorithm.SHA1, 'SOME-SHA1'))
package.verif_code = 'SOME code'
package.license_declared = NoAssert()
package.conc_lics = NoAssert()
Expand All @@ -114,7 +114,7 @@ def test_document_is_valid_when_using_or_later_licenses(self):
file1.name = './some/path/tofile'
file1.spdx_id = 'SPDXRef-File'
file1.file_types = [FileType.OTHER]
file1.set_checksum(Checksum('SHA1', 'SOME-SHA1'))
file1.set_checksum(Checksum(ChecksumAlgorithm.SHA1, 'SOME-SHA1'))
file1.conc_lics = NoAssert()
file1.copyright = NoAssert()

Expand Down Expand Up @@ -178,8 +178,8 @@ def _get_lgpl_doc(self, or_later=False):
package.spdx_id = 'SPDXRef-Package'
package.cr_text = 'Some copyright'
package.verif_code = 'SOME code'
package.set_checksum(Checksum('SHA1', 'SOME-SHA1'))
package.set_checksum(Checksum('SHA256', 'SOME-SHA256'))
package.set_checksum(Checksum(ChecksumAlgorithm.SHA1, 'SOME-SHA1'))
package.set_checksum(Checksum(ChecksumAlgorithm.SHA256, 'SOME-SHA256'))
package.license_declared = NoAssert()
package.conc_lics = NoAssert()
package.primary_package_purpose = PackagePurpose.FILE
Expand All @@ -191,8 +191,8 @@ def _get_lgpl_doc(self, or_later=False):
file1 = File('./some/path/tofile')
file1.name = './some/path/tofile'
file1.spdx_id = 'SPDXRef-File'
file1.set_checksum(Checksum('SHA1', 'SOME-SHA1'))
file1.set_checksum(Checksum('SHA256', 'SOME-SHA256'))
file1.set_checksum(Checksum(ChecksumAlgorithm.SHA1, 'SOME-SHA1'))
file1.set_checksum(Checksum(ChecksumAlgorithm.SHA256, 'SOME-SHA256'))
file1.conc_lics = NoAssert()
file1.copyright = NoAssert()
file1.file_types = [FileType.OTHER, FileType.SOURCE]
Expand Down Expand Up @@ -246,7 +246,7 @@ def _get_lgpl_multi_package_doc(self, or_later=False):
file1 = File('./some/path/tofile')
file1.name = './some/path/tofile'
file1.spdx_id = 'SPDXRef-File'
file1.set_checksum(Checksum('SHA1', 'SOME-SHA1'))
file1.set_checksum(Checksum(ChecksumAlgorithm.SHA1, 'SOME-SHA1'))
file1.conc_lics = NoAssert()
file1.copyright = NoAssert()

Expand Down
4 changes: 2 additions & 2 deletions tests/test_jsonyamlxml_writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

import pytest

from spdx.checksum import Checksum
from spdx.checksum import Checksum, ChecksumAlgorithm
from spdx.document import Document
from spdx.file import File
from spdx.license import License
Expand Down Expand Up @@ -148,7 +148,7 @@ def minimal_document():

def minimal_file():
file = File(name="Example File", spdx_id="SPDXRef-File")
file.set_checksum(Checksum('SHA1', 'some-sha1-value'))
file.set_checksum(Checksum(ChecksumAlgorithm.SHA1, 'some-sha1-value'))
return file


Expand Down

0 comments on commit f830eba

Please sign in to comment.