Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[issue-722] add calculate_package_verification_code() #723

Merged
merged 1 commit into from Jul 12, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
41 changes: 40 additions & 1 deletion src/spdx_tools/spdx/spdx_element_utils.py
@@ -1,9 +1,11 @@
# SPDX-FileCopyrightText: 2022 spdx contributors
#
# SPDX-License-Identifier: Apache-2.0
import hashlib

from beartype.typing import List, Union

from spdx_tools.spdx.model import ExternalDocumentRef, File, Package, Snippet
from spdx_tools.spdx.model import ChecksumAlgorithm, ExternalDocumentRef, File, Package, Snippet


def get_full_element_spdx_id(
Expand All @@ -29,3 +31,40 @@ def get_full_element_spdx_id(
raise ValueError(f"external id {external_id} not found in external document references")

return external_uri + "#" + local_id


def calculate_package_verification_code(files: List[File]) -> str:
list_of_file_hashes = []
for file in files:
file_checksum_value = None
for checksum in file.checksums:
if checksum.algorithm == ChecksumAlgorithm.SHA1:
file_checksum_value = checksum.value
if not file_checksum_value:
try:
file_checksum_value = calculate_file_checksum(file.name, ChecksumAlgorithm.SHA1)
except FileNotFoundError:
raise FileNotFoundError(
f"Cannot calculate package verification code because the file '{file.name}' "
f"provides no SHA1 checksum and can't be found at the specified location."
)
list_of_file_hashes.append(file_checksum_value)

list_of_file_hashes.sort()
hasher = hashlib.new("sha1")
hasher.update("".join(list_of_file_hashes).encode("utf-8"))
return hasher.hexdigest()


def calculate_file_checksum(file_name: str, hash_algorithm=ChecksumAlgorithm.SHA1) -> str:
BUFFER_SIZE = 65536

file_hash = hashlib.new(hash_algorithm.name.lower())
with open(file_name, "rb") as file_handle:
while True:
data = file_handle.read(BUFFER_SIZE)
if not data:
break
file_hash.update(data)

return file_hash.hexdigest()
76 changes: 76 additions & 0 deletions tests/spdx/test_checksum_calculation.py
@@ -0,0 +1,76 @@
# SPDX-FileCopyrightText: 2023 spdx contributors
#
# SPDX-License-Identifier: Apache-2.0
import pytest

from spdx_tools.spdx.model import Checksum, ChecksumAlgorithm, File
from spdx_tools.spdx.spdx_element_utils import calculate_file_checksum, calculate_package_verification_code


@pytest.fixture
def generate_test_files(tmp_path):
file_path_1 = tmp_path.joinpath("file1")
file_path_2 = tmp_path.joinpath("file2")

with open(file_path_1, "wb") as file:
file.write(bytes(111))
with open(file_path_2, "wb") as file:
file.write(bytes(222))

yield str(file_path_1), str(file_path_2)


def test_file_checksum_calculation(generate_test_files):
filepath1, filepath2 = generate_test_files
checksum = calculate_file_checksum(filepath1, ChecksumAlgorithm.SHA1)
assert checksum == "dd90903d2f566a3922979dd5e18378a075c7ed33"
checksum = calculate_file_checksum(filepath2, ChecksumAlgorithm.SHA1)
assert checksum == "140dc52658e2eeee3fdc4d471cce84fec7253fe3"


def test_verification_code_calculation_with_predefined_checksums(generate_test_files):
filepath1, filepath2 = generate_test_files
file1 = File(
filepath1,
"SPDXRef-hello",
[Checksum(ChecksumAlgorithm.SHA1, "20862a6d08391d07d09344029533ec644fac6b21")],
)
file2 = File(
filepath2,
"SPDXRef-Makefile",
[Checksum(ChecksumAlgorithm.SHA1, "69a2e85696fff1865c3f0686d6c3824b59915c80")],
)
verification_code = calculate_package_verification_code([file1, file2])

assert verification_code == "c6cb0949d7cd7439fce8690262a0946374824639"


def test_verification_code_calculation_with_calculated_checksums(generate_test_files):
filepath1, filepath2 = generate_test_files
file1 = File(
filepath1,
"SPDXRef-hello",
[Checksum(ChecksumAlgorithm.MD4, "20862a6d08391d07d09344029533ec644fac6b21")],
)
file2 = File(
filepath2,
"SPDXRef-Makefile",
[Checksum(ChecksumAlgorithm.MD4, "69a2e85696fff1865c3f0686d6c3824b59915c80")],
)
verification_code = calculate_package_verification_code([file1, file2])

assert verification_code == "6f29d813abb63ee52a47dbcb691ea2e70f956328"


def test_verification_code_calculation_with_wrong_file_location():
unknown_file_name = "./unknown_file_name"
file1 = File(
unknown_file_name,
"SPDXRef-unknown",
[Checksum(ChecksumAlgorithm.MD4, "20862a6d08391d07d09344029533ec644fac6b21")],
)

with pytest.raises(FileNotFoundError) as err:
calculate_package_verification_code([file1])

assert unknown_file_name in str(err.value)