Skip to content

Commit

Permalink
works with spdx tools-python v0.7.1
Browse files Browse the repository at this point in the history
no longer requires forked tools-python
support sbom json format automatically based on file type
  • Loading branch information
jotterson committed Jun 11, 2023
1 parent 7b35cbf commit 5bf8f11
Show file tree
Hide file tree
Showing 10 changed files with 215 additions and 212 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
.idea/
venv/
data/
sample/
*.spdx
__pycache__/
17 changes: 17 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
# Changes to sbom_validator

## v1.0.0 2021-12-09 Initial release.
Released as part of my CS6767 Cybersecurity Practicum.

I took a problem that made me nervous at work and designed and implemented software to help mitigate this problem.
See the paper at [jotterson6_cs6727_project_report_20211212.docx-compressed.pdf](jotterson6_cs6727_project_report_20211212.docx-compressed.pdf)

# v 1.1.0 2023-xx-xx Support SPDX tools-python v0.7.x

Support for latest SPDX tools-python, and, in theory, SPDX version 2.3.

This code no longer needs my fork of tools-python, but works with 'official' support for
more than one 'checksum' and more than one 'file type'.

JSON storage mode is selected based on specified command line arguments file extension. If you specify a file name
that ends with '.json' then JSON format will be used, else SPDX tagged-value format will be used.
25 changes: 6 additions & 19 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,20 +10,20 @@ The intended use cases are as follows:

1. A 'bootstrap' SBOM is initially created using the
`create-sbom.py` from the build output.
1. `create-sbom.py` is also used to create a list of 'approved'
2. `create-sbom.py` is also used to create a list of 'approved'
third-party components.
1. The `edit-sbom.py` script is used to set license information on
3. The `edit-sbom.py` script is used to set license information on
the third-party components.
1. `merge-by-sha256` is used to merge the third-party component data
4. `merge-by-sha256` is used to merge the third-party component data
into the 'bootstrap' SBOM to create the 'ideal' SBOM
3. `merge-and-test.py` is used at the end
5. `merge-and-test.py` is used at the end
of the build process to compare the build output to the
ideal SBOM. If files that were not identified as build
outputs in the edit phase have a different hash then a
warning is raised. Build outputs get their hashes
calculated, and the build output SBOM is produced. This step
will detect mis-matched third-party components.
4. the `validate-sbom.py` script is used to validate the
6. the `validate-sbom.py` script is used to validate the
integrity of the application release's file once
installed on a runtime environment.

Expand All @@ -48,17 +48,4 @@ The intended use cases are as follows:
hash values. Missing/extrafiles are detected and
reported, as are hash mis-matches.

## Caveats

The tools-python 'spdx' library on which this depends does
not fully comply with the SPDX-2.1 standard, there is no support
for multiple file types, and no support for multiple file
checksums. Consequently, this currently uses a patched
library. The patched library is currently at
https://github.com/jotterson/tools-python -- you can install it like this:

```shell
pip install git+https://github.com/jotterson/tools-python.git@checksums_final#egg=spdx-tools
```

J.B. Otterson 20211206
J.B. Otterson 20230611
101 changes: 43 additions & 58 deletions create-sbom.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@
from zipfile import ZipFile

import signature_utilities
from spdx.checksum import Algorithm
from spdx.checksum import Checksum, ChecksumAlgorithm
from spdx_utilities import \
add_checksum_to_spdx_file, \
add_signature_to_spdx_document, \
Expand All @@ -46,46 +46,46 @@
new_spdx_doc, \
new_spdx_file, \
new_spdx_pkg, \
read_sbom_file, \
read_spdx_file, \
set_spdx_file_type, \
serialize_spdx_doc, \
write_sbom_file
write_spdx_file
from validation_utilities import files_in_dir

HASH_NAMES = ['sha1', 'sha256']
HASH_NAMES = ['SHA1', 'SHA256']
spdx_id_counter = 0


def new_spdx_id():
global spdx_id_counter
spdx_id_counter += 1
return 'SPDXRef-{:06d}'.format(spdx_id_counter)
return f'SPDXRef-{spdx_id_counter:06d}'


def package_path_to_spdx_doc(args):
package_path = args.package_path
package_hashers = {}
for hash_name in HASH_NAMES:
package_hashers[hash_name] = hashlib.new(hash_name)

spdx_doc = new_spdx_doc(toolname='create-sbom.py')
package_name = package_path
if package_name[-1] == '/':
package_name = package_name[0:-1]
_, package_name = os.path.split(package_name)
spdx_pkg = new_spdx_pkg(spdx_id=new_spdx_id(), name='Example', version='0.0.0', file_name=package_name)

logging.info('Enumerating files at {}...'.format(package_path))
spdx_doc = new_spdx_doc(name=package_name, toolname='create-sbom.py')
# spdx_pkg = new_spdx_pkg(spdx_id=new_spdx_id(), name='Example', version='0.0.0', file_name=package_name)
# spdx_doc.add_package(spdx_pkg)

logging.info(f'Enumerating files at {package_path}')
files = files_in_dir(package_path)
logging.info('Directory enumeration found {} files'.format(len(files)))
# add all the discovered files to the package.
logging.info(f'Directory enumeration found {len(files)} files')
# add all the discovered files to the SPDX DOCUMENT
for file in files:
full_path = '{}/{}'.format(package_path, file)
full_path = f'{package_path}/{file}'
if args.flat:
_, file = os.path.split(file)
spdx_file = new_spdx_file(filename=file, spdx_id=new_spdx_id())
if args.file_comment is not None:
spdx_file.comment = args.file_comment
else:
spdx_file.comment = f'found during scan of {package_name}'
for hash_name in HASH_NAMES:
hasher = hashlib.new(hash_name)
with open(full_path, 'rb') as fh:
Expand All @@ -94,36 +94,24 @@ def package_path_to_spdx_doc(args):
if not block:
break
hasher.update(block)
package_hashers[hash_name].update(block)
add_checksum_to_spdx_file(spdx_file, hash_name.upper(), hasher.hexdigest())
set_spdx_file_type(spdx_file, full_path)
spdx_pkg.add_file(spdx_file)

# update package hashes
for hash_name in HASH_NAMES:
spdx_pkg.set_checksum(Algorithm(hash_name.upper(), package_hashers[hash_name].hexdigest()))

# update pkg verification code.
spdx_pkg.verif_code = spdx_pkg.calc_verif_code()
spdx_doc.add_package(spdx_pkg)
spdx_doc.add_file(spdx_file)
return spdx_doc


def package_zip_to_spdx_doc(args):
package_hashers = {}
for hash_name in HASH_NAMES:
package_hashers[hash_name] = hashlib.new(hash_name)

package_zip = args.package_zip
spdx_doc = new_spdx_doc(toolname='create-sbom.py')
_, package_name = os.path.split(package_zip)
spdx_pkg = new_spdx_pkg(spdx_id=new_spdx_id(), name=package_name, version='0.0.0', file_name=package_name)
spdx_doc = new_spdx_doc(name=package_name, toolname='create-sbom.py')
# spdx_pkg = new_spdx_pkg(spdx_id=new_spdx_id(), name=package_name, version='0.0.0', file_name=package_name)
# spdx_doc.add_package(spdx_pkg)

logging.info('Enumerating files in {}...'.format(package_zip))
logging.info(f'Enumerating files in {package_zip}')
with ZipFile(package_zip, 'r') as zipfile:
namelist = zipfile.namelist()
files = list(filter(lambda name: not name.endswith('/'), namelist))
logging.info('Zipfile contains {} files'.format(len(files)))
logging.info(f'Zipfile contains {len(files)} files.')
for file in files:
if args.flat:
_, filename = os.path.split(file)
Expand All @@ -133,27 +121,20 @@ def package_zip_to_spdx_doc(args):
spdx_file = new_spdx_file(filename=filename, spdx_id=new_spdx_id())
if args.file_comment is not None:
spdx_file.comment = args.comment
else:
spdx_file.comment = f'found during scan of {package_name}'
data = zipfile.read(file)
for hash_name in HASH_NAMES:
hasher = hashlib.new(hash_name)
hasher.update(data)
package_hashers[hash_name].update(data)
add_checksum_to_spdx_file(spdx_file, hash_name.upper(), hasher.hexdigest())
spdx_file_types = guess_spdx_file_type_from_extension(file)
if spdx_file_types is None:
spdx_file_types = guess_spdx_file_type_from_data(data)
if spdx_file_types is None or len(spdx_file_types) == 0:
logging.error('bad... {}'.format(file))
logging.error(f'bad... {file}')
spdx_file.file_types = spdx_file_types
spdx_pkg.add_file(spdx_file)

# update package hashes
for hash_name in HASH_NAMES:
spdx_pkg.set_checksum(Algorithm(hash_name.upper(), package_hashers[hash_name].hexdigest()))

# update pkg verification code.
spdx_pkg.verif_code = spdx_pkg.calc_verif_code()
spdx_doc.add_package(spdx_pkg)
spdx_doc.add_file(spdx_file)
return spdx_doc


Expand All @@ -169,13 +150,15 @@ def main():
parser.add_argument('--private-key', type=str, help='private key for signing SBOM')
args = parser.parse_args()

log_format = '%(asctime)s %(levelname)s %(message)s'
log_date_format = '%Y-%m-%d %H:%M:%S'
if args.debug:
logging.basicConfig(format='%(message)s', level=logging.DEBUG)
logging.basicConfig(format=log_format, datefmt=log_date_format, level=logging.DEBUG)
else:
logging.basicConfig(format='%(asctime)s %(levelname)s %(message)s', level=logging.INFO)
logging.basicConfig(format=log_format, datefmt=log_date_format, level=logging.INFO)

if args.package_path is None and args.package_zip is None:
logging.error('--package-path or --package-zip must be supplied')
logging.error('one of --package-path or --package-zip must be supplied')
exit(1)

if args.package_path is not None and args.package_zip is not None:
Expand All @@ -195,46 +178,48 @@ def main():

if args.package_path is not None:
if not os.path.isdir(args.package_path):
logging.error('package-path "{}" is not a directory.'.format(args.package_path))
logging.error(f'package-path "{args.package_path}" is not a directory.')
exit(1)
spdx_doc = package_path_to_spdx_doc(args)

if args.package_zip is not None:
if not os.path.exists(args.package_zip):
logging.error('package-zip {} not found'.format(args.package_zip))
logging.error(f'package-zip {args.package_zip} not found')
exit(1)
spdx_doc = package_zip_to_spdx_doc(args)

# sign the spdx file if the private key was specified
if private_key:
logging.info(f'Signing file {args.sbom_file}')
written_file_serialized_data = serialize_spdx_doc(spdx_doc)
signature = signature_utilities.create_signature(private_key,
serialize_spdx_doc(spdx_doc))
written_file_serialized_data)
add_signature_to_spdx_document(spdx_doc, signature)

# write the spdx file.
logging.info('Writing file {}'.format(args.sbom_file))
write_sbom_file(spdx_doc, args.sbom_file)
logging.info(f'Writing file {args.sbom_file}')
write_spdx_file(spdx_doc, args.sbom_file)

# read the spdx file for basic verification
logging.info('Reading file {}'.format(args.sbom_file))
new_doc = read_sbom_file(args.sbom_file)
logging.info(f'Reading file {args.sbom_file}')
new_doc = read_spdx_file(args.sbom_file)

if args.debug:
if args.private_key:
public_key = signature_utilities.read_ssh_public_key(args.private_key + '.pub')
else:
public_key = None
if public_key:
logging.info('Validating digital signature')
# validate digital signature on sbom document data
new_doc_data = serialize_spdx_doc(new_doc)
read_doc_serialized_data = serialize_spdx_doc(new_doc)
signature = get_digital_signature_from_spdx_document(new_doc)
if not signature_utilities.validate_signature(public_key, signature, new_doc_data):
if not signature_utilities.validate_signature(public_key, signature, read_doc_serialized_data):
logging.error('Digital signature mismatch')
exit(13)
else:
logging.info('Digital signature on SBOM file is good.')
logging.info('SBOM file contains {} file entries'.format(len(new_doc.packages[0].files)))

logging.info(f'SBOM file contains {len(new_doc.files)} file entries')
exit(0)


Expand Down
26 changes: 13 additions & 13 deletions edit-sbom.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,9 @@

import signature_utilities
import spdx_utilities
from spdx_utilities import add_signature_to_spdx_document, read_sbom_file, serialize_spdx_doc, write_sbom_file
from spdx.document import License
from spdx_utilities import add_signature_to_spdx_document, read_spdx_file, serialize_spdx_doc, write_spdx_file
from spdx.checksum import ChecksumAlgorithm
from spdx.license import License
from spdx.utils import NoAssert, SPDXNone


Expand All @@ -49,18 +50,15 @@ def __init__(self, filename, public_key=None, private_key=None):
self.filename = filename
self.public_key = public_key
self.private_key = private_key
self.spdx_doc = read_sbom_file(filename)
self.spdx_doc = read_spdx_file(filename)
if self.public_key is not None:
# validate signature
data = spdx_utilities.serialize_spdx_doc(self.spdx_doc)
signature = spdx_utilities.get_digital_signature_from_spdx_document(self.spdx_doc)
if signature is not None:
if not signature_utilities.validate_signature(public_key, signature, data):
raise RuntimeError('Digital signature mismatch')
files = []
for package in self.spdx_doc.packages:
files.extend(package.files)
self.files = files
self.files = self.spdx_doc.files
self.files_by_spdxid = {}
if len(self.files) > 0:
self.current_file = self.files[0]
Expand Down Expand Up @@ -118,7 +116,8 @@ def get_current_file_form_data(self):
'notice': self.current_file.notice,
}
for hash_name in ['SHA1', 'SHA256']:
hash_value = self.current_file.get_checksum(hash_name)
algo = ChecksumAlgorithm.checksum_algorithm_from_string(hash_name)
hash_value = self.current_file.get_checksum(algo)
if hash_value is not None:
value = hash_value.value
else:
Expand Down Expand Up @@ -171,10 +170,8 @@ def delete_file(self, file):
:param file: the spdx file to delete
:return: None
"""
for package in self.spdx_doc.packages:
if file in package.files:
package.files.remove(file)
break
if file in self.spdx_doc.files:
self.spdx_doc.files.remove(file)
if file in self.files:
self.files.remove(file)

Expand All @@ -189,7 +186,8 @@ def save_spdx_file(self):
serialize_spdx_doc(self.spdx_doc))
add_signature_to_spdx_document(self.spdx_doc, signature)
# write the spdx file.
write_sbom_file(self.spdx_doc, self.filename)
logging.info(f'saving file {self.filename}')
write_spdx_file(self.spdx_doc, self.filename)


class ListView(Frame):
Expand Down Expand Up @@ -376,9 +374,11 @@ def main():

# noinspection PyGlobalUndefined
global spdx_files_list_model
logging.info('starting...')
spdx_files_list_model = SpdxFileFilesAsListModel(filename=args.sbom_file,
public_key=public_key,
private_key=private_key)
logging.info('prepared model')
last_scene = None
while True:
try:
Expand Down
Loading

0 comments on commit 5bf8f11

Please sign in to comment.