From f11ac77c205e8d2ae81e9ebaad01f9cd92b85c13 Mon Sep 17 00:00:00 2001 From: Christoph Reiter Date: Tue, 19 Mar 2024 08:51:03 +0100 Subject: [PATCH] Add a hacky tool to write out a SBOM only supports packages with a pypi reference or a CPE atm. --- msys2-sbom | 5 ++ msys2_devtools/sbom.py | 102 ++++++++++++++++++++++++++++++++++++++ poetry.lock | 108 ++++++++++++++++++++++++++++++++++++++++- pyproject.toml | 3 ++ 4 files changed, 216 insertions(+), 2 deletions(-) create mode 100755 msys2-sbom create mode 100644 msys2_devtools/sbom.py diff --git a/msys2-sbom b/msys2-sbom new file mode 100755 index 0000000..54c6385 --- /dev/null +++ b/msys2-sbom @@ -0,0 +1,5 @@ +#!/usr/bin/env python3 + +from msys2_devtools import sbom + +sbom.run() diff --git a/msys2_devtools/sbom.py b/msys2_devtools/sbom.py new file mode 100644 index 0000000..7164756 --- /dev/null +++ b/msys2_devtools/sbom.py @@ -0,0 +1,102 @@ +import sys +import os +import argparse +import logging +import json +import gzip +from typing import Collection, Sequence, List + +from packageurl import PackageURL +from cyclonedx.model.bom import Bom +from cyclonedx.model.component import Component, ComponentType +from cyclonedx.output.json import JsonV1Dot5, Json as JsonOutputter + + +def convert_mapping(array: Sequence[str]) -> dict[str, str | None]: + converted: dict[str, str | None] = {} + for item in array: + if ":" in item: + key, value = item.split(":", 1) + value = value.strip() + else: + key = item + value = None + converted[key] = value + return converted + + +def extra_to_pkgextra_entry(data: dict[str, str | Collection[str]]) -> dict: + mappings = ["references"] + + data = dict(data) + for key in mappings: + if key in data: + value = data[key] + assert isinstance(value, list) + data[key] = convert_mapping(value) + + return data + + +def write_sbom(srcinfo_cache: str, sbom: str) -> None: + bom = Bom() + bom.metadata.component = root_component = Component( + name='MSYS2', + type=ComponentType.OPERATING_SYSTEM + ) + + srcinfo_cache = os.path.abspath(srcinfo_cache) + with open(srcinfo_cache, "rb") as h: + cache = json.loads(gzip.decompress(h.read())) + + for value in cache.values(): + pkgver = "" + pkgbase = "" + for srcinfo in value["srcinfo"].values(): + pkgver = [line for line in srcinfo.splitlines() + if line.strip().startswith("pkgver = ")][0].split(" = ")[1].strip() + pkgbase = [line for line in srcinfo.splitlines() + if line.strip().startswith("pkgbase = ")][0].split(" = ")[1].strip() + break + if "extra" in value and "references" in value["extra"]: + pkgextra = extra_to_pkgextra_entry(value["extra"]) + for extra_key, extra_value in pkgextra["references"].items(): + if extra_key == "pypi": + component1 = Component( + name=pkgbase, + version=pkgver, + purl=PackageURL('pypi', None, extra_value, pkgver) + ) + bom.components.add(component1) + bom.register_dependency(root_component, [component1]) + elif extra_key == "cpe": + if extra_value.startswith("2.3:"): + cpe = f"cpe:{extra_value}:{pkgver}:*:*:*:*:*:*:*" + else: + cpe = f"cpe:{extra_value}:{pkgver}" + component2 = Component( + name=pkgbase, + cpe=cpe, + version=pkgver + ) + bom.components.add(component2) + bom.register_dependency(root_component, [component2]) + + my_json_outputter: 'JsonOutputter' = JsonV1Dot5(bom) + serialized_json = my_json_outputter.output_as_string(indent=2) + with open(sbom, 'w') as file: + file.write(serialized_json) + + +def main(argv: List[str]) -> None: + parser = argparse.ArgumentParser(description="Create an SBOM for all packages in the repo", allow_abbrev=False) + parser.add_argument("srcinfo_cache", help="The path to the srcinfo.json.gz file") + parser.add_argument("sbom", help="The path to the SBOM json file used to store the results") + args = parser.parse_args(argv[1:]) + + logging.basicConfig(level="INFO") + write_sbom(args.srcinfo_cache, args.sbom) + + +def run() -> None: + return main(sys.argv) diff --git a/poetry.lock b/poetry.lock index 9b8e4cf..9b74216 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.7.1 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.8.2 and should not be changed by hand. [[package]] name = "annotated-types" @@ -11,6 +11,17 @@ files = [ {file = "annotated_types-0.6.0.tar.gz", hash = "sha256:563339e807e53ffd9c267e99fc6d9ea23eb8443c08f112651963e24e22f84a5d"}, ] +[[package]] +name = "boolean-py" +version = "4.0" +description = "Define boolean algebras, create and parse boolean expressions and create custom boolean DSL." +optional = false +python-versions = "*" +files = [ + {file = "boolean.py-4.0-py3-none-any.whl", hash = "sha256:2876f2051d7d6394a531d82dc6eb407faa0b01a0a0b3083817ccd7323b8d96bd"}, + {file = "boolean.py-4.0.tar.gz", hash = "sha256:17b9a181630e43dde1851d42bef546d616d5d9b4480357514597e78b203d06e4"}, +] + [[package]] name = "certifi" version = "2024.2.2" @@ -196,6 +207,39 @@ files = [ {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"}, ] +[[package]] +name = "cyclonedx-python-lib" +version = "6.4.4" +description = "Python library for CycloneDX" +optional = false +python-versions = ">=3.8,<4.0" +files = [ + {file = "cyclonedx_python_lib-6.4.4-py3-none-any.whl", hash = "sha256:c366619cc4effd528675f1f7a7a00be30b6695ff03f49c64880ad15acbebc341"}, + {file = "cyclonedx_python_lib-6.4.4.tar.gz", hash = "sha256:1b6f9109b6b9e91636dff822c2de90a05c0c8af120317713c1b879dbfdebdff8"}, +] + +[package.dependencies] +license-expression = ">=30,<31" +packageurl-python = ">=0.11,<2" +py-serializable = ">=0.16,<2" +sortedcontainers = ">=2.4.0,<3.0.0" + +[package.extras] +json-validation = ["jsonschema[format] (>=4.18,<5.0)"] +validation = ["jsonschema[format] (>=4.18,<5.0)", "lxml (>=4,<6)"] +xml-validation = ["lxml (>=4,<6)"] + +[[package]] +name = "defusedxml" +version = "0.7.1" +description = "XML bomb protection for Python stdlib modules" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" +files = [ + {file = "defusedxml-0.7.1-py2.py3-none-any.whl", hash = "sha256:a352e7e428770286cc899e2542b6cdaedb2b4953ff269a210103ec58f6198a61"}, + {file = "defusedxml-0.7.1.tar.gz", hash = "sha256:1bb3032db185915b62d7c6209c5a8792be6a32ab2fedacc84e01b52c51aa3e69"}, +] + [[package]] name = "exceptiongroup" version = "1.2.0" @@ -259,6 +303,24 @@ files = [ {file = "iniconfig-2.0.0.tar.gz", hash = "sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3"}, ] +[[package]] +name = "license-expression" +version = "30.2.0" +description = "license-expression is a comprehensive utility library to parse, compare, simplify and normalize license expressions (such as SPDX license expressions) using boolean logic." +optional = false +python-versions = ">=3.7" +files = [ + {file = "license-expression-30.2.0.tar.gz", hash = "sha256:599928edd995c43fc335e0af342076144dc71cb858afa1ed9c1c30c4e81794f5"}, + {file = "license_expression-30.2.0-py3-none-any.whl", hash = "sha256:1a7dc2bb2d09cdc983d072e4f9adc787e107e09def84cbb3919baaaf4f8e6fa1"}, +] + +[package.dependencies] +"boolean.py" = ">=4.0" + +[package.extras] +docs = ["Sphinx (>=5.0.2)", "doc8 (>=0.11.2)", "sphinx-autobuild", "sphinx-copybutton", "sphinx-reredirects (>=0.1.2)", "sphinx-rtd-dark-mode (>=1.3.0)", "sphinx-rtd-theme (>=1.0.0)", "sphinxcontrib-apidoc (>=0.4.0)"] +testing = ["black", "isort", "pytest (>=6,!=7.0.0)", "pytest-xdist (>=2)", "twine"] + [[package]] name = "mccabe" version = "0.7.0" @@ -284,6 +346,23 @@ files = [ [package.extras] nicer-shell = ["ipython"] +[[package]] +name = "packageurl-python" +version = "0.15.0" +description = "A purl aka. Package URL parser and builder" +optional = false +python-versions = ">=3.7" +files = [ + {file = "packageurl-python-0.15.0.tar.gz", hash = "sha256:f219b2ce6348185a27bd6a72e6fdc9f984e6c9fa157effa7cb93e341c49cdcc2"}, + {file = "packageurl_python-0.15.0-py3-none-any.whl", hash = "sha256:cdc6bd42dc30c4fc7f8f0ccb721fc31f8c33985dbffccb6e6be4c72874de48ca"}, +] + +[package.extras] +build = ["setuptools", "wheel"] +lint = ["black", "isort", "mypy"] +sqlalchemy = ["sqlalchemy (>=2.0.0)"] +test = ["pytest"] + [[package]] name = "packaging" version = "23.2" @@ -320,6 +399,20 @@ files = [ dev = ["pre-commit", "tox"] testing = ["pytest", "pytest-benchmark"] +[[package]] +name = "py-serializable" +version = "1.0.2" +description = "Library for serializing and deserializing Python Objects to and from JSON and XML." +optional = false +python-versions = ">=3.8,<4.0" +files = [ + {file = "py_serializable-1.0.2-py3-none-any.whl", hash = "sha256:f09dee8595a583117ba446c50be183eff9699b7d54529e0506d4f0f2e093e4a3"}, + {file = "py_serializable-1.0.2.tar.gz", hash = "sha256:158a98a7ffda067d21f844594ce571d97f36172ba538aee1a93196f8b5888bd8"}, +] + +[package.dependencies] +defusedxml = ">=0.7.1,<0.8.0" + [[package]] name = "pycodestyle" version = "2.11.1" @@ -555,6 +648,17 @@ urllib3 = ">=1.21.1,<3" socks = ["PySocks (>=1.5.6,!=1.5.7)"] use-chardet-on-py3 = ["chardet (>=3.0.2,<6)"] +[[package]] +name = "sortedcontainers" +version = "2.4.0" +description = "Sorted Containers -- Sorted List, Sorted Dict, Sorted Set" +optional = false +python-versions = "*" +files = [ + {file = "sortedcontainers-2.4.0-py2.py3-none-any.whl", hash = "sha256:a163dcaede0f1c021485e957a39245190e74249897e2ae4b2aa38595db237ee0"}, + {file = "sortedcontainers-2.4.0.tar.gz", hash = "sha256:25caa5a06cc30b6b83d11423433f65d1f9d76c4c6a0c90e3379eaa43b9bfdb88"}, +] + [[package]] name = "tabulate" version = "0.9.0" @@ -683,4 +787,4 @@ cffi = ["cffi (>=1.11)"] [metadata] lock-version = "2.0" python-versions = "^3.10" -content-hash = "ca88f7b59420ed2b2fd30004aaecd97bd1c9d5bb4b11d78e745c354c9454686b" +content-hash = "b012ca7b197239dd00ee718746d09fa2d2f251b3525c5f01b1dddd73f29cd87d" diff --git a/pyproject.toml b/pyproject.toml index 1412bc3..cb1e7ef 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -16,10 +16,13 @@ pyyaml = "^6.0" pydantic = "^2.0" pgpdump = "^1.5" fastprogress = "^1.0.3" +cyclonedx-python-lib = "^6.4.4" +packageurl-python = "^0.15.0" [tool.poetry.scripts] msys2-srcinfo-cache = 'msys2_devtools.srcinfo_cache:run' msys2-pypi-cache = 'msys2_devtools.pypi_cache:run' +msys2-sbom = 'msys2_devtools.sbom:run' [tool.poetry.group.dev.dependencies] pytest = "^8.0.0"