Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion src/macaron/build_spec_generator/common_spec/base_spec.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ class BaseBuildSpecDict(TypedDict, total=False):
newline: NotRequired[str]

#: The version of the programming language or runtime, e.g., '11' for JDK, '3.11' for Python.
language_version: Required[str]
language_version: Required[list[str]]

#: List of release dependencies.
dependencies: NotRequired[list[str]]
Expand All @@ -73,6 +73,11 @@ class BaseBuildSpecDict(TypedDict, total=False):
#: Entry point script, class, or binary for running the project.
entry_point: NotRequired[str | None]

#: A "back end" is tool that a "front end" (such as pip/build) would call to
#: package the source distribution into the wheel format. build_backends would
#: be a list of these that were used in building the wheel alongside their version.
build_backends: NotRequired[dict[str, str]]


class BaseBuildSpec(ABC):
"""Abstract base class for build specification behavior and field resolution."""
Expand Down
2 changes: 1 addition & 1 deletion src/macaron/build_spec_generator/common_spec/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -442,7 +442,7 @@ def gen_generic_build_spec(
"git_repo": latest_component_repository.remote_path,
"git_tag": latest_component_repository.commit_sha,
"newline": "lf",
"language_version": lang_version or "",
"language_version": [lang_version] if lang_version else [],
"ecosystem": purl.type,
"purl": str(purl),
"language": target_language,
Expand Down
6 changes: 4 additions & 2 deletions src/macaron/build_spec_generator/common_spec/maven_spec.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,12 +58,14 @@ def resolve_fields(self, purl: PackageURL) -> None:
jdk_from_jar or "Cannot find any.",
)

existing = self.data["language_version"][0] if self.data["language_version"] else None

# Select JDK from jar or another source, with a default of version 8.
selected_jdk_version = jdk_from_jar or self.data["language_version"] if self.data["language_version"] else "8"
selected_jdk_version = jdk_from_jar or existing if existing else "8"

major_jdk_version = normalize_jdk_version(selected_jdk_version)
if not major_jdk_version:
logger.error("Failed to obtain the major version of %s", selected_jdk_version)
return

self.data["language_version"] = major_jdk_version
self.data["language_version"] = [major_jdk_version]
170 changes: 170 additions & 0 deletions src/macaron/build_spec_generator/common_spec/pypi_spec.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,22 @@

"""This module includes build specification and helper classes for PyPI packages."""

import logging
import os
import re

import tomli
from packageurl import PackageURL
from packaging.requirements import InvalidRequirement, Requirement
from packaging.utils import InvalidWheelFilename, parse_wheel_filename

from macaron.build_spec_generator.common_spec.base_spec import BaseBuildSpec, BaseBuildSpecDict
from macaron.config.defaults import defaults
from macaron.errors import SourceCodeError
from macaron.slsa_analyzer.package_registry import pypi_registry
from macaron.slsa_analyzer.specs.package_registry_spec import PackageRegistryInfo

logger: logging.Logger = logging.getLogger(__name__)


class PyPIBuildSpec(
Expand Down Expand Up @@ -34,3 +46,161 @@ def resolve_fields(self, purl: PackageURL) -> None:
purl: str
The target software component Package URL.
"""
if purl.type != "pypi":
return

registry = pypi_registry.PyPIRegistry()
registry.load_defaults()

registry_info = PackageRegistryInfo(
build_tool_name="pip",
build_tool_purl_type="pypi",
package_registry=registry,
metadata=[],
)

pypi_package_json = pypi_registry.find_or_create_pypi_asset(purl.name, purl.version, registry_info)

if pypi_package_json is not None:
if pypi_package_json.package_json or pypi_package_json.download(dest=""):
requires_array: list[str] = []
build_backends: dict[str, str] = {}
python_version_list: list[str] = []
try:
with pypi_package_json.wheel():
logger.debug("Wheel at %s", pypi_package_json.wheel_path)
# Should only have .dist-info directory
logger.debug("It has directories %s", ",".join(os.listdir(pypi_package_json.wheel_path)))
wheel_contents, metadata_contents = self.read_directory(pypi_package_json.wheel_path, purl)
generator, version = self.read_generator_line(wheel_contents)
if generator != "":
build_backends[generator] = "==" + version
if generator != "setuptools":
# Apply METADATA heuristics to determine setuptools version
if "License-File" in metadata_contents:
build_backends["setuptools"] = "==" + defaults.get(
"heuristic.pypi", "setuptools_version_emitting_license"
)
elif "Platform: UNKNOWN" in metadata_contents:
build_backends["setuptools"] = "==" + defaults.get(
"heuristic.pypi", "setuptools_version_emitting_platform_unknown"
)
else:
build_backends["setuptools"] = "==" + defaults.get(
"heuristic.pypi", "default_setuptools"
)
except SourceCodeError:
logger.debug("Could not find pure wheel matching this PURL")

logger.debug("From .dist_info:")
logger.debug(build_backends)

try:
with pypi_package_json.sourcecode():
try:
pyproject_content = pypi_package_json.get_sourcecode_file_contents("pyproject.toml")
content = tomli.loads(pyproject_content.decode("utf-8"))
build_system: dict[str, list[str]] = content.get("build-system", {})
requires_array = build_system.get("requires", [])
python_version_constraint = content.get("project", {}).get("requires-python")
if python_version_constraint:
python_version_list.append(python_version_constraint)
logger.debug("From pyproject.toml:")
logger.debug(requires_array)
except SourceCodeError:
logger.debug("No pyproject.toml found")
except SourceCodeError:
logger.debug("No source distribution found")

# Merge in pyproject.toml information only when the wheel dist_info does not contain the same
# Hatch is an interesting example of this merge being required.
for requirement in requires_array:
try:
parsed_requirement = Requirement(requirement)
if parsed_requirement.name not in build_backends:
build_backends[parsed_requirement.name] = str(parsed_requirement.specifier)
except InvalidRequirement:
logger.debug("Malformed requirement encountered:")
logger.debug(requirement)

logger.debug("Combined:")
logger.debug(build_backends)
self.data["build_backends"] = build_backends

if not python_version_list:
try:
# Get python version specified in the wheel file name
logger.debug(pypi_package_json.wheel_filename)
_, _, _, tags = parse_wheel_filename(pypi_package_json.wheel_filename)
for tag in tags:
python_version_list.append(tag.interpreter)
logger.debug(python_version_list)
except InvalidWheelFilename:
logger.debug("Could not parse wheel file name to extract version")

self.data["language_version"] = python_version_list

def read_directory(self, wheel_path: str, purl: PackageURL) -> tuple[str, str]:
"""
Read in the WHEEL and METADATA file from the .dist_info directory.

Parameters
----------
wheel_path : str
Path to the temporary directory where the wheel was
downloaded into.
purl: PackageURL
PURL corresponding to the package being analyzed.

Returns
-------
tuple[str, str]
Tuple where the first element is a string of the .dist-info/WHEEL
contents and the second element is a string of the .dist-info/METADATA
contents
"""
# From https://peps.python.org/pep-0427/#escaping-and-unicode
normalized_name = re.sub(r"[^\w\d.]+", "_", purl.name, re.UNICODE)
dist_info = f"{normalized_name}-{purl.version}.dist-info"
logger.debug(dist_info)

dist_info_path = os.path.join(wheel_path, dist_info)

if not os.path.isdir(dist_info_path):
return "", ""

wheel_path = os.path.join(dist_info_path, "WHEEL")
metadata_path = os.path.join(dist_info_path, "METADATA")

wheel_contents = ""
metadata_contents = ""

if os.path.exists(wheel_path):
with open(wheel_path, encoding="utf-8") as wheel_file:
wheel_contents = wheel_file.read()
if os.path.exists(metadata_path):
with open(metadata_path, encoding="utf-8") as metadata_file:
metadata_contents = metadata_file.read()

return wheel_contents, metadata_contents

def read_generator_line(self, wheel_contents: str) -> tuple[str, str]:
"""
Parse through the "Generator: {build backend} {version}" line of .dist_info/WHEEL.

Parameters
----------
wheel_contents : str
String of the contents of the .dist_info/WHEEL file

Returns
-------
tuple[str, str]
Tuple where the first element is the generating build backend and
the second element is its version.
"""
for line in wheel_contents.splitlines():
if line.startswith("Generator:"):
split_line = line.split(" ")
return split_line[1], split_line[2]
return "", ""
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ def gen_reproducible_central_build_spec(build_spec: BaseBuildSpecDict) -> str |
"tool": ReproducibleCentralBuildTool[build_spec["build_tool"].upper()].value,
"newline": build_spec["newline"],
"buildinfo": f"target/{build_spec['artifact_id']}-{build_spec['version']}.buildinfo",
"jdk": build_spec["language_version"],
"jdk": build_spec["language_version"][0],
"command": compose_shell_commands(build_spec["build_commands"]),
}

Expand Down
6 changes: 6 additions & 0 deletions src/macaron/config/defaults.ini
Original file line number Diff line number Diff line change
Expand Up @@ -644,3 +644,9 @@ custom_semgrep_rules_path =
# .yaml prefix. Note, this will be ignored if a path to custom semgrep rules is not provided. This list may not contain
# duplicated elements, meaning that ruleset names must be unique.
disabled_custom_rulesets =
# As per https://peps.python.org/pep-0639/appendix-examples/, presumably most versions < 59.1.1 will work here
setuptools_version_emitting_license = 56.2.0
# TODO: Investigate if other versions would be suitable
setuptools_version_emitting_platform_unknown = 57.5.0
# TODO: Investigate if other versions would be suitable
default_setuptools = 67.7.2
2 changes: 1 addition & 1 deletion src/macaron/repo_finder/repo_finder_pypi.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ def find_repo(
pypi_registry = next((registry for registry in PACKAGE_REGISTRIES if isinstance(registry, PyPIRegistry)), None)
if not pypi_registry:
return "", RepoFinderInfo.PYPI_NO_REGISTRY
pypi_asset = PyPIPackageJsonAsset(purl.name, purl.version, False, pypi_registry, {}, "")
pypi_asset = PyPIPackageJsonAsset(purl.name, purl.version, False, pypi_registry, {}, "", "", "")

if not pypi_asset:
# This should be unreachable, as the pypi_registry has already been confirmed to be of type PyPIRegistry.
Expand Down
Loading
Loading