From 5b4b07f16fe84b0148b4acdfb305035ba0810af6 Mon Sep 17 00:00:00 2001 From: behnazh-w Date: Tue, 2 Sep 2025 15:57:15 +1000 Subject: [PATCH 1/2] fix: improve build tool detection Signed-off-by: behnazh-w --- src/macaron/config/defaults.ini | 11 +++ .../repo_verifier/repo_verifier_base.py | 52 ------------ .../repo_verifier/repo_verifier_gradle.py | 8 +- .../repo_verifier/repo_verifier_maven.py | 5 +- .../build_tool/base_build_tool.py | 79 ++++++++++++++++--- .../slsa_analyzer/build_tool/docker.py | 3 +- src/macaron/slsa_analyzer/build_tool/go.py | 3 +- .../slsa_analyzer/build_tool/gradle.py | 3 +- src/macaron/slsa_analyzer/build_tool/maven.py | 3 +- src/macaron/slsa_analyzer/build_tool/npm.py | 3 +- src/macaron/slsa_analyzer/build_tool/pip.py | 3 +- .../slsa_analyzer/build_tool/poetry.py | 3 +- src/macaron/slsa_analyzer/build_tool/yarn.py | 3 +- .../cases/fnproject_fdk-java/policy.dl | 21 +++++ .../cases/fnproject_fdk-java/test.yaml | 22 ++++++ .../configuration.ini | 6 ++ .../timyarkov_multibuild_test_maven/test.yaml | 1 + 17 files changed, 149 insertions(+), 80 deletions(-) create mode 100644 tests/integration/cases/fnproject_fdk-java/policy.dl create mode 100644 tests/integration/cases/fnproject_fdk-java/test.yaml create mode 100644 tests/integration/cases/timyarkov_multibuild_test_maven/configuration.ini diff --git a/src/macaron/config/defaults.ini b/src/macaron/config/defaults.ini index 8aa5e7a11..fa87f8553 100644 --- a/src/macaron/config/defaults.ini +++ b/src/macaron/config/defaults.ini @@ -92,6 +92,17 @@ hostname = gitlab.com # [git_service.local_repo] # hostname = example.org +[builder] +# Skip detecting build tool configuration files in paths containing the following keywords. +build_tool_path_filters = + test + example + sample + doc + demo + spec + mock + # This is the spec for trusted Maven build tools. [builder.maven] entry_conf = settings.xml diff --git a/src/macaron/repo_verifier/repo_verifier_base.py b/src/macaron/repo_verifier/repo_verifier_base.py index 0e8d87d9b..5762e6819 100644 --- a/src/macaron/repo_verifier/repo_verifier_base.py +++ b/src/macaron/repo_verifier/repo_verifier_base.py @@ -4,66 +4,14 @@ """This module contains the base class and core data models for repository verification.""" import abc import logging -import os -from collections import deque from dataclasses import dataclass from enum import Enum -from pathlib import Path from macaron.slsa_analyzer.build_tool import BaseBuildTool logger = logging.getLogger(__name__) -def find_file_in_repo(root_dir: Path, filename: str) -> Path | None: - """Find the highest level file with a given name in a local repository. - - This function ignores certain paths that are not under the main source code directories. - - Parameters - ---------- - root_dir : Path - The root directory of the repository. - filename : str - The name of the file to search for. - - Returns - ------- - Path | None - The path to the file if it exists, otherwise - """ - # TODO: Consider using BaseBuildTool.get_build_dirs. - # + Refactor 'get_build_dirs' to skip certain directories - # that are most likely not part of the main codebase (e.g., sample). - # + Need to find a way to look for other - # files (e.g., gradle.properties) for the purpose of repo verification - # without breaking the current logic of finding build directories. - # + Add the capability to return the content/path of the file. - if not os.path.isdir(root_dir): - return None - - queue: deque[Path] = deque() - queue.append(Path(root_dir)) - while queue: - current_dir = queue.popleft() - - # Don't look through non-main directories. - if any( - keyword in current_dir.name.lower() - for keyword in ["test", "example", "sample", "doc", "demo", "spec", "mock"] - ): - continue - - if Path(current_dir, filename).exists(): - return Path(current_dir, filename) - - # Ignore symlinks to prevent potential infinite loop. - sub_dirs = [Path(it) for it in current_dir.iterdir() if it.is_dir() and not it.is_symlink()] - queue.extend(sub_dirs) - - return None - - class RepositoryVerificationStatus(str, Enum): """A class to store the status of the repo verification.""" diff --git a/src/macaron/repo_verifier/repo_verifier_gradle.py b/src/macaron/repo_verifier/repo_verifier_gradle.py index a93e71d23..90f35b2c4 100644 --- a/src/macaron/repo_verifier/repo_verifier_gradle.py +++ b/src/macaron/repo_verifier/repo_verifier_gradle.py @@ -10,10 +10,10 @@ RepositoryVerificationResult, RepositoryVerificationStatus, RepoVerifierToolSpecific, - find_file_in_repo, ) from macaron.repo_verifier.repo_verifier_maven import RepoVerifierMaven from macaron.slsa_analyzer.build_tool import Gradle +from macaron.slsa_analyzer.build_tool.base_build_tool import file_exists from macaron.slsa_analyzer.package_registry.maven_central_registry import same_organization logger = logging.getLogger(__name__) @@ -151,15 +151,15 @@ def _extract_group_id_from_gradle_manifest( def _extract_group_id_from_properties(self) -> str | None: """Extract the group id from the gradle.properties file.""" - gradle_properties = find_file_in_repo(Path(self.reported_repo_fs), "gradle.properties") + gradle_properties = file_exists(self.reported_repo_fs, "gradle.properties") return self._extract_group_id_from_gradle_manifest(gradle_properties) def _extract_group_id_from_build_groovy(self) -> str | None: """Extract the group id from the build.gradle file.""" - build_gradle = find_file_in_repo(Path(self.reported_repo_fs), "build.gradle") + build_gradle = file_exists(self.reported_repo_fs, "build.gradle") return self._extract_group_id_from_gradle_manifest(build_gradle, quote_chars={"'", '"'}, delimiter=" ") def _extract_group_id_from_build_kotlin(self) -> str | None: """Extract the group id from the build.gradle.kts file.""" - build_gradle = find_file_in_repo(Path(self.reported_repo_fs), "build.gradle.kts") + build_gradle = file_exists(self.reported_repo_fs, "build.gradle.kts") return self._extract_group_id_from_gradle_manifest(build_gradle, quote_chars={'"'}, delimiter="=") diff --git a/src/macaron/repo_verifier/repo_verifier_maven.py b/src/macaron/repo_verifier/repo_verifier_maven.py index 4b6965408..880721385 100644 --- a/src/macaron/repo_verifier/repo_verifier_maven.py +++ b/src/macaron/repo_verifier/repo_verifier_maven.py @@ -3,7 +3,6 @@ """This module contains code to verify whether a reported Maven-based repository can be linked back to the artifact.""" import logging -from pathlib import Path from urllib.parse import urlparse from macaron.parsers.pomparser import parse_pom_string @@ -11,9 +10,9 @@ RepositoryVerificationResult, RepositoryVerificationStatus, RepoVerifierToolSpecific, - find_file_in_repo, ) from macaron.slsa_analyzer.build_tool import Maven +from macaron.slsa_analyzer.build_tool.base_build_tool import file_exists from macaron.slsa_analyzer.package_registry.maven_central_registry import ( RECOGNIZED_CODE_HOSTING_SERVICES, same_organization, @@ -47,7 +46,7 @@ def verify_by_tool(self) -> RepositoryVerificationResult: # TODO: check other pom files. Think about how to decide in case of contradicting evidence. # Check if repo contains pom.xml. - pom_file = find_file_in_repo(Path(self.reported_repo_fs), "pom.xml") + pom_file = file_exists(self.reported_repo_fs, "pom.xml") if not pom_file: logger.debug("Could not find any pom.xml in the repository: %s", self.reported_repo_url) return RepositoryVerificationResult( diff --git a/src/macaron/slsa_analyzer/build_tool/base_build_tool.py b/src/macaron/slsa_analyzer/build_tool/base_build_tool.py index 353596125..57d1eb855 100644 --- a/src/macaron/slsa_analyzer/build_tool/base_build_tool.py +++ b/src/macaron/slsa_analyzer/build_tool/base_build_tool.py @@ -9,12 +9,14 @@ import logging import os from abc import ABC, abstractmethod +from collections import deque from collections.abc import Iterable from dataclasses import dataclass from pathlib import Path from typing import TypedDict from macaron.code_analyzer.call_graph import BaseNode +from macaron.config.defaults import defaults from macaron.dependency_analyzer.cyclonedx import DependencyAnalyzer, NoneDependencyAnalyzer from macaron.slsa_analyzer.build_tool.language import BuildLanguage from macaron.slsa_analyzer.checks.check_result import Confidence, Evidence, EvidenceWeightMap @@ -53,30 +55,77 @@ class BuildToolCommand(TypedDict): events: list[str] | None -def file_exists(path: str, file_name: str) -> bool: - """Return True if a file exists in a directory. +def find_first_matching_file(directory: Path, pattern: str) -> Path | None: + """ + Return the first file that matches the given glob pattern in the specified directory. + + Parameters + ---------- + directory : Path + Directory to search in. + pattern : str + Glob pattern to match. + + Returns + ------- + Path | None + The first matching file's path, or None if no match is found. + """ + for match in directory.glob(pattern): + return match + return None + + +def file_exists(path: str, file_name: str, filters: list[str] | None = None) -> Path | None: + """Search recursively for the first matching file in a directory, skipping directories containing filter keywords. - This method searches in the directory recursively. + To disable filtering, pass an empty list to the `filters` parameter. Parameters ---------- path : str The path to search for the file. file_name : str - The name of the file to search. + The name of the file to search or a glob pattern (e.g., "Dockerfile.*"). + filters: list[str] | None + The list of keywords that should be filtered. Returns ------- - bool - True if file_name exists else False. + Path | None + The path to the file if it exists, otherwise """ - pattern = os.path.join(path, "**", file_name) - files_detected = glob.iglob(pattern, recursive=True) - try: - next(files_detected) - return True - except StopIteration: - return False + if not os.path.isdir(path): + return None + + # Check for file directly at root. + root_dir = Path(path) + if target_path := find_first_matching_file(root_dir, file_name): + return target_path + + def _enqueue_subdirs(directory: Path, queue: deque[Path]) -> None: + """Add non-symlink subdirectories to the search queue.""" + for entry in directory.iterdir(): + if entry.is_dir() and not entry.is_symlink(): + queue.append(entry) + + search_queue: deque[Path] = deque() + _enqueue_subdirs(root_dir, search_queue) + + while search_queue: + + current_dir = search_queue.popleft() + + # Skip filtered directories. + if filters and any(keyword in current_dir.name.lower() for keyword in filters): + continue + + if candidate_path := find_first_matching_file(current_dir, file_name): + return candidate_path + + _enqueue_subdirs(current_dir, search_queue) + + return None @dataclass @@ -135,6 +184,7 @@ def __init__(self, name: str, language: BuildLanguage, purl_type: str) -> None: self.build_log: list[str] = [] self.wrapper_files: list[str] = [] self.runtime_options = RuntimeOptions() + self.path_filters: list[str] = [] def __str__(self) -> str: return self.name @@ -157,6 +207,9 @@ def is_detected(self, repo_path: str) -> bool: @abstractmethod def load_defaults(self) -> None: """Load the default values from defaults.ini.""" + # A list of keywords that can be used as filters while detecting build tools. + if "builder" in defaults: + self.path_filters = defaults.get_list("builder", "build_tool_path_filters", fallback=[]) def get_dep_analyzer(self) -> DependencyAnalyzer: """Create a DependencyAnalyzer for the build tool. diff --git a/src/macaron/slsa_analyzer/build_tool/docker.py b/src/macaron/slsa_analyzer/build_tool/docker.py index 6672ec6c8..fc9c909d8 100644 --- a/src/macaron/slsa_analyzer/build_tool/docker.py +++ b/src/macaron/slsa_analyzer/build_tool/docker.py @@ -20,6 +20,7 @@ def __init__(self) -> None: def load_defaults(self) -> None: """Load the default values from defaults.ini.""" + super().load_defaults() if "builder.docker" in defaults: for item in defaults["builder.docker"]: if hasattr(self, item): @@ -43,4 +44,4 @@ def is_detected(self, repo_path: str) -> bool: bool True if this build tool is detected, else False. """ - return any(file_exists(repo_path, file) for file in self.build_configs) + return any(file_exists(repo_path, file, filters=self.path_filters) for file in self.build_configs) diff --git a/src/macaron/slsa_analyzer/build_tool/go.py b/src/macaron/slsa_analyzer/build_tool/go.py index 4d409ed9d..5610a3f81 100644 --- a/src/macaron/slsa_analyzer/build_tool/go.py +++ b/src/macaron/slsa_analyzer/build_tool/go.py @@ -19,6 +19,7 @@ def __init__(self) -> None: def load_defaults(self) -> None: """Load the default values from defaults.ini.""" + super().load_defaults() if "builder.go" in defaults: for item in defaults["builder.go"]: if hasattr(self, item): @@ -43,4 +44,4 @@ def is_detected(self, repo_path: str) -> bool: True if this build tool is detected, else False. """ go_config_files = self.build_configs + self.entry_conf - return any(file_exists(repo_path, file) for file in go_config_files) + return any(file_exists(repo_path, file, filters=self.path_filters) for file in go_config_files) diff --git a/src/macaron/slsa_analyzer/build_tool/gradle.py b/src/macaron/slsa_analyzer/build_tool/gradle.py index bd316dd30..c1e4d991f 100644 --- a/src/macaron/slsa_analyzer/build_tool/gradle.py +++ b/src/macaron/slsa_analyzer/build_tool/gradle.py @@ -25,6 +25,7 @@ def __init__(self) -> None: def load_defaults(self) -> None: """Load the default values from defaults.ini.""" + super().load_defaults() if "builder.gradle" in defaults: for item in defaults["builder.gradle"]: if hasattr(self, item): @@ -67,7 +68,7 @@ def is_detected(self, repo_path: str) -> bool: True if this build tool is detected, else False. """ gradle_config_files = self.build_configs + self.entry_conf - return any(file_exists(repo_path, file) for file in gradle_config_files) + return any(file_exists(repo_path, file, filters=self.path_filters) for file in gradle_config_files) def get_group_id(self, gradle_exec: str, project_path: str) -> str | None: """Get the group id of a Gradle project. diff --git a/src/macaron/slsa_analyzer/build_tool/maven.py b/src/macaron/slsa_analyzer/build_tool/maven.py index 0e89849af..d6fcd1c51 100644 --- a/src/macaron/slsa_analyzer/build_tool/maven.py +++ b/src/macaron/slsa_analyzer/build_tool/maven.py @@ -26,6 +26,7 @@ def __init__(self) -> None: def load_defaults(self) -> None: """Load the default values from defaults.ini.""" + super().load_defaults() if "builder.maven" in defaults: for item in defaults["builder.maven"]: if hasattr(self, item): @@ -63,4 +64,4 @@ def is_detected(self, repo_path: str) -> bool: ) return False maven_config_files = self.build_configs - return any(file_exists(repo_path, file) for file in maven_config_files) + return any(file_exists(repo_path, file, filters=self.path_filters) for file in maven_config_files) diff --git a/src/macaron/slsa_analyzer/build_tool/npm.py b/src/macaron/slsa_analyzer/build_tool/npm.py index 2bc725974..cae93d257 100644 --- a/src/macaron/slsa_analyzer/build_tool/npm.py +++ b/src/macaron/slsa_analyzer/build_tool/npm.py @@ -29,6 +29,7 @@ def __init__(self) -> None: def load_defaults(self) -> None: """Load the default values from defaults.ini.""" + super().load_defaults() if "builder.npm" in defaults: for item in defaults["builder.npm"]: if hasattr(self, item): @@ -56,7 +57,7 @@ def is_detected(self, repo_path: str) -> bool: # cases like .npmrc existing but not package-lock.json and whether # they would still count as "detected" npm_config_files = self.build_configs + self.package_lock + self.entry_conf - return any(file_exists(repo_path, file) for file in npm_config_files) + return any(file_exists(repo_path, file, filters=self.path_filters) for file in npm_config_files) def is_deploy_command( self, cmd: BuildToolCommand, excluded_configs: list[str] | None = None, provenance_workflow: str | None = None diff --git a/src/macaron/slsa_analyzer/build_tool/pip.py b/src/macaron/slsa_analyzer/build_tool/pip.py index 1926ca33b..5e1bb68a5 100644 --- a/src/macaron/slsa_analyzer/build_tool/pip.py +++ b/src/macaron/slsa_analyzer/build_tool/pip.py @@ -31,6 +31,7 @@ def __init__(self) -> None: def load_defaults(self) -> None: """Load the default values from defaults.ini.""" + super().load_defaults() if "builder.pip" in defaults: for item in defaults["builder.pip"]: if hasattr(self, item): @@ -54,7 +55,7 @@ def is_detected(self, repo_path: str) -> bool: bool True if this build tool is detected, else False. """ - return any(file_exists(repo_path, file) for file in self.build_configs) + return any(file_exists(repo_path, file, filters=self.path_filters) for file in self.build_configs) def get_dep_analyzer(self) -> DependencyAnalyzer: """Create a DependencyAnalyzer for the build tool. diff --git a/src/macaron/slsa_analyzer/build_tool/poetry.py b/src/macaron/slsa_analyzer/build_tool/poetry.py index a1d5a4a0d..0363f3cbb 100644 --- a/src/macaron/slsa_analyzer/build_tool/poetry.py +++ b/src/macaron/slsa_analyzer/build_tool/poetry.py @@ -34,6 +34,7 @@ def __init__(self) -> None: def load_defaults(self) -> None: """Load the default values from defaults.ini.""" + super().load_defaults() if "builder.poetry" in defaults: for item in defaults["builder.poetry"]: if hasattr(self, item): @@ -59,7 +60,7 @@ def is_detected(self, repo_path: str) -> bool: """ package_lock_exists = "" for file in self.package_lock: - if file_exists(repo_path, file): + if file_exists(repo_path, file, filters=self.path_filters): package_lock_exists = file break diff --git a/src/macaron/slsa_analyzer/build_tool/yarn.py b/src/macaron/slsa_analyzer/build_tool/yarn.py index 30914f27e..4660faf12 100644 --- a/src/macaron/slsa_analyzer/build_tool/yarn.py +++ b/src/macaron/slsa_analyzer/build_tool/yarn.py @@ -26,6 +26,7 @@ def __init__(self) -> None: def load_defaults(self) -> None: """Load the default values from defaults.ini.""" + super().load_defaults() if "builder.yarn" in defaults: for item in defaults["builder.yarn"]: if hasattr(self, item): @@ -54,7 +55,7 @@ def is_detected(self, repo_path: str) -> bool: # cases like .yarnrc existing but not package-lock.json and whether # they would still count as "detected" yarn_config_files = self.build_configs + self.package_lock + self.entry_conf - return any(file_exists(repo_path, file) for file in yarn_config_files) + return any(file_exists(repo_path, file, filters=self.path_filters) for file in yarn_config_files) def is_deploy_command( self, cmd: BuildToolCommand, excluded_configs: list[str] | None = None, provenance_workflow: str | None = None diff --git a/tests/integration/cases/fnproject_fdk-java/policy.dl b/tests/integration/cases/fnproject_fdk-java/policy.dl new file mode 100644 index 000000000..f4d71f66e --- /dev/null +++ b/tests/integration/cases/fnproject_fdk-java/policy.dl @@ -0,0 +1,21 @@ +/* Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved. */ +/* Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. */ + +#include "prelude.dl" + +Policy("test_policy", component_id, "") :- + check_passed(component_id, "mcn_build_tool_1"), + build_tool_exists(component_id, "maven"), + !build_tool_exists(component_id, "gradle"). + +.decl build_tool_exists(component_id: number, name: symbol) +build_tool_exists(component_id, name) :- + build_tool_check( + check_id, + name, + _ + ), + check_facts(check_id, _, component_id,_,_). + +apply_policy_to("test_policy", component_id) :- + is_repo_url(component_id, "https://github.com/fnproject/fdk-java"). diff --git a/tests/integration/cases/fnproject_fdk-java/test.yaml b/tests/integration/cases/fnproject_fdk-java/test.yaml new file mode 100644 index 000000000..5544a7068 --- /dev/null +++ b/tests/integration/cases/fnproject_fdk-java/test.yaml @@ -0,0 +1,22 @@ +# Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved. +# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. + +description: | + Analyzing with repository. + +tags: +- macaron-python-package + +steps: +- name: Run macaron analyze + kind: analyze + options: + command_args: + - -rp + - https://github.com/fnproject/fdk-java.git + - --digest + - fdac7f9417156df1936dda1c38125a7f7bf9b21d +- name: Run macaron verify-policy to verify passed/failed checks + kind: verify + options: + policy: policy.dl diff --git a/tests/integration/cases/timyarkov_multibuild_test_maven/configuration.ini b/tests/integration/cases/timyarkov_multibuild_test_maven/configuration.ini new file mode 100644 index 000000000..f49ec5bfa --- /dev/null +++ b/tests/integration/cases/timyarkov_multibuild_test_maven/configuration.ini @@ -0,0 +1,6 @@ +# Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved. +# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. + +[builder] +# Disable the default path filtering for detecting build tools. +build_tool_path_filters = diff --git a/tests/integration/cases/timyarkov_multibuild_test_maven/test.yaml b/tests/integration/cases/timyarkov_multibuild_test_maven/test.yaml index 56343f72d..c6b58016a 100644 --- a/tests/integration/cases/timyarkov_multibuild_test_maven/test.yaml +++ b/tests/integration/cases/timyarkov_multibuild_test_maven/test.yaml @@ -13,6 +13,7 @@ steps: - name: Run macaron analyze kind: analyze options: + ini: configuration.ini command_args: - -purl - pkg:maven/org.example/mock_maven_proj@1.0-SNAPSHOT?type=jar From ff7d81be9be80c5f93700565bec26bed6f0faba7 Mon Sep 17 00:00:00 2001 From: behnazh-w Date: Tue, 9 Sep 2025 12:04:14 +1000 Subject: [PATCH 2/2] fix: fix bug in repo verifier and add tests Signed-off-by: behnazh-w --- src/macaron/repo_verifier/repo_verifier.py | 1 + .../repo_verifier/repo_verifier_base.py | 12 +- .../repo_verifier/repo_verifier_gradle.py | 57 ++++--- .../repo_verifier/repo_verifier_maven.py | 54 ++++--- .../build_tool/base_build_tool.py | 2 +- .../policy.dl | 17 +++ .../test.yaml | 20 +++ .../fail_groovy/tests/build.gradle | 9 ++ .../fail_kotlin/tests/build.gradle.kts | 9 ++ .../fail_properties/tests/gradle.properties | 5 + .../gradle_repos/pass_groovy/build.gradle | 9 ++ .../gradle_repos/pass_kotlin/build.gradle.kts | 9 ++ .../pass_properties/gradle.properties | 5 + .../maven_repos/fail_pom/tests/pom.xml | 19 +++ .../mock_repos/maven_repos/pass_pom/pom.xml | 19 +++ tests/repo_verifier/test_repo_verifier.py | 141 ++++++++++++++++++ 16 files changed, 337 insertions(+), 51 deletions(-) create mode 100644 tests/integration/cases/IntellectualSites_Arkitektonika-Client/policy.dl create mode 100644 tests/integration/cases/IntellectualSites_Arkitektonika-Client/test.yaml create mode 100644 tests/repo_verifier/mock_repos/gradle_repos/fail_groovy/tests/build.gradle create mode 100644 tests/repo_verifier/mock_repos/gradle_repos/fail_kotlin/tests/build.gradle.kts create mode 100644 tests/repo_verifier/mock_repos/gradle_repos/fail_properties/tests/gradle.properties create mode 100644 tests/repo_verifier/mock_repos/gradle_repos/pass_groovy/build.gradle create mode 100644 tests/repo_verifier/mock_repos/gradle_repos/pass_kotlin/build.gradle.kts create mode 100644 tests/repo_verifier/mock_repos/gradle_repos/pass_properties/gradle.properties create mode 100644 tests/repo_verifier/mock_repos/maven_repos/fail_pom/tests/pom.xml create mode 100644 tests/repo_verifier/mock_repos/maven_repos/pass_pom/pom.xml create mode 100644 tests/repo_verifier/test_repo_verifier.py diff --git a/src/macaron/repo_verifier/repo_verifier.py b/src/macaron/repo_verifier/repo_verifier.py index d9752e7f6..a99538fb7 100644 --- a/src/macaron/repo_verifier/repo_verifier.py +++ b/src/macaron/repo_verifier/repo_verifier.py @@ -82,6 +82,7 @@ def verify_repo( version=version, reported_repo_url=reported_repo_url, reported_repo_fs=reported_repo_fs, + build_tool=build_tool, provenance_repo_url=provenance_repo_url, ) diff --git a/src/macaron/repo_verifier/repo_verifier_base.py b/src/macaron/repo_verifier/repo_verifier_base.py index 5762e6819..b056eac86 100644 --- a/src/macaron/repo_verifier/repo_verifier_base.py +++ b/src/macaron/repo_verifier/repo_verifier_base.py @@ -115,11 +115,6 @@ class RepoVerifierToolSpecific(RepoVerifierFromProvenance, abc.ABC): From-provenance verification is inherited from the parent class. """ - @property - @abc.abstractmethod - def specific_tool(self) -> BaseBuildTool: - """Define the build tool used to build the package.""" - def __init__( self, namespace: str | None, @@ -127,6 +122,7 @@ def __init__( version: str, reported_repo_url: str, reported_repo_fs: str, + build_tool: BaseBuildTool, provenance_repo_url: str | None, ): """Instantiate the class. @@ -143,12 +139,12 @@ def __init__( The URL of the repository reported by the publisher. reported_repo_fs : str The file system path of the reported repository. + build_tool : BaseBuildTool + The build tool used to build the package. provenance_repo_url : str | None The URL of the repository from a provenance file, or None if it, or the provenance, is not present. """ - super().__init__( - namespace, name, version, reported_repo_url, reported_repo_fs, provenance_repo_url, self.specific_tool - ) + super().__init__(namespace, name, version, reported_repo_url, reported_repo_fs, provenance_repo_url, build_tool) def verify_repo(self) -> RepositoryVerificationResult: """Verify the repository as per the base class method.""" diff --git a/src/macaron/repo_verifier/repo_verifier_gradle.py b/src/macaron/repo_verifier/repo_verifier_gradle.py index 90f35b2c4..6b4960958 100644 --- a/src/macaron/repo_verifier/repo_verifier_gradle.py +++ b/src/macaron/repo_verifier/repo_verifier_gradle.py @@ -12,8 +12,7 @@ RepoVerifierToolSpecific, ) from macaron.repo_verifier.repo_verifier_maven import RepoVerifierMaven -from macaron.slsa_analyzer.build_tool import Gradle -from macaron.slsa_analyzer.build_tool.base_build_tool import file_exists +from macaron.slsa_analyzer.build_tool.base_build_tool import BaseBuildTool, file_exists from macaron.slsa_analyzer.package_registry.maven_central_registry import same_organization logger = logging.getLogger(__name__) @@ -22,8 +21,6 @@ class RepoVerifierGradle(RepoVerifierToolSpecific): """A class to verify whether a repository with Gradle build tool links back to the artifact.""" - specific_tool = Gradle() - def __init__( self, namespace: str, @@ -31,6 +28,7 @@ def __init__( version: str, reported_repo_url: str, reported_repo_fs: str, + build_tool: BaseBuildTool, provenance_repo_url: str | None, ): """Initialize a RepoVerifierGradle instance. @@ -47,10 +45,12 @@ def __init__( The URL of the repository reported by the publisher. reported_repo_fs : str The file system path of the reported repository. + build_tool : BaseBuildTool + The build tool used to build the package. provenance_repo_url : str | None The URL of the repository from a provenance file, or None if it, or the provenance, is not present. """ - super().__init__(namespace, name, version, reported_repo_url, reported_repo_fs, provenance_repo_url) + super().__init__(namespace, name, version, reported_repo_url, reported_repo_fs, build_tool, provenance_repo_url) self.maven_verifier = RepoVerifierMaven( namespace=namespace, @@ -58,6 +58,7 @@ def __init__( version=version, reported_repo_url=reported_repo_url, reported_repo_fs=reported_repo_fs, + build_tool=build_tool, provenance_repo_url=provenance_repo_url, ) @@ -81,11 +82,11 @@ def verify_by_tool(self) -> RepositoryVerificationResult: if recognized_services_verification_result.status == RepositoryVerificationStatus.PASSED: return recognized_services_verification_result - gradle_group_id = self._extract_group_id_from_properties() + gradle_group_id = self.extract_group_id_from_properties() if not gradle_group_id: - gradle_group_id = self._extract_group_id_from_build_groovy() + gradle_group_id = self.extract_group_id_from_build_groovy() if not gradle_group_id: - gradle_group_id = self._extract_group_id_from_build_kotlin() + gradle_group_id = self.extract_group_id_from_build_kotlin() if not gradle_group_id: logger.debug("Could not find group from gradle manifests for %s", self.reported_repo_url) return RepositoryVerificationResult( @@ -149,17 +150,37 @@ def _extract_group_id_from_gradle_manifest( return None - def _extract_group_id_from_properties(self) -> str | None: - """Extract the group id from the gradle.properties file.""" - gradle_properties = file_exists(self.reported_repo_fs, "gradle.properties") + def extract_group_id_from_properties(self) -> str | None: + """Extract the group id from the gradle.properties file. + + Returns + ------- + str | None + The extracted group id if found, otherwise None. + """ + gradle_properties = file_exists( + self.reported_repo_fs, "gradle.properties", filters=self.build_tool.path_filters + ) return self._extract_group_id_from_gradle_manifest(gradle_properties) - def _extract_group_id_from_build_groovy(self) -> str | None: - """Extract the group id from the build.gradle file.""" - build_gradle = file_exists(self.reported_repo_fs, "build.gradle") - return self._extract_group_id_from_gradle_manifest(build_gradle, quote_chars={"'", '"'}, delimiter=" ") + def extract_group_id_from_build_groovy(self) -> str | None: + """Extract the group id from the build.gradle file. - def _extract_group_id_from_build_kotlin(self) -> str | None: - """Extract the group id from the build.gradle.kts file.""" - build_gradle = file_exists(self.reported_repo_fs, "build.gradle.kts") + Returns + ------- + str | None + The extracted group id if found, otherwise None. + """ + build_gradle = file_exists(self.reported_repo_fs, "build.gradle", filters=self.build_tool.path_filters) + return self._extract_group_id_from_gradle_manifest(build_gradle, quote_chars={"'", '"'}, delimiter="=") + + def extract_group_id_from_build_kotlin(self) -> str | None: + """Extract the group id from the build.gradle.kts file. + + Returns + ------- + str | None + The extracted group id if found, otherwise None. + """ + build_gradle = file_exists(self.reported_repo_fs, "build.gradle.kts", filters=self.build_tool.path_filters) return self._extract_group_id_from_gradle_manifest(build_gradle, quote_chars={'"'}, delimiter="=") diff --git a/src/macaron/repo_verifier/repo_verifier_maven.py b/src/macaron/repo_verifier/repo_verifier_maven.py index 880721385..f09d9ad3b 100644 --- a/src/macaron/repo_verifier/repo_verifier_maven.py +++ b/src/macaron/repo_verifier/repo_verifier_maven.py @@ -11,7 +11,6 @@ RepositoryVerificationStatus, RepoVerifierToolSpecific, ) -from macaron.slsa_analyzer.build_tool import Maven from macaron.slsa_analyzer.build_tool.base_build_tool import file_exists from macaron.slsa_analyzer.package_registry.maven_central_registry import ( RECOGNIZED_CODE_HOSTING_SERVICES, @@ -24,8 +23,6 @@ class RepoVerifierMaven(RepoVerifierToolSpecific): """A class to verify whether a repository with Maven build tool links back to the artifact.""" - specific_tool = Maven() - def verify_by_tool(self) -> RepositoryVerificationResult: """Verify whether the reported repository links back to the Maven artifact. @@ -44,43 +41,52 @@ def verify_by_tool(self) -> RepositoryVerificationResult: if recognized_services_verification_result.status == RepositoryVerificationStatus.PASSED: return recognized_services_verification_result + pom_group_id = self.extract_group_id_from_pom() + if pom_group_id is None: + logger.debug("Could not find groupId from the pom.xml in %s", self.reported_repo_url) + return RepositoryVerificationResult( + status=RepositoryVerificationStatus.UNKNOWN, reason="no_group_id_in_pom", build_tool=self.build_tool + ) + if not same_organization(pom_group_id, self.namespace): + logger.debug("Group id in pom.xml does not match the provided group id for: %s", self.reported_repo_url) + return RepositoryVerificationResult( + status=RepositoryVerificationStatus.FAILED, reason="group_id_mismatch", build_tool=self.build_tool + ) + + return RepositoryVerificationResult( + status=RepositoryVerificationStatus.PASSED, reason="group_id_match", build_tool=self.build_tool + ) + + def extract_group_id_from_pom(self) -> str | None: + """Extract the group id from the pom.xml file. + + Returns + ------- + str | None + The extracted group id if found, otherwise None. + """ # TODO: check other pom files. Think about how to decide in case of contradicting evidence. # Check if repo contains pom.xml. - pom_file = file_exists(self.reported_repo_fs, "pom.xml") + pom_file = file_exists(self.reported_repo_fs, "pom.xml", filters=self.build_tool.path_filters) if not pom_file: logger.debug("Could not find any pom.xml in the repository: %s", self.reported_repo_url) - return RepositoryVerificationResult( - status=RepositoryVerificationStatus.UNKNOWN, reason="no_pom", build_tool=self.build_tool - ) + return None pom_content = pom_file.read_text(encoding="utf-8") pom_root = parse_pom_string(pom_content) - if not pom_root: + if pom_root is None: logger.debug("Could not parse pom.xml: %s", pom_file.as_posix()) - return RepositoryVerificationResult( - status=RepositoryVerificationStatus.UNKNOWN, reason="not_parsed_pom", build_tool=self.build_tool - ) + return None # Find the group id in the pom (project/groupId). # The closing curly brace represents the end of the XML namespace. pom_group_id_elem = next((ch for ch in pom_root if ch.tag.endswith("}groupId")), None) if pom_group_id_elem is None or not pom_group_id_elem.text: logger.debug("Could not find groupId in pom.xml: %s", pom_file) - return RepositoryVerificationResult( - status=RepositoryVerificationStatus.UNKNOWN, reason="no_group_id_in_pom", build_tool=self.build_tool - ) + return None - pom_group_id = pom_group_id_elem.text.strip() - if not same_organization(pom_group_id, self.namespace): - logger.debug("Group id in pom.xml does not match the provided group id: %s", pom_file) - return RepositoryVerificationResult( - status=RepositoryVerificationStatus.FAILED, reason="group_id_mismatch", build_tool=self.build_tool - ) - - return RepositoryVerificationResult( - status=RepositoryVerificationStatus.PASSED, reason="group_id_match", build_tool=self.build_tool - ) + return pom_group_id_elem.text.strip() def verify_domains_from_recognized_code_hosting_services(self) -> RepositoryVerificationResult: """Verify repository link by comparing the maven domain name and the account on code hosting services. diff --git a/src/macaron/slsa_analyzer/build_tool/base_build_tool.py b/src/macaron/slsa_analyzer/build_tool/base_build_tool.py index 57d1eb855..bf0e025ac 100644 --- a/src/macaron/slsa_analyzer/build_tool/base_build_tool.py +++ b/src/macaron/slsa_analyzer/build_tool/base_build_tool.py @@ -79,7 +79,7 @@ def find_first_matching_file(directory: Path, pattern: str) -> Path | None: def file_exists(path: str, file_name: str, filters: list[str] | None = None) -> Path | None: """Search recursively for the first matching file in a directory, skipping directories containing filter keywords. - To disable filtering, pass an empty list to the `filters` parameter. + To disable filtering, pass an empty list or `None` to the `filters` parameter. Parameters ---------- diff --git a/tests/integration/cases/IntellectualSites_Arkitektonika-Client/policy.dl b/tests/integration/cases/IntellectualSites_Arkitektonika-Client/policy.dl new file mode 100644 index 000000000..3faa06d37 --- /dev/null +++ b/tests/integration/cases/IntellectualSites_Arkitektonika-Client/policy.dl @@ -0,0 +1,17 @@ +/* Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved. */ +/* Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. */ + +#include "prelude.dl" + +Policy("test_policy", component_id, "") :- + check_passed(component_id, "mcn_scm_authenticity_1"), + is_repo_url(component_id, "https://github.com/IntellectualSites/Arkitektonika-Client"), + build_tool_check( + check_id, + "gradle", + _ + ), + check_facts(check_id, _, component_id,_,_). + +apply_policy_to("test_policy", component_id) :- + is_component(component_id, "pkg:maven/com.intellectualsites.arkitektonika/Arkitektonika-Client@2.1.3"). diff --git a/tests/integration/cases/IntellectualSites_Arkitektonika-Client/test.yaml b/tests/integration/cases/IntellectualSites_Arkitektonika-Client/test.yaml new file mode 100644 index 000000000..283c12c3f --- /dev/null +++ b/tests/integration/cases/IntellectualSites_Arkitektonika-Client/test.yaml @@ -0,0 +1,20 @@ +# Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved. +# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. + +description: | + Analyzing repository authenticity of an artifact that uses Gradle as build tool. + +tags: +- macaron-python-package + +steps: +- name: Run macaron analyze + kind: analyze + options: + command_args: + - -purl + - pkg:maven/com.intellectualsites.arkitektonika/Arkitektonika-Client@2.1.3 +- name: Run macaron verify-policy to verify passed/failed checks + kind: verify + options: + policy: policy.dl diff --git a/tests/repo_verifier/mock_repos/gradle_repos/fail_groovy/tests/build.gradle b/tests/repo_verifier/mock_repos/gradle_repos/fail_groovy/tests/build.gradle new file mode 100644 index 000000000..02b5ebcac --- /dev/null +++ b/tests/repo_verifier/mock_repos/gradle_repos/fail_groovy/tests/build.gradle @@ -0,0 +1,9 @@ +/* Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved. */ +/* Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. */ + +group = 'com.example' +version = '1.0.0' + +repositories { + mavenCentral() +} diff --git a/tests/repo_verifier/mock_repos/gradle_repos/fail_kotlin/tests/build.gradle.kts b/tests/repo_verifier/mock_repos/gradle_repos/fail_kotlin/tests/build.gradle.kts new file mode 100644 index 000000000..d0a47870b --- /dev/null +++ b/tests/repo_verifier/mock_repos/gradle_repos/fail_kotlin/tests/build.gradle.kts @@ -0,0 +1,9 @@ +/* Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved. */ +/* Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. */ + +group = "com.example" +version = "1.0.0" + +repositories { + mavenCentral() +} diff --git a/tests/repo_verifier/mock_repos/gradle_repos/fail_properties/tests/gradle.properties b/tests/repo_verifier/mock_repos/gradle_repos/fail_properties/tests/gradle.properties new file mode 100644 index 000000000..039b858f8 --- /dev/null +++ b/tests/repo_verifier/mock_repos/gradle_repos/fail_properties/tests/gradle.properties @@ -0,0 +1,5 @@ +# Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved. +# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. + +group=com.example +version=1.0.0 diff --git a/tests/repo_verifier/mock_repos/gradle_repos/pass_groovy/build.gradle b/tests/repo_verifier/mock_repos/gradle_repos/pass_groovy/build.gradle new file mode 100644 index 000000000..02b5ebcac --- /dev/null +++ b/tests/repo_verifier/mock_repos/gradle_repos/pass_groovy/build.gradle @@ -0,0 +1,9 @@ +/* Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved. */ +/* Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. */ + +group = 'com.example' +version = '1.0.0' + +repositories { + mavenCentral() +} diff --git a/tests/repo_verifier/mock_repos/gradle_repos/pass_kotlin/build.gradle.kts b/tests/repo_verifier/mock_repos/gradle_repos/pass_kotlin/build.gradle.kts new file mode 100644 index 000000000..d0a47870b --- /dev/null +++ b/tests/repo_verifier/mock_repos/gradle_repos/pass_kotlin/build.gradle.kts @@ -0,0 +1,9 @@ +/* Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved. */ +/* Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. */ + +group = "com.example" +version = "1.0.0" + +repositories { + mavenCentral() +} diff --git a/tests/repo_verifier/mock_repos/gradle_repos/pass_properties/gradle.properties b/tests/repo_verifier/mock_repos/gradle_repos/pass_properties/gradle.properties new file mode 100644 index 000000000..039b858f8 --- /dev/null +++ b/tests/repo_verifier/mock_repos/gradle_repos/pass_properties/gradle.properties @@ -0,0 +1,5 @@ +# Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved. +# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. + +group=com.example +version=1.0.0 diff --git a/tests/repo_verifier/mock_repos/maven_repos/fail_pom/tests/pom.xml b/tests/repo_verifier/mock_repos/maven_repos/fail_pom/tests/pom.xml new file mode 100644 index 000000000..243d4f9b1 --- /dev/null +++ b/tests/repo_verifier/mock_repos/maven_repos/fail_pom/tests/pom.xml @@ -0,0 +1,19 @@ + + 4.0.0 + + com.example + artifact + 1.0.0 + jar + + artifact + A simple Maven project for com.example:artifact:1.0.0 + + + 1.8 + 1.8 + + + diff --git a/tests/repo_verifier/mock_repos/maven_repos/pass_pom/pom.xml b/tests/repo_verifier/mock_repos/maven_repos/pass_pom/pom.xml new file mode 100644 index 000000000..243d4f9b1 --- /dev/null +++ b/tests/repo_verifier/mock_repos/maven_repos/pass_pom/pom.xml @@ -0,0 +1,19 @@ + + 4.0.0 + + com.example + artifact + 1.0.0 + jar + + artifact + A simple Maven project for com.example:artifact:1.0.0 + + + 1.8 + 1.8 + + + diff --git a/tests/repo_verifier/test_repo_verifier.py b/tests/repo_verifier/test_repo_verifier.py new file mode 100644 index 000000000..0c01a8bff --- /dev/null +++ b/tests/repo_verifier/test_repo_verifier.py @@ -0,0 +1,141 @@ +# Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved. +# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. + +"""This module tests the repo verifier.""" +from pathlib import Path + +import pytest + +from macaron.repo_verifier.repo_verifier_gradle import RepoVerifierGradle +from macaron.repo_verifier.repo_verifier_maven import RepoVerifierMaven +from macaron.slsa_analyzer.build_tool.base_build_tool import BaseBuildTool + + +def gradle_repo_verifier(build_tool: BaseBuildTool, mock_repo: str) -> RepoVerifierGradle: + """ + Create and return an instance of RepoVerifierGradle with predefined test parameters. + + Parameters + ---------- + build_tool: BaseBuildTool + The build tool instance to be used for verification (expected to be Gradle). + mock_repo: str + File system path to the mock Gradle repository. + + Returns + ------- + RepoVerifierGradle + An initialized verifier for the provided mock Gradle repository. + """ + return RepoVerifierGradle( + namespace="com.example", + name="artifact", + version="1.0.0", + reported_repo_url="https://github.com/example/example", + reported_repo_fs=mock_repo, + build_tool=build_tool, + provenance_repo_url=None, + ) + + +def maven_repo_verifier(build_tool: BaseBuildTool, mock_repo: str) -> RepoVerifierMaven: + """ + Create and return an instance of RepoVerifierMaven with predefined test parameters. + + Parameters + ---------- + build_tool : BaseBuildTool + The build tool instance to be used for verification (expected to be Maven). + mock_repo : str + File system path to the mock Maven repository. + + Returns + ------- + RepoVerifierMaven + A RepoVerifierMaven instance initialized with test parameters for the specified mock repo. + """ + return RepoVerifierMaven( + namespace="com.example", + name="artifact", + version="1.0.0", + reported_repo_url="https://github.com/example/example", + reported_repo_fs=mock_repo, + build_tool=build_tool, + provenance_repo_url=None, + ) + + +@pytest.mark.parametrize( + ("mock_repo", "expected_result"), + [ + (Path(__file__).parent.joinpath("mock_repos", "gradle_repos", "fail_groovy"), False), + (Path(__file__).parent.joinpath("mock_repos", "gradle_repos", "pass_groovy"), True), + ], +) +def test_extract_group_id_from_build_groovy( + build_tools: dict[str, BaseBuildTool], mock_repo: Path, expected_result: bool +) -> None: + """Test if the method successfully extracts a group ID from a given Gradle build (Groovy). + + Each test case provides a path to a mock repository and the expected boolean result: True if a group ID + should be detected, False otherwise. + """ + verifier = gradle_repo_verifier(build_tools["gradle"], str(mock_repo)) + assert (verifier.extract_group_id_from_build_groovy() is not None) == expected_result + + +@pytest.mark.parametrize( + ("mock_repo", "expected_result"), + [ + (Path(__file__).parent.joinpath("mock_repos", "gradle_repos", "fail_properties"), False), + (Path(__file__).parent.joinpath("mock_repos", "gradle_repos", "pass_properties"), True), + ], +) +def test_extract_group_id_from_build_properties( + build_tools: dict[str, BaseBuildTool], mock_repo: Path, expected_result: bool +) -> None: + """Test if the method successfully extracts a group ID from a given Gradle build (properties file). + + Each test case provides a path to a mock repository and the expected boolean result: True if a group ID + should be detected, False otherwise. + """ + verifier = gradle_repo_verifier(build_tools["gradle"], str(mock_repo)) + assert (verifier.extract_group_id_from_properties() is not None) == expected_result + + +@pytest.mark.parametrize( + ("mock_repo", "expected_result"), + [ + (Path(__file__).parent.joinpath("mock_repos", "gradle_repos", "fail_kotlin"), False), + (Path(__file__).parent.joinpath("mock_repos", "gradle_repos", "pass_kotlin"), True), + ], +) +def test_extract_group_id_from_build_kotlin( + build_tools: dict[str, BaseBuildTool], mock_repo: Path, expected_result: bool +) -> None: + """Test if the method successfully extracts a group ID from a given Gradle build (Kotlin). + + Each test case provides a path to a mock repository and the expected boolean result: True if a group ID + should be detected, False otherwise. + """ + verifier = gradle_repo_verifier(build_tools["gradle"], str(mock_repo)) + assert (verifier.extract_group_id_from_build_kotlin() is not None) == expected_result + + +@pytest.mark.parametrize( + ("mock_repo", "expected_result"), + [ + (Path(__file__).parent.joinpath("mock_repos", "maven_repos", "fail_pom"), False), + (Path(__file__).parent.joinpath("mock_repos", "maven_repos", "pass_pom"), True), + ], +) +def test_extract_group_id_from_pom( + build_tools: dict[str, BaseBuildTool], mock_repo: Path, expected_result: bool +) -> None: + """Test if the method successfully extracts a group ID from a given Maven build. + + Each test case provides a path to a mock repository and the expected boolean result: True if a group ID + should be detected, False otherwise. + """ + verifier = maven_repo_verifier(build_tools["maven"], str(mock_repo)) + assert (verifier.extract_group_id_from_pom() is not None) == expected_result