diff --git a/src/macaron/config/defaults.ini b/src/macaron/config/defaults.ini
index 8aa5e7a11..fa87f8553 100644
--- a/src/macaron/config/defaults.ini
+++ b/src/macaron/config/defaults.ini
@@ -92,6 +92,17 @@ hostname = gitlab.com
# [git_service.local_repo]
# hostname = example.org
+[builder]
+# Skip detecting build tool configuration files in paths containing the following keywords.
+build_tool_path_filters =
+ test
+ example
+ sample
+ doc
+ demo
+ spec
+ mock
+
# This is the spec for trusted Maven build tools.
[builder.maven]
entry_conf = settings.xml
diff --git a/src/macaron/repo_verifier/repo_verifier.py b/src/macaron/repo_verifier/repo_verifier.py
index d9752e7f6..a99538fb7 100644
--- a/src/macaron/repo_verifier/repo_verifier.py
+++ b/src/macaron/repo_verifier/repo_verifier.py
@@ -82,6 +82,7 @@ def verify_repo(
version=version,
reported_repo_url=reported_repo_url,
reported_repo_fs=reported_repo_fs,
+ build_tool=build_tool,
provenance_repo_url=provenance_repo_url,
)
diff --git a/src/macaron/repo_verifier/repo_verifier_base.py b/src/macaron/repo_verifier/repo_verifier_base.py
index 0e8d87d9b..b056eac86 100644
--- a/src/macaron/repo_verifier/repo_verifier_base.py
+++ b/src/macaron/repo_verifier/repo_verifier_base.py
@@ -4,66 +4,14 @@
"""This module contains the base class and core data models for repository verification."""
import abc
import logging
-import os
-from collections import deque
from dataclasses import dataclass
from enum import Enum
-from pathlib import Path
from macaron.slsa_analyzer.build_tool import BaseBuildTool
logger = logging.getLogger(__name__)
-def find_file_in_repo(root_dir: Path, filename: str) -> Path | None:
- """Find the highest level file with a given name in a local repository.
-
- This function ignores certain paths that are not under the main source code directories.
-
- Parameters
- ----------
- root_dir : Path
- The root directory of the repository.
- filename : str
- The name of the file to search for.
-
- Returns
- -------
- Path | None
- The path to the file if it exists, otherwise
- """
- # TODO: Consider using BaseBuildTool.get_build_dirs.
- # + Refactor 'get_build_dirs' to skip certain directories
- # that are most likely not part of the main codebase (e.g., sample).
- # + Need to find a way to look for other
- # files (e.g., gradle.properties) for the purpose of repo verification
- # without breaking the current logic of finding build directories.
- # + Add the capability to return the content/path of the file.
- if not os.path.isdir(root_dir):
- return None
-
- queue: deque[Path] = deque()
- queue.append(Path(root_dir))
- while queue:
- current_dir = queue.popleft()
-
- # Don't look through non-main directories.
- if any(
- keyword in current_dir.name.lower()
- for keyword in ["test", "example", "sample", "doc", "demo", "spec", "mock"]
- ):
- continue
-
- if Path(current_dir, filename).exists():
- return Path(current_dir, filename)
-
- # Ignore symlinks to prevent potential infinite loop.
- sub_dirs = [Path(it) for it in current_dir.iterdir() if it.is_dir() and not it.is_symlink()]
- queue.extend(sub_dirs)
-
- return None
-
-
class RepositoryVerificationStatus(str, Enum):
"""A class to store the status of the repo verification."""
@@ -167,11 +115,6 @@ class RepoVerifierToolSpecific(RepoVerifierFromProvenance, abc.ABC):
From-provenance verification is inherited from the parent class.
"""
- @property
- @abc.abstractmethod
- def specific_tool(self) -> BaseBuildTool:
- """Define the build tool used to build the package."""
-
def __init__(
self,
namespace: str | None,
@@ -179,6 +122,7 @@ def __init__(
version: str,
reported_repo_url: str,
reported_repo_fs: str,
+ build_tool: BaseBuildTool,
provenance_repo_url: str | None,
):
"""Instantiate the class.
@@ -195,12 +139,12 @@ def __init__(
The URL of the repository reported by the publisher.
reported_repo_fs : str
The file system path of the reported repository.
+ build_tool : BaseBuildTool
+ The build tool used to build the package.
provenance_repo_url : str | None
The URL of the repository from a provenance file, or None if it, or the provenance, is not present.
"""
- super().__init__(
- namespace, name, version, reported_repo_url, reported_repo_fs, provenance_repo_url, self.specific_tool
- )
+ super().__init__(namespace, name, version, reported_repo_url, reported_repo_fs, provenance_repo_url, build_tool)
def verify_repo(self) -> RepositoryVerificationResult:
"""Verify the repository as per the base class method."""
diff --git a/src/macaron/repo_verifier/repo_verifier_gradle.py b/src/macaron/repo_verifier/repo_verifier_gradle.py
index a93e71d23..6b4960958 100644
--- a/src/macaron/repo_verifier/repo_verifier_gradle.py
+++ b/src/macaron/repo_verifier/repo_verifier_gradle.py
@@ -10,10 +10,9 @@
RepositoryVerificationResult,
RepositoryVerificationStatus,
RepoVerifierToolSpecific,
- find_file_in_repo,
)
from macaron.repo_verifier.repo_verifier_maven import RepoVerifierMaven
-from macaron.slsa_analyzer.build_tool import Gradle
+from macaron.slsa_analyzer.build_tool.base_build_tool import BaseBuildTool, file_exists
from macaron.slsa_analyzer.package_registry.maven_central_registry import same_organization
logger = logging.getLogger(__name__)
@@ -22,8 +21,6 @@
class RepoVerifierGradle(RepoVerifierToolSpecific):
"""A class to verify whether a repository with Gradle build tool links back to the artifact."""
- specific_tool = Gradle()
-
def __init__(
self,
namespace: str,
@@ -31,6 +28,7 @@ def __init__(
version: str,
reported_repo_url: str,
reported_repo_fs: str,
+ build_tool: BaseBuildTool,
provenance_repo_url: str | None,
):
"""Initialize a RepoVerifierGradle instance.
@@ -47,10 +45,12 @@ def __init__(
The URL of the repository reported by the publisher.
reported_repo_fs : str
The file system path of the reported repository.
+ build_tool : BaseBuildTool
+ The build tool used to build the package.
provenance_repo_url : str | None
The URL of the repository from a provenance file, or None if it, or the provenance, is not present.
"""
- super().__init__(namespace, name, version, reported_repo_url, reported_repo_fs, provenance_repo_url)
+ super().__init__(namespace, name, version, reported_repo_url, reported_repo_fs, build_tool, provenance_repo_url)
self.maven_verifier = RepoVerifierMaven(
namespace=namespace,
@@ -58,6 +58,7 @@ def __init__(
version=version,
reported_repo_url=reported_repo_url,
reported_repo_fs=reported_repo_fs,
+ build_tool=build_tool,
provenance_repo_url=provenance_repo_url,
)
@@ -81,11 +82,11 @@ def verify_by_tool(self) -> RepositoryVerificationResult:
if recognized_services_verification_result.status == RepositoryVerificationStatus.PASSED:
return recognized_services_verification_result
- gradle_group_id = self._extract_group_id_from_properties()
+ gradle_group_id = self.extract_group_id_from_properties()
if not gradle_group_id:
- gradle_group_id = self._extract_group_id_from_build_groovy()
+ gradle_group_id = self.extract_group_id_from_build_groovy()
if not gradle_group_id:
- gradle_group_id = self._extract_group_id_from_build_kotlin()
+ gradle_group_id = self.extract_group_id_from_build_kotlin()
if not gradle_group_id:
logger.debug("Could not find group from gradle manifests for %s", self.reported_repo_url)
return RepositoryVerificationResult(
@@ -149,17 +150,37 @@ def _extract_group_id_from_gradle_manifest(
return None
- def _extract_group_id_from_properties(self) -> str | None:
- """Extract the group id from the gradle.properties file."""
- gradle_properties = find_file_in_repo(Path(self.reported_repo_fs), "gradle.properties")
+ def extract_group_id_from_properties(self) -> str | None:
+ """Extract the group id from the gradle.properties file.
+
+ Returns
+ -------
+ str | None
+ The extracted group id if found, otherwise None.
+ """
+ gradle_properties = file_exists(
+ self.reported_repo_fs, "gradle.properties", filters=self.build_tool.path_filters
+ )
return self._extract_group_id_from_gradle_manifest(gradle_properties)
- def _extract_group_id_from_build_groovy(self) -> str | None:
- """Extract the group id from the build.gradle file."""
- build_gradle = find_file_in_repo(Path(self.reported_repo_fs), "build.gradle")
- return self._extract_group_id_from_gradle_manifest(build_gradle, quote_chars={"'", '"'}, delimiter=" ")
+ def extract_group_id_from_build_groovy(self) -> str | None:
+ """Extract the group id from the build.gradle file.
- def _extract_group_id_from_build_kotlin(self) -> str | None:
- """Extract the group id from the build.gradle.kts file."""
- build_gradle = find_file_in_repo(Path(self.reported_repo_fs), "build.gradle.kts")
+ Returns
+ -------
+ str | None
+ The extracted group id if found, otherwise None.
+ """
+ build_gradle = file_exists(self.reported_repo_fs, "build.gradle", filters=self.build_tool.path_filters)
+ return self._extract_group_id_from_gradle_manifest(build_gradle, quote_chars={"'", '"'}, delimiter="=")
+
+ def extract_group_id_from_build_kotlin(self) -> str | None:
+ """Extract the group id from the build.gradle.kts file.
+
+ Returns
+ -------
+ str | None
+ The extracted group id if found, otherwise None.
+ """
+ build_gradle = file_exists(self.reported_repo_fs, "build.gradle.kts", filters=self.build_tool.path_filters)
return self._extract_group_id_from_gradle_manifest(build_gradle, quote_chars={'"'}, delimiter="=")
diff --git a/src/macaron/repo_verifier/repo_verifier_maven.py b/src/macaron/repo_verifier/repo_verifier_maven.py
index 4b6965408..f09d9ad3b 100644
--- a/src/macaron/repo_verifier/repo_verifier_maven.py
+++ b/src/macaron/repo_verifier/repo_verifier_maven.py
@@ -3,7 +3,6 @@
"""This module contains code to verify whether a reported Maven-based repository can be linked back to the artifact."""
import logging
-from pathlib import Path
from urllib.parse import urlparse
from macaron.parsers.pomparser import parse_pom_string
@@ -11,9 +10,8 @@
RepositoryVerificationResult,
RepositoryVerificationStatus,
RepoVerifierToolSpecific,
- find_file_in_repo,
)
-from macaron.slsa_analyzer.build_tool import Maven
+from macaron.slsa_analyzer.build_tool.base_build_tool import file_exists
from macaron.slsa_analyzer.package_registry.maven_central_registry import (
RECOGNIZED_CODE_HOSTING_SERVICES,
same_organization,
@@ -25,8 +23,6 @@
class RepoVerifierMaven(RepoVerifierToolSpecific):
"""A class to verify whether a repository with Maven build tool links back to the artifact."""
- specific_tool = Maven()
-
def verify_by_tool(self) -> RepositoryVerificationResult:
"""Verify whether the reported repository links back to the Maven artifact.
@@ -45,43 +41,52 @@ def verify_by_tool(self) -> RepositoryVerificationResult:
if recognized_services_verification_result.status == RepositoryVerificationStatus.PASSED:
return recognized_services_verification_result
+ pom_group_id = self.extract_group_id_from_pom()
+ if pom_group_id is None:
+ logger.debug("Could not find groupId from the pom.xml in %s", self.reported_repo_url)
+ return RepositoryVerificationResult(
+ status=RepositoryVerificationStatus.UNKNOWN, reason="no_group_id_in_pom", build_tool=self.build_tool
+ )
+ if not same_organization(pom_group_id, self.namespace):
+ logger.debug("Group id in pom.xml does not match the provided group id for: %s", self.reported_repo_url)
+ return RepositoryVerificationResult(
+ status=RepositoryVerificationStatus.FAILED, reason="group_id_mismatch", build_tool=self.build_tool
+ )
+
+ return RepositoryVerificationResult(
+ status=RepositoryVerificationStatus.PASSED, reason="group_id_match", build_tool=self.build_tool
+ )
+
+ def extract_group_id_from_pom(self) -> str | None:
+ """Extract the group id from the pom.xml file.
+
+ Returns
+ -------
+ str | None
+ The extracted group id if found, otherwise None.
+ """
# TODO: check other pom files. Think about how to decide in case of contradicting evidence.
# Check if repo contains pom.xml.
- pom_file = find_file_in_repo(Path(self.reported_repo_fs), "pom.xml")
+ pom_file = file_exists(self.reported_repo_fs, "pom.xml", filters=self.build_tool.path_filters)
if not pom_file:
logger.debug("Could not find any pom.xml in the repository: %s", self.reported_repo_url)
- return RepositoryVerificationResult(
- status=RepositoryVerificationStatus.UNKNOWN, reason="no_pom", build_tool=self.build_tool
- )
+ return None
pom_content = pom_file.read_text(encoding="utf-8")
pom_root = parse_pom_string(pom_content)
- if not pom_root:
+ if pom_root is None:
logger.debug("Could not parse pom.xml: %s", pom_file.as_posix())
- return RepositoryVerificationResult(
- status=RepositoryVerificationStatus.UNKNOWN, reason="not_parsed_pom", build_tool=self.build_tool
- )
+ return None
# Find the group id in the pom (project/groupId).
# The closing curly brace represents the end of the XML namespace.
pom_group_id_elem = next((ch for ch in pom_root if ch.tag.endswith("}groupId")), None)
if pom_group_id_elem is None or not pom_group_id_elem.text:
logger.debug("Could not find groupId in pom.xml: %s", pom_file)
- return RepositoryVerificationResult(
- status=RepositoryVerificationStatus.UNKNOWN, reason="no_group_id_in_pom", build_tool=self.build_tool
- )
+ return None
- pom_group_id = pom_group_id_elem.text.strip()
- if not same_organization(pom_group_id, self.namespace):
- logger.debug("Group id in pom.xml does not match the provided group id: %s", pom_file)
- return RepositoryVerificationResult(
- status=RepositoryVerificationStatus.FAILED, reason="group_id_mismatch", build_tool=self.build_tool
- )
-
- return RepositoryVerificationResult(
- status=RepositoryVerificationStatus.PASSED, reason="group_id_match", build_tool=self.build_tool
- )
+ return pom_group_id_elem.text.strip()
def verify_domains_from_recognized_code_hosting_services(self) -> RepositoryVerificationResult:
"""Verify repository link by comparing the maven domain name and the account on code hosting services.
diff --git a/src/macaron/slsa_analyzer/build_tool/base_build_tool.py b/src/macaron/slsa_analyzer/build_tool/base_build_tool.py
index 353596125..bf0e025ac 100644
--- a/src/macaron/slsa_analyzer/build_tool/base_build_tool.py
+++ b/src/macaron/slsa_analyzer/build_tool/base_build_tool.py
@@ -9,12 +9,14 @@
import logging
import os
from abc import ABC, abstractmethod
+from collections import deque
from collections.abc import Iterable
from dataclasses import dataclass
from pathlib import Path
from typing import TypedDict
from macaron.code_analyzer.call_graph import BaseNode
+from macaron.config.defaults import defaults
from macaron.dependency_analyzer.cyclonedx import DependencyAnalyzer, NoneDependencyAnalyzer
from macaron.slsa_analyzer.build_tool.language import BuildLanguage
from macaron.slsa_analyzer.checks.check_result import Confidence, Evidence, EvidenceWeightMap
@@ -53,30 +55,77 @@ class BuildToolCommand(TypedDict):
events: list[str] | None
-def file_exists(path: str, file_name: str) -> bool:
- """Return True if a file exists in a directory.
+def find_first_matching_file(directory: Path, pattern: str) -> Path | None:
+ """
+ Return the first file that matches the given glob pattern in the specified directory.
+
+ Parameters
+ ----------
+ directory : Path
+ Directory to search in.
+ pattern : str
+ Glob pattern to match.
+
+ Returns
+ -------
+ Path | None
+ The first matching file's path, or None if no match is found.
+ """
+ for match in directory.glob(pattern):
+ return match
+ return None
+
+
+def file_exists(path: str, file_name: str, filters: list[str] | None = None) -> Path | None:
+ """Search recursively for the first matching file in a directory, skipping directories containing filter keywords.
- This method searches in the directory recursively.
+ To disable filtering, pass an empty list or `None` to the `filters` parameter.
Parameters
----------
path : str
The path to search for the file.
file_name : str
- The name of the file to search.
+ The name of the file to search or a glob pattern (e.g., "Dockerfile.*").
+ filters: list[str] | None
+ The list of keywords that should be filtered.
Returns
-------
- bool
- True if file_name exists else False.
+ Path | None
+ The path to the file if it exists, otherwise
"""
- pattern = os.path.join(path, "**", file_name)
- files_detected = glob.iglob(pattern, recursive=True)
- try:
- next(files_detected)
- return True
- except StopIteration:
- return False
+ if not os.path.isdir(path):
+ return None
+
+ # Check for file directly at root.
+ root_dir = Path(path)
+ if target_path := find_first_matching_file(root_dir, file_name):
+ return target_path
+
+ def _enqueue_subdirs(directory: Path, queue: deque[Path]) -> None:
+ """Add non-symlink subdirectories to the search queue."""
+ for entry in directory.iterdir():
+ if entry.is_dir() and not entry.is_symlink():
+ queue.append(entry)
+
+ search_queue: deque[Path] = deque()
+ _enqueue_subdirs(root_dir, search_queue)
+
+ while search_queue:
+
+ current_dir = search_queue.popleft()
+
+ # Skip filtered directories.
+ if filters and any(keyword in current_dir.name.lower() for keyword in filters):
+ continue
+
+ if candidate_path := find_first_matching_file(current_dir, file_name):
+ return candidate_path
+
+ _enqueue_subdirs(current_dir, search_queue)
+
+ return None
@dataclass
@@ -135,6 +184,7 @@ def __init__(self, name: str, language: BuildLanguage, purl_type: str) -> None:
self.build_log: list[str] = []
self.wrapper_files: list[str] = []
self.runtime_options = RuntimeOptions()
+ self.path_filters: list[str] = []
def __str__(self) -> str:
return self.name
@@ -157,6 +207,9 @@ def is_detected(self, repo_path: str) -> bool:
@abstractmethod
def load_defaults(self) -> None:
"""Load the default values from defaults.ini."""
+ # A list of keywords that can be used as filters while detecting build tools.
+ if "builder" in defaults:
+ self.path_filters = defaults.get_list("builder", "build_tool_path_filters", fallback=[])
def get_dep_analyzer(self) -> DependencyAnalyzer:
"""Create a DependencyAnalyzer for the build tool.
diff --git a/src/macaron/slsa_analyzer/build_tool/docker.py b/src/macaron/slsa_analyzer/build_tool/docker.py
index 6672ec6c8..fc9c909d8 100644
--- a/src/macaron/slsa_analyzer/build_tool/docker.py
+++ b/src/macaron/slsa_analyzer/build_tool/docker.py
@@ -20,6 +20,7 @@ def __init__(self) -> None:
def load_defaults(self) -> None:
"""Load the default values from defaults.ini."""
+ super().load_defaults()
if "builder.docker" in defaults:
for item in defaults["builder.docker"]:
if hasattr(self, item):
@@ -43,4 +44,4 @@ def is_detected(self, repo_path: str) -> bool:
bool
True if this build tool is detected, else False.
"""
- return any(file_exists(repo_path, file) for file in self.build_configs)
+ return any(file_exists(repo_path, file, filters=self.path_filters) for file in self.build_configs)
diff --git a/src/macaron/slsa_analyzer/build_tool/go.py b/src/macaron/slsa_analyzer/build_tool/go.py
index 4d409ed9d..5610a3f81 100644
--- a/src/macaron/slsa_analyzer/build_tool/go.py
+++ b/src/macaron/slsa_analyzer/build_tool/go.py
@@ -19,6 +19,7 @@ def __init__(self) -> None:
def load_defaults(self) -> None:
"""Load the default values from defaults.ini."""
+ super().load_defaults()
if "builder.go" in defaults:
for item in defaults["builder.go"]:
if hasattr(self, item):
@@ -43,4 +44,4 @@ def is_detected(self, repo_path: str) -> bool:
True if this build tool is detected, else False.
"""
go_config_files = self.build_configs + self.entry_conf
- return any(file_exists(repo_path, file) for file in go_config_files)
+ return any(file_exists(repo_path, file, filters=self.path_filters) for file in go_config_files)
diff --git a/src/macaron/slsa_analyzer/build_tool/gradle.py b/src/macaron/slsa_analyzer/build_tool/gradle.py
index bd316dd30..c1e4d991f 100644
--- a/src/macaron/slsa_analyzer/build_tool/gradle.py
+++ b/src/macaron/slsa_analyzer/build_tool/gradle.py
@@ -25,6 +25,7 @@ def __init__(self) -> None:
def load_defaults(self) -> None:
"""Load the default values from defaults.ini."""
+ super().load_defaults()
if "builder.gradle" in defaults:
for item in defaults["builder.gradle"]:
if hasattr(self, item):
@@ -67,7 +68,7 @@ def is_detected(self, repo_path: str) -> bool:
True if this build tool is detected, else False.
"""
gradle_config_files = self.build_configs + self.entry_conf
- return any(file_exists(repo_path, file) for file in gradle_config_files)
+ return any(file_exists(repo_path, file, filters=self.path_filters) for file in gradle_config_files)
def get_group_id(self, gradle_exec: str, project_path: str) -> str | None:
"""Get the group id of a Gradle project.
diff --git a/src/macaron/slsa_analyzer/build_tool/maven.py b/src/macaron/slsa_analyzer/build_tool/maven.py
index 0e89849af..d6fcd1c51 100644
--- a/src/macaron/slsa_analyzer/build_tool/maven.py
+++ b/src/macaron/slsa_analyzer/build_tool/maven.py
@@ -26,6 +26,7 @@ def __init__(self) -> None:
def load_defaults(self) -> None:
"""Load the default values from defaults.ini."""
+ super().load_defaults()
if "builder.maven" in defaults:
for item in defaults["builder.maven"]:
if hasattr(self, item):
@@ -63,4 +64,4 @@ def is_detected(self, repo_path: str) -> bool:
)
return False
maven_config_files = self.build_configs
- return any(file_exists(repo_path, file) for file in maven_config_files)
+ return any(file_exists(repo_path, file, filters=self.path_filters) for file in maven_config_files)
diff --git a/src/macaron/slsa_analyzer/build_tool/npm.py b/src/macaron/slsa_analyzer/build_tool/npm.py
index 2bc725974..cae93d257 100644
--- a/src/macaron/slsa_analyzer/build_tool/npm.py
+++ b/src/macaron/slsa_analyzer/build_tool/npm.py
@@ -29,6 +29,7 @@ def __init__(self) -> None:
def load_defaults(self) -> None:
"""Load the default values from defaults.ini."""
+ super().load_defaults()
if "builder.npm" in defaults:
for item in defaults["builder.npm"]:
if hasattr(self, item):
@@ -56,7 +57,7 @@ def is_detected(self, repo_path: str) -> bool:
# cases like .npmrc existing but not package-lock.json and whether
# they would still count as "detected"
npm_config_files = self.build_configs + self.package_lock + self.entry_conf
- return any(file_exists(repo_path, file) for file in npm_config_files)
+ return any(file_exists(repo_path, file, filters=self.path_filters) for file in npm_config_files)
def is_deploy_command(
self, cmd: BuildToolCommand, excluded_configs: list[str] | None = None, provenance_workflow: str | None = None
diff --git a/src/macaron/slsa_analyzer/build_tool/pip.py b/src/macaron/slsa_analyzer/build_tool/pip.py
index 1926ca33b..5e1bb68a5 100644
--- a/src/macaron/slsa_analyzer/build_tool/pip.py
+++ b/src/macaron/slsa_analyzer/build_tool/pip.py
@@ -31,6 +31,7 @@ def __init__(self) -> None:
def load_defaults(self) -> None:
"""Load the default values from defaults.ini."""
+ super().load_defaults()
if "builder.pip" in defaults:
for item in defaults["builder.pip"]:
if hasattr(self, item):
@@ -54,7 +55,7 @@ def is_detected(self, repo_path: str) -> bool:
bool
True if this build tool is detected, else False.
"""
- return any(file_exists(repo_path, file) for file in self.build_configs)
+ return any(file_exists(repo_path, file, filters=self.path_filters) for file in self.build_configs)
def get_dep_analyzer(self) -> DependencyAnalyzer:
"""Create a DependencyAnalyzer for the build tool.
diff --git a/src/macaron/slsa_analyzer/build_tool/poetry.py b/src/macaron/slsa_analyzer/build_tool/poetry.py
index a1d5a4a0d..0363f3cbb 100644
--- a/src/macaron/slsa_analyzer/build_tool/poetry.py
+++ b/src/macaron/slsa_analyzer/build_tool/poetry.py
@@ -34,6 +34,7 @@ def __init__(self) -> None:
def load_defaults(self) -> None:
"""Load the default values from defaults.ini."""
+ super().load_defaults()
if "builder.poetry" in defaults:
for item in defaults["builder.poetry"]:
if hasattr(self, item):
@@ -59,7 +60,7 @@ def is_detected(self, repo_path: str) -> bool:
"""
package_lock_exists = ""
for file in self.package_lock:
- if file_exists(repo_path, file):
+ if file_exists(repo_path, file, filters=self.path_filters):
package_lock_exists = file
break
diff --git a/src/macaron/slsa_analyzer/build_tool/yarn.py b/src/macaron/slsa_analyzer/build_tool/yarn.py
index 30914f27e..4660faf12 100644
--- a/src/macaron/slsa_analyzer/build_tool/yarn.py
+++ b/src/macaron/slsa_analyzer/build_tool/yarn.py
@@ -26,6 +26,7 @@ def __init__(self) -> None:
def load_defaults(self) -> None:
"""Load the default values from defaults.ini."""
+ super().load_defaults()
if "builder.yarn" in defaults:
for item in defaults["builder.yarn"]:
if hasattr(self, item):
@@ -54,7 +55,7 @@ def is_detected(self, repo_path: str) -> bool:
# cases like .yarnrc existing but not package-lock.json and whether
# they would still count as "detected"
yarn_config_files = self.build_configs + self.package_lock + self.entry_conf
- return any(file_exists(repo_path, file) for file in yarn_config_files)
+ return any(file_exists(repo_path, file, filters=self.path_filters) for file in yarn_config_files)
def is_deploy_command(
self, cmd: BuildToolCommand, excluded_configs: list[str] | None = None, provenance_workflow: str | None = None
diff --git a/tests/integration/cases/IntellectualSites_Arkitektonika-Client/policy.dl b/tests/integration/cases/IntellectualSites_Arkitektonika-Client/policy.dl
new file mode 100644
index 000000000..3faa06d37
--- /dev/null
+++ b/tests/integration/cases/IntellectualSites_Arkitektonika-Client/policy.dl
@@ -0,0 +1,17 @@
+/* Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved. */
+/* Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. */
+
+#include "prelude.dl"
+
+Policy("test_policy", component_id, "") :-
+ check_passed(component_id, "mcn_scm_authenticity_1"),
+ is_repo_url(component_id, "https://github.com/IntellectualSites/Arkitektonika-Client"),
+ build_tool_check(
+ check_id,
+ "gradle",
+ _
+ ),
+ check_facts(check_id, _, component_id,_,_).
+
+apply_policy_to("test_policy", component_id) :-
+ is_component(component_id, "pkg:maven/com.intellectualsites.arkitektonika/Arkitektonika-Client@2.1.3").
diff --git a/tests/integration/cases/IntellectualSites_Arkitektonika-Client/test.yaml b/tests/integration/cases/IntellectualSites_Arkitektonika-Client/test.yaml
new file mode 100644
index 000000000..283c12c3f
--- /dev/null
+++ b/tests/integration/cases/IntellectualSites_Arkitektonika-Client/test.yaml
@@ -0,0 +1,20 @@
+# Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved.
+# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.
+
+description: |
+ Analyzing repository authenticity of an artifact that uses Gradle as build tool.
+
+tags:
+- macaron-python-package
+
+steps:
+- name: Run macaron analyze
+ kind: analyze
+ options:
+ command_args:
+ - -purl
+ - pkg:maven/com.intellectualsites.arkitektonika/Arkitektonika-Client@2.1.3
+- name: Run macaron verify-policy to verify passed/failed checks
+ kind: verify
+ options:
+ policy: policy.dl
diff --git a/tests/integration/cases/fnproject_fdk-java/policy.dl b/tests/integration/cases/fnproject_fdk-java/policy.dl
new file mode 100644
index 000000000..f4d71f66e
--- /dev/null
+++ b/tests/integration/cases/fnproject_fdk-java/policy.dl
@@ -0,0 +1,21 @@
+/* Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved. */
+/* Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. */
+
+#include "prelude.dl"
+
+Policy("test_policy", component_id, "") :-
+ check_passed(component_id, "mcn_build_tool_1"),
+ build_tool_exists(component_id, "maven"),
+ !build_tool_exists(component_id, "gradle").
+
+.decl build_tool_exists(component_id: number, name: symbol)
+build_tool_exists(component_id, name) :-
+ build_tool_check(
+ check_id,
+ name,
+ _
+ ),
+ check_facts(check_id, _, component_id,_,_).
+
+apply_policy_to("test_policy", component_id) :-
+ is_repo_url(component_id, "https://github.com/fnproject/fdk-java").
diff --git a/tests/integration/cases/fnproject_fdk-java/test.yaml b/tests/integration/cases/fnproject_fdk-java/test.yaml
new file mode 100644
index 000000000..5544a7068
--- /dev/null
+++ b/tests/integration/cases/fnproject_fdk-java/test.yaml
@@ -0,0 +1,22 @@
+# Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved.
+# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.
+
+description: |
+ Analyzing with repository.
+
+tags:
+- macaron-python-package
+
+steps:
+- name: Run macaron analyze
+ kind: analyze
+ options:
+ command_args:
+ - -rp
+ - https://github.com/fnproject/fdk-java.git
+ - --digest
+ - fdac7f9417156df1936dda1c38125a7f7bf9b21d
+- name: Run macaron verify-policy to verify passed/failed checks
+ kind: verify
+ options:
+ policy: policy.dl
diff --git a/tests/integration/cases/timyarkov_multibuild_test_maven/configuration.ini b/tests/integration/cases/timyarkov_multibuild_test_maven/configuration.ini
new file mode 100644
index 000000000..f49ec5bfa
--- /dev/null
+++ b/tests/integration/cases/timyarkov_multibuild_test_maven/configuration.ini
@@ -0,0 +1,6 @@
+# Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved.
+# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.
+
+[builder]
+# Disable the default path filtering for detecting build tools.
+build_tool_path_filters =
diff --git a/tests/integration/cases/timyarkov_multibuild_test_maven/test.yaml b/tests/integration/cases/timyarkov_multibuild_test_maven/test.yaml
index 56343f72d..c6b58016a 100644
--- a/tests/integration/cases/timyarkov_multibuild_test_maven/test.yaml
+++ b/tests/integration/cases/timyarkov_multibuild_test_maven/test.yaml
@@ -13,6 +13,7 @@ steps:
- name: Run macaron analyze
kind: analyze
options:
+ ini: configuration.ini
command_args:
- -purl
- pkg:maven/org.example/mock_maven_proj@1.0-SNAPSHOT?type=jar
diff --git a/tests/repo_verifier/mock_repos/gradle_repos/fail_groovy/tests/build.gradle b/tests/repo_verifier/mock_repos/gradle_repos/fail_groovy/tests/build.gradle
new file mode 100644
index 000000000..02b5ebcac
--- /dev/null
+++ b/tests/repo_verifier/mock_repos/gradle_repos/fail_groovy/tests/build.gradle
@@ -0,0 +1,9 @@
+/* Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved. */
+/* Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. */
+
+group = 'com.example'
+version = '1.0.0'
+
+repositories {
+ mavenCentral()
+}
diff --git a/tests/repo_verifier/mock_repos/gradle_repos/fail_kotlin/tests/build.gradle.kts b/tests/repo_verifier/mock_repos/gradle_repos/fail_kotlin/tests/build.gradle.kts
new file mode 100644
index 000000000..d0a47870b
--- /dev/null
+++ b/tests/repo_verifier/mock_repos/gradle_repos/fail_kotlin/tests/build.gradle.kts
@@ -0,0 +1,9 @@
+/* Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved. */
+/* Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. */
+
+group = "com.example"
+version = "1.0.0"
+
+repositories {
+ mavenCentral()
+}
diff --git a/tests/repo_verifier/mock_repos/gradle_repos/fail_properties/tests/gradle.properties b/tests/repo_verifier/mock_repos/gradle_repos/fail_properties/tests/gradle.properties
new file mode 100644
index 000000000..039b858f8
--- /dev/null
+++ b/tests/repo_verifier/mock_repos/gradle_repos/fail_properties/tests/gradle.properties
@@ -0,0 +1,5 @@
+# Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved.
+# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.
+
+group=com.example
+version=1.0.0
diff --git a/tests/repo_verifier/mock_repos/gradle_repos/pass_groovy/build.gradle b/tests/repo_verifier/mock_repos/gradle_repos/pass_groovy/build.gradle
new file mode 100644
index 000000000..02b5ebcac
--- /dev/null
+++ b/tests/repo_verifier/mock_repos/gradle_repos/pass_groovy/build.gradle
@@ -0,0 +1,9 @@
+/* Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved. */
+/* Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. */
+
+group = 'com.example'
+version = '1.0.0'
+
+repositories {
+ mavenCentral()
+}
diff --git a/tests/repo_verifier/mock_repos/gradle_repos/pass_kotlin/build.gradle.kts b/tests/repo_verifier/mock_repos/gradle_repos/pass_kotlin/build.gradle.kts
new file mode 100644
index 000000000..d0a47870b
--- /dev/null
+++ b/tests/repo_verifier/mock_repos/gradle_repos/pass_kotlin/build.gradle.kts
@@ -0,0 +1,9 @@
+/* Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved. */
+/* Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. */
+
+group = "com.example"
+version = "1.0.0"
+
+repositories {
+ mavenCentral()
+}
diff --git a/tests/repo_verifier/mock_repos/gradle_repos/pass_properties/gradle.properties b/tests/repo_verifier/mock_repos/gradle_repos/pass_properties/gradle.properties
new file mode 100644
index 000000000..039b858f8
--- /dev/null
+++ b/tests/repo_verifier/mock_repos/gradle_repos/pass_properties/gradle.properties
@@ -0,0 +1,5 @@
+# Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved.
+# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.
+
+group=com.example
+version=1.0.0
diff --git a/tests/repo_verifier/mock_repos/maven_repos/fail_pom/tests/pom.xml b/tests/repo_verifier/mock_repos/maven_repos/fail_pom/tests/pom.xml
new file mode 100644
index 000000000..243d4f9b1
--- /dev/null
+++ b/tests/repo_verifier/mock_repos/maven_repos/fail_pom/tests/pom.xml
@@ -0,0 +1,19 @@
+
+ 4.0.0
+
+ com.example
+ artifact
+ 1.0.0
+ jar
+
+ artifact
+ A simple Maven project for com.example:artifact:1.0.0
+
+
+ 1.8
+ 1.8
+
+
+
diff --git a/tests/repo_verifier/mock_repos/maven_repos/pass_pom/pom.xml b/tests/repo_verifier/mock_repos/maven_repos/pass_pom/pom.xml
new file mode 100644
index 000000000..243d4f9b1
--- /dev/null
+++ b/tests/repo_verifier/mock_repos/maven_repos/pass_pom/pom.xml
@@ -0,0 +1,19 @@
+
+ 4.0.0
+
+ com.example
+ artifact
+ 1.0.0
+ jar
+
+ artifact
+ A simple Maven project for com.example:artifact:1.0.0
+
+
+ 1.8
+ 1.8
+
+
+
diff --git a/tests/repo_verifier/test_repo_verifier.py b/tests/repo_verifier/test_repo_verifier.py
new file mode 100644
index 000000000..0c01a8bff
--- /dev/null
+++ b/tests/repo_verifier/test_repo_verifier.py
@@ -0,0 +1,141 @@
+# Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved.
+# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.
+
+"""This module tests the repo verifier."""
+from pathlib import Path
+
+import pytest
+
+from macaron.repo_verifier.repo_verifier_gradle import RepoVerifierGradle
+from macaron.repo_verifier.repo_verifier_maven import RepoVerifierMaven
+from macaron.slsa_analyzer.build_tool.base_build_tool import BaseBuildTool
+
+
+def gradle_repo_verifier(build_tool: BaseBuildTool, mock_repo: str) -> RepoVerifierGradle:
+ """
+ Create and return an instance of RepoVerifierGradle with predefined test parameters.
+
+ Parameters
+ ----------
+ build_tool: BaseBuildTool
+ The build tool instance to be used for verification (expected to be Gradle).
+ mock_repo: str
+ File system path to the mock Gradle repository.
+
+ Returns
+ -------
+ RepoVerifierGradle
+ An initialized verifier for the provided mock Gradle repository.
+ """
+ return RepoVerifierGradle(
+ namespace="com.example",
+ name="artifact",
+ version="1.0.0",
+ reported_repo_url="https://github.com/example/example",
+ reported_repo_fs=mock_repo,
+ build_tool=build_tool,
+ provenance_repo_url=None,
+ )
+
+
+def maven_repo_verifier(build_tool: BaseBuildTool, mock_repo: str) -> RepoVerifierMaven:
+ """
+ Create and return an instance of RepoVerifierMaven with predefined test parameters.
+
+ Parameters
+ ----------
+ build_tool : BaseBuildTool
+ The build tool instance to be used for verification (expected to be Maven).
+ mock_repo : str
+ File system path to the mock Maven repository.
+
+ Returns
+ -------
+ RepoVerifierMaven
+ A RepoVerifierMaven instance initialized with test parameters for the specified mock repo.
+ """
+ return RepoVerifierMaven(
+ namespace="com.example",
+ name="artifact",
+ version="1.0.0",
+ reported_repo_url="https://github.com/example/example",
+ reported_repo_fs=mock_repo,
+ build_tool=build_tool,
+ provenance_repo_url=None,
+ )
+
+
+@pytest.mark.parametrize(
+ ("mock_repo", "expected_result"),
+ [
+ (Path(__file__).parent.joinpath("mock_repos", "gradle_repos", "fail_groovy"), False),
+ (Path(__file__).parent.joinpath("mock_repos", "gradle_repos", "pass_groovy"), True),
+ ],
+)
+def test_extract_group_id_from_build_groovy(
+ build_tools: dict[str, BaseBuildTool], mock_repo: Path, expected_result: bool
+) -> None:
+ """Test if the method successfully extracts a group ID from a given Gradle build (Groovy).
+
+ Each test case provides a path to a mock repository and the expected boolean result: True if a group ID
+ should be detected, False otherwise.
+ """
+ verifier = gradle_repo_verifier(build_tools["gradle"], str(mock_repo))
+ assert (verifier.extract_group_id_from_build_groovy() is not None) == expected_result
+
+
+@pytest.mark.parametrize(
+ ("mock_repo", "expected_result"),
+ [
+ (Path(__file__).parent.joinpath("mock_repos", "gradle_repos", "fail_properties"), False),
+ (Path(__file__).parent.joinpath("mock_repos", "gradle_repos", "pass_properties"), True),
+ ],
+)
+def test_extract_group_id_from_build_properties(
+ build_tools: dict[str, BaseBuildTool], mock_repo: Path, expected_result: bool
+) -> None:
+ """Test if the method successfully extracts a group ID from a given Gradle build (properties file).
+
+ Each test case provides a path to a mock repository and the expected boolean result: True if a group ID
+ should be detected, False otherwise.
+ """
+ verifier = gradle_repo_verifier(build_tools["gradle"], str(mock_repo))
+ assert (verifier.extract_group_id_from_properties() is not None) == expected_result
+
+
+@pytest.mark.parametrize(
+ ("mock_repo", "expected_result"),
+ [
+ (Path(__file__).parent.joinpath("mock_repos", "gradle_repos", "fail_kotlin"), False),
+ (Path(__file__).parent.joinpath("mock_repos", "gradle_repos", "pass_kotlin"), True),
+ ],
+)
+def test_extract_group_id_from_build_kotlin(
+ build_tools: dict[str, BaseBuildTool], mock_repo: Path, expected_result: bool
+) -> None:
+ """Test if the method successfully extracts a group ID from a given Gradle build (Kotlin).
+
+ Each test case provides a path to a mock repository and the expected boolean result: True if a group ID
+ should be detected, False otherwise.
+ """
+ verifier = gradle_repo_verifier(build_tools["gradle"], str(mock_repo))
+ assert (verifier.extract_group_id_from_build_kotlin() is not None) == expected_result
+
+
+@pytest.mark.parametrize(
+ ("mock_repo", "expected_result"),
+ [
+ (Path(__file__).parent.joinpath("mock_repos", "maven_repos", "fail_pom"), False),
+ (Path(__file__).parent.joinpath("mock_repos", "maven_repos", "pass_pom"), True),
+ ],
+)
+def test_extract_group_id_from_pom(
+ build_tools: dict[str, BaseBuildTool], mock_repo: Path, expected_result: bool
+) -> None:
+ """Test if the method successfully extracts a group ID from a given Maven build.
+
+ Each test case provides a path to a mock repository and the expected boolean result: True if a group ID
+ should be detected, False otherwise.
+ """
+ verifier = maven_repo_verifier(build_tools["maven"], str(mock_repo))
+ assert (verifier.extract_group_id_from_pom() is not None) == expected_result