diff --git a/src/macaron/config/defaults.ini b/src/macaron/config/defaults.ini index 52b0814a6..a6507231f 100644 --- a/src/macaron/config/defaults.ini +++ b/src/macaron/config/defaults.ini @@ -59,6 +59,7 @@ redirect_urls = [repofinder.java] # The list of maven-like repositories to attempt to retrieve artifact POMs from. artifact_repositories = https://repo.maven.apache.org/maven2 +# The repo_pom_paths list is a priority list. The first path that produces a valid URL will be returned as the result. repo_pom_paths = scm.url scm.connection diff --git a/src/macaron/repo_finder/repo_finder_java.py b/src/macaron/repo_finder/repo_finder_java.py index 3c9a89daf..ee80130de 100644 --- a/src/macaron/repo_finder/repo_finder_java.py +++ b/src/macaron/repo_finder/repo_finder_java.py @@ -1,4 +1,4 @@ -# Copyright (c) 2023 - 2023, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2023 - 2024, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """This module contains the JavaRepoFinder class to be used for finding Java repositories.""" @@ -116,7 +116,10 @@ def _create_urls(self, group: str, artifact: str, version: str) -> list[str]: The list of created URLs. """ repositories = defaults.get_list( - "repofinder.java", "artifact_repositories", fallback=["https://repo.maven.apache.org/maven2"] + "repofinder.java", + "artifact_repositories", + fallback=["https://repo.maven.apache.org/maven2"], + duplicated_ok=True, ) urls = [] for repo in repositories: @@ -160,7 +163,7 @@ def _read_pom(self, pom: str) -> list[str]: The extracted contents as a list of strings. """ # Retrieve tags - tags = defaults.get_list("repofinder.java", "repo_pom_paths") + tags = defaults.get_list("repofinder.java", "repo_pom_paths", duplicated_ok=True) if not any(tags): logger.debug("No POM tags found for URL discovery.") return [] diff --git a/src/macaron/repo_finder/repo_validator.py b/src/macaron/repo_finder/repo_validator.py index 74c37aab8..dd78ec10f 100644 --- a/src/macaron/repo_finder/repo_validator.py +++ b/src/macaron/repo_finder/repo_validator.py @@ -21,28 +21,19 @@ def find_valid_repository_url(urls: Iterable[str]) -> str: Returns ------- str - A valid URL, or an empty string if none can be found. + The first valid URL from the iterable, or an empty string if none can be found. """ - pruned_list = [] for url in urls: parsed_url = clean_url(url) if not parsed_url: - # URLs that failed to parse can be rejected here. + # URLs that fail to parse can be rejected here. continue redirect_url = resolve_redirects(parsed_url) - # If a redirect URL is found add it, otherwise add the parsed url. - pruned_list.append(redirect_url if redirect_url else parsed_url.geturl()) + checked_url = get_remote_vcs_url(redirect_url if redirect_url else parsed_url.geturl()) + if checked_url: + return checked_url - vcs_set = {get_remote_vcs_url(value) for value in pruned_list if get_remote_vcs_url(value) != ""} - - # To avoid non-deterministic results we sort the URLs. - vcs_list = sorted(vcs_set) - - if len(vcs_list) < 1: - return "" - - # Report the first valid URL from the end of the list. - return vcs_list.pop() + return "" def resolve_redirects(parsed_url: urllib.parse.ParseResult) -> str | None: diff --git a/tests/repo_finder/test_repo_finder.py b/tests/repo_finder/test_repo_finder.py index 4aafe308d..6b724d2e2 100644 --- a/tests/repo_finder/test_repo_finder.py +++ b/tests/repo_finder/test_repo_finder.py @@ -1,12 +1,16 @@ -# Copyright (c) 2023 - 2023, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2023 - 2024, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """This module tests the repo finder.""" +import os +from pathlib import Path import pytest from packageurl import PackageURL +from macaron.config.defaults import load_defaults from macaron.config.target_config import Configuration +from macaron.repo_finder.repo_finder_java import JavaRepoFinder from macaron.slsa_analyzer.analyzer import Analyzer @@ -69,3 +73,51 @@ def test_resolve_analysis_target( ) -> None: """Test the resolve analysis target method with valid inputs.""" assert Analyzer.to_analysis_target(config, available_domains) == expect + + +@pytest.mark.parametrize( + ("user_config_input", "expected"), + [ + ( + """ + [repofinder.java] + repo_pom_paths = + scm.connection + scm.url + """, + ["scm:git:git@github.com:oracle-samples/macaron.git", "https://github.com/oracle/macaron"], + ), + ( + """ + [repofinder.java] + repo_pom_paths = + scm.url + scm.connection + """, + ["https://github.com/oracle/macaron", "scm:git:git@github.com:oracle-samples/macaron.git"], + ), + ], +) +def test_pom_extraction_ordering(tmp_path: Path, user_config_input: str, expected: list[str]) -> None: + """Test the ordering of elements extracted from the POM is correct and maintained.""" + pom_text = """ + + https://example.org + + scm:git:git@github.com:oracle-samples/macaron.git + https://github.com/oracle/macaron + + + 1.9.15 + + + """ + user_config_path = os.path.join(tmp_path, "config.ini") + with open(user_config_path, "w", encoding="utf-8") as user_config_file: + user_config_file.write(user_config_input) + load_defaults(user_config_path) + + repo_finder = JavaRepoFinder() + + # Retrieve SCM from POM. + assert expected == repo_finder._read_pom(pom_text) # pylint: disable=W0212