From 95968e298cc4a9202f917ab0d94901bd6b4ca910 Mon Sep 17 00:00:00 2001 From: Marco Castelluccio Date: Fri, 8 May 2026 14:00:22 +0200 Subject: [PATCH 1/2] Bug 2037300 - Add heuristics for selecting gtest, cppunit, rusttests --- bugbug/test_scheduling.py | 81 +++++++++++++++++ http_service/bugbug_http/models.py | 13 ++- tests/test_test_scheduling.py | 140 +++++++++++++++++++++++++++++ 3 files changed, 230 insertions(+), 4 deletions(-) diff --git a/bugbug/test_scheduling.py b/bugbug/test_scheduling.py index 6b71dea6de..074886d8c5 100644 --- a/bugbug/test_scheduling.py +++ b/bugbug/test_scheduling.py @@ -1054,3 +1054,84 @@ def has_wpt_files(d: Path) -> bool: break return manifests + + +_GTEST_RE = re.compile(rb"^[ \t]*(?:TEST|TEST_F)\(", re.MULTILINE) +_GTEST_FOLDERS = ("gtest", "gtests", "googletest") +_CPPUNIT_INFRA_FILES = { + "testing/remotecppunittests.py", + "testing/runcppunittests.py", + "testing/cppunittest.toml", +} + + +def _get_cppunit_test_names(repo_dir: Path) -> set[str]: + with open(repo_dir / "testing" / "cppunittest.toml", "rb") as f: + data = tomllib.load(f) + return {f"{key}.cpp" for key in data if key != "DEFAULT"} + + +def find_tasks_for_paths( + repo_dir_str: str, known_tasks: tuple[str, ...], paths: list[str] +) -> list[str]: + repo_dir = Path(repo_dir_str) + + select_gtest = False + select_cppunit = False + select_rusttests = False + + # Any Rust file is modified. + for path in paths: + if repository.get_type(path) == "Rust": + select_rusttests = True + break + + # Any file in a gtest folder is modified. + for path in paths: + if any(f"/{gtest_path}/" in path for gtest_path in _GTEST_FOLDERS): + select_gtest = True + break + + # Any file in a folder close to a gtest folder is modified (e.g. dom/media/CubebUtils.cpp and we have dom/media/gtest/). + if not select_gtest: + for path in paths: + for sibling in (repo_dir / path).parent.rglob("*"): + if sibling.is_dir() and any( + part in _GTEST_FOLDERS for part in sibling.parts + ): + select_gtest = True + break + if select_gtest: + break + + # Any C/C++ file containing gtests is modified. + if not select_gtest: + for path in paths: + if repository.get_type(path) in ["C/C++", "Objective-C/C++"]: + try: + with open(repo_dir / path, "rb") as f: + select_gtest = _GTEST_RE.search(f.read()) is not None + if select_gtest: + break + except OSError: + pass + + # Cppunit: run if infrastructure files are modified, or any .cpp file whose + # name matches a stem listed in testing/cppunittest.toml is modified. + cppunit_test_names = _get_cppunit_test_names(repo_dir) + for path in paths: + if path in _CPPUNIT_INFRA_FILES or Path(path).name in cppunit_test_names: + select_cppunit = True + break + + selected_tasks = [] + + for task in known_tasks: + if select_gtest and "gtest" in task: + selected_tasks.append(task) + if select_cppunit and "cppunit" in task: + selected_tasks.append(task) + if select_rusttests and "rusttests" in task: + selected_tasks.append(task) + + return selected_tasks diff --git a/http_service/bugbug_http/models.py b/http_service/bugbug_http/models.py index e43e7e8724..0d756e658b 100644 --- a/http_service/bugbug_http/models.py +++ b/http_service/bugbug_http/models.py @@ -352,7 +352,14 @@ def _analyze_patch(revs: list[bytes], branch: str | None) -> dict: testlabelselect_model = MODEL_CACHE.get("testlabelselect") testgroupselect_model = MODEL_CACHE.get("testgroupselect") + known_tasks = get_known_tasks() + modified_paths = list(set(path for commit in commits for path in commit["files"])) + tasks = testlabelselect_model.select_tests(commits, test_selection_threshold) + for task in test_scheduling.find_tasks_for_paths( + REPO_DIR, known_tasks, modified_paths + ): + tasks[task] = 1.0 reduced = testselect.reduce_configs( set(t for t, c in tasks.items() if c >= 0.8), 1.0 @@ -363,9 +370,7 @@ def _analyze_patch(revs: list[bytes], branch: str | None) -> dict: ) groups = testgroupselect_model.select_tests(commits, test_selection_threshold) - for group in test_scheduling.find_manifests_for_paths( - REPO_DIR, list(set(path for commit in commits for path in commit["files"])) - ): + for group in test_scheduling.find_manifests_for_paths(REPO_DIR, modified_paths): groups[group] = 1.0 config_groups = testselect.select_configs(groups.keys(), 0.9) @@ -376,7 +381,7 @@ def _analyze_patch(revs: list[bytes], branch: str | None) -> dict: "config_groups": config_groups, "reduced_tasks": {t: c for t, c in tasks.items() if t in reduced}, "reduced_tasks_higher": {t: c for t, c in tasks.items() if t in reduced_higher}, - "known_tasks": get_known_tasks(), + "known_tasks": known_tasks, } return data diff --git a/tests/test_test_scheduling.py b/tests/test_test_scheduling.py index 5683f0bac3..a22143fcb3 100644 --- a/tests/test_test_scheduling.py +++ b/tests/test_test_scheduling.py @@ -1189,3 +1189,143 @@ def test_find_manifests_for_paths(tmp_path) -> None: str(tmp_path), ["testing/web-platform/tests/encrypted-media/content/content-metadata.js"], ) == {"testing/web-platform/tests/encrypted-media"} + + +def test_find_tasks_for_paths(tmp_path) -> None: + known_tasks = ( + "test-linux64/opt-gtest-1proc", + "test-linux64/opt-mochitest-browser-chrome-1proc", + "test-linux64/opt-gtest-e10s", + "test-linux64/opt-cppunit", + "test-linux64/opt-rusttests", + ) + + # Set up a minimal cppunittest.toml listing two test names. + (tmp_path / "testing").mkdir(parents=True) + (tmp_path / "testing" / "cppunittest.toml").write_text( + '[DEFAULT]\n\n["TestArray"]\n\n["TestArrayUtils"]\n' + ) + + # Non-C/C++ file containing GTest patterns should not trigger gtest selection. + (tmp_path / "script.py").write_bytes(b"TEST(Foo, Bar) {}") + assert ( + test_scheduling.find_tasks_for_paths(str(tmp_path), known_tasks, ["script.py"]) + == [] + ) + + # C/C++ file without GTest patterns should not trigger gtest selection. + (tmp_path / "source.cpp").write_bytes(b"int main() { return 0; }") + assert ( + test_scheduling.find_tasks_for_paths(str(tmp_path), known_tasks, ["source.cpp"]) + == [] + ) + + # C/C++ file with TEST macro selects tasks containing "gtest". + (tmp_path / "test_foo.cpp").write_bytes(b"TEST(FooTest, Bar) {}\n") + assert test_scheduling.find_tasks_for_paths( + str(tmp_path), known_tasks, ["test_foo.cpp"] + ) == [ + "test-linux64/opt-gtest-1proc", + "test-linux64/opt-gtest-e10s", + ] + + # C/C++ file with TEST_F macro also triggers gtest selection. + (tmp_path / "test_fixture.cpp").write_bytes(b"TEST_F(FooFixture, Bar) {}\n") + assert test_scheduling.find_tasks_for_paths( + str(tmp_path), known_tasks, ["test_fixture.cpp"] + ) == [ + "test-linux64/opt-gtest-1proc", + "test-linux64/opt-gtest-e10s", + ] + + # Non-existent C/C++ file raises OSError which is silently skipped. + assert ( + test_scheduling.find_tasks_for_paths( + str(tmp_path), known_tasks, ["nonexistent.cpp"] + ) + == [] + ) + + # No paths -> no tasks selected. + assert test_scheduling.find_tasks_for_paths(str(tmp_path), known_tasks, []) == [] + + # known_tasks without any "gtest" task -> empty even when GTest file present. + assert ( + test_scheduling.find_tasks_for_paths( + str(tmp_path), + ("test-linux64/opt-mochitest-browser-chrome-1proc",), + ["test_foo.cpp"], + ) + == [] + ) + + # Two paths: one C/C++ with GTest patterns and one without -> gtest tasks selected. + assert test_scheduling.find_tasks_for_paths( + str(tmp_path), known_tasks, ["source.cpp", "test_foo.cpp"] + ) == [ + "test-linux64/opt-gtest-1proc", + "test-linux64/opt-gtest-e10s", + ] + + # File whose path contains a gtest folder triggers gtest selection regardless of content. + (tmp_path / "dom" / "media" / "gtest").mkdir(parents=True) + (tmp_path / "dom" / "media" / "gtest" / "TestCubeb.cpp").write_bytes( + b"// no test macros\n" + ) + assert test_scheduling.find_tasks_for_paths( + str(tmp_path), known_tasks, ["dom/media/gtest/TestCubeb.cpp"] + ) == [ + "test-linux64/opt-gtest-1proc", + "test-linux64/opt-gtest-e10s", + ] + + # File in a folder adjacent to a gtest subfolder triggers gtest selection. + (tmp_path / "dom" / "media" / "CubebUtils.cpp").write_bytes(b"int foo() {}\n") + assert test_scheduling.find_tasks_for_paths( + str(tmp_path), known_tasks, ["dom/media/CubebUtils.cpp"] + ) == [ + "test-linux64/opt-gtest-1proc", + "test-linux64/opt-gtest-e10s", + ] + + # Rust file triggers rusttests selection. + (tmp_path / "servo").mkdir() + (tmp_path / "servo" / "lib.rs").write_bytes(b"pub fn foo() {}\n") + assert test_scheduling.find_tasks_for_paths( + str(tmp_path), known_tasks, ["servo/lib.rs"] + ) == ["test-linux64/opt-rusttests"] + + # Non-Rust file does not trigger rusttests selection. + assert "test-linux64/opt-rusttests" not in test_scheduling.find_tasks_for_paths( + str(tmp_path), known_tasks, ["servo/foo.cpp"] + ) + + # Modifying testing/cppunittest.toml itself triggers cppunit selection. + assert test_scheduling.find_tasks_for_paths( + str(tmp_path), known_tasks, ["testing/cppunittest.toml"] + ) == ["test-linux64/opt-cppunit"] + + # Modifying testing/remotecppunittests.py triggers cppunit selection. + assert test_scheduling.find_tasks_for_paths( + str(tmp_path), known_tasks, ["testing/remotecppunittests.py"] + ) == ["test-linux64/opt-cppunit"] + + # Modifying testing/runcppunittests.py triggers cppunit selection. + assert test_scheduling.find_tasks_for_paths( + str(tmp_path), known_tasks, ["testing/runcppunittests.py"] + ) == ["test-linux64/opt-cppunit"] + + # A .cpp file whose stem is listed in cppunittest.toml triggers cppunit selection. + assert test_scheduling.find_tasks_for_paths( + str(tmp_path), known_tasks, ["mfbt/TestArrayUtils.cpp"] + ) == ["test-linux64/opt-cppunit"] + + # A .cpp file whose stem is not listed in cppunittest.toml does not trigger cppunit. + assert "test-linux64/opt-cppunit" not in test_scheduling.find_tasks_for_paths( + str(tmp_path), known_tasks, ["mfbt/TestUnknown.cpp"] + ) + + # Empty known_tasks -> always empty. + assert ( + test_scheduling.find_tasks_for_paths(str(tmp_path), (), ["test_foo.cpp"]) == [] + ) From f69faaab997d2eb2bc75ca56f33a42548f7b7d7c Mon Sep 17 00:00:00 2001 From: Marco Castelluccio Date: Fri, 8 May 2026 14:26:24 +0200 Subject: [PATCH 2/2] Don't fail if testing/cppunittest.toml doesn't exist in the repo --- bugbug/test_scheduling.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/bugbug/test_scheduling.py b/bugbug/test_scheduling.py index 074886d8c5..5e40f9c367 100644 --- a/bugbug/test_scheduling.py +++ b/bugbug/test_scheduling.py @@ -1066,9 +1066,15 @@ def has_wpt_files(d: Path) -> bool: def _get_cppunit_test_names(repo_dir: Path) -> set[str]: - with open(repo_dir / "testing" / "cppunittest.toml", "rb") as f: - data = tomllib.load(f) - return {f"{key}.cpp" for key in data if key != "DEFAULT"} + try: + with open(repo_dir / "testing" / "cppunittest.toml", "rb") as f: + data = tomllib.load(f) + return {f"{key}.cpp" for key in data if key != "DEFAULT"} + except FileNotFoundError: + logger.error( + "testing/cppunittest.toml wasn't found, cppunit heuristic won't work" + ) + return set() def find_tasks_for_paths(