Skip to content

Commit

Permalink
[ci][microcheck] improve determine new tests (#45377)
Browse files Browse the repository at this point in the history
Improve the logic to determine new tests for microcheck. The new logic
get test targets from the list of changed files.

Previously, we determine new tests by computing the differences between
local test targets and test in DB. However, there are a lot of cases a
few test function is added into an existing test target, so that logic
doesn't work too broadly.

Test:
- CI
- microcheck runs the test_tester test that is changed in this PR

Signed-off-by: can <can@anyscale.com>
  • Loading branch information
can-anyscale committed May 17, 2024
1 parent 01c6964 commit 467b092
Show file tree
Hide file tree
Showing 2 changed files with 81 additions and 25 deletions.
37 changes: 23 additions & 14 deletions ci/ray_ci/test_tester.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,9 @@
_get_test_targets,
_get_high_impact_test_targets,
_get_flaky_test_targets,
_get_new_tests,
_get_tag_matcher,
_get_changed_files,
_get_changed_tests,
)
from ray_release.test import Test, TestState

Expand Down Expand Up @@ -125,7 +126,7 @@ def test_get_test_targets() -> None:
"ray_release.test.Test.gen_high_impact_tests",
return_value={"step": test_objects},
), mock.patch(
"ci.ray_ci.tester._get_new_tests",
"ci.ray_ci.tester._get_changed_tests",
return_value=set(),
):
assert set(
Expand Down Expand Up @@ -242,7 +243,7 @@ def test_get_high_impact_test_targets() -> None:
"ray_release.test.Test.gen_high_impact_tests",
return_value={"step": test["input"]},
), mock.patch(
"ci.ray_ci.tester._get_new_tests",
"ci.ray_ci.tester._get_changed_tests",
return_value=test["new_tests"],
):
assert (
Expand All @@ -255,17 +256,25 @@ def test_get_high_impact_test_targets() -> None:
)


@mock.patch("ci.ray_ci.tester_container.TesterContainer.run_script_with_output")
@mock.patch("ray_release.test.Test.gen_from_s3")
def test_get_new_tests(mock_gen_from_s3, mock_run_script_with_output) -> None:
mock_gen_from_s3.return_value = [
_stub_test({"name": "linux://old_test_01"}),
_stub_test({"name": "linux://old_test_02"}),
]
mock_run_script_with_output.return_value = "//old_test_01\n//new_test"
assert _get_new_tests(
"linux", LinuxTesterContainer("test", skip_ray_installation=True)
) == {"//new_test"}
@mock.patch.dict(os.environ, {"BUILDKITE_PULL_REQUEST_BASE_BRANCH": "base"})
@mock.patch("subprocess.check_call")
@mock.patch("subprocess.check_output")
def test_get_changed_files(mock_check_output, mock_check_call) -> None:
mock_check_output.return_value = b"file1\nfile2\n"
assert _get_changed_files() == {"file1", "file2"}


@mock.patch("ci.ray_ci.tester._get_test_targets_per_file")
@mock.patch("ci.ray_ci.tester._get_changed_files")
def test_get_changed_tests(
mock_get_changed_files, mock_get_test_targets_per_file
) -> None:
mock_get_changed_files.return_value = {"test_src", "build_src"}
mock_get_test_targets_per_file.side_effect = (
lambda x: {"//t1", "//t2"} if x == "test_src" else {}
)

assert _get_changed_tests() == {"//t1", "//t2"}


def test_get_flaky_test_targets() -> None:
Expand Down
69 changes: 58 additions & 11 deletions ci/ray_ci/tester.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import itertools
import os
import subprocess
import sys
from typing import List, Set, Tuple, Optional

Expand All @@ -17,7 +18,7 @@
from ci.ray_ci.linux_tester_container import LinuxTesterContainer
from ci.ray_ci.windows_tester_container import WindowsTesterContainer
from ci.ray_ci.tester_container import TesterContainer
from ci.ray_ci.utils import docker_login, ci_init
from ci.ray_ci.utils import docker_login, ci_init, logger
from ray_release.test import Test, TestState

CUDA_COPYRIGHT = """
Expand Down Expand Up @@ -414,23 +415,69 @@ def _get_high_impact_test_targets(
for test in itertools.chain.from_iterable(step_id_to_tests.values())
if test.get_oncall() == team
}
new_tests = _get_new_tests(os_prefix, container)
changed_tests = _get_changed_tests()

return high_impact_tests.union(new_tests)
return high_impact_tests.union(changed_tests)


def _get_new_tests(prefix: str, container: TesterContainer) -> Set[str]:
def _get_changed_tests() -> Set[str]:
"""
Get all local test targets that are not in database
Get all changed tests in the current PR
"""
local_test_targets = set(
container.run_script_with_output(['bazel query "tests(//...)"'])
.strip()
.split(os.linesep)
changed_files = _get_changed_files()
logger.info(f"Changed files: {changed_files}")
return set(
itertools.chain.from_iterable(
[_get_test_targets_per_file(file) for file in _get_changed_files()]
)
)
db_test_targets = {test.get_target() for test in Test.gen_from_s3(prefix=prefix)}

return local_test_targets.difference(db_test_targets)

def _get_test_targets_per_file(file: str) -> Set[str]:
"""
Get the test target from a file path
"""
try:
package = (
subprocess.check_output(["bazel", "query", file], cwd=bazel_workspace_dir)
.decode()
.strip()
)
if not package:
return set()
targets = subprocess.check_output(
["bazel", "query", f"tests(attr('srcs', {package}, //...))"],
cwd=bazel_workspace_dir,
)
targets = {
target.strip()
for target in targets.decode().splitlines()
if target is not None
}
logger.info(f"Found test targets for file {file}: {targets}")

return targets
except subprocess.CalledProcessError:
logger.info(f"File {file} is not a test target")
return set()


def _get_changed_files() -> Set[str]:
"""
Get all changed files in the current PR
"""
base = os.environ.get("BUILDKITE_PULL_REQUEST_BASE_BRANCH")
head = os.environ.get("BUILDKITE_COMMIT")
if not base or not head:
# if not in a PR, return an empty set
return set()

subprocess.check_call(["git", "fetch", "origin", base], cwd=bazel_workspace_dir)
changes = subprocess.check_output(
["git", "diff", "--name-only", f"origin/{base}...{head}"],
cwd=bazel_workspace_dir,
)
return {file.strip() for file in changes.decode().splitlines() if file is not None}


def _get_flaky_test_targets(
Expand Down

0 comments on commit 467b092

Please sign in to comment.