From 23a5deb4ac41a3549a969140b74a113e9f542150 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Tue, 26 Mar 2024 09:04:28 +0100 Subject: [PATCH] [3.12] gh-83434: Sync libregrtest and test_regrtest with the main branch (GH-117250) * gh-115122: Add --bisect option to regrtest (GH-115123) * test.bisect_cmd now exit with code 0 on success, and code 1 on failure. Before, it was the opposite. * test.bisect_cmd now runs the test worker process with -X faulthandler. * regrtest RunTests: Add create_python_cmd() and bisect_cmd() methods. (cherry picked from commit 1e5719a663d5b1703ad588dda4fccd763c7d3e99) * gh-115720: Show number of leaks in huntrleaks progress reports (GH-115726) Instead of showing a dot for each iteration, show: - '.' for zero (on negative) leaks - number of leaks for 1-9 - 'X' if there are more leaks This allows more rapid iteration: when bisecting, I don't need to wait for the final report to see if the test still leaks. Also, show the full result if there are any non-zero entries. This shows negative entries, for the unfortunate cases where a reference is created and cleaned up in different runs. Test *failure* is still determined by the existing heuristic. (cherry picked from commit af5f9d682c20c951b90e3c020eeccac386c9bbb0) * gh-83434: Disable XML in regrtest when -R option is used (GH-117232) (cherry picked from commit d52bdfb19fadd7614a0e5abaf68525fc7300e841) --------- (cherry picked from commit 477ef9015c312725734c4613b5ba12e80d920a22) Co-authored-by: Victor Stinner Co-authored-by: Petr Viktorin --- Lib/test/bisect_cmd.py | 13 +++- Lib/test/libregrtest/cmdline.py | 16 +++- Lib/test/libregrtest/main.py | 58 ++++++++++++++- Lib/test/libregrtest/refleak.py | 47 +++++++++--- Lib/test/libregrtest/results.py | 13 ++-- Lib/test/libregrtest/runtests.py | 48 ++++++++++++ Lib/test/libregrtest/worker.py | 16 +--- Lib/test/test_regrtest.py | 74 ++++++++++++++++++- ...-02-18-14-20-52.gh-issue-115122.3rGNo9.rst | 2 + ...-02-20-15-47-41.gh-issue-115720.w8i8UG.rst | 2 + ...4-03-25-21-31-49.gh-issue-83434.U7Z8cY.rst | 3 + 11 files changed, 249 insertions(+), 43 deletions(-) create mode 100644 Misc/NEWS.d/next/Tests/2024-02-18-14-20-52.gh-issue-115122.3rGNo9.rst create mode 100644 Misc/NEWS.d/next/Tests/2024-02-20-15-47-41.gh-issue-115720.w8i8UG.rst create mode 100644 Misc/NEWS.d/next/Tests/2024-03-25-21-31-49.gh-issue-83434.U7Z8cY.rst diff --git a/Lib/test/bisect_cmd.py b/Lib/test/bisect_cmd.py index 0bdd7a43c03f7b..c36334ecfbd66d 100755 --- a/Lib/test/bisect_cmd.py +++ b/Lib/test/bisect_cmd.py @@ -51,6 +51,7 @@ def python_cmd(): cmd = [sys.executable] cmd.extend(subprocess._args_from_interpreter_flags()) cmd.extend(subprocess._optim_args_from_interpreter_flags()) + cmd.extend(('-X', 'faulthandler')) return cmd @@ -77,9 +78,13 @@ def run_tests(args, tests, huntrleaks=None): write_tests(tmp, tests) cmd = python_cmd() - cmd.extend(['-m', 'test', '--matchfile', tmp]) + cmd.extend(['-u', '-m', 'test', '--matchfile', tmp]) cmd.extend(args.test_args) print("+ %s" % format_shell_args(cmd)) + + sys.stdout.flush() + sys.stderr.flush() + proc = subprocess.run(cmd) return proc.returncode finally: @@ -136,8 +141,8 @@ def main(): ntest = max(ntest // 2, 1) subtests = random.sample(tests, ntest) - print("[+] Iteration %s: run %s tests/%s" - % (iteration, len(subtests), len(tests))) + print(f"[+] Iteration {iteration}/{args.max_iter}: " + f"run {len(subtests)} tests/{len(tests)}") print() exitcode = run_tests(args, subtests) @@ -169,10 +174,10 @@ def main(): if len(tests) <= args.max_tests: print("Bisection completed in %s iterations and %s" % (iteration, datetime.timedelta(seconds=dt))) - sys.exit(1) else: print("Bisection failed after %s iterations and %s" % (iteration, datetime.timedelta(seconds=dt))) + sys.exit(1) if __name__ == "__main__": diff --git a/Lib/test/libregrtest/cmdline.py b/Lib/test/libregrtest/cmdline.py index 23ca3565662146..aa81e6388aa2ee 100644 --- a/Lib/test/libregrtest/cmdline.py +++ b/Lib/test/libregrtest/cmdline.py @@ -172,6 +172,7 @@ def __init__(self, **kwargs) -> None: self.fail_rerun = False self.tempdir = None self._add_python_opts = True + self.xmlpath = None super().__init__(**kwargs) @@ -347,6 +348,8 @@ def _create_parser(): help='override the working directory for the test run') group.add_argument('--cleanup', action='store_true', help='remove old test_python_* directories') + group.add_argument('--bisect', action='store_true', + help='if some tests fail, run test.bisect_cmd on them') group.add_argument('--dont-add-python-opts', dest='_add_python_opts', action='store_false', help="internal option, don't use it") @@ -494,17 +497,28 @@ def _parse_args(args, **kwargs): ns.randomize = True if ns.verbose: ns.header = True + # When -jN option is used, a worker process does not use --verbose3 # and so -R 3:3 -jN --verbose3 just works as expected: there is no false # alarm about memory leak. if ns.huntrleaks and ns.verbose3 and ns.use_mp is None: - ns.verbose3 = False # run_single_test() replaces sys.stdout with io.StringIO if verbose3 # is true. In this case, huntrleaks sees an write into StringIO as # a memory leak, whereas it is not (gh-71290). + ns.verbose3 = False print("WARNING: Disable --verbose3 because it's incompatible with " "--huntrleaks without -jN option", file=sys.stderr) + + if ns.huntrleaks and ns.xmlpath: + # The XML data is written into a file outside runtest_refleak(), so + # it looks like a leak but it's not. Simply disable XML output when + # hunting for reference leaks (gh-83434). + ns.xmlpath = None + print("WARNING: Disable --junit-xml because it's incompatible " + "with --huntrleaks", + file=sys.stderr) + if ns.forever: # --forever implies --failfast ns.failfast = True diff --git a/Lib/test/libregrtest/main.py b/Lib/test/libregrtest/main.py index a9725fa967370a..87bec656ec52b7 100644 --- a/Lib/test/libregrtest/main.py +++ b/Lib/test/libregrtest/main.py @@ -6,8 +6,7 @@ import sysconfig import time -from test import support -from test.support import os_helper, MS_WINDOWS +from test.support import os_helper, MS_WINDOWS, flush_std_streams from .cmdline import _parse_args, Namespace from .findtests import findtests, split_test_packages, list_cases @@ -72,6 +71,7 @@ def __init__(self, ns: Namespace, _add_python_opts: bool = False): self.want_cleanup: bool = ns.cleanup self.want_rerun: bool = ns.rerun self.want_run_leaks: bool = ns.runleaks + self.want_bisect: bool = ns.bisect self.ci_mode: bool = (ns.fast_ci or ns.slow_ci) self.want_add_python_opts: bool = (_add_python_opts @@ -272,6 +272,55 @@ def rerun_failed_tests(self, runtests: RunTests): self.display_result(rerun_runtests) + def _run_bisect(self, runtests: RunTests, test: str, progress: str) -> bool: + print() + title = f"Bisect {test}" + if progress: + title = f"{title} ({progress})" + print(title) + print("#" * len(title)) + print() + + cmd = runtests.create_python_cmd() + cmd.extend([ + "-u", "-m", "test.bisect_cmd", + # Limit to 25 iterations (instead of 100) to not abuse CI resources + "--max-iter", "25", + "-v", + # runtests.match_tests is not used (yet) for bisect_cmd -i arg + ]) + cmd.extend(runtests.bisect_cmd_args()) + cmd.append(test) + print("+", shlex.join(cmd), flush=True) + + flush_std_streams() + + import subprocess + proc = subprocess.run(cmd, timeout=runtests.timeout) + exitcode = proc.returncode + + title = f"{title}: exit code {exitcode}" + print(title) + print("#" * len(title)) + print(flush=True) + + if exitcode: + print(f"Bisect failed with exit code {exitcode}") + return False + + return True + + def run_bisect(self, runtests: RunTests) -> None: + tests, _ = self.results.prepare_rerun(clear=False) + + for index, name in enumerate(tests, 1): + if len(tests) > 1: + progress = f"{index}/{len(tests)}" + else: + progress = "" + if not self._run_bisect(runtests, name, progress): + return + def display_result(self, runtests): # If running the test suite for PGO then no one cares about results. if runtests.pgo: @@ -453,7 +502,7 @@ def _run_tests(self, selected: TestTuple, tests: TestList | None) -> int: setup_process() - if self.hunt_refleak and not self.num_workers: + if (runtests.hunt_refleak is not None) and (not self.num_workers): # gh-109739: WindowsLoadTracker thread interfers with refleak check use_load_tracker = False else: @@ -473,6 +522,9 @@ def _run_tests(self, selected: TestTuple, tests: TestList | None) -> int: if self.want_rerun and self.results.need_rerun(): self.rerun_failed_tests(runtests) + + if self.want_bisect and self.results.need_rerun(): + self.run_bisect(runtests) finally: if use_load_tracker: self.logger.stop_load_tracker() diff --git a/Lib/test/libregrtest/refleak.py b/Lib/test/libregrtest/refleak.py index 8d18ff3ae96db5..a3686d85265f58 100644 --- a/Lib/test/libregrtest/refleak.py +++ b/Lib/test/libregrtest/refleak.py @@ -86,9 +86,12 @@ def get_pooled_int(value): rc_before = alloc_before = fd_before = 0 if not quiet: - print("beginning", repcount, "repetitions", file=sys.stderr) - print(("1234567890"*(repcount//10 + 1))[:repcount], file=sys.stderr, - flush=True) + print("beginning", repcount, "repetitions. Showing number of leaks " + "(. for 0 or less, X for 10 or more)", + file=sys.stderr) + numbers = ("1234567890"*(repcount//10 + 1))[:repcount] + numbers = numbers[:warmups] + ':' + numbers[warmups:] + print(numbers, file=sys.stderr, flush=True) results = None dash_R_cleanup(fs, ps, pic, zdc, abcs) @@ -105,13 +108,27 @@ def get_pooled_int(value): rc_after = gettotalrefcount() fd_after = fd_count() - if not quiet: - print('.', end='', file=sys.stderr, flush=True) - rc_deltas[i] = get_pooled_int(rc_after - rc_before) alloc_deltas[i] = get_pooled_int(alloc_after - alloc_before) fd_deltas[i] = get_pooled_int(fd_after - fd_before) + if not quiet: + # use max, not sum, so total_leaks is one of the pooled ints + total_leaks = max(rc_deltas[i], alloc_deltas[i], fd_deltas[i]) + if total_leaks <= 0: + symbol = '.' + elif total_leaks < 10: + symbol = ( + '.', '1', '2', '3', '4', '5', '6', '7', '8', '9', + )[total_leaks] + else: + symbol = 'X' + if i == warmups: + print(' ', end='', file=sys.stderr, flush=True) + print(symbol, end='', file=sys.stderr, flush=True) + del total_leaks + del symbol + alloc_before = alloc_after rc_before = rc_after fd_before = fd_after @@ -146,14 +163,20 @@ def check_fd_deltas(deltas): ]: # ignore warmup runs deltas = deltas[warmups:] - if checker(deltas): + failing = checker(deltas) + suspicious = any(deltas) + if failing or suspicious: msg = '%s leaked %s %s, sum=%s' % ( test_name, deltas, item_name, sum(deltas)) - print(msg, file=sys.stderr, flush=True) - with open(filename, "a", encoding="utf-8") as refrep: - print(msg, file=refrep) - refrep.flush() - failed = True + print(msg, end='', file=sys.stderr) + if failing: + print(file=sys.stderr, flush=True) + with open(filename, "a", encoding="utf-8") as refrep: + print(msg, file=refrep) + refrep.flush() + failed = True + else: + print(' (this is fine)', file=sys.stderr, flush=True) return (failed, results) diff --git a/Lib/test/libregrtest/results.py b/Lib/test/libregrtest/results.py index 477de777f2a9ad..a1abe89f4cd176 100644 --- a/Lib/test/libregrtest/results.py +++ b/Lib/test/libregrtest/results.py @@ -129,7 +129,7 @@ def accumulate_result(self, result: TestResult, runtests: RunTests): def need_rerun(self): return bool(self.rerun_results) - def prepare_rerun(self) -> tuple[TestTuple, FilterDict]: + def prepare_rerun(self, *, clear: bool = True) -> tuple[TestTuple, FilterDict]: tests: TestList = [] match_tests_dict = {} for result in self.rerun_results: @@ -140,11 +140,12 @@ def prepare_rerun(self) -> tuple[TestTuple, FilterDict]: if match_tests: match_tests_dict[result.test_name] = match_tests - # Clear previously failed tests - self.rerun_bad.extend(self.bad) - self.bad.clear() - self.env_changed.clear() - self.rerun_results.clear() + if clear: + # Clear previously failed tests + self.rerun_bad.extend(self.bad) + self.bad.clear() + self.env_changed.clear() + self.rerun_results.clear() return (tuple(tests), match_tests_dict) diff --git a/Lib/test/libregrtest/runtests.py b/Lib/test/libregrtest/runtests.py index 4bf2b1fb576e04..2116dec83e8aa0 100644 --- a/Lib/test/libregrtest/runtests.py +++ b/Lib/test/libregrtest/runtests.py @@ -2,7 +2,9 @@ import dataclasses import json import os +import shlex import subprocess +import sys from typing import Any from test import support @@ -67,6 +69,11 @@ class HuntRefleak: runs: int filename: StrPath + def bisect_cmd_args(self) -> list[str]: + # Ignore filename since it can contain colon (":"), + # and usually it's not used. Use the default filename. + return ["-R", f"{self.warmups}:{self.runs}:"] + @dataclasses.dataclass(slots=True, frozen=True) class RunTests: @@ -136,6 +143,47 @@ def json_file_use_stdout(self) -> bool: or support.is_wasi ) + def create_python_cmd(self) -> list[str]: + python_opts = support.args_from_interpreter_flags() + if self.python_cmd is not None: + executable = self.python_cmd + # Remove -E option, since --python=COMMAND can set PYTHON + # environment variables, such as PYTHONPATH, in the worker + # process. + python_opts = [opt for opt in python_opts if opt != "-E"] + else: + executable = (sys.executable,) + cmd = [*executable, *python_opts] + if '-u' not in python_opts: + cmd.append('-u') # Unbuffered stdout and stderr + return cmd + + def bisect_cmd_args(self) -> list[str]: + args = [] + if self.fail_fast: + args.append("--failfast") + if self.fail_env_changed: + args.append("--fail-env-changed") + if self.timeout: + args.append(f"--timeout={self.timeout}") + if self.hunt_refleak is not None: + args.extend(self.hunt_refleak.bisect_cmd_args()) + if self.test_dir: + args.extend(("--testdir", self.test_dir)) + if self.memory_limit: + args.extend(("--memlimit", self.memory_limit)) + if self.gc_threshold: + args.append(f"--threshold={self.gc_threshold}") + if self.use_resources: + args.extend(("-u", ','.join(self.use_resources))) + if self.python_cmd: + cmd = shlex.join(self.python_cmd) + args.extend(("--python", cmd)) + if self.randomize: + args.append(f"--randomize") + args.append(f"--randseed={self.random_seed}") + return args + @dataclasses.dataclass(slots=True, frozen=True) class WorkerRunTests(RunTests): diff --git a/Lib/test/libregrtest/worker.py b/Lib/test/libregrtest/worker.py index 581741f2b1e92f..556706ee3a23ea 100644 --- a/Lib/test/libregrtest/worker.py +++ b/Lib/test/libregrtest/worker.py @@ -3,7 +3,6 @@ import os from typing import Any, NoReturn -from test import support from test.support import os_helper from .setup import setup_process, setup_test_dir @@ -19,21 +18,10 @@ def create_worker_process(runtests: WorkerRunTests, output_fd: int, tmp_dir: StrPath | None = None) -> subprocess.Popen: - python_cmd = runtests.python_cmd worker_json = runtests.as_json() - python_opts = support.args_from_interpreter_flags() - if python_cmd is not None: - executable = python_cmd - # Remove -E option, since --python=COMMAND can set PYTHON environment - # variables, such as PYTHONPATH, in the worker process. - python_opts = [opt for opt in python_opts if opt != "-E"] - else: - executable = (sys.executable,) - cmd = [*executable, *python_opts, - '-u', # Unbuffered stdout and stderr - '-m', 'test.libregrtest.worker', - worker_json] + cmd = runtests.create_python_cmd() + cmd.extend(['-m', 'test.libregrtest.worker', worker_json]) env = dict(os.environ) if tmp_dir is not None: diff --git a/Lib/test/test_regrtest.py b/Lib/test/test_regrtest.py index 08b3f144380bf3..0a0c371836dc7a 100644 --- a/Lib/test/test_regrtest.py +++ b/Lib/test/test_regrtest.py @@ -389,7 +389,7 @@ def test_unknown_option(self): self.checkError(['--unknown-option'], 'unrecognized arguments: --unknown-option') - def check_ci_mode(self, args, use_resources, rerun=True): + def create_regrtest(self, args): ns = cmdline._parse_args(args) # Check Regrtest attributes which are more reliable than Namespace @@ -401,6 +401,10 @@ def check_ci_mode(self, args, use_resources, rerun=True): regrtest = main.Regrtest(ns) + return regrtest + + def check_ci_mode(self, args, use_resources, rerun=True): + regrtest = self.create_regrtest(args) self.assertEqual(regrtest.num_workers, -1) self.assertEqual(regrtest.want_rerun, rerun) self.assertTrue(regrtest.randomize) @@ -446,6 +450,29 @@ def test_dont_add_python_opts(self): ns = cmdline._parse_args(args) self.assertFalse(ns._add_python_opts) + def test_bisect(self): + args = ['--bisect'] + regrtest = self.create_regrtest(args) + self.assertTrue(regrtest.want_bisect) + + def test_verbose3_huntrleaks(self): + args = ['-R', '3:10', '--verbose3'] + with support.captured_stderr(): + regrtest = self.create_regrtest(args) + self.assertIsNotNone(regrtest.hunt_refleak) + self.assertEqual(regrtest.hunt_refleak.warmups, 3) + self.assertEqual(regrtest.hunt_refleak.runs, 10) + self.assertFalse(regrtest.output_on_failure) + + def test_xml_huntrleaks(self): + args = ['-R', '3:12', '--junit-xml', 'output.xml'] + with support.captured_stderr(): + regrtest = self.create_regrtest(args) + self.assertIsNotNone(regrtest.hunt_refleak) + self.assertEqual(regrtest.hunt_refleak.warmups, 3) + self.assertEqual(regrtest.hunt_refleak.runs, 12) + self.assertIsNone(regrtest.junit_filename) + @dataclasses.dataclass(slots=True) class Rerun: @@ -1148,8 +1175,8 @@ def check_leak(self, code, what, *, run_workers=False): stderr=subprocess.STDOUT) self.check_executed_tests(output, [test], failed=test, stats=1) - line = 'beginning 6 repetitions\n123456\n......\n' - self.check_line(output, re.escape(line)) + line = r'beginning 6 repetitions. .*\n123:456\n[.0-9X]{3} 111\n' + self.check_line(output, line) line2 = '%s leaked [1, 1, 1] %s, sum=3\n' % (test, what) self.assertIn(line2, output) @@ -1178,6 +1205,47 @@ def test_huntrleaks(self): def test_huntrleaks_mp(self): self.check_huntrleaks(run_workers=True) + @unittest.skipUnless(support.Py_DEBUG, 'need a debug build') + def test_huntrleaks_bisect(self): + # test --huntrleaks --bisect + code = textwrap.dedent(""" + import unittest + + GLOBAL_LIST = [] + + class RefLeakTest(unittest.TestCase): + def test1(self): + pass + + def test2(self): + pass + + def test3(self): + GLOBAL_LIST.append(object()) + + def test4(self): + pass + """) + + test = self.create_test('huntrleaks', code=code) + + filename = 'reflog.txt' + self.addCleanup(os_helper.unlink, filename) + cmd = ['--huntrleaks', '3:3:', '--bisect', test] + output = self.run_tests(*cmd, + exitcode=EXITCODE_BAD_TEST, + stderr=subprocess.STDOUT) + + self.assertIn(f"Bisect {test}", output) + self.assertIn(f"Bisect {test}: exit code 0", output) + + # test3 is the one which leaks + self.assertIn("Bisection completed in", output) + self.assertIn( + "Tests (1):\n" + f"* {test}.RefLeakTest.test3\n", + output) + @unittest.skipUnless(support.Py_DEBUG, 'need a debug build') def test_huntrleaks_fd_leak(self): # test --huntrleaks for file descriptor leak diff --git a/Misc/NEWS.d/next/Tests/2024-02-18-14-20-52.gh-issue-115122.3rGNo9.rst b/Misc/NEWS.d/next/Tests/2024-02-18-14-20-52.gh-issue-115122.3rGNo9.rst new file mode 100644 index 00000000000000..e187a40a40516b --- /dev/null +++ b/Misc/NEWS.d/next/Tests/2024-02-18-14-20-52.gh-issue-115122.3rGNo9.rst @@ -0,0 +1,2 @@ +Add ``--bisect`` option to regrtest test runner: run failed tests with +``test.bisect_cmd`` to identify failing tests. Patch by Victor Stinner. diff --git a/Misc/NEWS.d/next/Tests/2024-02-20-15-47-41.gh-issue-115720.w8i8UG.rst b/Misc/NEWS.d/next/Tests/2024-02-20-15-47-41.gh-issue-115720.w8i8UG.rst new file mode 100644 index 00000000000000..a03ee11d974251 --- /dev/null +++ b/Misc/NEWS.d/next/Tests/2024-02-20-15-47-41.gh-issue-115720.w8i8UG.rst @@ -0,0 +1,2 @@ +Leak tests (``-R``, ``--huntrleaks``) now show a summary of the number of +leaks found in each iteration. diff --git a/Misc/NEWS.d/next/Tests/2024-03-25-21-31-49.gh-issue-83434.U7Z8cY.rst b/Misc/NEWS.d/next/Tests/2024-03-25-21-31-49.gh-issue-83434.U7Z8cY.rst new file mode 100644 index 00000000000000..7b7a8fcf53bb3c --- /dev/null +++ b/Misc/NEWS.d/next/Tests/2024-03-25-21-31-49.gh-issue-83434.U7Z8cY.rst @@ -0,0 +1,3 @@ +Disable JUnit XML output (``--junit-xml=FILE`` command line option) in +regrtest when hunting for reference leaks (``-R`` option). Patch by Victor +Stinner.