Skip to content

Commit

Permalink
Introduce flaky tests statistics
Browse files Browse the repository at this point in the history
* Consider a test as 'flaky' if it was restarted at least 1 time and
  eventually passed. Print the number of flaked tests and tests info
  in statistics.

* Collect logs of such tests into the 'artifacts' directory, along with
  logs of failed tests.

Example:

    $ ./test-run.py --force replication-luatest/
    Started ./test-run.py --force replication-luatest/
    Running in parallel with 16 workers

    ... <other tests>

    [016] replication-luatest/gh_6033_box_promote_demote>                 [ pass ]
    [014] replication-luatest/gh_5418_qsync_with_anon_te>                 [ pass ]
    [012] replication-luatest/quorum_misc_test.lua                        [ pass ]
    [005] replication-luatest/linearizable_test.lua                       [ fail ]

    ... <traceback>

    [005] Test "replication-luatest/linearizable_test.lua", conf: "None"
    [005] 	failed, rerunning ...
    [005] replication-luatest/linearizable_test.lua                       [ pass ]

    ---------------------------------------------------------------------------------
    Top 10 longest tests (seconds):
    *  67.86 replication-luatest/quorum_orphan_test.lua
    *  26.00 replication-luatest/quorum_misc_test.lua
    *  15.59 replication-luatest/gh_5418_qsync_with_anon_test.lua
    *  13.39 replication-luatest/election_fencing_test.lua
    *   9.77 replication-luatest/gh_5295_split_brain_test.lua
    *   6.95 replication-luatest/no_quorum_test.lua
    *   6.29 replication-luatest/bootstrap_strategy_auto_test.lua
    *   6.26 replication-luatest/gh_5568_read_only_reason_test.lua
    *   5.31 replication-luatest/gh_7253_election_long_wal_write_test.lua
    *   5.13 replication-luatest/gh_7086_box_issue_promote_assert_test.lua
    ---------------------------------------------------------------------------------
    Statistics:
    * pass: 36
    * flaky: 1
    Flaked tasks:
    - [replication-luatest/linearizable_test.lua, null]
    # logfile:        /private/tmp/t/log/005_replication-luatest.log
    # reproduce file: /private/tmp/t/reproduce/005_replication-luatest.list.yaml
    ---
    - [replication-luatest/linearizable_test.lua, null]
    - [replication-luatest/linearizable_test.lua, null]
    ...

    $ ls /tmp/t/artifacts/
    005_replication-luatest/ log/                     reproduce/               statistics/
  • Loading branch information
ylobankov committed Mar 21, 2023
1 parent cc3c38e commit 5e11b25
Show file tree
Hide file tree
Showing 3 changed files with 40 additions and 18 deletions.
10 changes: 9 additions & 1 deletion lib/worker.py
Expand Up @@ -341,6 +341,7 @@ def run_loop(self, task_queue, result_queue):
break

short_status = None
fails_count = 0
duration = 0.0
result_queue.put(self.current_task(task_id))
testname = os.path.basename(task_id[0])
Expand All @@ -358,6 +359,7 @@ def run_loop(self, task_queue, result_queue):
self.restart_server()
# print message only after some fails occurred
if short_status == 'fail':
fails_count += 1
if testname not in self.suite.fragile['tests']:
color_stdout(
'Test "%s", conf: "%s"\n\tfailed, rerunning ...\n'
Expand All @@ -371,7 +373,13 @@ def run_loop(self, task_queue, result_queue):
short_status, duration = self.run_task(task_id)
retries_left = retries_left - 1

result_queue.put(self.wrap_result(task_id, short_status, duration))
if fails_count > 0 and short_status == 'pass':
result_queue.put(self.wrap_result(task_id, 'flaky', duration))
else:
result_queue.put(
self.wrap_result(task_id, short_status, duration)
)

if short_status == 'fail':
if Options().args.is_force:
self.restart_server()
Expand Down
41 changes: 27 additions & 14 deletions listeners.py
Expand Up @@ -39,6 +39,7 @@ def __init__(self, get_logfile):
self._sampler = sampler
self.duration_stats = dict()
self.failed_tasks = []
self.flaked_tasks = []
self.get_logfile = get_logfile
self.long_tasks = set()

Expand All @@ -58,6 +59,11 @@ def process_result(self, obj):
obj.worker_name,
obj.show_reproduce_content))

if obj.short_status == 'flaky':
self.flaked_tasks.append((obj.task_id,
obj.worker_name,
obj.show_reproduce_content))

self.duration_stats[obj.task_id] = obj.duration

def get_long_mark(self, task):
Expand Down Expand Up @@ -149,6 +155,19 @@ def print_duration(self, stats_dir):
self.duration_stats[task_id]))
fd.close()

def print_tasks_info(self, tasks):
for task_id, worker_name, show_reproduce_content in tasks:
logfile = self.get_logfile(worker_name)
task_id_str = yaml.safe_dump(task_id, default_flow_style=True)
final_report('- %s' % task_id_str, schema='test_var')
color_stdout('# logfile: %s\n' % logfile)
reproduce_file_path = get_reproduce_file(worker_name)
color_stdout('# reproduce file: %s\n' % reproduce_file_path)
if show_reproduce_content:
color_stdout("---\n", schema='separator')
print_tail_n(reproduce_file_path)
color_stdout("...\n", schema='separator')

def print_statistics(self):
"""Print statistics and results of testing."""
# Prepare standalone subpath '<vardir>/statistics' for statistics files.
Expand All @@ -163,23 +182,17 @@ def print_statistics(self):
for short_status, cnt in self.stats.items():
final_report('* %s: %d\n' % (short_status, cnt), schema='test_var')

if self.flaked_tasks:
final_report('Flaked tasks:\n', schema='test_var')
self.print_tasks_info(self.flaked_tasks)

if not self.failed_tasks:
return False
return False, bool(self.flaked_tasks)

final_report('Failed tasks:\n', schema='test_var')
for task_id, worker_name, show_reproduce_content in self.failed_tasks:
logfile = self.get_logfile(worker_name)
task_id_str = yaml.safe_dump(task_id, default_flow_style=True)
final_report('- %s' % task_id_str, schema='test_var')
color_stdout('# logfile: %s\n' % logfile)
reproduce_file_path = get_reproduce_file(worker_name)
color_stdout('# reproduce file: %s\n' % reproduce_file_path)
if show_reproduce_content:
color_stdout("---\n", schema='separator')
print_tail_n(reproduce_file_path)
color_stdout("...\n", schema='separator')
self.print_tasks_info(self.failed_tasks)

return True
return True, bool(self.flaked_tasks)


class ArtifactsWatcher(BaseWatcher):
Expand All @@ -196,7 +209,7 @@ def process_result(self, obj):
if not isinstance(obj, WorkerTaskResult):
return

if obj.short_status == 'fail' and \
if obj.short_status in ('fail', 'flaky') and \
obj.worker_name not in self.failed_workers:
self.failed_workers.append(obj.worker_name)

Expand Down
7 changes: 4 additions & 3 deletions test-run.py
Expand Up @@ -126,12 +126,13 @@ def main_loop_parallel():
dispatcher.wait()
dispatcher.wait_processes()
color_stdout('-' * 81, "\n", schema='separator')
has_failed = dispatcher.statistics.print_statistics()
has_failed, has_flaked = dispatcher.statistics.print_statistics()
has_undone = dispatcher.report_undone(
verbose=bool(is_force or not has_failed))
if has_failed:
if any([has_failed, has_flaked]):
dispatcher.artifacts.save_artifacts()
return EXIT_FAILED_TEST
if has_failed:
return EXIT_FAILED_TEST
if has_undone:
return EXIT_NOTDONE_TEST
except KeyboardInterrupt:
Expand Down

0 comments on commit 5e11b25

Please sign in to comment.