Introduce flaky tests statistics

* Consider a test as 'flaky' if it was restarted at least 1 time and eventually passed. Print the number of flaked tests and tests info in statistics. * Collect logs of such tests into the 'artifacts' directory, along with logs of failed tests. Example: $ ./test-run.py --force replication-luatest/ Started ./test-run.py --force replication-luatest/ Running in parallel with 16 workers ... <other tests> [016] replication-luatest/gh_6033_box_promote_demote> [ pass ] [014] replication-luatest/gh_5418_qsync_with_anon_te> [ pass ] [012] replication-luatest/quorum_misc_test.lua [ pass ] [005] replication-luatest/linearizable_test.lua [ fail ] ... <traceback> [005] Test "replication-luatest/linearizable_test.lua", conf: "None" [005] failed, rerunning ... [005] replication-luatest/linearizable_test.lua [ pass ] --------------------------------------------------------------------------------- Top 10 longest tests (seconds): * 67.86 replication-luatest/quorum_orphan_test.lua * 26.00 replication-luatest/quorum_misc_test.lua * 15.59 replication-luatest/gh_5418_qsync_with_anon_test.lua * 13.39 replication-luatest/election_fencing_test.lua * 9.77 replication-luatest/gh_5295_split_brain_test.lua * 6.95 replication-luatest/no_quorum_test.lua * 6.29 replication-luatest/bootstrap_strategy_auto_test.lua * 6.26 replication-luatest/gh_5568_read_only_reason_test.lua * 5.31 replication-luatest/gh_7253_election_long_wal_write_test.lua * 5.13 replication-luatest/gh_7086_box_issue_promote_assert_test.lua --------------------------------------------------------------------------------- Statistics: * pass: 36 * flaky: 1 Flaked tasks: - [replication-luatest/linearizable_test.lua, null] # logfile: /private/tmp/t/log/005_replication-luatest.log # reproduce file: /private/tmp/t/reproduce/005_replication-luatest.list.yaml --- - [replication-luatest/linearizable_test.lua, null] - [replication-luatest/linearizable_test.lua, null] ... $ ls /tmp/t/artifacts/ 005_replication-luatest/ log/ reproduce/ statistics/
tarantool · Mar 21, 2023 · 5e11b25 · 5e11b25
1 parent cc3c38e
commit 5e11b25
Show file tree

Hide file tree

Showing 3 changed files with 40 additions and 18 deletions.
diff --git a/lib/worker.py b/lib/worker.py
@@ -341,6 +341,7 @@ def run_loop(self, task_queue, result_queue):
                 break
 
             short_status = None
+            fails_count = 0
             duration = 0.0
             result_queue.put(self.current_task(task_id))
             testname = os.path.basename(task_id[0])
@@ -358,6 +359,7 @@ def run_loop(self, task_queue, result_queue):
                 self.restart_server()
                 # print message only after some fails occurred
                 if short_status == 'fail':
+                    fails_count += 1
                     if testname not in self.suite.fragile['tests']:
                         color_stdout(
                             'Test "%s", conf: "%s"\n\tfailed, rerunning ...\n'
@@ -371,7 +373,13 @@ def run_loop(self, task_queue, result_queue):
                 short_status, duration = self.run_task(task_id)
                 retries_left = retries_left - 1
 
-            result_queue.put(self.wrap_result(task_id, short_status, duration))
+            if fails_count > 0 and short_status == 'pass':
+                result_queue.put(self.wrap_result(task_id, 'flaky', duration))
+            else:
+                result_queue.put(
+                    self.wrap_result(task_id, short_status, duration)
+                )
+
             if short_status == 'fail':
                 if Options().args.is_force:
                     self.restart_server()

diff --git a/listeners.py b/listeners.py
@@ -39,6 +39,7 @@ def __init__(self, get_logfile):
         self._sampler = sampler
         self.duration_stats = dict()
         self.failed_tasks = []
+        self.flaked_tasks = []
         self.get_logfile = get_logfile
         self.long_tasks = set()
 
@@ -58,6 +59,11 @@ def process_result(self, obj):
                                       obj.worker_name,
                                       obj.show_reproduce_content))
 
+        if obj.short_status == 'flaky':
+            self.flaked_tasks.append((obj.task_id,
+                                      obj.worker_name,
+                                      obj.show_reproduce_content))
+
         self.duration_stats[obj.task_id] = obj.duration
 
     def get_long_mark(self, task):
@@ -149,6 +155,19 @@ def print_duration(self, stats_dir):
                                       self.duration_stats[task_id]))
         fd.close()
 
+    def print_tasks_info(self, tasks):
+        for task_id, worker_name, show_reproduce_content in tasks:
+            logfile = self.get_logfile(worker_name)
+            task_id_str = yaml.safe_dump(task_id, default_flow_style=True)
+            final_report('- %s' % task_id_str, schema='test_var')
+            color_stdout('# logfile:        %s\n' % logfile)
+            reproduce_file_path = get_reproduce_file(worker_name)
+            color_stdout('# reproduce file: %s\n' % reproduce_file_path)
+            if show_reproduce_content:
+                color_stdout("---\n", schema='separator')
+                print_tail_n(reproduce_file_path)
+                color_stdout("...\n", schema='separator')
+
     def print_statistics(self):
         """Print statistics and results of testing."""
         # Prepare standalone subpath '<vardir>/statistics' for statistics files.
@@ -163,23 +182,17 @@ def print_statistics(self):
         for short_status, cnt in self.stats.items():
             final_report('* %s: %d\n' % (short_status, cnt), schema='test_var')
 
+        if self.flaked_tasks:
+            final_report('Flaked tasks:\n', schema='test_var')
+            self.print_tasks_info(self.flaked_tasks)
+
         if not self.failed_tasks:
-            return False
+            return False, bool(self.flaked_tasks)
 
         final_report('Failed tasks:\n', schema='test_var')
-        for task_id, worker_name, show_reproduce_content in self.failed_tasks:
-            logfile = self.get_logfile(worker_name)
-            task_id_str = yaml.safe_dump(task_id, default_flow_style=True)
-            final_report('- %s' % task_id_str, schema='test_var')
-            color_stdout('# logfile:        %s\n' % logfile)
-            reproduce_file_path = get_reproduce_file(worker_name)
-            color_stdout('# reproduce file: %s\n' % reproduce_file_path)
-            if show_reproduce_content:
-                color_stdout("---\n", schema='separator')
-                print_tail_n(reproduce_file_path)
-                color_stdout("...\n", schema='separator')
+        self.print_tasks_info(self.failed_tasks)
 
-        return True
+        return True, bool(self.flaked_tasks)
 
 
 class ArtifactsWatcher(BaseWatcher):
@@ -196,7 +209,7 @@ def process_result(self, obj):
         if not isinstance(obj, WorkerTaskResult):
             return
 
-        if obj.short_status == 'fail' and \
+        if obj.short_status in ('fail', 'flaky') and \
                 obj.worker_name not in self.failed_workers:
             self.failed_workers.append(obj.worker_name)
 

diff --git a/test-run.py b/test-run.py
@@ -126,12 +126,13 @@ def main_loop_parallel():
         dispatcher.wait()
         dispatcher.wait_processes()
         color_stdout('-' * 81, "\n", schema='separator')
-        has_failed = dispatcher.statistics.print_statistics()
+        has_failed, has_flaked = dispatcher.statistics.print_statistics()
         has_undone = dispatcher.report_undone(
             verbose=bool(is_force or not has_failed))
-        if has_failed:
+        if any([has_failed, has_flaked]):
             dispatcher.artifacts.save_artifacts()
-            return EXIT_FAILED_TEST
+            if has_failed:
+                return EXIT_FAILED_TEST
         if has_undone:
             return EXIT_NOTDONE_TEST
     except KeyboardInterrupt: