Introduce flaky tests statistics

* Consider a test as 'flaky' if it was restarted at least 1 time and eventually passed. Print the number of flaked tests and the corresponding info in statistics. * Collect logs of such tests into the 'artifacts' directory, along with logs of failed tests. Example: $ ./test-run.py --force replication-luatest/ Started ./test-run.py --force replication-luatest/ Running in parallel with 16 workers ... <other tests> [016] replication-luatest/gh_6033_box_promote_demote> [ pass ] [014] replication-luatest/gh_5418_qsync_with_anon_te> [ pass ] [012] replication-luatest/quorum_misc_test.lua [ pass ] [005] replication-luatest/linearizable_test.lua [ fail ] ... <traceback> [005] Test "replication-luatest/linearizable_test.lua", conf: "None" [005] failed, rerunning ... [005] replication-luatest/linearizable_test.lua [ pass ] --------------------------------------------------------------------------------- Top 10 longest tests (seconds): * 67.86 replication-luatest/quorum_orphan_test.lua * 26.00 replication-luatest/quorum_misc_test.lua * 15.59 replication-luatest/gh_5418_qsync_with_anon_test.lua * 13.39 replication-luatest/election_fencing_test.lua * 9.77 replication-luatest/gh_5295_split_brain_test.lua * 6.95 replication-luatest/no_quorum_test.lua * 6.29 replication-luatest/bootstrap_strategy_auto_test.lua * 6.26 replication-luatest/gh_5568_read_only_reason_test.lua * 5.31 replication-luatest/gh_7253_election_long_wal_write_test.lua * 5.13 replication-luatest/gh_7086_box_issue_promote_assert_test.lua --------------------------------------------------------------------------------- Statistics: * pass: 37 (flaky: 1) Flaked tasks: - [replication-luatest/linearizable_test.lua, null] # logfile: /private/tmp/t/log/005_replication-luatest.log # reproduce file: /private/tmp/t/reproduce/005_replication-luatest.list.yaml $ ls /tmp/t/artifacts/ 005_replication-luatest/ log/ reproduce/ statistics/
tarantool · Mar 21, 2023 · 596fd3b · 596fd3b
1 parent cc3c38e
commit 596fd3b
Show file tree

Hide file tree

Showing 3 changed files with 69 additions and 32 deletions.
diff --git a/lib/worker.py b/lib/worker.py
@@ -160,6 +160,15 @@ def __init__(self, worker_id, worker_name, task_id,
         self.show_reproduce_content = show_reproduce_content
 
 
+class WorkerFlakedTask(BaseWorkerMessage):
+    """Passed into the result queue when a task flaked.
+    The task_id (any hashable object) field holds ID of the flaked task.
+    """
+    def __init__(self, worker_id, worker_name, task_id):
+        super(WorkerFlakedTask, self).__init__(worker_id, worker_name)
+        self.task_id = task_id
+
+
 class WorkerOutput(BaseWorkerMessage):
     """The output passed by worker processes via color_stdout/color_log
     functions. The output wrapped into objects of this class by setting queue
@@ -227,6 +236,9 @@ def wrap_result(self, task_id, short_status, duration):
                                 self.suite.test_is_long(task_id), duration,
                                 self.suite.show_reproduce_content())
 
+    def wrap_flaked_task(self, task_id):
+        return WorkerFlakedTask(self.id, self.name, task_id)
+
     def sigterm_handler(self, signum, frame):
         self.sigterm_received = True
 
@@ -341,6 +353,7 @@ def run_loop(self, task_queue, result_queue):
                 break
 
             short_status = None
+            fails_count = 0
             duration = 0.0
             result_queue.put(self.current_task(task_id))
             testname = os.path.basename(task_id[0])
@@ -358,6 +371,7 @@ def run_loop(self, task_queue, result_queue):
                 self.restart_server()
                 # print message only after some fails occurred
                 if short_status == 'fail':
+                    fails_count += 1
                     if testname not in self.suite.fragile['tests']:
                         color_stdout(
                             'Test "%s", conf: "%s"\n\tfailed, rerunning ...\n'
@@ -372,6 +386,10 @@ def run_loop(self, task_queue, result_queue):
                 retries_left = retries_left - 1
 
             result_queue.put(self.wrap_result(task_id, short_status, duration))
+
+            if fails_count > 0 and short_status == 'pass':
+                result_queue.put(self.wrap_flaked_task(task_id))
+
             if short_status == 'fail':
                 if Options().args.is_force:
                     self.restart_server()

diff --git a/listeners.py b/listeners.py
@@ -10,6 +10,7 @@
 from lib.sampler import sampler
 from lib.worker import WorkerCurrentTask
 from lib.worker import WorkerDone
+from lib.worker import WorkerFlakedTask
 from lib.worker import WorkerOutput
 from lib.worker import WorkerTaskResult
 from lib.worker import get_reproduce_file
@@ -39,26 +40,28 @@ def __init__(self, get_logfile):
         self._sampler = sampler
         self.duration_stats = dict()
         self.failed_tasks = []
+        self.flaked_tasks = []
         self.get_logfile = get_logfile
         self.long_tasks = set()
 
     def process_result(self, obj):
-        if not isinstance(obj, WorkerTaskResult):
-            return
+        if isinstance(obj, WorkerTaskResult):
+            if obj.is_long:
+                self.long_tasks.add(obj.task_id)
 
-        if obj.is_long:
-            self.long_tasks.add(obj.task_id)
+            if obj.short_status not in self.stats:
+                self.stats[obj.short_status] = 0
+            self.stats[obj.short_status] += 1
 
-        if obj.short_status not in self.stats:
-            self.stats[obj.short_status] = 0
-        self.stats[obj.short_status] += 1
+            if obj.short_status == 'fail':
+                self.failed_tasks.append((obj.task_id,
+                                          obj.worker_name,
+                                          obj.show_reproduce_content))
 
-        if obj.short_status == 'fail':
-            self.failed_tasks.append((obj.task_id,
-                                      obj.worker_name,
-                                      obj.show_reproduce_content))
+            self.duration_stats[obj.task_id] = obj.duration
 
-        self.duration_stats[obj.task_id] = obj.duration
+        if isinstance(obj, WorkerFlakedTask):
+            self.flaked_tasks.append((obj.task_id, obj.worker_name, False))
 
     def get_long_mark(self, task):
         return '(long)' if task in self.long_tasks else ''
@@ -149,6 +152,19 @@ def print_duration(self, stats_dir):
                                       self.duration_stats[task_id]))
         fd.close()
 
+    def print_tasks_info(self, tasks):
+        for task_id, worker_name, show_reproduce_content in tasks:
+            logfile = self.get_logfile(worker_name)
+            task_id_str = yaml.safe_dump(task_id, default_flow_style=True)
+            final_report('- %s' % task_id_str, schema='test_var')
+            color_stdout('# logfile:        %s\n' % logfile)
+            reproduce_file_path = get_reproduce_file(worker_name)
+            color_stdout('# reproduce file: %s\n' % reproduce_file_path)
+            if show_reproduce_content:
+                color_stdout("---\n", schema='separator')
+                print_tail_n(reproduce_file_path)
+                color_stdout("...\n", schema='separator')
+
     def print_statistics(self):
         """Print statistics and results of testing."""
         # Prepare standalone subpath '<vardir>/statistics' for statistics files.
@@ -161,25 +177,25 @@ def print_statistics(self):
         if self.stats:
             final_report('Statistics:\n', schema='test_var')
         for short_status, cnt in self.stats.items():
-            final_report('* %s: %d\n' % (short_status, cnt), schema='test_var')
+            if short_status == 'pass' and self.flaked_tasks:
+                final_report('* %s: %d (flaky: %d)\n' %
+                             (short_status, cnt, len(self.flaked_tasks)),
+                             schema='test_var')
+            else:
+                final_report('* %s: %d\n' % (short_status, cnt),
+                             schema='test_var')
+
+        if self.flaked_tasks:
+            final_report('Flaked tasks:\n', schema='test_var')
+            self.print_tasks_info(self.flaked_tasks)
 
         if not self.failed_tasks:
-            return False
+            return False, bool(self.flaked_tasks)
 
         final_report('Failed tasks:\n', schema='test_var')
-        for task_id, worker_name, show_reproduce_content in self.failed_tasks:
-            logfile = self.get_logfile(worker_name)
-            task_id_str = yaml.safe_dump(task_id, default_flow_style=True)
-            final_report('- %s' % task_id_str, schema='test_var')
-            color_stdout('# logfile:        %s\n' % logfile)
-            reproduce_file_path = get_reproduce_file(worker_name)
-            color_stdout('# reproduce file: %s\n' % reproduce_file_path)
-            if show_reproduce_content:
-                color_stdout("---\n", schema='separator')
-                print_tail_n(reproduce_file_path)
-                color_stdout("...\n", schema='separator')
+        self.print_tasks_info(self.failed_tasks)
 
-        return True
+        return True, bool(self.flaked_tasks)
 
 
 class ArtifactsWatcher(BaseWatcher):
@@ -193,10 +209,12 @@ def __init__(self, get_logfile):
         self.get_logfile = get_logfile
 
     def process_result(self, obj):
-        if not isinstance(obj, WorkerTaskResult):
-            return
+        if isinstance(obj, WorkerTaskResult):
+            if obj.short_status == 'fail' and \
+                    obj.worker_name not in self.failed_workers:
+                self.failed_workers.append(obj.worker_name)
 
-        if obj.short_status == 'fail' and \
+        if isinstance(obj, WorkerFlakedTask) and \
                 obj.worker_name not in self.failed_workers:
             self.failed_workers.append(obj.worker_name)
 

diff --git a/test-run.py b/test-run.py
@@ -126,12 +126,13 @@ def main_loop_parallel():
         dispatcher.wait()
         dispatcher.wait_processes()
         color_stdout('-' * 81, "\n", schema='separator')
-        has_failed = dispatcher.statistics.print_statistics()
+        has_failed, has_flaked = dispatcher.statistics.print_statistics()
         has_undone = dispatcher.report_undone(
             verbose=bool(is_force or not has_failed))
-        if has_failed:
+        if any([has_failed, has_flaked]):
             dispatcher.artifacts.save_artifacts()
-            return EXIT_FAILED_TEST
+            if has_failed:
+                return EXIT_FAILED_TEST
         if has_undone:
             return EXIT_NOTDONE_TEST
     except KeyboardInterrupt: