Skip to content

Commit

Permalink
Rerun all failed tests, not only marked as fragile
Browse files Browse the repository at this point in the history
test-run supports functionality to rerun failed tests in place, but
these tests have to be on so called fragile list. To add a test to the
fragile list we need to add a special configuration to the suite.ini
file of a test suite. Configuration example:

    fragile = {
        "retries": 5,
        "tests": {
            "tarantoolctl.test.lua": {
                "issues": [ "gh-5059", "gh-5346" ]
            },
            "debug.test.lua": {
                "issues": [ "gh-5346" ]
            },
            ...
        }
    }

Rerunning failed tests in place is quite convenient because it allows us
to avoid rerunning all tests again and thus save time.

But to make it work as expected we should keep the list of fragile tests
always up-to-date. Flaky tests may be introduced every day and keeping
the list of fragile tests always up-to-date becomes extremely difficult
to do.

So our solusion is quite simple: just rerun all failed tests.
By default, the number of retries for regular and fragile tests is 3.
But for fragile tests this number can be overriden in the suite.ini
file.

Closes #328
  • Loading branch information
ylobankov committed Mar 16, 2022
1 parent f246567 commit 36cee89
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 10 deletions.
9 changes: 6 additions & 3 deletions lib/test_suite.py
Expand Up @@ -45,6 +45,9 @@ class TestSuite:
server for this suite, the client program to execute individual
tests and other suite properties. The server is started once per
suite."""

RETRIES_COUNT = 3

def get_multirun_conf(self, suite_path):
conf_name = self.ini.get('config', None)
if conf_name is None:
Expand Down Expand Up @@ -91,7 +94,7 @@ def __init__(self, suite_path, args):
self.args = args
self.tests = []
self.ini = {}
self.fragile = {'retries': 0, 'tests': {}}
self.fragile = {'retries': self.RETRIES_COUNT, 'tests': {}}
self.suite_path = suite_path
self.ini["core"] = "tarantool"

Expand Down Expand Up @@ -128,7 +131,7 @@ def __init__(self, suite_path, args):
if config.has_option("default", "fragile"):
fragiles = config.get("default", "fragile")
try:
self.fragile = json.loads(fragiles)
self.fragile.update(json.loads(fragiles))
if 'tests' not in self.fragile:
raise RuntimeError(
"Key 'tests' absent in 'fragile' json: {}"
Expand Down Expand Up @@ -288,7 +291,7 @@ def is_parallel(self):
return self.ini['is_parallel']

def fragile_retries(self):
return self.fragile.get('retries', 0)
return self.fragile['retries']

def show_reproduce_content(self):
return self.ini['show_reproduce_content']
Expand Down
19 changes: 12 additions & 7 deletions lib/worker.py
Expand Up @@ -350,20 +350,25 @@ def run_loop(self, task_queue, result_queue):
'defined in suite.ini but this functionality '
'is dropped' % testname)
)
retries_left = self.suite.fragile_retries()
retries_left = self.suite.RETRIES_COUNT
if testname in self.suite.fragile['tests']:
retries_left = self.suite.fragile_retries()
# let's run till short_status became 'pass'
while short_status in (None, 'fail') and retries_left >= 0:
self.restart_server()
# print message only after some fails occurred
if short_status == 'fail':
color_stdout(
'Test "%s", conf: "%s"\n'
'\tfrom "fragile" list failed, rerunning ...\n'
% (task_id[0], task_id[1]), schema='error')
if testname not in self.suite.fragile['tests']:
color_stdout(
'Test "%s", conf: "%s"\n\tfailed, rerunning ...\n'
% (task_id[0], task_id[1]), schema='error')
else:
color_stdout(
'Test "%s", conf: "%s"\n'
'\tfrom "fragile" list failed, rerunning ...\n'
% (task_id[0], task_id[1]), schema='error')
# run task and save the result to short_status
short_status, duration = self.run_task(task_id)
if testname not in self.suite.fragile['tests']:
break
retries_left = retries_left - 1

result_queue.put(self.wrap_result(task_id, short_status, duration))
Expand Down

0 comments on commit 36cee89

Please sign in to comment.