Skip to content

Commit

Permalink
Add ability to check results file checksum on fail
Browse files Browse the repository at this point in the history
Added ability to check results file checksum on tests fail and
compare with the checksums of the known issues mentioned in the
fragile list. Fragile list should consist of the results files
checksums with its issues in the format:

  fragile = {
    "retries": 10,
    "tests": {
        "bitset.test.lua": {
            "issues": [ "gh-4095" ],
            "checksums": [ "050af3a99561a724013995668a4bc71c", "f34be60193cfe9221d3fe50df657e9d3" ]
        }
    }}

Closes #189
  • Loading branch information
avtikhon committed Sep 24, 2020
1 parent f25fef3 commit cde0988
Show file tree
Hide file tree
Showing 4 changed files with 43 additions and 19 deletions.
12 changes: 8 additions & 4 deletions lib/test.py
Expand Up @@ -8,6 +8,7 @@
import sys
import traceback
from functools import partial
from hashlib import md5

try:
from cStringIO import StringIO
Expand Down Expand Up @@ -152,9 +153,9 @@ def run(self, server):
it to stdout.
Returns short status of the test as a string: 'skip', 'pass',
'new', 'updated' or 'fail'. There is also one possible value for
short_status, 'disabled', but it returned in the caller,
TestSuite.run_test().
'new', 'updated' or 'fail' and results file checksum on fail.
There is also one possible value for short_status, 'disabled',
but it returned in the caller, TestSuite.run_test().
"""

# Note: test was created before certain worker become known, so we need
Expand Down Expand Up @@ -219,6 +220,7 @@ def run(self, server):
self.is_valgrind_clean = not bool(non_empty_logs)

short_status = None
result_checksum = None

if self.skip:
short_status = 'skip'
Expand Down Expand Up @@ -252,6 +254,8 @@ def run(self, server):
has_result = os.path.exists(self.tmp_result)
if has_result:
shutil.copy(self.tmp_result, self.reject)
with open(self.tmp_result, mode='rb') as result_file:
result_checksum = md5(result_file.read()).hexdigest()
short_status = 'fail'
color_stdout("[ fail ]\n", schema='test_fail')

Expand All @@ -277,7 +281,7 @@ def run(self, server):
"Test failed! Output from log file "
"{0}:\n".format(log_file))
where = ": there were warnings in the valgrind log file(s)"
return short_status
return short_status, result_checksum

def print_diagnostics(self, log_file, message):
"""Print whole lines of client program output leading to test
Expand Down
13 changes: 10 additions & 3 deletions lib/test_suite.py
Expand Up @@ -185,6 +185,12 @@ def fragile_tests(self):
res.append(test)
return res

def get_test_fragile_checksums(self, test):
try:
return self.fragile['tests'][test]['checksums']
except Exception:
return []

def gen_server(self):
try:
return Server(self.ini, test_suite=self)
Expand Down Expand Up @@ -237,7 +243,7 @@ def stop_server(self, server, inspector, silent=False, cleanup=True):

def run_test(self, test, server, inspector):
""" Returns short status of the test as a string: 'skip', 'pass',
'new', 'fail', or 'disabled'.
'new', 'fail', or 'disabled' and results file checksum on fail.
"""
test.inspector = inspector
test_name = os.path.basename(test.name)
Expand All @@ -251,16 +257,17 @@ def run_test(self, test, server, inspector):
color_stdout(conf.ljust(16), schema='test_var')

if self.is_test_enabled(test, conf, server):
short_status = test.run(server)
short_status, result_checksum = test.run(server)
else:
color_stdout("[ disabled ]\n", schema='t_name')
short_status = 'disabled'
result_checksum = None

# cleanup only if test passed or if --force mode enabled
if lib.Options().args.is_force or short_status == 'pass':
inspector.cleanup_nondefault()

return short_status
return short_status, result_checksum

def is_parallel(self):
return self.ini['is_parallel']
Expand Down
33 changes: 22 additions & 11 deletions lib/worker.py
Expand Up @@ -144,13 +144,15 @@ class WorkerTaskResult(BaseWorkerMessage):
""" Passed into the result queue when a task processed (done) by the
worker. The short_status (string) field intended to give short note whether
the task processed successfully or not, but with little more flexibility
than binary True/False. The task_id (any hashable object) field hold ID of
than binary True/False. The result_checksum (string) field saves the results
file checksum on test fail. The task_id (any hashable object) field hold ID of
the processed task. The show_reproduce_content configuration form suite.ini
"""
def __init__(self, worker_id, worker_name, task_id,
short_status, show_reproduce_content):
short_status, result_checksum, show_reproduce_content):
super(WorkerTaskResult, self).__init__(worker_id, worker_name)
self.short_status = short_status
self.result_checksum = result_checksum
self.task_id = task_id
self.show_reproduce_content = show_reproduce_content

Expand Down Expand Up @@ -216,8 +218,9 @@ def current_task(self, task_id):
return WorkerCurrentTask(self.id, self.name, task_name, task_param,
task_result, task_tmp_result)

def wrap_result(self, task_id, short_status):
def wrap_result(self, task_id, short_status, result_checksum):
return WorkerTaskResult(self.id, self.name, task_id, short_status,
result_checksum,
self.suite.show_reproduce_content())

def sigterm_handler(self, signum, frame):
Expand Down Expand Up @@ -304,7 +307,7 @@ def run_task(self, task_id):
with open(self.reproduce_file, 'a') as f:
task_id_str = yaml.safe_dump(task.id, default_flow_style=True)
f.write('- ' + task_id_str)
short_status = self.suite.run_test(
short_status, result_checksum = self.suite.run_test(
task, self.server, self.inspector)
except KeyboardInterrupt:
self.report_keyboard_interrupt()
Expand All @@ -314,7 +317,7 @@ def run_task(self, task_id):
'\nWorker "%s" received the following error; stopping...\n'
% self.name + traceback.format_exc() + '\n', schema='error')
raise
return short_status
return short_status, result_checksum

def run_loop(self, task_queue, result_queue, is_fragile):
""" called from 'run_all' """
Expand All @@ -329,24 +332,32 @@ def run_loop(self, task_queue, result_queue, is_fragile):
break

short_status = None
result_checksum = None
result_queue.put(self.current_task(task_id))
if is_fragile:
testname = os.path.basename(task_id[0])
fragile_checksums = self.suite.get_test_fragile_checksums(testname)
if is_fragile and fragile_checksums:
retries_left = self.suite.fragile_retries()
# let's run till short_status became 'pass'
while short_status != 'pass' and retries_left >= 0:
# print message only after some fails occurred
if short_status == 'fail':
color_stdout(
'Test "%s", conf: "%s"\n'
'\tfrom "fragile" list failed, rerunning ...\n'
% (task_id[0], task_id[1]), schema='error')
'\tfrom "fragile" list failed with results'
' file checksum: "%s", rerunning ...\n'
% (task_id[0], task_id[1], result_checksum), schema='error')
# run task and save the result to short_status
short_status = self.run_task(task_id)
short_status, result_checksum = self.run_task(task_id)
# check if the results file checksum set on fail and if
# the newly created results file is known by checksum
if not result_checksum or (result_checksum not in fragile_checksums):
break
retries_left = retries_left - 1
else:
short_status = self.run_task(task_id)
short_status, result_checksum = self.run_task(task_id)

result_queue.put(self.wrap_result(task_id, short_status))
result_queue.put(self.wrap_result(task_id, short_status, result_checksum))
if not lib.Options().args.is_force and short_status == 'fail':
color_stdout(
'Worker "%s" got failed test; stopping the server...\n'
Expand Down
4 changes: 3 additions & 1 deletion listeners.py
Expand Up @@ -45,6 +45,7 @@ def process_result(self, obj):
if obj.short_status == 'fail':
self.failed_tasks.append((obj.task_id,
obj.worker_name,
obj.result_checksum,
obj.show_reproduce_content))

def print_statistics(self):
Expand All @@ -58,10 +59,11 @@ def print_statistics(self):
return False

color_stdout('Failed tasks:\n', schema='test_var')
for task_id, worker_name, show_reproduce_content in self.failed_tasks:
for task_id, worker_name, result_checksum, show_reproduce_content in self.failed_tasks:
logfile = self.get_logfile(worker_name)
task_id_str = yaml.safe_dump(task_id, default_flow_style=True)
color_stdout('- %s' % task_id_str, schema='test_var')
color_stdout('# results file checksum: %s\n' % result_checksum)
color_stdout('# logfile: %s\n' % logfile)
reproduce_file_path = get_reproduce_file(worker_name)
color_stdout('# reproduce file: %s\n' % reproduce_file_path)
Expand Down

0 comments on commit cde0988

Please sign in to comment.