diff --git a/lib/test.py b/lib/test.py index c607f225..69c7a6be 100644 --- a/lib/test.py +++ b/lib/test.py @@ -8,6 +8,7 @@ import sys import traceback from functools import partial +from hashlib import md5 try: from cStringIO import StringIO @@ -152,9 +153,9 @@ def run(self, server): it to stdout. Returns short status of the test as a string: 'skip', 'pass', - 'new', 'updated' or 'fail'. There is also one possible value for - short_status, 'disabled', but it returned in the caller, - TestSuite.run_test(). + 'new', 'updated' or 'fail' and results file checksum on fail. + There is also one possible value for short_status, 'disabled', + but it returned in the caller, TestSuite.run_test(). """ # Note: test was created before certain worker become known, so we need @@ -219,6 +220,7 @@ def run(self, server): self.is_valgrind_clean = not bool(non_empty_logs) short_status = None + result_checksum = None if self.skip: short_status = 'skip' @@ -252,6 +254,8 @@ def run(self, server): has_result = os.path.exists(self.tmp_result) if has_result: shutil.copy(self.tmp_result, self.reject) + with open(self.tmp_result, mode='rb') as result_file: + result_checksum = md5(result_file.read()).hexdigest() short_status = 'fail' color_stdout("[ fail ]\n", schema='test_fail') @@ -277,7 +281,7 @@ def run(self, server): "Test failed! Output from log file " "{0}:\n".format(log_file)) where = ": there were warnings in the valgrind log file(s)" - return short_status + return short_status, result_checksum def print_diagnostics(self, log_file, message): """Print whole lines of client program output leading to test diff --git a/lib/test_suite.py b/lib/test_suite.py index b8e1454c..d1faa46b 100644 --- a/lib/test_suite.py +++ b/lib/test_suite.py @@ -185,6 +185,12 @@ def fragile_tests(self): res.append(test) return res + def get_test_fragile_checksums(self, test): + try: + return self.fragile['tests'][test]['checksums'] + except Exception: + return [] + def gen_server(self): try: return Server(self.ini, test_suite=self) @@ -237,7 +243,7 @@ def stop_server(self, server, inspector, silent=False, cleanup=True): def run_test(self, test, server, inspector): """ Returns short status of the test as a string: 'skip', 'pass', - 'new', 'fail', or 'disabled'. + 'new', 'fail', or 'disabled' and results file checksum on fail. """ test.inspector = inspector test_name = os.path.basename(test.name) @@ -251,16 +257,17 @@ def run_test(self, test, server, inspector): color_stdout(conf.ljust(16), schema='test_var') if self.is_test_enabled(test, conf, server): - short_status = test.run(server) + short_status, result_checksum = test.run(server) else: color_stdout("[ disabled ]\n", schema='t_name') short_status = 'disabled' + result_checksum = None # cleanup only if test passed or if --force mode enabled if lib.Options().args.is_force or short_status == 'pass': inspector.cleanup_nondefault() - return short_status + return short_status, result_checksum def is_parallel(self): return self.ini['is_parallel'] diff --git a/lib/worker.py b/lib/worker.py index 526dd3a0..1e1bfdd4 100644 --- a/lib/worker.py +++ b/lib/worker.py @@ -144,13 +144,15 @@ class WorkerTaskResult(BaseWorkerMessage): """ Passed into the result queue when a task processed (done) by the worker. The short_status (string) field intended to give short note whether the task processed successfully or not, but with little more flexibility - than binary True/False. The task_id (any hashable object) field hold ID of + than binary True/False. The result_checksum (string) field saves the results + file checksum on test fail. The task_id (any hashable object) field hold ID of the processed task. The show_reproduce_content configuration form suite.ini """ def __init__(self, worker_id, worker_name, task_id, - short_status, show_reproduce_content): + short_status, result_checksum, show_reproduce_content): super(WorkerTaskResult, self).__init__(worker_id, worker_name) self.short_status = short_status + self.result_checksum = result_checksum self.task_id = task_id self.show_reproduce_content = show_reproduce_content @@ -216,8 +218,9 @@ def current_task(self, task_id): return WorkerCurrentTask(self.id, self.name, task_name, task_param, task_result, task_tmp_result) - def wrap_result(self, task_id, short_status): + def wrap_result(self, task_id, short_status, result_checksum): return WorkerTaskResult(self.id, self.name, task_id, short_status, + result_checksum, self.suite.show_reproduce_content()) def sigterm_handler(self, signum, frame): @@ -304,7 +307,7 @@ def run_task(self, task_id): with open(self.reproduce_file, 'a') as f: task_id_str = yaml.safe_dump(task.id, default_flow_style=True) f.write('- ' + task_id_str) - short_status = self.suite.run_test( + short_status, result_checksum = self.suite.run_test( task, self.server, self.inspector) except KeyboardInterrupt: self.report_keyboard_interrupt() @@ -314,7 +317,7 @@ def run_task(self, task_id): '\nWorker "%s" received the following error; stopping...\n' % self.name + traceback.format_exc() + '\n', schema='error') raise - return short_status + return short_status, result_checksum def run_loop(self, task_queue, result_queue, is_fragile): """ called from 'run_all' """ @@ -329,8 +332,11 @@ def run_loop(self, task_queue, result_queue, is_fragile): break short_status = None + result_checksum = None result_queue.put(self.current_task(task_id)) - if is_fragile: + testname = os.path.basename(task_id[0]) + fragile_checksums = self.suite.get_test_fragile_checksums(testname) + if is_fragile and fragile_checksums: retries_left = self.suite.fragile_retries() # let's run till short_status became 'pass' while short_status != 'pass' and retries_left >= 0: @@ -338,15 +344,20 @@ def run_loop(self, task_queue, result_queue, is_fragile): if short_status == 'fail': color_stdout( 'Test "%s", conf: "%s"\n' - '\tfrom "fragile" list failed, rerunning ...\n' - % (task_id[0], task_id[1]), schema='error') + '\tfrom "fragile" list failed with results' + ' file checksum: "%s", rerunning ...\n' + % (task_id[0], task_id[1], result_checksum), schema='error') # run task and save the result to short_status - short_status = self.run_task(task_id) + short_status, result_checksum = self.run_task(task_id) + # check if the results file checksum set on fail and if + # the newly created results file is known by checksum + if not result_checksum or (result_checksum not in fragile_checksums): + break retries_left = retries_left - 1 else: - short_status = self.run_task(task_id) + short_status, result_checksum = self.run_task(task_id) - result_queue.put(self.wrap_result(task_id, short_status)) + result_queue.put(self.wrap_result(task_id, short_status, result_checksum)) if not lib.Options().args.is_force and short_status == 'fail': color_stdout( 'Worker "%s" got failed test; stopping the server...\n' diff --git a/listeners.py b/listeners.py index c4d6fcc9..b3a5c90b 100644 --- a/listeners.py +++ b/listeners.py @@ -45,6 +45,7 @@ def process_result(self, obj): if obj.short_status == 'fail': self.failed_tasks.append((obj.task_id, obj.worker_name, + obj.result_checksum, obj.show_reproduce_content)) def print_statistics(self): @@ -58,10 +59,11 @@ def print_statistics(self): return False color_stdout('Failed tasks:\n', schema='test_var') - for task_id, worker_name, show_reproduce_content in self.failed_tasks: + for task_id, worker_name, result_checksum, show_reproduce_content in self.failed_tasks: logfile = self.get_logfile(worker_name) task_id_str = yaml.safe_dump(task_id, default_flow_style=True) color_stdout('- %s' % task_id_str, schema='test_var') + color_stdout('# results file checksum: %s\n' % result_checksum) color_stdout('# logfile: %s\n' % logfile) reproduce_file_path = get_reproduce_file(worker_name) color_stdout('# reproduce file: %s\n' % reproduce_file_path)