From 9aa879a1bda4617e45193afe29892570b48d22c5 Mon Sep 17 00:00:00 2001 From: Samuel Omlin Date: Tue, 18 Sep 2018 09:59:52 +0200 Subject: [PATCH 1/4] [feat]keep retry info --- reframe/core/runtime.py | 28 ++++++++++++++++++++++---- reframe/frontend/executors/__init__.py | 14 ++++--------- reframe/frontend/statistics.py | 25 +++++++++-------------- unittests/test_policies.py | 6 +++--- 4 files changed, 41 insertions(+), 32 deletions(-) diff --git a/reframe/core/runtime.py b/reframe/core/runtime.py index b6093fbc6c..0ecc401f5a 100644 --- a/reframe/core/runtime.py +++ b/reframe/core/runtime.py @@ -113,6 +113,10 @@ def _makedir(self, *dirs, wipeout=False): os.makedirs(ret, exist_ok=True) return ret + def _run_suffix(self): + current_run = runtime().current_run + return '_%s' % current_run if current_run > 0 else '' + @property def timestamp(self): return self._timestamp.strftime(self.timefmt) if self.timefmt else '' @@ -121,17 +125,21 @@ def timestamp(self): def output_prefix(self): """The output prefix directory of ReFrame.""" if self.outputdir is None: - return os.path.join(self.prefix, 'output', self.timestamp) + return os.path.join(self.prefix, 'output' + self._run_suffix(), + self.timestamp) else: - return os.path.join(self.outputdir, self.timestamp) + return os.path.join(os.path.normpath(self.outputdir) + + self._run_suffix(), self.timestamp) @property def stage_prefix(self): """The stage prefix directory of ReFrame.""" if self.stagedir is None: - return os.path.join(self.prefix, 'stage', self.timestamp) + return os.path.join(self.prefix, 'stage' + self._run_suffix(), + self.timestamp) else: - return os.path.join(self.stagedir, self.timestamp) + return os.path.join(os.path.normpath(self.stagedir) + + self._run_suffix(), self.timestamp) @property def perflog_prefix(self): @@ -177,6 +185,7 @@ def __init__(self, dict_config, sysdescr=None): self._system.outputdir, self._system.perflogdir) self._modules_system = ModulesSystem.create( self._system.modules_system) + self._current_run = 0 def _autodetect_system(self): """Auto-detect system.""" @@ -204,6 +213,17 @@ def mode(self, name): except KeyError: raise ConfigError('unknown execution mode: %s' % name) from None + def next_run(self): + self._current_run += 1 + + @property + def current_run(self): + """The current run. + + :type: `integer` + """ + return self._current_run + @property def system(self): """The current host system. diff --git a/reframe/frontend/executors/__init__.py b/reframe/frontend/executors/__init__.py index 87629078b6..20a7f156a5 100644 --- a/reframe/frontend/executors/__init__.py +++ b/reframe/frontend/executors/__init__.py @@ -150,7 +150,6 @@ def __init__(self, policy, printer=None, max_retries=0): self._policy = policy self._printer = printer or PrettyPrinter() self._max_retries = max_retries - self._current_run = 0 self._stats = TestStats() self._policy.stats = self._stats self._policy.printer = self._printer @@ -207,24 +206,19 @@ def _environ_supported(self, check, environ): return ret and check.supports_environ(environ.name) def _retry_failed(self, checks): + rt = runtime.runtime() while (self._stats.num_failures() and - self._current_run < self._max_retries): + rt.current_run < self._max_retries): failed_checks = [ c for c in checks if c.name in set([t.check.name for t in self._stats.tasks_failed()]) ] - self._current_run += 1 - self._stats.next_run() - if self._stats.current_run != self._current_run: - raise AssertionError('current_run variable out of sync' - '(Runner: %d; TestStats: %d)' % - self._current_run, - self._stats.current_run) + rt.next_run() self._printer.separator( 'short double line', 'Retrying %d failed check(s) (retry %d/%d)' % - (len(failed_checks), self._current_run, self._max_retries) + (len(failed_checks), rt.current_run, self._max_retries) ) self._runall(failed_checks) diff --git a/reframe/frontend/statistics.py b/reframe/frontend/statistics.py index 5b611e062b..4667646752 100644 --- a/reframe/frontend/statistics.py +++ b/reframe/frontend/statistics.py @@ -1,5 +1,5 @@ import reframe.core.debug as debug - +import reframe.core.runtime as rt from reframe.core.exceptions import StatisticsError @@ -9,21 +9,15 @@ class TestStats: def __init__(self): # Tasks per run stored as follows: [[run0_tasks], [run1_tasks], ...] self._tasks = [[]] - self._current_run = 0 def __repr__(self): return debug.repr(self) - @property - def current_run(self): - return self._current_run - - def next_run(self): - self._current_run += 1 - self._tasks.append([]) - def add_task(self, task): - self._tasks[self._current_run].append(task) + current_run = rt.runtime().current_run + if current_run == len(self._tasks): + self._tasks.append([]) + self._tasks[current_run].append(task) def get_tasks(self, run=-1): try: @@ -42,7 +36,7 @@ def tasks_failed(self, run=-1): def retry_report(self): # Return an empty report if no retries were done. - if not self._current_run: + if not rt.runtime().current_run: return '' line_width = 78 @@ -71,14 +65,15 @@ def failure_report(self): line_width = 78 report = [line_width * '='] report.append('SUMMARY OF FAILURES') - for tf in (t for t in self.get_tasks(self._current_run) if t.failed): + current_run = rt.runtime().current_run + for tf in (t for t in self.get_tasks(current_run) if t.failed): check = tf.check partition = check.current_partition partname = partition.fullname if partition else 'None' environ_name = (check.current_environ.name if check.current_environ else 'None') - retry_info = ('(for the last of %s retries)' % self._current_run - if self._current_run > 0 else '') + retry_info = ('(for the last of %s retries)' % current_run + if current_run > 0 else '') report.append(line_width * '-') report.append('FAILURE INFO for %s %s' % (check.name, retry_info)) diff --git a/unittests/test_policies.py b/unittests/test_policies.py index 49d41b9ac7..c03562131b 100644 --- a/unittests/test_policies.py +++ b/unittests/test_policies.py @@ -140,7 +140,7 @@ def test_retries_bad_check(self): # Ensure that the test was retried #max_retries times and failed. self.assertEqual(1, self.runner.stats.num_cases()) - self.assertEqual(max_retries, self.runner.stats.current_run) + self.assertEqual(max_retries, rt.runtime().current_run) self.assertEqual(1, self.runner.stats.num_failures()) def test_retries_good_check(self): @@ -151,7 +151,7 @@ def test_retries_good_check(self): # Ensure that the test passed without retries. self.assertEqual(1, self.runner.stats.num_cases()) - self.assertEqual(0, self.runner.stats.current_run) + self.assertEqual(0, rt.runtime().current_run) self.assertEqual(0, self.runner.stats.num_failures()) def test_pass_in_retries(self): @@ -169,7 +169,7 @@ def test_pass_in_retries(self): # Ensure that the test passed after retries in run #run_to_pass. self.assertEqual(1, self.runner.stats.num_cases()) self.assertEqual(1, self.runner.stats.num_failures(run=0)) - self.assertEqual(run_to_pass, self.runner.stats.current_run) + self.assertEqual(run_to_pass, rt.runtime().current_run) self.assertEqual(0, self.runner.stats.num_failures()) os.remove(fp.name) From cba4d9d756bece3f4ad5f6626d287cceb7fb2cdb Mon Sep 17 00:00:00 2001 From: Samuel Omlin Date: Tue, 18 Sep 2018 18:51:50 +0200 Subject: [PATCH 2/4] Fix of retries unittests --- unittests/test_policies.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/unittests/test_policies.py b/unittests/test_policies.py index c03562131b..58f774bac3 100644 --- a/unittests/test_policies.py +++ b/unittests/test_policies.py @@ -8,6 +8,7 @@ import reframe.utility.os_ext as os_ext from reframe.core.exceptions import JobNotStartedError from reframe.frontend.loader import RegressionCheckLoader +import unittests.fixtures as fixtures from unittests.resources.checks.hellocheck import HelloTest from unittests.resources.checks.frontend_checks import ( KeyboardInterruptCheck, SleepCheck, @@ -132,6 +133,9 @@ def test_system_exit_within_test(self): stats = self.runner.stats self.assertEqual(1, stats.num_failures()) + # Retries tests are executed in a different runtime as they modify + # the global rt.runtime().current_run what makes subsequent tests fail + @rt.switch_runtime(fixtures.TEST_SITE_CONFIG, 'generic') def test_retries_bad_check(self): max_retries = 2 checks = [BadSetupCheck()] @@ -143,6 +147,7 @@ def test_retries_bad_check(self): self.assertEqual(max_retries, rt.runtime().current_run) self.assertEqual(1, self.runner.stats.num_failures()) + @rt.switch_runtime(fixtures.TEST_SITE_CONFIG, 'generic') def test_retries_good_check(self): max_retries = 2 checks = [HelloTest()] @@ -154,6 +159,7 @@ def test_retries_good_check(self): self.assertEqual(0, rt.runtime().current_run) self.assertEqual(0, self.runner.stats.num_failures()) + @rt.switch_runtime(fixtures.TEST_SITE_CONFIG, 'generic') def test_pass_in_retries(self): max_retries = 3 run_to_pass = 2 From 1e93c536e6e98cb58f714635295d22f9be0a02ff Mon Sep 17 00:00:00 2001 From: Samuel Omlin Date: Thu, 20 Sep 2018 16:41:39 +0200 Subject: [PATCH 3/4] Minor revisions following review --- reframe/core/runtime.py | 14 ++++++-------- reframe/frontend/statistics.py | 1 + unittests/test_policies.py | 8 +++----- 3 files changed, 10 insertions(+), 13 deletions(-) diff --git a/reframe/core/runtime.py b/reframe/core/runtime.py index 0ecc401f5a..447ece19dd 100644 --- a/reframe/core/runtime.py +++ b/reframe/core/runtime.py @@ -128,8 +128,9 @@ def output_prefix(self): return os.path.join(self.prefix, 'output' + self._run_suffix(), self.timestamp) else: - return os.path.join(os.path.normpath(self.outputdir) + - self._run_suffix(), self.timestamp) + return os.path.join(self.outputdir + self._run_suffix(), + self.timestamp) + @property def stage_prefix(self): @@ -138,8 +139,8 @@ def stage_prefix(self): return os.path.join(self.prefix, 'stage' + self._run_suffix(), self.timestamp) else: - return os.path.join(os.path.normpath(self.stagedir) + - self._run_suffix(), self.timestamp) + return os.path.join(self.stagedir + self._run_suffix(), + self.timestamp) @property def perflog_prefix(self): @@ -218,10 +219,7 @@ def next_run(self): @property def current_run(self): - """The current run. - - :type: `integer` - """ + # Not publicly documented return self._current_run @property diff --git a/reframe/frontend/statistics.py b/reframe/frontend/statistics.py index 4667646752..19353d24a5 100644 --- a/reframe/frontend/statistics.py +++ b/reframe/frontend/statistics.py @@ -17,6 +17,7 @@ def add_task(self, task): current_run = rt.runtime().current_run if current_run == len(self._tasks): self._tasks.append([]) + self._tasks[current_run].append(task) def get_tasks(self, run=-1): diff --git a/unittests/test_policies.py b/unittests/test_policies.py index 58f774bac3..fd3e216c61 100644 --- a/unittests/test_policies.py +++ b/unittests/test_policies.py @@ -27,6 +27,9 @@ def setUp(self): # Set runtime prefix rt.runtime().resources.prefix = tempfile.mkdtemp(dir='unittests') + # Reset current_run + rt.runtime()._current_run = 0 + def tearDown(self): os_ext.rmtree(rt.runtime().resources.prefix) @@ -133,9 +136,6 @@ def test_system_exit_within_test(self): stats = self.runner.stats self.assertEqual(1, stats.num_failures()) - # Retries tests are executed in a different runtime as they modify - # the global rt.runtime().current_run what makes subsequent tests fail - @rt.switch_runtime(fixtures.TEST_SITE_CONFIG, 'generic') def test_retries_bad_check(self): max_retries = 2 checks = [BadSetupCheck()] @@ -147,7 +147,6 @@ def test_retries_bad_check(self): self.assertEqual(max_retries, rt.runtime().current_run) self.assertEqual(1, self.runner.stats.num_failures()) - @rt.switch_runtime(fixtures.TEST_SITE_CONFIG, 'generic') def test_retries_good_check(self): max_retries = 2 checks = [HelloTest()] @@ -159,7 +158,6 @@ def test_retries_good_check(self): self.assertEqual(0, rt.runtime().current_run) self.assertEqual(0, self.runner.stats.num_failures()) - @rt.switch_runtime(fixtures.TEST_SITE_CONFIG, 'generic') def test_pass_in_retries(self): max_retries = 3 run_to_pass = 2 From f3bc3282e8ab509752581e43cee832033b358b4e Mon Sep 17 00:00:00 2001 From: Vasileios Karakasis Date: Thu, 20 Sep 2018 17:33:04 +0200 Subject: [PATCH 4/4] Remove unnecessary comment --- reframe/core/runtime.py | 1 - 1 file changed, 1 deletion(-) diff --git a/reframe/core/runtime.py b/reframe/core/runtime.py index 447ece19dd..8910e64c16 100644 --- a/reframe/core/runtime.py +++ b/reframe/core/runtime.py @@ -219,7 +219,6 @@ def next_run(self): @property def current_run(self): - # Not publicly documented return self._current_run @property