From 0b8efe6c1562e835449af89d811ef929aac1afcf Mon Sep 17 00:00:00 2001 From: Theofilos Manitaras Date: Thu, 29 Sep 2022 11:15:27 +0200 Subject: [PATCH 1/3] Submit jobs using correct module path when testing schedulers --- unittests/test_schedulers.py | 30 +++++++++++++++++++----------- 1 file changed, 19 insertions(+), 11 deletions(-) diff --git a/unittests/test_schedulers.py b/unittests/test_schedulers.py index 3565d2a513..b02675fc52 100644 --- a/unittests/test_schedulers.py +++ b/unittests/test_schedulers.py @@ -115,7 +115,7 @@ def prepare_job(job, command='hostname', pre_run = pre_run or ['echo prerun'] post_run = post_run or ['echo postrun'] prepare_cmds = prepare_cmds or ['echo prepare'] - with rt.module_use('unittests/modules'): + with rt.module_use(test_util.TEST_MODULES): job.prepare( [ *pre_run, @@ -127,6 +127,11 @@ def prepare_job(job, command='hostname', ) +def submit_job(job): + with rt.module_use(test_util.TEST_MODULES): + job.submit() + + def assert_job_script_sanity(job): '''Assert the sanity of the produced script file.''' with open(job.script_filename) as fp: @@ -386,7 +391,7 @@ def test_submit(make_job, exec_ctx): minimal_job = make_job(sched_access=exec_ctx.access) prepare_job(minimal_job) assert minimal_job.nodelist is None - minimal_job.submit() + submit_job(minimal_job) assert minimal_job.jobid is not None minimal_job.wait() @@ -403,12 +408,15 @@ def test_submit(make_job, exec_ctx): assert num_nodes == len(minimal_job.nodelist) assert 0 == minimal_job.exitcode + with open(minimal_job.stderr) as stderr: + assert not stderr.read().strip() + def test_submit_timelimit(minimal_job, local_only): minimal_job.time_limit = '2s' prepare_job(minimal_job, 'sleep 10') t_job = time.time() - minimal_job.submit() + submit_job(minimal_job) assert minimal_job.jobid is not None minimal_job.wait() t_job = time.time() - t_job @@ -430,7 +438,7 @@ def test_submit_job_array(make_job, slurm_only, exec_ctx): job = make_job(sched_access=exec_ctx.access) job.options = ['--array=0-1'] prepare_job(job, command='echo "Task id: ${SLURM_ARRAY_TASK_ID}"') - job.submit() + submit_job(job) job.wait() if job.scheduler.registered_name == 'slurm': assert job.exitcode == 0 @@ -445,7 +453,7 @@ def test_cancel(make_job, exec_ctx): prepare_job(minimal_job, 'sleep 30') t_job = time.time() - minimal_job.submit() + submit_job(job) minimal_job.cancel() # We give some time to the local scheduler for the TERM signal to be @@ -484,7 +492,7 @@ def test_wait_before_submit(minimal_job): def test_finished(make_job, exec_ctx): minimal_job = make_job(sched_access=exec_ctx.access) prepare_job(minimal_job, 'sleep 2') - minimal_job.submit() + submit_job(minimal_job) assert not minimal_job.finished() minimal_job.wait() @@ -498,7 +506,7 @@ def test_finished_before_submit(minimal_job): def test_finished_raises_error(make_job, exec_ctx): minimal_job = make_job(sched_access=exec_ctx.access) prepare_job(minimal_job, 'echo hello') - minimal_job.submit() + submit_job(minimal_job) minimal_job.wait() # Emulate an error during polling and verify that it is raised correctly @@ -555,7 +563,7 @@ def test_guess_num_tasks(minimal_job, scheduler): minimal_job.num_tasks = 0 minimal_job._sched_flex_alloc_nodes = 'idle' prepare_job(minimal_job) - minimal_job.submit() + submit_job(minimal_job) minimal_job.wait() assert minimal_job.num_tasks == 1 elif scheduler.registered_name in ('slurm', 'squeue'): @@ -596,7 +604,7 @@ def state(self): type(minimal_job).state = property(state) prepare_job(minimal_job, 'sleep 30') - minimal_job.submit() + submit_job(minimal_job) with pytest.raises(JobError, match='maximum pending time exceeded'): minimal_job.wait() @@ -649,7 +657,7 @@ def test_cancel_with_grace(minimal_job, scheduler, local_only): pre_run=['trap -- "" TERM'], post_run=['echo $!', 'wait'], prepare_cmds=['']) - minimal_job.submit() + submit_job(minimal_job) # Stall a bit here to let the the spawned process start and install its # signal handler for SIGTERM @@ -696,7 +704,7 @@ def test_cancel_term_ignore(minimal_job, scheduler, local_only): pre_run=[''], post_run=[''], prepare_cmds=['']) - minimal_job.submit() + submit_job(minimal_job) # Stall a bit here to let the the spawned process start and install its # signal handler for SIGTERM From 119dc2b19c074a3bf707663a19842f0ca6fe5d3f Mon Sep 17 00:00:00 2001 From: Theofilos Manitaras Date: Thu, 29 Sep 2022 11:21:36 +0200 Subject: [PATCH 2/3] Fix introduced bug --- unittests/test_schedulers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/unittests/test_schedulers.py b/unittests/test_schedulers.py index b02675fc52..ebb06cc14b 100644 --- a/unittests/test_schedulers.py +++ b/unittests/test_schedulers.py @@ -453,7 +453,7 @@ def test_cancel(make_job, exec_ctx): prepare_job(minimal_job, 'sleep 30') t_job = time.time() - submit_job(job) + submit_job(minimal_job) minimal_job.cancel() # We give some time to the local scheduler for the TERM signal to be From 7ef640a35f6da7eddea22a7fca503b7ed7189a8e Mon Sep 17 00:00:00 2001 From: Theofilos Manitaras Date: Fri, 30 Sep 2022 11:04:48 +0200 Subject: [PATCH 3/3] Handle the case where the scheduler does not retrieve exitcode --- unittests/test_schedulers.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/unittests/test_schedulers.py b/unittests/test_schedulers.py index ebb06cc14b..1f0f76acd3 100644 --- a/unittests/test_schedulers.py +++ b/unittests/test_schedulers.py @@ -406,7 +406,9 @@ def test_submit(make_job, exec_ctx): num_tasks_per_node = minimal_job.num_tasks_per_node or 1 num_nodes = minimal_job.num_tasks // num_tasks_per_node assert num_nodes == len(minimal_job.nodelist) - assert 0 == minimal_job.exitcode + + # Handle the case where the exitcode was not reported by the scheduler + assert minimal_job.exitcode is None or 0 == minimal_job.exitcode with open(minimal_job.stderr) as stderr: assert not stderr.read().strip()