From 1fecf6be7b1c86464d7d33718b98160f2665a249 Mon Sep 17 00:00:00 2001 From: Rafael Sarmiento Date: Tue, 2 Jun 2020 22:44:27 +0200 Subject: [PATCH 1/6] add option to emit -N --- docs/config_reference.rst | 9 +++++++++ docs/manpage.rst | 13 +++++++++++++ reframe/core/schedulers/slurm.py | 9 +++++++++ reframe/frontend/cli.py | 7 +++++++ reframe/schemas/config.json | 2 ++ 5 files changed, 40 insertions(+) diff --git a/docs/config_reference.rst b/docs/config_reference.rst index 9fdf9aa7fe..a0fb4fad3d 100644 --- a/docs/config_reference.rst +++ b/docs/config_reference.rst @@ -960,6 +960,15 @@ Common scheduler options A list of systems or system/partitions combinations that this scheduler configuration is valid for. For a detailed description of this property, you may refer `here <#.environments[].target_systems>`__. +.. js:attribute:: .schedulers[].emit_num_nodes + + :required: No + :default: ``false`` + + This option is relevant to the Slurm backend only. + + Force emitting the option ``-N`` in the preamble of the submission scripts. + Execution Mode Configuration diff --git a/docs/manpage.rst b/docs/manpage.rst index 9cdd059812..89804a66f2 100644 --- a/docs/manpage.rst +++ b/docs/manpage.rst @@ -608,6 +608,19 @@ Here is an alphabetical list of the environment variables recognized by ReFrame: ================================== ================== +.. envvar:: RFM_EMIT_NUM_NODES + + Force emitting the option ``-N`` in the preamble of the submission scripts. + + .. table:: + :align: left + + ================================== ================== + Associated command line option N/A + Associated configuration parameter js:attr:`emit_num_nodes` scheduler configuration parameter + ================================== ================== + + .. envvar:: RFM_GRAYLOG_SERVER The address of the Graylog server to send performance logs. diff --git a/reframe/core/schedulers/slurm.py b/reframe/core/schedulers/slurm.py index 279774899b..72209b5efb 100644 --- a/reframe/core/schedulers/slurm.py +++ b/reframe/core/schedulers/slurm.py @@ -107,6 +107,9 @@ def __init__(self): self._job_submit_timeout = rt.runtime().get_option( f'schedulers/@{self.registered_name}/job_submit_timeout' ) + emit_num_nodes = rt.runtime().get_option( + f'schedulers/@{self.registered_name}/emit_num_nodes') + self._emit_num_nodes = emit_num_nodes def completion_time(self, job): if (self._completion_time or @@ -170,6 +173,12 @@ def emit_preamble(self, job): self._format_option(job.sched_exclusive_access, '--exclusive') ) + if self._emit_num_nodes: + preamble.append( + self._format_option(job.num_tasks // job.num_tasks_per_node, + '-N={0}') + ) + if job.use_smt is None: hint = None else: diff --git a/reframe/frontend/cli.py b/reframe/frontend/cli.py index 1c48bec6bc..88f8fda69f 100644 --- a/reframe/frontend/cli.py +++ b/reframe/frontend/cli.py @@ -358,6 +358,13 @@ def main(): action='store_true', help='Use a login shell for job scripts' ) + argparser.add_argument( + dest='emit_num_nodes', + envvar='RFM_EMIT_NUM_NODES', + configvar='schedulers/emit_num_nodes', + action='store_true', + help='Emit -N in Slurm job script' + ) if len(sys.argv) == 1: argparser.print_help() diff --git a/reframe/schemas/config.json b/reframe/schemas/config.json index dc7c05b442..be33f8c113 100644 --- a/reframe/schemas/config.json +++ b/reframe/schemas/config.json @@ -274,6 +274,7 @@ "type": "object", "properties": { "name": {"type": "string"}, + "emit_num_nodes": {"type": "boolean"}, "job_submit_timeout": {"type": "number"}, "target_systems": {"$ref": "#/defs/system_ref"} }, @@ -419,6 +420,7 @@ "modes/target_systems": ["*"], "schedulers/job_submit_timeout": 60, "schedulers/target_systems": ["*"], + "schedulers/emit_num_nodes": false, "systems/descr": "", "systems/modules_system": "nomod", "systems/modules": [], From 57ca38ea4508512e8e7b9e2d14fb0d0904d8ff25 Mon Sep 17 00:00:00 2001 From: Rafael Sarmiento Date: Tue, 2 Jun 2020 22:49:50 +0200 Subject: [PATCH 2/6] fix pep --- reframe/core/schedulers/slurm.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/reframe/core/schedulers/slurm.py b/reframe/core/schedulers/slurm.py index 72209b5efb..9f9170d89b 100644 --- a/reframe/core/schedulers/slurm.py +++ b/reframe/core/schedulers/slurm.py @@ -108,7 +108,7 @@ def __init__(self): f'schedulers/@{self.registered_name}/job_submit_timeout' ) emit_num_nodes = rt.runtime().get_option( - f'schedulers/@{self.registered_name}/emit_num_nodes') + f'schedulers/@{self.registered_name}/emit_num_nodes') self._emit_num_nodes = emit_num_nodes def completion_time(self, job): From 389fbd983c5482d775ce106fd325e31ec756eead Mon Sep 17 00:00:00 2001 From: Rafael Sarmiento Date: Wed, 3 Jun 2020 07:34:13 +0200 Subject: [PATCH 3/6] remove tabs --- reframe/schemas/config.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/reframe/schemas/config.json b/reframe/schemas/config.json index be33f8c113..3c040eb7e1 100644 --- a/reframe/schemas/config.json +++ b/reframe/schemas/config.json @@ -274,7 +274,7 @@ "type": "object", "properties": { "name": {"type": "string"}, - "emit_num_nodes": {"type": "boolean"}, + "emit_num_nodes": {"type": "boolean"}, "job_submit_timeout": {"type": "number"}, "target_systems": {"$ref": "#/defs/system_ref"} }, @@ -420,7 +420,7 @@ "modes/target_systems": ["*"], "schedulers/job_submit_timeout": 60, "schedulers/target_systems": ["*"], - "schedulers/emit_num_nodes": false, + "schedulers/emit_num_nodes": false, "systems/descr": "", "systems/modules_system": "nomod", "systems/modules": [], From 6c956ec623b2e2f6af92fb4e52cb263193b0fe44 Mon Sep 17 00:00:00 2001 From: Vasileios Karakasis Date: Wed, 3 Jun 2020 17:51:56 +0200 Subject: [PATCH 4/6] WIP: Address PR comments --- docs/config_reference.rst | 7 +++---- docs/manpage.rst | 13 ------------- reframe/core/schedulers/slurm.py | 14 ++++++-------- reframe/frontend/cli.py | 7 ------- reframe/schemas/config.json | 4 ++-- unittests/test_schedulers.py | 9 +++++++++ 6 files changed, 20 insertions(+), 34 deletions(-) diff --git a/docs/config_reference.rst b/docs/config_reference.rst index a0fb4fad3d..db19a7c29f 100644 --- a/docs/config_reference.rst +++ b/docs/config_reference.rst @@ -960,15 +960,14 @@ Common scheduler options A list of systems or system/partitions combinations that this scheduler configuration is valid for. For a detailed description of this property, you may refer `here <#.environments[].target_systems>`__. -.. js:attribute:: .schedulers[].emit_num_nodes +.. js:attribute:: .schedulers[].use_nodes_option :required: No :default: ``false`` - This option is relevant to the Slurm backend only. + Always emit the ``-N`` Slurm option in the preamble of the job script. + This option is relevant to Slurm backends only. - Force emitting the option ``-N`` in the preamble of the submission scripts. - Execution Mode Configuration diff --git a/docs/manpage.rst b/docs/manpage.rst index 89804a66f2..9cdd059812 100644 --- a/docs/manpage.rst +++ b/docs/manpage.rst @@ -608,19 +608,6 @@ Here is an alphabetical list of the environment variables recognized by ReFrame: ================================== ================== -.. envvar:: RFM_EMIT_NUM_NODES - - Force emitting the option ``-N`` in the preamble of the submission scripts. - - .. table:: - :align: left - - ================================== ================== - Associated command line option N/A - Associated configuration parameter js:attr:`emit_num_nodes` scheduler configuration parameter - ================================== ================== - - .. envvar:: RFM_GRAYLOG_SERVER The address of the Graylog server to send performance logs. diff --git a/reframe/core/schedulers/slurm.py b/reframe/core/schedulers/slurm.py index 9f9170d89b..dbc8692f95 100644 --- a/reframe/core/schedulers/slurm.py +++ b/reframe/core/schedulers/slurm.py @@ -107,9 +107,9 @@ def __init__(self): self._job_submit_timeout = rt.runtime().get_option( f'schedulers/@{self.registered_name}/job_submit_timeout' ) - emit_num_nodes = rt.runtime().get_option( - f'schedulers/@{self.registered_name}/emit_num_nodes') - self._emit_num_nodes = emit_num_nodes + self._use_nodes_opt = rt.runtime().get_option( + f'schedulers/@{self.registered_name}/use_nodes_option' + ) def completion_time(self, job): if (self._completion_time or @@ -173,11 +173,9 @@ def emit_preamble(self, job): self._format_option(job.sched_exclusive_access, '--exclusive') ) - if self._emit_num_nodes: - preamble.append( - self._format_option(job.num_tasks // job.num_tasks_per_node, - '-N={0}') - ) + if self._use_nodes_opt: + num_nodes = job.num_tasks // job.num_tasks_per_node + preamble.append(self._format_option(num_nodes, '--nodes={0}')) if job.use_smt is None: hint = None diff --git a/reframe/frontend/cli.py b/reframe/frontend/cli.py index 88f8fda69f..1c48bec6bc 100644 --- a/reframe/frontend/cli.py +++ b/reframe/frontend/cli.py @@ -358,13 +358,6 @@ def main(): action='store_true', help='Use a login shell for job scripts' ) - argparser.add_argument( - dest='emit_num_nodes', - envvar='RFM_EMIT_NUM_NODES', - configvar='schedulers/emit_num_nodes', - action='store_true', - help='Emit -N in Slurm job script' - ) if len(sys.argv) == 1: argparser.print_help() diff --git a/reframe/schemas/config.json b/reframe/schemas/config.json index 3c040eb7e1..ed9c1f9ecd 100644 --- a/reframe/schemas/config.json +++ b/reframe/schemas/config.json @@ -274,7 +274,7 @@ "type": "object", "properties": { "name": {"type": "string"}, - "emit_num_nodes": {"type": "boolean"}, + "use_nodes_option": {"type": "boolean"}, "job_submit_timeout": {"type": "number"}, "target_systems": {"$ref": "#/defs/system_ref"} }, @@ -420,7 +420,7 @@ "modes/target_systems": ["*"], "schedulers/job_submit_timeout": 60, "schedulers/target_systems": ["*"], - "schedulers/emit_num_nodes": false, + "schedulers/use_nodes_option": false, "systems/descr": "", "systems/modules_system": "nomod", "systems/modules": [], diff --git a/unittests/test_schedulers.py b/unittests/test_schedulers.py index 5a8599f4ac..e3e8b08898 100644 --- a/unittests/test_schedulers.py +++ b/unittests/test_schedulers.py @@ -261,6 +261,15 @@ def test_prepare_without_smt(fake_job, slurm_only): assert re.search(r'--hint=nomultithread', fp.read()) is not None +def test_prepare_nodes_option(temp_runtime, fake_job, slurm_only): + rt = temp_runtime(fixtures.TEST_CONFIG_FILE, 'generic', + {'schedulers/use_nodes_option': True}) + next(rt) + prepare_job(fake_job) + with open(fake_job.script_filename) as fp: + assert re.search(r'--nodes=8', fp.read()) is not None + + def test_submit(make_job, exec_ctx): minimal_job = make_job(sched_access=exec_ctx.access) prepare_job(minimal_job) From c01465d7bfd93b323a9ed8f7a863249c6297105a Mon Sep 17 00:00:00 2001 From: Vasileios Karakasis Date: Wed, 3 Jun 2020 17:59:52 +0200 Subject: [PATCH 5/6] Add unit test for use_nodes_option configuration parameter --- unittests/test_schedulers.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/unittests/test_schedulers.py b/unittests/test_schedulers.py index e3e8b08898..2d532c6490 100644 --- a/unittests/test_schedulers.py +++ b/unittests/test_schedulers.py @@ -261,12 +261,15 @@ def test_prepare_without_smt(fake_job, slurm_only): assert re.search(r'--hint=nomultithread', fp.read()) is not None -def test_prepare_nodes_option(temp_runtime, fake_job, slurm_only): +def test_prepare_nodes_option(temp_runtime, make_job, slurm_only): rt = temp_runtime(fixtures.TEST_CONFIG_FILE, 'generic', {'schedulers/use_nodes_option': True}) next(rt) - prepare_job(fake_job) - with open(fake_job.script_filename) as fp: + job = make_job() + job.num_tasks = 16 + job.num_tasks_per_node = 2 + prepare_job(job) + with open(job.script_filename) as fp: assert re.search(r'--nodes=8', fp.read()) is not None From 5af478172a12738468c79da6b7d73e6663ab8eea Mon Sep 17 00:00:00 2001 From: Vasileios Karakasis Date: Wed, 3 Jun 2020 20:53:59 +0200 Subject: [PATCH 6/6] Address PR comments --- docs/config_reference.rst | 2 +- reframe/schemas/config.json | 5 ++--- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/docs/config_reference.rst b/docs/config_reference.rst index 893baa62ce..eaa0f5ff47 100644 --- a/docs/config_reference.rst +++ b/docs/config_reference.rst @@ -965,7 +965,7 @@ Common scheduler options :required: No :default: ``false`` - Always emit the ``-N`` Slurm option in the preamble of the job script. + Always emit the ``--nodes`` Slurm option in the preamble of the job script. This option is relevant to Slurm backends only. diff --git a/reframe/schemas/config.json b/reframe/schemas/config.json index 98a72d63e3..a84208f4f1 100644 --- a/reframe/schemas/config.json +++ b/reframe/schemas/config.json @@ -273,15 +273,14 @@ "items": { "type": "object", "properties": { - "name": {"type": "string"}, - "use_nodes_option": {"type": "boolean"}, "name": { "type": "string", "enum": ["local", "pbs", "slurm", "squeue", "torque"] }, "ignore_reqnodenotavail": {"type": "boolean"}, "job_submit_timeout": {"type": "number"}, - "target_systems": {"$ref": "#/defs/system_ref"} + "target_systems": {"$ref": "#/defs/system_ref"}, + "use_nodes_option": {"type": "boolean"} }, "required": ["name"], "additionalProperties": false