From 28f5cb4f6556197f7e3c5c2ce946d324fda707ed Mon Sep 17 00:00:00 2001 From: Vasileios Karakasis Date: Sat, 18 Feb 2023 16:29:46 +0100 Subject: [PATCH 1/3] Fix `sched_options` use by backend schedulers --- reframe/core/schedulers/__init__.py | 2 +- reframe/schemas/config.json | 8 ++++---- unittests/resources/config/settings.py | 2 +- unittests/test_config.py | 2 +- unittests/test_schedulers.py | 10 +++++----- 5 files changed, 12 insertions(+), 12 deletions(-) diff --git a/reframe/core/schedulers/__init__.py b/reframe/core/schedulers/__init__.py index cf87ffd603..0424571e87 100644 --- a/reframe/core/schedulers/__init__.py +++ b/reframe/core/schedulers/__init__.py @@ -45,7 +45,7 @@ def __call__(cls, *args, **kwargs): obj = cls.__new__(cls, *args, **kwargs) if part_name: obj._config_prefix = ( - f'systems/0/paritions/@{part_name}/sched_options' + f'systems/0/partitions/@{part_name}/sched_options' ) else: obj._config_prefix = 'systems/0/sched_options' diff --git a/reframe/schemas/config.json b/reframe/schemas/config.json index 2ca913db5b..9fb60f0e18 100644 --- a/reframe/schemas/config.json +++ b/reframe/schemas/config.json @@ -601,9 +601,9 @@ "systems/partitions/time_limit": null, "systems/partitions/devices": [], "systems/partitions/extras": {}, - "systems*/sched_options/ignore_reqnodenotavail": false, - "systems*/sched_options/job_submit_timeout": 60, - "systems*/sched_options/resubmit_on_errors": [], - "systems*/sched_options/use_nodes_option": false + "{systems,systems/partitions}/sched_options/ignore_reqnodenotavail": false, + "{systems,systems/partitions}/sched_options/job_submit_timeout": 60, + "{systems,systems/partitions}/sched_options/resubmit_on_errors": [], + "{systems,systems/partitions}/sched_options/use_nodes_option": false } } diff --git a/unittests/resources/config/settings.py b/unittests/resources/config/settings.py index a7746fb167..64b276cac4 100644 --- a/unittests/resources/config/settings.py +++ b/unittests/resources/config/settings.py @@ -63,7 +63,7 @@ 'environs': ['PrgEnv-gnu', 'builtin'], 'max_jobs': 10, 'sched_options': { - 'use_nodes_option': False + 'use_nodes_option': True }, 'processor': { 'arch': 'skylake', diff --git a/unittests/test_config.py b/unittests/test_config.py index aee8d64698..4f936539c8 100644 --- a/unittests/test_config.py +++ b/unittests/test_config.py @@ -296,7 +296,7 @@ def test_select_subconfig(site_config): [['FOO_GPU', 'yes']]) assert site_config.get('systems/0/partitions/0/max_jobs') == 10 assert site_config.get('systems/0/partitions/0/sched_options') == { - 'use_nodes_option': False + 'use_nodes_option': True } assert site_config.get('environments/@PrgEnv-gnu/cc') == 'cc' assert site_config.get('environments/1/cxx') == 'CC' diff --git a/unittests/test_schedulers.py b/unittests/test_schedulers.py index 154bab27be..a28720c876 100644 --- a/unittests/test_schedulers.py +++ b/unittests/test_schedulers.py @@ -72,9 +72,10 @@ def exec_ctx(make_exec_ctx, scheduler): @pytest.fixture def make_job(scheduler, launcher, tmp_path): - def _make_job(**jobargs): + def _make_job(sched_opts=None, **jobargs): + sched = scheduler(**sched_opts) if sched_opts else scheduler() return Job.create( - scheduler(), launcher(), + sched, launcher(), name='testjob', workdir=tmp_path, script_filename=str(tmp_path / 'job.sh'), @@ -377,9 +378,8 @@ def test_prepare_without_smt(fake_job, slurm_only): def test_prepare_nodes_option(make_exec_ctx, make_job, slurm_only): - make_exec_ctx(test_util.TEST_CONFIG_FILE, 'generic', - {'systems*/sched_options/use_nodes_option': True}) - job = make_job() + make_exec_ctx(test_util.TEST_CONFIG_FILE, 'testsys') + job = make_job(sched_opts={'part_name': 'gpu'}) job.num_tasks = 16 job.num_tasks_per_node = 2 prepare_job(job) From cda96a631cbaaedf8629e1b66f52e10e833a06f3 Mon Sep 17 00:00:00 2001 From: Vasileios Karakasis Date: Sat, 18 Feb 2023 21:13:14 +0100 Subject: [PATCH 2/3] Fix `use_nodes_option` case when `num_tasks_per_node` is not set --- docs/config_reference.rst | 1 - reframe/core/schedulers/slurm.py | 3 ++- unittests/test_schedulers.py | 9 +++++++++ 3 files changed, 11 insertions(+), 2 deletions(-) diff --git a/docs/config_reference.rst b/docs/config_reference.rst index f471337fd6..f1d8c6877a 100644 --- a/docs/config_reference.rst +++ b/docs/config_reference.rst @@ -359,7 +359,6 @@ System Partition Configuration :default: ``false`` Always emit the ``--nodes`` Slurm option in the preamble of the job script. - This option is relevant to Slurm backends only. This option is relevant for the Slurm backends only. diff --git a/reframe/core/schedulers/slurm.py b/reframe/core/schedulers/slurm.py index 1dceb80a7c..2ced2d5c45 100644 --- a/reframe/core/schedulers/slurm.py +++ b/reframe/core/schedulers/slurm.py @@ -192,7 +192,8 @@ def emit_preamble(self, job): ) if self._use_nodes_opt: - num_nodes = job.num_tasks // job.num_tasks_per_node + num_tasks_per_node = job.num_tasks_per_node or 1 + num_nodes = job.num_tasks // num_tasks_per_node preamble.append(self._format_option(num_nodes, '--nodes={0}')) if job.use_smt is None: diff --git a/unittests/test_schedulers.py b/unittests/test_schedulers.py index a28720c876..ab7726c0f0 100644 --- a/unittests/test_schedulers.py +++ b/unittests/test_schedulers.py @@ -387,6 +387,15 @@ def test_prepare_nodes_option(make_exec_ctx, make_job, slurm_only): assert re.search(r'--nodes=8', fp.read()) is not None +def test_prepare_nodes_option_minimal(make_exec_ctx, make_job, slurm_only): + make_exec_ctx(test_util.TEST_CONFIG_FILE, 'testsys') + job = make_job(sched_opts={'part_name': 'gpu'}) + job.num_tasks = 16 + prepare_job(job) + with open(job.script_filename) as fp: + assert re.search(r'--nodes=16', fp.read()) is not None + + def test_submit(make_job, exec_ctx): minimal_job = make_job(sched_access=exec_ctx.access) prepare_job(minimal_job) From b13d9c484ded1cf293f4e1a674a71161f2c53bca Mon Sep 17 00:00:00 2001 From: Vasileios Karakasis Date: Sat, 18 Feb 2023 21:44:47 +0100 Subject: [PATCH 3/3] Allow `sched_options` to be defined inside `systems` --- reframe/schemas/config.json | 28 ++++++++++++++------------ unittests/resources/config/settings.py | 3 +++ unittests/test_config.py | 3 +++ 3 files changed, 21 insertions(+), 13 deletions(-) diff --git a/reframe/schemas/config.json b/reframe/schemas/config.json index 9fb60f0e18..1a9010b44f 100644 --- a/reframe/schemas/config.json +++ b/reframe/schemas/config.json @@ -102,6 +102,19 @@ } ] }, + "sched_options": { + "type": "object", + "properties": { + "ignore_reqnodenotavail": {"type": "boolean"}, + "job_submit_timeout": {"type": "number"}, + "resubmit_on_errors": { + "type": "array", + "items": {"type": "string"} + }, + "use_nodes_option": {"type": "boolean"} + }, + "additionalProperties": false + }, "stream_handler": { "allOf": [ {"$ref": "#/defs/handler_common"}, @@ -242,6 +255,7 @@ "stagedir": {"type": "string"}, "outputdir": {"type": "string"}, "resourcesdir": {"type": "string"}, + "sched_options": {"$ref": "#/defs/sched_options"}, "partitions": { "type": "array", "items": { @@ -256,19 +270,7 @@ "sge", "slurm", "squeue", "torque" ] }, - "sched_options": { - "type": "object", - "properties": { - "ignore_reqnodenotavail": {"type": "boolean"}, - "job_submit_timeout": {"type": "number"}, - "resubmit_on_errors": { - "type": "array", - "items": {"type": "string"} - }, - "use_nodes_option": {"type": "boolean"} - }, - "additionalProperties": false - }, + "sched_options": {"$ref": "#/defs/sched_options"}, "launcher": { "type": "string" }, diff --git a/unittests/resources/config/settings.py b/unittests/resources/config/settings.py index 64b276cac4..4e55956601 100644 --- a/unittests/resources/config/settings.py +++ b/unittests/resources/config/settings.py @@ -17,6 +17,9 @@ 'resourcesdir': '.rfm_testing/resources', 'modules': ['foo/1.0'], 'env_vars': [['FOO_CMD', 'foobar']], + 'sched_options': { + 'job_submit_timeout': 10 + }, 'partitions': [ { 'name': 'login', diff --git a/unittests/test_config.py b/unittests/test_config.py index 4f936539c8..442d512066 100644 --- a/unittests/test_config.py +++ b/unittests/test_config.py @@ -298,6 +298,9 @@ def test_select_subconfig(site_config): assert site_config.get('systems/0/partitions/0/sched_options') == { 'use_nodes_option': True } + assert site_config.get('systems/0/sched_options') == { + 'job_submit_timeout': 10 + } assert site_config.get('environments/@PrgEnv-gnu/cc') == 'cc' assert site_config.get('environments/1/cxx') == 'CC' assert site_config.get('general/0/check_search_path') == ['c:d']