From fdf7b6d97c3630f203648d60f981ad9732465cca Mon Sep 17 00:00:00 2001 From: Vasileios Karakasis Date: Fri, 12 Apr 2024 17:58:43 +0200 Subject: [PATCH 1/3] Treat OR Slurm constraints in flexible node allocation --- docs/config_reference.rst | 11 ++++++++--- docs/manpage.rst | 5 +++++ reframe/core/launchers/__init__.py | 2 +- reframe/core/schedulers/slurm.py | 29 +++++++++++++++++++++++------ unittests/test_schedulers.py | 18 ++++++++++++++++-- 5 files changed, 53 insertions(+), 12 deletions(-) diff --git a/docs/config_reference.rst b/docs/config_reference.rst index 2632e76833..984d97205a 100644 --- a/docs/config_reference.rst +++ b/docs/config_reference.rst @@ -480,9 +480,14 @@ System Partition Configuration A list of job scheduler options that will be passed to the generated job script for gaining access to that logical partition. - .. note:: - For the ``pbs`` and ``torque`` backends, options accepted in the :attr:`~config.systems.partitions.access` and :attr:`~config.systems.partitions.resources` parameters may either refer to actual ``qsub`` options or may just be resources specifications to be passed to the ``-l`` option. - The backend assumes a ``qsub`` option, if the options passed in these attributes start with a ``-``. +.. note:: + For the ``pbs`` and ``torque`` backends, options accepted in the :attr:`~config.systems.partitions.access` and :attr:`~config.systems.partitions.resources` parameters may either refer to actual ``qsub`` options or may just be resources specifications to be passed to the ``-l`` option. + The backend assumes a ``qsub`` option, if the options passed in these attributes start with a ``-``. + +.. note:: + If constraints are specified in :attr:`~config.systems.partition.access` for the Slurm backends, + these will be AND'ed with any additional constraints passed either through the test job :attr:`~reframe.core.schedulers.Job.options` or the :option:`-J` command-line option. + In other words, any constraint passed in :attr:`~config.systems.partition.access` will always be present in the generated job script. .. py:attribute:: systems.partitions.environs diff --git a/docs/manpage.rst b/docs/manpage.rst index 86365b6a77..004618f4c7 100644 --- a/docs/manpage.rst +++ b/docs/manpage.rst @@ -728,6 +728,9 @@ Flexible node allocation ReFrame can automatically set the number of tasks of a test, if its :attr:`num_tasks ` attribute is set to a value less than or equal to zero. This scheme is conveniently called *flexible node allocation* and is valid only for the Slurm backend. When allocating nodes automatically, ReFrame will take into account all node limiting factors, such as partition :attr:`~config.systems.partitions.access` options, and any job submission control options described above. +Particularly for Slurm constraints, ReFrame will only recognize simple AND or OR constraints and any parenthesized expression of them. +The full syntax of `Slurm constraints `__ is not currently supported. + Nodes from this pool are allocated according to different policies. If no node can be selected, the test will be marked as a failure with an appropriate message. @@ -747,6 +750,8 @@ If no node can be selected, the test will be marked as a failure with an appropr Align the state selection with the :option:`--distribute` option. See the :option:`--distribute` for more details. + Slurm OR constraints and parenthesized expressions are supported in flexible node allocation. + --------------------------------------- Options controlling ReFrame environment --------------------------------------- diff --git a/reframe/core/launchers/__init__.py b/reframe/core/launchers/__init__.py index d1c10b2c9d..eef3eacd02 100644 --- a/reframe/core/launchers/__init__.py +++ b/reframe/core/launchers/__init__.py @@ -51,7 +51,7 @@ class JobLauncher(metaclass=_JobLauncherMeta): #: #: If the modifier is empty, these options will be ignored. #: - #: :type: :clas:`List[str]` + #: :type: :class:`List[str]` #: :default: ``[]`` #: #: :versionadded:: 4.6.0 diff --git a/reframe/core/schedulers/slurm.py b/reframe/core/schedulers/slurm.py index 779e270bf0..ab29c64453 100644 --- a/reframe/core/schedulers/slurm.py +++ b/reframe/core/schedulers/slurm.py @@ -213,11 +213,15 @@ def emit_preamble(self, job): if not opt.strip().startswith(('-C', '--constraint')): preamble.append('%s %s' % (self._prefix, opt)) + # To avoid overriding a constraint that's passed into `sched_access`, + # we AND it with the `--constraint` option passed either in `options` + # or in `cli_options` constraints = [] constraint_parser = ArgumentParser() constraint_parser.add_argument('-C', '--constraint') parsed_options, _ = constraint_parser.parse_known_args( - job.sched_access) + job.sched_access + ) if parsed_options.constraint: constraints.append(parsed_options.constraint.strip()) @@ -230,9 +234,14 @@ def emit_preamble(self, job): constraints.append(parsed_options.constraint.strip()) if constraints: - preamble.append( - self._format_option('&'.join(constraints), '--constraint={0}') - ) + if len(constraints) == 1: + constr = constraints[0] + else: + # Parenthesize the constraints prior to joining them with `&` + # to make sure that precedence is respected. + constr = '&'.join(f'({c})' for c in constraints) + + preamble.append(self._format_option(constr, '--constraint={0}')) preamble.append(self._format_option(hint, '--hint={0}')) prefix_patt = re.compile(r'(#\w+)') @@ -350,8 +359,7 @@ def filternodes(self, job, nodes): self.log(f'[F] Filtering nodes by partition(s) {partitions}: ' f'available nodes now: {len(nodes)}') if constraints: - constraints = set(constraints.strip().split('&')) - nodes = {n for n in nodes if n.active_features >= constraints} + nodes = {n for n in nodes if n.satisfies(constraints)} self.log(f'[F] Filtering nodes by constraint(s) {constraints}: ' f'available nodes now: {len(nodes)}') @@ -669,6 +677,15 @@ def is_avail(self): def is_down(self): return not self.is_avail() + def satisfies(self, slurm_constraint): + # Convert the Slurm constraint to a Python expression and evaluate it + names = {grp[0] + for grp in re.finditer(r'(\w(\w|\d)*)', slurm_constraint)} + expr = slurm_constraint.replace('|', ' or ').replace('&', ' and ') + vars = {n: True for n in self.active_features} + vars.update({n: False for n in names - self.active_features}) + return eval(expr, {}, vars) + @property def active_features(self): return self._active_features diff --git a/unittests/test_schedulers.py b/unittests/test_schedulers.py index 3553ba07e3..9e116f8187 100644 --- a/unittests/test_schedulers.py +++ b/unittests/test_schedulers.py @@ -617,7 +617,7 @@ def test_combined_access_constraint(make_job, slurm_only): with open(job.script_filename) as fp: script_content = fp.read() - assert re.search(r'(?m)--constraint=c1&c2&c3$', script_content) + assert re.search(r'(?m)--constraint=\(c1\)&\(c2&c3\)$', script_content) assert re.search(r'(?m)--constraint=(c1|c2&c3)$', script_content) is None @@ -628,7 +628,7 @@ def test_combined_access_multiple_constraints(make_job, slurm_only): with open(job.script_filename) as fp: script_content = fp.read() - assert re.search(r'(?m)--constraint=c1&c3$', script_content) + assert re.search(r'(?m)--constraint=\(c1\)&\(c3\)$', script_content) assert re.search(r'(?m)--constraint=(c1|c2|c3)$', script_content) is None @@ -1176,6 +1176,20 @@ def test_flex_alloc_enough_nodes_constraint_partition(make_flexible_job): assert job.num_tasks == 4 +def test_flex_alloc_enough_nodes_constraint_expr(make_flexible_job): + job = make_flexible_job('all') + job.options = ['-C "(f1|f2)&f3"'] + prepare_job(job) + assert job.num_tasks == 8 + + +def test_flex_alloc_not_enough_nodes_constraint_expr(make_flexible_job): + job = make_flexible_job('all') + job.options = ['-C "(f1|f2)&(f8|f9)"'] + with pytest.raises(JobError): + prepare_job(job) + + @pytest.fixture def slurm_node_allocated(): return _SlurmNode( From f29a3785f49334974dbce42c5de05a374a777a88 Mon Sep 17 00:00:00 2001 From: Vasileios Karakasis Date: Mon, 15 Apr 2024 18:53:43 +0200 Subject: [PATCH 2/3] Add unit tests to test against invalid and unsupported constraint expressions --- reframe/core/schedulers/slurm.py | 12 ++++++++++-- unittests/test_schedulers.py | 13 +++++++++++++ 2 files changed, 23 insertions(+), 2 deletions(-) diff --git a/reframe/core/schedulers/slurm.py b/reframe/core/schedulers/slurm.py index ab29c64453..550f0d3a8a 100644 --- a/reframe/core/schedulers/slurm.py +++ b/reframe/core/schedulers/slurm.py @@ -678,13 +678,21 @@ def is_down(self): return not self.is_avail() def satisfies(self, slurm_constraint): - # Convert the Slurm constraint to a Python expression and evaluate it + # Convert the Slurm constraint to a Python expression and evaluate it, + # but restrict our syntax to accept only AND or OR constraints and + # their combinations + if not re.match(r'^[\w\d\(\)\|\&]*$', slurm_constraint): + return False + names = {grp[0] for grp in re.finditer(r'(\w(\w|\d)*)', slurm_constraint)} expr = slurm_constraint.replace('|', ' or ').replace('&', ' and ') vars = {n: True for n in self.active_features} vars.update({n: False for n in names - self.active_features}) - return eval(expr, {}, vars) + try: + return eval(expr, {}, vars) + except BaseException: + return False @property def active_features(self): diff --git a/unittests/test_schedulers.py b/unittests/test_schedulers.py index 9e116f8187..05340bca96 100644 --- a/unittests/test_schedulers.py +++ b/unittests/test_schedulers.py @@ -1183,6 +1183,19 @@ def test_flex_alloc_enough_nodes_constraint_expr(make_flexible_job): assert job.num_tasks == 8 +def test_flex_alloc_nodes_unsupported_constraint(make_flexible_job): + job = make_flexible_job('all') + job.options = ['-C "[f1*2&f2*4]"'] + with pytest.raises(JobError): + prepare_job(job) + +def test_flex_alloc_nodes_invalid_constraint(make_flexible_job): + job = make_flexible_job('all') + job.options = ['-C "(f1|f2)&"'] + with pytest.raises(JobError): + prepare_job(job) + + def test_flex_alloc_not_enough_nodes_constraint_expr(make_flexible_job): job = make_flexible_job('all') job.options = ['-C "(f1|f2)&(f8|f9)"'] From 2d1eca4439c1125cfc439d443a4d4736da93119e Mon Sep 17 00:00:00 2001 From: Vasileios Karakasis Date: Mon, 15 Apr 2024 18:55:44 +0200 Subject: [PATCH 3/3] Fix coding style issues --- unittests/test_schedulers.py | 1 + 1 file changed, 1 insertion(+) diff --git a/unittests/test_schedulers.py b/unittests/test_schedulers.py index 05340bca96..8c4ebea5c9 100644 --- a/unittests/test_schedulers.py +++ b/unittests/test_schedulers.py @@ -1189,6 +1189,7 @@ def test_flex_alloc_nodes_unsupported_constraint(make_flexible_job): with pytest.raises(JobError): prepare_job(job) + def test_flex_alloc_nodes_invalid_constraint(make_flexible_job): job = make_flexible_job('all') job.options = ['-C "(f1|f2)&"']