From bfbc02d6822247ec4ba438e6aa261c975db0749c Mon Sep 17 00:00:00 2001 From: Theofilos Manitaras Date: Mon, 15 Jun 2020 17:06:06 +0200 Subject: [PATCH 1/5] Add 'maint' mode for '--flex-alloc-nodes' * Add unittests * Update documentation --- docs/manpage.rst | 6 ++ reframe/core/schedulers/__init__.py | 7 ++ reframe/core/schedulers/slurm.py | 30 +++++++- reframe/frontend/cli.py | 5 +- unittests/test_schedulers.py | 109 +++++++++++++++++++++++----- 5 files changed, 134 insertions(+), 23 deletions(-) diff --git a/docs/manpage.rst b/docs/manpage.rst index 9955d5ec56..94ff9cdc96 100644 --- a/docs/manpage.rst +++ b/docs/manpage.rst @@ -341,6 +341,10 @@ Options controlling job submission If ``key`` starts with ``-`` or ``#``, the option will be passed verbatim to the job script. Otherwise, ReFrame will add ``-`` or ``--`` as well as the directive corresponding to the current scheduler. This option will be emitted after any options specified in the :js:attr:`access` system partition configuration parameter. + Especially for the Slurm scheduler, constraint options, i.e ``-J constraint=value``, ``-J C=value``, ``-J --constraint=value``, ``-J -C=value` are going to be combined with the corresponding ones specified in the :js:attr:`access` system partition configuration parameter. + If multiple constraint options are specified with separate key-value pairs, only the last one is going to be taken into account. + For multiple combined constraints use the ``-J constraint=value1,value2`` syntax. + Note that the above is not valid if ``key`` starts with ``#`` in which case the option is going to be passed verbatim to the job script. ------------------------ @@ -364,6 +368,8 @@ If no node can be selected, the test will be marked as a failure with an appropr It is therefore possible that the number of tasks assigned does not correspond to the actual idle nodes. This is the default policy. + + - ``maint``: Flexible tests will be assigned as many tasks as needed in order to span over the nodes of the node pool which are currently under maintenance. - Any positive integer: Flexible tests will be assigned as many tasks as needed in order to span over the specified number of nodes from the node pool. --------------------------------------- diff --git a/reframe/core/schedulers/__init__.py b/reframe/core/schedulers/__init__.py index 312d1f6267..31d56ad5a8 100644 --- a/reframe/core/schedulers/__init__.py +++ b/reframe/core/schedulers/__init__.py @@ -361,6 +361,13 @@ def guess_num_tasks(self): 'flex_alloc_nodes: selecting idle nodes: ' 'available nodes now: %s' % len(available_nodes) ) + elif self.sched_flex_alloc_nodes == 'maint': + available_nodes = {n for n in available_nodes + if n.under_maintenance()} + getlogger().debug( + 'flex_alloc_nodes: selecting nodes under maintenance: ' + 'available nodes now: %s' % len(available_nodes) + ) return len(available_nodes) * num_tasks_per_node diff --git a/reframe/core/schedulers/slurm.py b/reframe/core/schedulers/slurm.py index 55a910d3c0..3fe818584c 100644 --- a/reframe/core/schedulers/slurm.py +++ b/reframe/core/schedulers/slurm.py @@ -195,11 +195,35 @@ def emit_preamble(self, job): hint = 'multithread' if job.use_smt else 'nomultithread' for opt in job.sched_access: - preamble.append('%s %s' % (self._prefix, opt)) + if not opt.strip().startswith(('-C', '--constraint')): + preamble.append('%s %s' % (self._prefix, opt)) + + constraints = [] + constraint_parser = ArgumentParser() + constraint_parser.add_argument('-C', '--constraint') + parsed_options, _ = constraint_parser.parse_known_args( + job.sched_access) + if parsed_options.constraint: + constraints.append(parsed_options.constraint.strip()) + + # NOTE: Here last of the passed --constraint job options is taken + # into account in order to respect the behavior of slurm. + parsed_options, _ = constraint_parser.parse_known_args(job.options) + if parsed_options.constraint: + constraints.append(parsed_options.constraint.strip()) + + if constraints: + preamble.append( + self._format_option(','.join(constraints), '--constraint={0}') + ) preamble.append(self._format_option(hint, '--hint={0}')) prefix_patt = re.compile(r'(#\w+)') for opt in job.options: + if opt.strip().startswith(('-C', '--constraint')): + # Constraints are already processed + continue + if not prefix_patt.match(opt): preamble.append('%s %s' % (self._prefix, opt)) else: @@ -591,6 +615,10 @@ def is_available(self): return all([self._states == {'IDLE'}, self._partitions, self._active_features, self._states]) + def under_maintenance(self): + return all([self._states == {'MAINT'}, self._partitions, + self._active_features, self._states]) + def is_down(self): return bool({'DOWN', 'DRAIN', 'MAINT', 'NO_RESPOND'} & self._states) diff --git a/reframe/frontend/cli.py b/reframe/frontend/cli.py index f84615c93f..5c476ee49d 100644 --- a/reframe/frontend/cli.py +++ b/reframe/frontend/cli.py @@ -260,7 +260,7 @@ def main(): ) run_options.add_argument( '--flex-alloc-nodes', action='store', - dest='flex_alloc_nodes', metavar='{all|idle|NUM}', default=None, + dest='flex_alloc_nodes', metavar='{all|idle|maint|NUM}', default=None, help='Set strategy for the flexible node allocation (default: "idle").' ) env_options.add_argument( @@ -656,7 +656,8 @@ def print_infoline(param, value): if sched_flex_alloc_nodes <= 0: raise ConfigError(errmsg.format(options.flex_alloc_nodes)) except ValueError: - if not options.flex_alloc_nodes.casefold() in {'idle', 'all'}: + if (not options.flex_alloc_nodes.casefold() in + {'idle', 'all', 'maint'}): raise ConfigError( errmsg.format(options.flex_alloc_nodes)) from None diff --git a/unittests/test_schedulers.py b/unittests/test_schedulers.py index ecf02555f6..61452fcb4b 100644 --- a/unittests/test_schedulers.py +++ b/unittests/test_schedulers.py @@ -67,7 +67,7 @@ def exec_ctx(temp_runtime, scheduler): next(rt) if scheduler.registered_name == 'squeue': # slurm backend fulfills the functionality of the squeue backend, so - # if squeue is not configured, use slurrm instead + # if squeue is not configured, use slurm instead partition = (fixtures.partition_by_scheduler('squeue') or fixtures.partition_by_scheduler('slurm')) else: @@ -370,6 +370,40 @@ def test_no_empty_lines_in_preamble(minimal_job): assert line != '' +def test_combined_access_constraint(make_job, slurm_only): + job = make_job(sched_access=['--constraint=c1']) + job.options = ['-C c2,c3'] + prepare_job(job) + with open(job.script_filename) as fp: + script_content = fp.read() + + assert re.search(r'(?m)--constraint=c1,c2,c3$', script_content) + assert re.search(r'(?m)--constraint=(c1|c2,c3)$', script_content) is None + + +def test_combined_access_multiple_constraints(make_job, slurm_only): + job = make_job(sched_access=['--constraint=c1']) + job.options = ['--constraint=c2', '-C c3'] + prepare_job(job) + with open(job.script_filename) as fp: + script_content = fp.read() + + assert re.search(r'(?m)--constraint=c1,c3$', script_content) + assert re.search(r'(?m)--constraint=(c1|c2|c3)$', script_content) is None + + +def test_combined_access_verbatim_constraint(make_job, slurm_only): + job = make_job(sched_access=['--constraint=c1']) + job.options = ['#SBATCH --constraint=c2', '#SBATCH -C c3'] + prepare_job(job) + with open(job.script_filename) as fp: + script_content = fp.read() + + assert re.search(r'(?m)--constraint=c1$', script_content) + assert re.search(r'(?m)^#SBATCH --constraint=c2$', script_content) + assert re.search(r'(?m)^#SBATCH -C c3$', script_content) + + def test_guess_num_tasks(minimal_job, scheduler): minimal_job.num_tasks = 0 if scheduler.registered_name == 'local': @@ -613,6 +647,24 @@ def slurm_nodes(): 'ExtSensorsTemp=n/s Reason=Foo/ ' 'failed [reframe_user@01 Jan 2018]', + 'NodeName=nid00006 Arch=x86_64 CoresPerSocket=12 ' + 'CPUAlloc=0 CPUErr=0 CPUTot=24 CPULoad=0.00 ' + 'AvailableFeatures=f6 ActiveFeatures=f6 ' + 'Gres=gpu_mem:16280,gpu:1 NodeAddr=nid00006' + 'NodeHostName=nid00006 Version=10.00 OS=Linux ' + 'RealMemory=32220 AllocMem=0 FreeMem=10000 ' + 'Sockets=1 Boards=1 State=MAINT ' + 'ThreadsPerCore=2 TmpDisk=0 Weight=1 Owner=N/A ' + 'MCS_label=N/A Partitions=p4 ' + 'BootTime=01 Jan 2018 ' + 'SlurmdStartTime=01 Jan 2018 ' + 'CfgTRES=cpu=24,mem=32220M ' + 'AllocTRES= CapWatts=n/a CurrentWatts=100 ' + 'LowestJoules=100000000 ConsumedJoules=0 ' + 'ExtSensorsJoules=n/s ExtSensorsWatts=0 ' + 'ExtSensorsTemp=n/s Reason=Foo/ ' + 'failed [reframe_user@01 Jan 2018]', + 'Node invalid_node2 not found'] @@ -827,6 +879,13 @@ def test_flex_alloc_not_enough_idle_nodes(make_flexible_job): prepare_job(job) +def test_flex_alloc_maintenance_nodes(make_flexible_job): + job = make_flexible_job('maint') + job.options = ['--partition=p4'] + prepare_job(job) + assert job.num_tasks == 4 + + def test_flex_alloc_not_enough_nodes_constraint_partition(make_flexible_job): job = make_flexible_job('all') job.options = ['-C f1,f2', '--partition=p1,p2'] @@ -934,6 +993,29 @@ def slurm_node_nopart(): ) +@pytest.fixture +def slurm_node_maintenance(): + return _SlurmNode( + 'NodeName=nid00006 Arch=x86_64 CoresPerSocket=12 ' + 'CPUAlloc=0 CPUErr=0 CPUTot=24 CPULoad=0.00 ' + 'AvailableFeatures=f6 ActiveFeatures=f6 ' + 'Gres=gpu_mem:16280,gpu:1 NodeAddr=nid00006' + 'NodeHostName=nid00006 Version=10.00 OS=Linux ' + 'RealMemory=32220 AllocMem=0 FreeMem=10000 ' + 'Sockets=1 Boards=1 State=MAINT ' + 'ThreadsPerCore=2 TmpDisk=0 Weight=1 Owner=N/A ' + 'MCS_label=N/A Partitions=p4 ' + 'BootTime=01 Jan 2018 ' + 'SlurmdStartTime=01 Jan 2018 ' + 'CfgTRES=cpu=24,mem=32220M ' + 'AllocTRES= CapWatts=n/a CurrentWatts=100 ' + 'LowestJoules=100000000 ConsumedJoules=0 ' + 'ExtSensorsJoules=n/s ExtSensorsWatts=0 ' + 'ExtSensorsTemp=n/s Reason=Foo/ ' + 'failed [reframe_user@01 Jan 2018]' + ) + + def test_slurm_node_noname(): with pytest.raises(JobError): _SlurmNode( @@ -1006,22 +1088,9 @@ def test_slurm_node_is_down(slurm_node_allocated, assert slurm_node_nopart.is_down() -class TestSlurmNode: - def setUp(self): - idle_node_description = ( - ) - - idle_drained_node_description = ( - ) - - no_partition_node_description = ( - ) - - self.no_name_node_description = ( - ) - - self.allocated_node = _SlurmNode(allocated_node_description) - self.allocated_node_copy = _SlurmNode(allocated_node_description) - self.idle_node = _SlurmNode(idle_node_description) - self.idle_drained = _SlurmNode(idle_drained_node_description) - self.no_partition_node = _SlurmNode(no_partition_node_description) +def test_slurm_node_under_maintenance(slurm_node_allocated, + slurm_node_idle, + slurm_node_maintenance): + assert not slurm_node_allocated.under_maintenance() + assert not slurm_node_idle.under_maintenance() + assert slurm_node_maintenance.under_maintenance() From 787aab58ba8585b9f0fb7ff8611e67d5e485ba2b Mon Sep 17 00:00:00 2001 From: Theofilos Manitaras Date: Wed, 17 Jun 2020 15:59:51 +0200 Subject: [PATCH 2/5] Allow arbitrary strings for '--flex-alloc-nodes' --- docs/manpage.rst | 16 +++++++--------- reframe/core/schedulers/__init__.py | 22 ++++++++-------------- reframe/core/schedulers/local.py | 4 ++-- reframe/core/schedulers/slurm.py | 10 +++------- reframe/frontend/cli.py | 11 +++-------- unittests/test_schedulers.py | 24 ++++++++---------------- 6 files changed, 31 insertions(+), 56 deletions(-) diff --git a/docs/manpage.rst b/docs/manpage.rst index 94ff9cdc96..e6c5eefc59 100644 --- a/docs/manpage.rst +++ b/docs/manpage.rst @@ -341,10 +341,6 @@ Options controlling job submission If ``key`` starts with ``-`` or ``#``, the option will be passed verbatim to the job script. Otherwise, ReFrame will add ``-`` or ``--`` as well as the directive corresponding to the current scheduler. This option will be emitted after any options specified in the :js:attr:`access` system partition configuration parameter. - Especially for the Slurm scheduler, constraint options, i.e ``-J constraint=value``, ``-J C=value``, ``-J --constraint=value``, ``-J -C=value` are going to be combined with the corresponding ones specified in the :js:attr:`access` system partition configuration parameter. - If multiple constraint options are specified with separate key-value pairs, only the last one is going to be taken into account. - For multiple combined constraints use the ``-J constraint=value1,value2`` syntax. - Note that the above is not valid if ``key`` starts with ``#`` in which case the option is going to be passed verbatim to the job script. ------------------------ @@ -363,15 +359,17 @@ If no node can be selected, the test will be marked as a failure with an appropr Available values are the following: - ``all``: Flexible tests will be assigned as many tasks as needed in order to span over *all* the nodes of the node pool. - - ``idle``: Flexible tests will be assigned as many tasks as needed in order to span over the *idle* nodes of the node pool. + - ``STATE``: Flexible tests will be assigned as many tasks as needed in order to span over the nodes in state ``STATE``. Querying of the node state and submission of the test job are two separate steps not executed atomically. - It is therefore possible that the number of tasks assigned does not correspond to the actual idle nodes. + It is therefore possible that the number of tasks assigned does not correspond to the actual nodes in the given ``STATE``. - This is the default policy. - - - ``maint``: Flexible tests will be assigned as many tasks as needed in order to span over the nodes of the node pool which are currently under maintenance. + The default policy is to use the ``IDLE`` state which conforms to Slurm's convention for idle nodes. - Any positive integer: Flexible tests will be assigned as many tasks as needed in order to span over the specified number of nodes from the node pool. + .. versionchanged:: 3.1 + It is now possible to pass a string corresponding to the required state of the node to be considered for the flexible node allocation. + + --------------------------------------- Options controlling ReFrame environment --------------------------------------- diff --git a/reframe/core/schedulers/__init__.py b/reframe/core/schedulers/__init__.py index 31d56ad5a8..31a35d575c 100644 --- a/reframe/core/schedulers/__init__.py +++ b/reframe/core/schedulers/__init__.py @@ -349,24 +349,18 @@ def guess_num_tasks(self): return self.sched_flex_alloc_nodes * num_tasks_per_node available_nodes = self.scheduler.allnodes() - getlogger().debug('flex_alloc_nodes: total available nodes %s ' % + getlogger().debug('flex_alloc_nodes: total available nodes: %s ' % len(available_nodes)) # Try to guess the number of tasks now available_nodes = self.scheduler.filternodes(self, available_nodes) - if self.sched_flex_alloc_nodes == 'idle': + if self.sched_flex_alloc_nodes.casefold() != 'all': available_nodes = {n for n in available_nodes - if n.is_available()} + if n.in_state(self.sched_flex_alloc_nodes)} getlogger().debug( - 'flex_alloc_nodes: selecting idle nodes: ' - 'available nodes now: %s' % len(available_nodes) - ) - elif self.sched_flex_alloc_nodes == 'maint': - available_nodes = {n for n in available_nodes - if n.under_maintenance()} - getlogger().debug( - 'flex_alloc_nodes: selecting nodes under maintenance: ' - 'available nodes now: %s' % len(available_nodes) + f'flex_alloc_nodes: selecting nodes in state ' + f'"{self.sched_flex_alloc_nodes}: " ' + f'available nodes now: {len(available_nodes)}' ) return len(available_nodes) * num_tasks_per_node @@ -405,5 +399,5 @@ class Node(abc.ABC): ''' @abc.abstractmethod - def is_available(self): - '''Return ``True`` if this node is available, ``False`` otherwise.''' + def in_state(self, state): + '''Return ``True`` if this node is in the given state, ``False`` otherwise.''' diff --git a/reframe/core/schedulers/local.py b/reframe/core/schedulers/local.py index 2f708f756f..1ec8f0eb6e 100644 --- a/reframe/core/schedulers/local.py +++ b/reframe/core/schedulers/local.py @@ -182,5 +182,5 @@ class _LocalNode(sched.Node): def __init__(self, name): self._name = name - def is_available(self): - return True + def in_state(self, state): + return NotImplemented diff --git a/reframe/core/schedulers/slurm.py b/reframe/core/schedulers/slurm.py index 3fe818584c..0cfe23234c 100644 --- a/reframe/core/schedulers/slurm.py +++ b/reframe/core/schedulers/slurm.py @@ -611,13 +611,9 @@ def __eq__(self, other): def __hash__(self): return hash(self.name) - def is_available(self): - return all([self._states == {'IDLE'}, self._partitions, - self._active_features, self._states]) - - def under_maintenance(self): - return all([self._states == {'MAINT'}, self._partitions, - self._active_features, self._states]) + def in_state(self, state): + return all([self._states == set(state.upper().split('+')), + self._partitions, self._active_features, self._states]) def is_down(self): return bool({'DOWN', 'DRAIN', 'MAINT', 'NO_RESPOND'} & self._states) diff --git a/reframe/frontend/cli.py b/reframe/frontend/cli.py index 5c476ee49d..4431094733 100644 --- a/reframe/frontend/cli.py +++ b/reframe/frontend/cli.py @@ -260,8 +260,8 @@ def main(): ) run_options.add_argument( '--flex-alloc-nodes', action='store', - dest='flex_alloc_nodes', metavar='{all|idle|maint|NUM}', default=None, - help='Set strategy for the flexible node allocation (default: "idle").' + dest='flex_alloc_nodes', metavar='{STATE|ALL||NUM}', default=None, + help='Set strategy for the flexible node allocation (default: "IDLE").' ) env_options.add_argument( '-M', '--map-module', action='append', metavar='MAPPING', @@ -593,7 +593,7 @@ def print_infoline(param, value): "Skipping..." % m) printer.debug(str(e)) - options.flex_alloc_nodes = options.flex_alloc_nodes or 'idle' + options.flex_alloc_nodes = options.flex_alloc_nodes or 'IDLE' if options.account: printer.warning(f"`--account' is deprecated and " f"will be removed in the future; you should " @@ -656,11 +656,6 @@ def print_infoline(param, value): if sched_flex_alloc_nodes <= 0: raise ConfigError(errmsg.format(options.flex_alloc_nodes)) except ValueError: - if (not options.flex_alloc_nodes.casefold() in - {'idle', 'all', 'maint'}): - raise ConfigError( - errmsg.format(options.flex_alloc_nodes)) from None - sched_flex_alloc_nodes = options.flex_alloc_nodes exec_policy.sched_flex_alloc_nodes = sched_flex_alloc_nodes diff --git a/unittests/test_schedulers.py b/unittests/test_schedulers.py index 61452fcb4b..bae8a8aeef 100644 --- a/unittests/test_schedulers.py +++ b/unittests/test_schedulers.py @@ -1070,14 +1070,14 @@ def test_str(slurm_node_allocated): assert 'nid00001' == str(slurm_node_allocated) -def test_slurm_node_is_available(slurm_node_allocated, - slurm_node_idle, - slurm_node_drained, - slurm_node_nopart): - assert not slurm_node_allocated.is_available() - assert slurm_node_idle.is_available() - assert not slurm_node_drained.is_available() - assert not slurm_node_nopart.is_available() +def test_slurm_node_in_state(slurm_node_allocated, + slurm_node_idle, + slurm_node_drained, + slurm_node_nopart): + assert slurm_node_allocated.in_state('allocated') + assert slurm_node_idle.in_state('Idle') + assert slurm_node_drained.in_state('IDLE+Drain') + assert not slurm_node_nopart.in_state('IDLE') def test_slurm_node_is_down(slurm_node_allocated, @@ -1086,11 +1086,3 @@ def test_slurm_node_is_down(slurm_node_allocated, assert not slurm_node_allocated.is_down() assert not slurm_node_idle.is_down() assert slurm_node_nopart.is_down() - - -def test_slurm_node_under_maintenance(slurm_node_allocated, - slurm_node_idle, - slurm_node_maintenance): - assert not slurm_node_allocated.under_maintenance() - assert not slurm_node_idle.under_maintenance() - assert slurm_node_maintenance.under_maintenance() From 8087b3769163b622cf68357a9300d822f3f59fea Mon Sep 17 00:00:00 2001 From: Theofilos Manitaras Date: Wed, 17 Jun 2020 16:08:03 +0200 Subject: [PATCH 3/5] Fix Pep8 issues --- reframe/core/schedulers/__init__.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/reframe/core/schedulers/__init__.py b/reframe/core/schedulers/__init__.py index 31a35d575c..dd23e6812f 100644 --- a/reframe/core/schedulers/__init__.py +++ b/reframe/core/schedulers/__init__.py @@ -400,4 +400,9 @@ class Node(abc.ABC): @abc.abstractmethod def in_state(self, state): - '''Return ``True`` if this node is in the given state, ``False`` otherwise.''' + '''Returns whether the node is in the given state. + + :arg state: The node state. + :returns: :class:`True` if the nodes's state matches the given one, + :class:`False` otherwise. + ''' From 8ae5e682b58118c1006df4070b6949d620a693fa Mon Sep 17 00:00:00 2001 From: Theofilos Manitaras Date: Thu, 18 Jun 2020 18:39:42 +0200 Subject: [PATCH 4/5] Address PR comments --- docs/manpage.rst | 6 +++--- reframe/core/schedulers/__init__.py | 2 +- reframe/core/schedulers/local.py | 2 +- reframe/core/schedulers/slurm.py | 2 +- reframe/frontend/cli.py | 4 ++-- unittests/test_schedulers.py | 3 +++ 6 files changed, 11 insertions(+), 8 deletions(-) diff --git a/docs/manpage.rst b/docs/manpage.rst index e6c5eefc59..6d69fbd8ed 100644 --- a/docs/manpage.rst +++ b/docs/manpage.rst @@ -359,15 +359,15 @@ If no node can be selected, the test will be marked as a failure with an appropr Available values are the following: - ``all``: Flexible tests will be assigned as many tasks as needed in order to span over *all* the nodes of the node pool. - - ``STATE``: Flexible tests will be assigned as many tasks as needed in order to span over the nodes in state ``STATE``. + - ``STATE``: Flexible tests will be assigned as many tasks as needed in order to span over the nodes that are currently in state ``STATE``. Querying of the node state and submission of the test job are two separate steps not executed atomically. - It is therefore possible that the number of tasks assigned does not correspond to the actual nodes in the given ``STATE``. + It is therefore possible that the number of tasks assigned does not correspond to the actual nodes in the given state. The default policy is to use the ``IDLE`` state which conforms to Slurm's convention for idle nodes. - Any positive integer: Flexible tests will be assigned as many tasks as needed in order to span over the specified number of nodes from the node pool. .. versionchanged:: 3.1 - It is now possible to pass a string corresponding to the required state of the node to be considered for the flexible node allocation. + It is now possible to pass an arbitrary node state as a flexible node allocation parameter. --------------------------------------- diff --git a/reframe/core/schedulers/__init__.py b/reframe/core/schedulers/__init__.py index dd23e6812f..763b960509 100644 --- a/reframe/core/schedulers/__init__.py +++ b/reframe/core/schedulers/__init__.py @@ -359,7 +359,7 @@ def guess_num_tasks(self): if n.in_state(self.sched_flex_alloc_nodes)} getlogger().debug( f'flex_alloc_nodes: selecting nodes in state ' - f'"{self.sched_flex_alloc_nodes}: " ' + f'{self.sched_flex_alloc_nodes!r}: ' f'available nodes now: {len(available_nodes)}' ) diff --git a/reframe/core/schedulers/local.py b/reframe/core/schedulers/local.py index 1ec8f0eb6e..354755a6a3 100644 --- a/reframe/core/schedulers/local.py +++ b/reframe/core/schedulers/local.py @@ -183,4 +183,4 @@ def __init__(self, name): self._name = name def in_state(self, state): - return NotImplemented + return state.casefold() == 'idle' diff --git a/reframe/core/schedulers/slurm.py b/reframe/core/schedulers/slurm.py index 0cfe23234c..2e6db6581a 100644 --- a/reframe/core/schedulers/slurm.py +++ b/reframe/core/schedulers/slurm.py @@ -612,7 +612,7 @@ def __hash__(self): return hash(self.name) def in_state(self, state): - return all([self._states == set(state.upper().split('+')), + return all([self._states >= set(state.upper().split('+')), self._partitions, self._active_features, self._states]) def is_down(self): diff --git a/reframe/frontend/cli.py b/reframe/frontend/cli.py index 4431094733..7dc83267f2 100644 --- a/reframe/frontend/cli.py +++ b/reframe/frontend/cli.py @@ -260,8 +260,8 @@ def main(): ) run_options.add_argument( '--flex-alloc-nodes', action='store', - dest='flex_alloc_nodes', metavar='{STATE|ALL||NUM}', default=None, - help='Set strategy for the flexible node allocation (default: "IDLE").' + dest='flex_alloc_nodes', metavar='{all|STATE|NUM}', default=None, + help='Set strategy for the flexible node allocation (default: "idle").' ) env_options.add_argument( '-M', '--map-module', action='append', metavar='MAPPING', diff --git a/unittests/test_schedulers.py b/unittests/test_schedulers.py index bae8a8aeef..a52352bfd6 100644 --- a/unittests/test_schedulers.py +++ b/unittests/test_schedulers.py @@ -1077,6 +1077,9 @@ def test_slurm_node_in_state(slurm_node_allocated, assert slurm_node_allocated.in_state('allocated') assert slurm_node_idle.in_state('Idle') assert slurm_node_drained.in_state('IDLE+Drain') + assert slurm_node_drained.in_state('IDLE') + assert slurm_node_drained.in_state('idle') + assert slurm_node_drained.in_state('DRAIN') assert not slurm_node_nopart.in_state('IDLE') From 9606710a6a73e12aec080f73fe485dc5e697fc96 Mon Sep 17 00:00:00 2001 From: Theofilos Manitaras Date: Mon, 22 Jun 2020 09:35:20 +0200 Subject: [PATCH 5/5] Address PR comments (version 2) --- docs/manpage.rst | 4 ++-- reframe/frontend/cli.py | 2 +- unittests/test_schedulers.py | 34 ---------------------------------- 3 files changed, 3 insertions(+), 37 deletions(-) diff --git a/docs/manpage.rst b/docs/manpage.rst index 6d69fbd8ed..95492328cb 100644 --- a/docs/manpage.rst +++ b/docs/manpage.rst @@ -353,7 +353,7 @@ When allocating nodes automatically, ReFrame will take into account all node lim Nodes from this pool are allocated according to different policies. If no node can be selected, the test will be marked as a failure with an appropriate message. -.. option:: --flex-alloc-nodes[=POLICY] +.. option:: --flex-alloc-nodes=POLICY Set the flexible node allocation policy. Available values are the following: @@ -363,7 +363,7 @@ If no node can be selected, the test will be marked as a failure with an appropr Querying of the node state and submission of the test job are two separate steps not executed atomically. It is therefore possible that the number of tasks assigned does not correspond to the actual nodes in the given state. - The default policy is to use the ``IDLE`` state which conforms to Slurm's convention for idle nodes. + If this option is not specified, the default allocation policy for flexible tests is 'idle'. - Any positive integer: Flexible tests will be assigned as many tasks as needed in order to span over the specified number of nodes from the node pool. .. versionchanged:: 3.1 diff --git a/reframe/frontend/cli.py b/reframe/frontend/cli.py index 7dc83267f2..d3123f72b6 100644 --- a/reframe/frontend/cli.py +++ b/reframe/frontend/cli.py @@ -593,7 +593,7 @@ def print_infoline(param, value): "Skipping..." % m) printer.debug(str(e)) - options.flex_alloc_nodes = options.flex_alloc_nodes or 'IDLE' + options.flex_alloc_nodes = options.flex_alloc_nodes or 'idle' if options.account: printer.warning(f"`--account' is deprecated and " f"will be removed in the future; you should " diff --git a/unittests/test_schedulers.py b/unittests/test_schedulers.py index a52352bfd6..2c20e2569a 100644 --- a/unittests/test_schedulers.py +++ b/unittests/test_schedulers.py @@ -370,40 +370,6 @@ def test_no_empty_lines_in_preamble(minimal_job): assert line != '' -def test_combined_access_constraint(make_job, slurm_only): - job = make_job(sched_access=['--constraint=c1']) - job.options = ['-C c2,c3'] - prepare_job(job) - with open(job.script_filename) as fp: - script_content = fp.read() - - assert re.search(r'(?m)--constraint=c1,c2,c3$', script_content) - assert re.search(r'(?m)--constraint=(c1|c2,c3)$', script_content) is None - - -def test_combined_access_multiple_constraints(make_job, slurm_only): - job = make_job(sched_access=['--constraint=c1']) - job.options = ['--constraint=c2', '-C c3'] - prepare_job(job) - with open(job.script_filename) as fp: - script_content = fp.read() - - assert re.search(r'(?m)--constraint=c1,c3$', script_content) - assert re.search(r'(?m)--constraint=(c1|c2|c3)$', script_content) is None - - -def test_combined_access_verbatim_constraint(make_job, slurm_only): - job = make_job(sched_access=['--constraint=c1']) - job.options = ['#SBATCH --constraint=c2', '#SBATCH -C c3'] - prepare_job(job) - with open(job.script_filename) as fp: - script_content = fp.read() - - assert re.search(r'(?m)--constraint=c1$', script_content) - assert re.search(r'(?m)^#SBATCH --constraint=c2$', script_content) - assert re.search(r'(?m)^#SBATCH -C c3$', script_content) - - def test_guess_num_tasks(minimal_job, scheduler): minimal_job.num_tasks = 0 if scheduler.registered_name == 'local':