Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 14 additions & 11 deletions reframe/core/schedulers/slurm.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
JobBlockedError,
JobError,
JobSchedulerError)
from reframe.utility import nodelist_abbrev, seconds_to_hms
from reframe.utility import nodelist_abbrev, nodelist_expand, seconds_to_hms


def slurm_state_completed(state):
Expand Down Expand Up @@ -73,6 +73,18 @@ def __init__(self, *args, **kwargs):
self._is_array = False
self._is_cancelling = False

# The compacted nodelist as reported by Slurm. This must be updated in
# every poll as Slurm may be slow in reporting the exact nodelist
self._nodespec = None

@property
def nodelist(self):
# Redefine nodelist so as to generate it from the nodespec
if self._nodelist is None and self._nodespec is not None:
self._nodelist = nodelist_expand(self._nodespec)

return self._nodelist

@property
def is_array(self):
return self._is_array
Expand Down Expand Up @@ -380,13 +392,6 @@ def _get_nodes_by_name(self, nodespec):
node_descriptions = completed.stdout.splitlines()
return _create_nodes(node_descriptions)

def _update_nodelist(self, job, nodespec):
if job.nodelist is not None:
return

if nodespec and nodespec != 'None assigned':
job._nodelist = [n.name for n in self._get_nodes_by_name(nodespec)]

def _update_completion_time(self, job, timestamps):
if job._completion_time is not None:
return
Expand Down Expand Up @@ -460,9 +465,7 @@ def poll(self, *jobs):
)

# Use ',' to join nodes to be consistent with Slurm syntax
self._update_nodelist(
job, ','.join(m.group('nodespec') for m in jobarr_info)
)
job._nodespec = ','.join(m.group('nodespec') for m in jobarr_info)
self._update_completion_time(
job, (m.group('end') for m in jobarr_info)
)
Expand Down
37 changes: 37 additions & 0 deletions reframe/utility/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -925,6 +925,43 @@ def nodelist_abbrev(nodes):
return ','.join(str(ng) for ng in node_groups)


def nodelist_expand(nodespec):
'''Expand the nodes in ``nodespec`` to a list of nodes.

:arg nodespec: A node specification as the one returned by
:func:`nodelist_abbrev`
:returns: The list of nodes corresponding to the given node specification.

.. versionadded:: 4.0.0
'''

if not isinstance(nodespec, str):
raise TypeError('nodespec argument must be a string')

if nodespec == '':
return []

nodespec_parts = nodespec.split(',')
node_patt = re.compile(r'(?P<prefix>.+)\[(?P<l>\d+)-(?P<u>\d+)\]')
nodes = []
for ns in nodespec_parts:
if '[' not in ns and ']' not in ns:
nodes.append(ns)
continue

match = node_patt.match(ns)
if not match:
raise ValueError(f'invalid nodespec: {nodespec}')

prefix = match.group('prefix')
low, upper = int(match.group('l')), int(match.group('u'))
width = count_digits(upper)
for nid in range(low, upper+1):
nodes.append(f'{prefix}{nid:0{width}}')

return nodes


def cache_return_value(fn):
'''Decorator that caches the return value of the decorated function.

Expand Down
26 changes: 25 additions & 1 deletion unittests/test_utility.py
Original file line number Diff line number Diff line change
Expand Up @@ -1761,7 +1761,7 @@ def bar(x, y):
util.is_trivially_callable(1)


def test_nodelist_abbrev():
def test_nodelist_utilities():
nid_nodes = [f'nid{n:03}' for n in range(5, 20)]
cid_nodes = [f'cid{n:03}' for n in range(20)]

Expand All @@ -1776,12 +1776,20 @@ def test_nodelist_abbrev():
random.shuffle(all_nodes)

nodelist = util.nodelist_abbrev
expand = util.nodelist_expand
assert nodelist(nid_nodes) == 'nid00[1-2],nid0[05-19],nid125'
assert nodelist(cid_nodes) == 'cid0[00-19],cid05[5-6]'
assert nodelist(all_nodes) == (
'cid0[00-19],cid05[5-6],nid00[1-2],nid0[05-19],nid125'
)

# Test the reverse operation
assert expand('nid00[1-2],nid0[05-19],nid125') == sorted(nid_nodes)
assert expand('cid0[00-19],cid05[5-6]') == sorted(cid_nodes)
assert expand(
'cid0[00-19],cid05[5-6],nid00[1-2],nid0[05-19],nid125'
) == sorted(all_nodes)

# Test non-contiguous nodes
nid_nodes = []
for i in range(3):
Expand All @@ -1793,19 +1801,35 @@ def test_nodelist_abbrev():
assert nodelist([]) == ''
assert nodelist(['nid001']) == 'nid001'

# Test the reverse operation
assert expand('nid00[0-4],nid01[0-4],nid02[0-4]') == sorted(nid_nodes)
assert expand('nid01,nid10,nid20') == ['nid01', 'nid10', 'nid20']
assert expand('') == []
assert expand('nid001') == ['nid001']

# Test host names with numbers in their basename (see GH #2357)
nodes = [f'c2-01-{n:02}' for n in range(100)]
assert nodelist(nodes) == 'c2-01-[00-99]'

# Test the reverse operation
assert expand('c2-01-[00-99]') == nodes

# Test node duplicates
assert nodelist(['nid001', 'nid001', 'nid002']) == 'nid001,nid00[1-2]'
assert expand('nid001,nid00[1-2]') == ['nid001', 'nid001', 'nid002']

with pytest.raises(TypeError, match='nodes argument must be a Sequence'):
nodelist(1)

with pytest.raises(TypeError, match='nodes argument cannot be a string'):
nodelist('foo')

with pytest.raises(TypeError, match='nodespec argument must be a string'):
expand(10)

with pytest.raises(ValueError, match='invalid nodespec'):
expand('nid00[1-3],nid3[3-43')


def test_cached_return_value():

Expand Down