diff --git a/reframe/frontend/statistics.py b/reframe/frontend/statistics.py index 16db93d2d5..963676d519 100644 --- a/reframe/frontend/statistics.py +++ b/reframe/frontend/statistics.py @@ -7,6 +7,7 @@ import traceback import reframe.core.runtime as rt import reframe.core.exceptions as errors +import reframe.utility as util class TestStats: @@ -216,8 +217,9 @@ def print_failure_report(self, printer): printer.info(f" * System partition: {r['system']}") printer.info(f" * Environment: {r['environment']}") printer.info(f" * Stage directory: {r['stagedir']}") - nodelist = ','.join(r['nodelist']) if r['nodelist'] else None - printer.info(f" * Node list: {nodelist}") + printer.info( + f" * Node list: {util.nodelist_abbrev(r['nodelist'])}" + ) job_type = 'local' if r['scheduler'] == 'local' else 'batch job' jobid = r['jobid'] printer.info(f" * Job type: {job_type} (id={r['jobid']})") diff --git a/reframe/utility/__init__.py b/reframe/utility/__init__.py index 0d6f7d615e..e2e88668dc 100644 --- a/reframe/utility/__init__.py +++ b/reframe/utility/__init__.py @@ -643,6 +643,243 @@ def _is_valid_for_env(m, e): yield (p.fullname, e.name, m) +def _delta_encode(seq): + '''Delta-encode sequence. + + The input list must be at least of size 1. + + Example of delta encoding: + + - Input list: + 1 2 5 6 7 8 9 125 + + - Output list: + 1 1 3 1 1 1 1 106 + ^ + | + First element + of the original list. + + :returns: the encoded list. The first element of the encoded sequence is + the first element of the original sequence. + + ''' + + assert len(seq) >= 1 + + ret = [seq[0]] + for i in range(1, len(seq)): + ret.append(seq[i] - seq[i-1]) + + return ret + + +def _rl_encode(seq): + '''Run-length encode a delta-encoded sequence. + + The input list must be at least of size 1. + + Example of run-length encoding: + + - Original list: + 1 2 5 6 7 8 9 125 + + - Delta-encoded list: + 1 1 3 1 1 1 1 106 + + - Run-length-encoded list: + + (1,1,2), (5,1,5), (125,1,1) + + For convenience, in each RLE unit we use the first element of the original + unit and not the delta value from the previous unit. + + :returns: the encoded list. Each element of the list is a three-tuple + containing the first element of the unit, the delta value of the unit + and its length. + + ''' + assert len(seq) >= 1 + + encoded = [] + curr_unit = [seq[0], 1, 1] # current RLE unit + for delta in seq[1:]: + uelem, udelta, ulen = curr_unit + if udelta is None: + curr_unit[1] = delta + curr_unit[2] += 1 + elif udelta != delta: + # New unit; we don't set the delta of the new unit here, because + # `delta` is just the jump for the previous unit. The length of + # the unit is initialized to one, because the last processed + # element *is* part of the new unit. + encoded.append(tuple(curr_unit)) + curr_unit = [uelem + udelta*(ulen-1) + delta, None, 1] + else: + # Increase unit + curr_unit[2] += 1 + + # Fix last unit and add it to the encoded list + if curr_unit[1] is None: + # Conveniently set delta to 1 + curr_unit[1] = 1 + + encoded.append(tuple(curr_unit)) + return encoded + + +def _parse_node(nodename): + m = re.search(r'(^\D+)(\d+)', nodename) + if m is None: + basename = nodename + width = 0 + nodeid = None + else: + basename = m.group(1) + _id = m.group(2).lstrip('0') + if _id == '': + # This is to cover nodes with id=0, e.g., x000 + _id = '0' + + nodeid = int(_id) + width = len(m.group(2)) + + return basename, width, nodeid + + +def _count_digits(n): + '''Count digits of a decimal number.''' + + num_digits = 1 + while n > 10: + n /= 10 + num_digits += 1 + + return num_digits + + +def _common_prefix(s1, s2): + pos = 0 + for i in range(min(len(s1), len(s2))): + if s1[i] != s2[i]: + break + + pos += 1 + + return s1[:pos], s1[pos:], s2[pos:] + + +class _NodeGroup: + def __init__(self, name, width): + self.__name = name + self.__width = width + self.__nodes = [] + + @property + def name(self): + return self.__name + + @property + def width(self): + return self.__width + + @property + def nodes(self): + return self.__nodes + + def add(self, nid): + self.__nodes.append(nid) + + def __str__(self): + abbrev = [] + encoded = _rl_encode(_delta_encode(self.nodes)) + for unit in encoded: + start, delta, size = unit + if size == 1: + s_start = str(start).zfill(self.width) + abbrev.append(f'{self.name}{s_start}') + elif delta != 1: + # We simply unpack node lists with delta != 1 + for i in range(size): + s_start = str(start + i*delta).zfill(self.width) + abbrev.append(f'{self.name}{s_start}') + else: + last = start + delta*(size-1) + digits_last = _count_digits(last) + pad = self.width - digits_last + nd_range = self.name + if pad > 0: + for _ in range(pad): + nd_range += '0' + + s_first = str(start).zfill(digits_last) + s_last = str(last) + prefix, s_first, s_last = _common_prefix(s_first, s_last) + nd_range += f'{prefix}[{s_first}-{s_last}]' + abbrev.append(nd_range) + + return ','.join(abbrev) + + def __hash__(self): + return hash(self.name) ^ hash(self.width) + + def __eq__(self, other): + if not isinstance(other, _NodeGroup): + return NotImplemented + + return self.name == other.name and self.width == other.width + + +def nodelist_abbrev(nodes): + '''Create an abbreviated string representation of the node list. + + For example, the node list + + .. code-block:: python + + ['nid001', 'nid002', 'nid010', 'nid011', 'nid012', 'nid510', 'nid511'] + + will be abbreviated as follows: + + .. code-block:: none + + nid00[1-2],nid0[10-12],nid51[0-1] + + + .. versionadded:: 3.5.3 + + :arg nodes: The node list to abbreviate. + :returns: The abbreviated list representation. + + ''' + + # The algorithm used for abbreviating the list is a standard index + # compression algorithm, the run-length encoding. We first delta encode + # the nodes based on their id, which we retrieve from their name, and then + # run-length encode the list of deltas. The resulting run-length-encoded + # units are then used to generate the abbreviated representation using + # some formatting sugar. The abbreviation is handled in the `__str__()` + # function of the `_NodeGroup`. The purpose of the `_NodeGroup` is to + # group nodes in the list that belong to the same family, namely have the + # same prefix. We then apply the run-length encoding to each group + # independently. + + if isinstance(nodes, str): + raise TypeError('nodes argument cannot be a string') + + if not isinstance(nodes, collections.abc.Sequence): + raise TypeError('nodes argument must be a Sequence') + + node_groups = {} + for n in sorted(nodes): + basename, width, nid = _parse_node(n) + ng = _NodeGroup(basename, width) + node_groups.setdefault(ng, ng) + node_groups[ng].add(nid) + + return ','.join(str(ng) for ng in node_groups) + + class ScopedDict(UserDict): '''This is a special dictionary that imposes scopes on its keys. diff --git a/unittests/test_utility.py b/unittests/test_utility.py index ba75b3ec96..11aa8c7bf6 100644 --- a/unittests/test_utility.py +++ b/unittests/test_utility.py @@ -1712,3 +1712,45 @@ def foo(): assert util.is_copyable(len) assert util.is_copyable(int) assert not util.is_copyable(foo()) + + +def test_nodelist_abbrev(): + nid_nodes = [f'nid{n:03}' for n in range(5, 20)] + cid_nodes = [f'cid{n:03}' for n in range(20)] + + random.shuffle(nid_nodes) + random.shuffle(cid_nodes) + nid_nodes.insert(0, 'nid002') + nid_nodes.insert(0, 'nid001') + nid_nodes.append('nid125') + cid_nodes += ['cid055', 'cid056'] + + all_nodes = nid_nodes + cid_nodes + random.shuffle(all_nodes) + + nodelist = util.nodelist_abbrev + assert nodelist(nid_nodes) == 'nid00[1-2],nid0[05-19],nid125' + assert nodelist(cid_nodes) == 'cid0[00-19],cid05[5-6]' + assert nodelist(all_nodes) == ( + 'cid0[00-19],cid05[5-6],nid00[1-2],nid0[05-19],nid125' + ) + + # Test non-contiguous nodes + nid_nodes = [] + for i in range(3): + nid_nodes += [f'nid{n:03}' for n in range(10*i, 10*i+5)] + + random.shuffle(nid_nodes) + assert nodelist(nid_nodes) == 'nid00[0-4],nid01[0-4],nid02[0-4]' + assert nodelist(['nid01', 'nid10', 'nid20']) == 'nid01,nid10,nid20' + assert nodelist([]) == '' + assert nodelist(['nid001']) == 'nid001' + + # Test node duplicates + assert nodelist(['nid001', 'nid001', 'nid002']) == 'nid001,nid00[1-2]' + + with pytest.raises(TypeError, match='nodes argument must be a Sequence'): + nodelist(1) + + with pytest.raises(TypeError, match='nodes argument cannot be a string'): + nodelist('foo')