Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions reframe/frontend/statistics.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import traceback
import reframe.core.runtime as rt
import reframe.core.exceptions as errors
import reframe.utility as util


class TestStats:
Expand Down Expand Up @@ -216,8 +217,9 @@ def print_failure_report(self, printer):
printer.info(f" * System partition: {r['system']}")
printer.info(f" * Environment: {r['environment']}")
printer.info(f" * Stage directory: {r['stagedir']}")
nodelist = ','.join(r['nodelist']) if r['nodelist'] else None
printer.info(f" * Node list: {nodelist}")
printer.info(
f" * Node list: {util.nodelist_abbrev(r['nodelist'])}"
)
job_type = 'local' if r['scheduler'] == 'local' else 'batch job'
jobid = r['jobid']
printer.info(f" * Job type: {job_type} (id={r['jobid']})")
Expand Down
237 changes: 237 additions & 0 deletions reframe/utility/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -643,6 +643,243 @@ def _is_valid_for_env(m, e):
yield (p.fullname, e.name, m)


def _delta_encode(seq):
'''Delta-encode sequence.

The input list must be at least of size 1.

Example of delta encoding:

- Input list:
1 2 5 6 7 8 9 125

- Output list:
1 1 3 1 1 1 1 106
^
|
First element
of the original list.

:returns: the encoded list. The first element of the encoded sequence is
the first element of the original sequence.

'''

assert len(seq) >= 1

ret = [seq[0]]
for i in range(1, len(seq)):
ret.append(seq[i] - seq[i-1])

return ret


def _rl_encode(seq):
'''Run-length encode a delta-encoded sequence.

The input list must be at least of size 1.

Example of run-length encoding:

- Original list:
1 2 5 6 7 8 9 125

- Delta-encoded list:
1 1 3 1 1 1 1 106

- Run-length-encoded list:

(1,1,2), (5,1,5), (125,1,1)

For convenience, in each RLE unit we use the first element of the original
unit and not the delta value from the previous unit.

:returns: the encoded list. Each element of the list is a three-tuple
containing the first element of the unit, the delta value of the unit
and its length.

'''
assert len(seq) >= 1

encoded = []
curr_unit = [seq[0], 1, 1] # current RLE unit
for delta in seq[1:]:
uelem, udelta, ulen = curr_unit
if udelta is None:
curr_unit[1] = delta
curr_unit[2] += 1
elif udelta != delta:
# New unit; we don't set the delta of the new unit here, because
# `delta` is just the jump for the previous unit. The length of
# the unit is initialized to one, because the last processed
# element *is* part of the new unit.
encoded.append(tuple(curr_unit))
curr_unit = [uelem + udelta*(ulen-1) + delta, None, 1]
else:
# Increase unit
curr_unit[2] += 1

# Fix last unit and add it to the encoded list
if curr_unit[1] is None:
# Conveniently set delta to 1
curr_unit[1] = 1

encoded.append(tuple(curr_unit))
return encoded


def _parse_node(nodename):
m = re.search(r'(^\D+)(\d+)', nodename)
if m is None:
basename = nodename
width = 0
nodeid = None
else:
basename = m.group(1)
_id = m.group(2).lstrip('0')
if _id == '':
# This is to cover nodes with id=0, e.g., x000
_id = '0'

nodeid = int(_id)
width = len(m.group(2))

return basename, width, nodeid


def _count_digits(n):
'''Count digits of a decimal number.'''

num_digits = 1
while n > 10:
n /= 10
num_digits += 1

return num_digits


def _common_prefix(s1, s2):
pos = 0
for i in range(min(len(s1), len(s2))):
if s1[i] != s2[i]:
break

pos += 1

return s1[:pos], s1[pos:], s2[pos:]


class _NodeGroup:
def __init__(self, name, width):
self.__name = name
self.__width = width
self.__nodes = []

@property
def name(self):
return self.__name

@property
def width(self):
return self.__width

@property
def nodes(self):
return self.__nodes

def add(self, nid):
self.__nodes.append(nid)

def __str__(self):
abbrev = []
encoded = _rl_encode(_delta_encode(self.nodes))
for unit in encoded:
start, delta, size = unit
if size == 1:
s_start = str(start).zfill(self.width)
abbrev.append(f'{self.name}{s_start}')
elif delta != 1:
# We simply unpack node lists with delta != 1
for i in range(size):
s_start = str(start + i*delta).zfill(self.width)
abbrev.append(f'{self.name}{s_start}')
else:
last = start + delta*(size-1)
digits_last = _count_digits(last)
pad = self.width - digits_last
nd_range = self.name
if pad > 0:
for _ in range(pad):
nd_range += '0'

s_first = str(start).zfill(digits_last)
s_last = str(last)
prefix, s_first, s_last = _common_prefix(s_first, s_last)
nd_range += f'{prefix}[{s_first}-{s_last}]'
abbrev.append(nd_range)

return ','.join(abbrev)

def __hash__(self):
return hash(self.name) ^ hash(self.width)

def __eq__(self, other):
if not isinstance(other, _NodeGroup):
return NotImplemented

return self.name == other.name and self.width == other.width


def nodelist_abbrev(nodes):
'''Create an abbreviated string representation of the node list.

For example, the node list

.. code-block:: python

['nid001', 'nid002', 'nid010', 'nid011', 'nid012', 'nid510', 'nid511']

will be abbreviated as follows:

.. code-block:: none

nid00[1-2],nid0[10-12],nid51[0-1]


.. versionadded:: 3.5.3

:arg nodes: The node list to abbreviate.
:returns: The abbreviated list representation.

'''

# The algorithm used for abbreviating the list is a standard index
# compression algorithm, the run-length encoding. We first delta encode
# the nodes based on their id, which we retrieve from their name, and then
# run-length encode the list of deltas. The resulting run-length-encoded
# units are then used to generate the abbreviated representation using
# some formatting sugar. The abbreviation is handled in the `__str__()`
# function of the `_NodeGroup`. The purpose of the `_NodeGroup` is to
# group nodes in the list that belong to the same family, namely have the
# same prefix. We then apply the run-length encoding to each group
# independently.

if isinstance(nodes, str):
raise TypeError('nodes argument cannot be a string')

if not isinstance(nodes, collections.abc.Sequence):
raise TypeError('nodes argument must be a Sequence')

node_groups = {}
for n in sorted(nodes):
basename, width, nid = _parse_node(n)
ng = _NodeGroup(basename, width)
node_groups.setdefault(ng, ng)
node_groups[ng].add(nid)

return ','.join(str(ng) for ng in node_groups)


class ScopedDict(UserDict):
'''This is a special dictionary that imposes scopes on its keys.

Expand Down
42 changes: 42 additions & 0 deletions unittests/test_utility.py
Original file line number Diff line number Diff line change
Expand Up @@ -1712,3 +1712,45 @@ def foo():
assert util.is_copyable(len)
assert util.is_copyable(int)
assert not util.is_copyable(foo())


def test_nodelist_abbrev():
nid_nodes = [f'nid{n:03}' for n in range(5, 20)]
cid_nodes = [f'cid{n:03}' for n in range(20)]

random.shuffle(nid_nodes)
random.shuffle(cid_nodes)
nid_nodes.insert(0, 'nid002')
nid_nodes.insert(0, 'nid001')
nid_nodes.append('nid125')
cid_nodes += ['cid055', 'cid056']

all_nodes = nid_nodes + cid_nodes
random.shuffle(all_nodes)

nodelist = util.nodelist_abbrev
assert nodelist(nid_nodes) == 'nid00[1-2],nid0[05-19],nid125'
assert nodelist(cid_nodes) == 'cid0[00-19],cid05[5-6]'
assert nodelist(all_nodes) == (
'cid0[00-19],cid05[5-6],nid00[1-2],nid0[05-19],nid125'
)

# Test non-contiguous nodes
nid_nodes = []
for i in range(3):
nid_nodes += [f'nid{n:03}' for n in range(10*i, 10*i+5)]

random.shuffle(nid_nodes)
assert nodelist(nid_nodes) == 'nid00[0-4],nid01[0-4],nid02[0-4]'
assert nodelist(['nid01', 'nid10', 'nid20']) == 'nid01,nid10,nid20'
assert nodelist([]) == ''
assert nodelist(['nid001']) == 'nid001'

# Test node duplicates
assert nodelist(['nid001', 'nid001', 'nid002']) == 'nid001,nid00[1-2]'

with pytest.raises(TypeError, match='nodes argument must be a Sequence'):
nodelist(1)

with pytest.raises(TypeError, match='nodes argument cannot be a string'):
nodelist('foo')