From 7a1dbb15f00f0c8dfdba651c30466a9ec777a803 Mon Sep 17 00:00:00 2001
From: jgp <jgp@cscs.ch>
Date: Tue, 6 Apr 2021 13:12:05 +0200
Subject: [PATCH 1/6] Abbreviate nodelist using hostlist format

---
 reframe/core/schedulers/__init__.py | 13 +++++++++++++
 reframe/core/schedulers/slurm.py    |  4 +++-
 reframe/frontend/cli.py             |  7 +++++++
 reframe/frontend/statistics.py      |  2 ++
 reframe/schemas/config.json         |  2 ++
 5 files changed, 27 insertions(+), 1 deletion(-)

diff --git a/reframe/core/schedulers/__init__.py b/reframe/core/schedulers/__init__.py
index 287eb35d3b..b786cc62c1 100644
--- a/reframe/core/schedulers/__init__.py
+++ b/reframe/core/schedulers/__init__.py
@@ -201,6 +201,7 @@ def __init__(self,
         self._exitcode = None
         self._state = None
         self._nodelist = None
+        self._hostlist = None
         self._submit_time = None
         self._completion_time = None
 
@@ -326,6 +327,18 @@ def nodelist(self):
         '''
         return self._nodelist
 
+    @property
+    def hostlist(self):
+        '''The hostlist of this job.
+
+        The value of this field is scheduler-specific.
+
+        .. versionadded:: 3.6
+
+        :type: :class`str` or :class:`None`
+        '''
+        return self._hostlist
+
     @property
     def submit_time(self):
         return self._submit_time
diff --git a/reframe/core/schedulers/slurm.py b/reframe/core/schedulers/slurm.py
index bd019c9df3..9ad0255abd 100644
--- a/reframe/core/schedulers/slurm.py
+++ b/reframe/core/schedulers/slurm.py
@@ -368,7 +368,9 @@ def _update_nodelist(self, job, nodespec):
 
         if nodespec and nodespec != 'None assigned':
             job._nodelist = [n.name for n in self._get_nodes_by_name(nodespec)]
-
+            if rt.runtime().get_option('general/0/report_hostlist'):
+                job._hostlist = nodespec
+    
     def _update_completion_time(self, job, timestamps):
         if job._completion_time is not None:
             return
diff --git a/reframe/frontend/cli.py b/reframe/frontend/cli.py
index c6a14459bd..93e031aca3 100644
--- a/reframe/frontend/cli.py
+++ b/reframe/frontend/cli.py
@@ -450,6 +450,13 @@ def main():
         action='store_true',
         help='Use a login shell for job scripts'
     )
+    argparser.add_argument(
+        dest='report_hostlist',
+        envvar='RFM_REPORT_HOSTLIST',
+        configvar='general/report_hostlist',
+        action='store_true',
+        help='Use hostlist format to report nodelist'
+    )
 
     # Parse command line
     options = argparser.parse_args()
diff --git a/reframe/frontend/statistics.py b/reframe/frontend/statistics.py
index 16db93d2d5..95e8a26a6d 100644
--- a/reframe/frontend/statistics.py
+++ b/reframe/frontend/statistics.py
@@ -112,6 +112,7 @@ def json(self, force=False):
                     'maintainers': check.maintainers,
                     'name': check.name,
                     'nodelist': [],
+                    'hostlist': None,
                     'outputdir': None,
                     'perfvars': None,
                     'result': None,
@@ -140,6 +141,7 @@ def json(self, force=False):
                     entry['job_stderr'] = check.stderr.evaluate()
                     entry['job_stdout'] = check.stdout.evaluate()
                     entry['nodelist'] = check.job.nodelist or []
+                    entry['hostlist'] = check.job.hostlist or ''
 
                 if check.build_job:
                     entry['build_stderr'] = check.build_stderr.evaluate()
diff --git a/reframe/schemas/config.json b/reframe/schemas/config.json
index 1d5b143b1c..11c86989d7 100644
--- a/reframe/schemas/config.json
+++ b/reframe/schemas/config.json
@@ -448,6 +448,7 @@
                     "non_default_craype": {"type": "boolean"},
                     "purge_environment": {"type": "boolean"},
                     "report_file": {"type": "string"},
+                    "report_hostlist": {"type": "boolean"},
                     "save_log_files": {"type": "boolean"},
                     "target_systems": {"$ref": "#/defs/system_ref"},
                     "timestamp_dirs": {"type": "string"},
@@ -486,6 +487,7 @@
         "general/non_default_craype": false,
         "general/purge_environment": false,
         "general/report_file": "${HOME}/.reframe/reports/run-report.json",
+        "general/report_hostlist": false,
         "general/save_log_files": false,
         "general/target_systems": ["*"],
         "general/timestamp_dirs": "",

From 3934a7d71f4483575af9c0b59dc5b2f474604cb7 Mon Sep 17 00:00:00 2001
From: Vasileios Karakasis <karakasis@cscs.ch>
Date: Tue, 13 Apr 2021 17:59:06 +0200
Subject: [PATCH 2/6] Add utility function for abbreviating node lists

---
 reframe/utility/__init__.py | 232 ++++++++++++++++++++++++++++++++++++
 unittests/test_utility.py   |  39 ++++++
 2 files changed, 271 insertions(+)

diff --git a/reframe/utility/__init__.py b/reframe/utility/__init__.py
index 0d6f7d615e..05d7cb3990 100644
--- a/reframe/utility/__init__.py
+++ b/reframe/utility/__init__.py
@@ -643,6 +643,238 @@ def _is_valid_for_env(m, e):
                     yield (p.fullname, e.name, m)
 
 
+def _delta_encode(seq):
+    '''Delta-encode sequence.
+
+    The input list must be at least of size 1.
+
+    :returns: the encoded list. The first element of the encoded sequence is
+        the first element of the original sequence.
+
+    '''
+
+    assert len(seq) >= 1
+
+    ret = [seq[0]]
+    for i in range(1, len(seq)):
+        ret.append(seq[i] - seq[i-1])
+
+    return ret
+
+
+def _rl_encode(seq):
+    '''Run-length encode a delta-encoded sequence.
+
+    The input list must be at least of size 1.
+
+    :returns: the encoded list. Each element of the list is a three-tuple
+        containing the first element of the unit, the delta value of the unit
+        and its length.
+
+    '''
+    assert len(seq) >= 1
+
+    encoded = []
+    curr_unit = [seq[0], 1, 1]     # current RLE unit
+    for delta in seq[1:]:
+        uelem, udelta, ulen = curr_unit
+        if udelta is None:
+            curr_unit[1] = delta
+            curr_unit[2] += 1
+        elif udelta != delta:
+            # New unit; we don't set the delta of the new unit here, because
+            # `delta` is just the jump for the previous unit. The length of
+            # the unit is initialized to one, because the last processed
+            # element *is* part of the new unit.
+            encoded.append(tuple(curr_unit))
+            curr_unit = [uelem + udelta*(ulen-1) + delta, None, 1]
+        else:
+            # Increase unit
+            curr_unit[2] += 1
+
+    # Fix last unit and add it to the encoded list
+    if curr_unit[1] is None:
+        # Conveniently set delta to 1
+        curr_unit[1] = 1
+
+    encoded.append(tuple(curr_unit))
+    return encoded
+
+
+class _NodeElem:
+    def __init__(self, nodename):
+        m = re.search('(^\D+)(\d+)', nodename)
+        if m is None:
+            self._basename = nodename
+            self._nodeid = -1
+        else:
+            self._basename = m.group(1)
+            self._nodeid = int(m.group(2).lstrip('0'))
+
+    @property
+    def basename(self):
+        return self._basename
+
+    @property
+    def nodeid(self):
+        return self._nodeid
+
+    def __repr__(self):
+        return f'{type(self)}({self.basename}, {self.nodeid})'
+
+
+def _parse_node(nodename):
+    m = re.search('(^\D+)(\d+)', nodename)
+    if m is None:
+        basename = nodename
+        width = 0
+        nodeid = None
+    else:
+        basename = m.group(1)
+        _id = m.group(2).lstrip('0')
+        if _id == '':
+            # This is to cover nodes with id=0, e.g., x000
+            _id = '0'
+
+        nodeid = int(_id)
+        width = len(m.group(2))
+
+    return basename, width, nodeid
+
+
+def _count_digits(n):
+    '''Count digits of a decimal number.'''
+
+    num_digits = 1
+    while n > 10:
+        n /= 10
+        num_digits += 1
+
+    return num_digits
+
+
+def _common_prefix(s1, s2):
+    pos = 0
+    for i in range(min(len(s1), len(s2))):
+        if s1[i] != s2[i]:
+            break
+
+        pos += 1
+
+    return s1[:pos], s1[pos:], s2[pos:]
+
+
+class _NodeGroup:
+    def __init__(self, name, width):
+        self.__name = name
+        self.__width = width
+        self.__nodes = []
+
+    @property
+    def name(self):
+        return self.__name
+
+    @property
+    def width(self):
+        return self.__width
+
+    @property
+    def nodes(self):
+        return self.__nodes
+
+    def add(self, nid):
+        self.__nodes.append(nid)
+
+    def __str__(self):
+        abbrev = []
+        encoded = _rl_encode(_delta_encode(self.nodes))
+        for unit in encoded:
+            start, delta, size = unit
+            if size == 1:
+                s_start = str(start).zfill(self.width)
+                abbrev.append(f'{self.name}{s_start}')
+            elif delta != 1:
+                # We simply unpack node lists with delta != 1
+                for i in range(size):
+                    s_start = str(start + i*delta).zfill(self.width)
+                    abbrev.append(f'{self.name}{s_start}')
+            else:
+                last = start + delta*(size-1)
+                digits_last = _count_digits(last)
+                pad = self.width - digits_last
+                nd_range = self.name
+                if pad > 0:
+                    for _ in range(pad):
+                        nd_range += '0'
+
+                s_first = str(start).zfill(digits_last)
+                s_last  = str(last)
+                prefix, s_first, s_last = _common_prefix(s_first, s_last)
+                nd_range += f'{prefix}[{s_first}-{s_last}]'
+                abbrev.append(nd_range)
+
+        return ','.join(abbrev)
+
+    def __hash__(self):
+        return hash(self.name) ^ hash(self.width)
+
+    def __eq__(self, other):
+        if not isinstance(other, _NodeGroup):
+            return NotImplemented
+
+        return self.name == other.name and self.width == other.width
+
+
+def nodelist_abbrev(nodes):
+    '''Create an abbreviated string representation of the node list.
+
+    For example, the node list
+
+    .. code-block:: python
+
+       ['nid001', 'nid002', 'nid010', 'nid011', 'nid012', 'nid510', 'nid511']
+
+    will be abbreviated as follows:
+
+    .. code-block:: none
+
+       nid00[1-2],nid0[10-12],nid51[0-1]
+
+
+    .. versionadded:: 3.5.3
+
+    :arg nodes: The node list to abbreviate.
+    :returns: The abbreviated list representation.
+
+    '''
+
+    # The algorithm used for abbreviating the list is a standard index
+    # compression algorithm, the run-length encoding. We first delta encode
+    # the nodes based on their id, which we retrieve from their name, and then
+    # run-length encode the list of deltas. The resulting run-length-encoded
+    # units are then used to generate the abbreviated representation using
+    # some formatting sugar. The abbreviation is handled in the `__str__()`
+    # function of the `_NodeGroup`. The purpose of the `_NodeGroup` is to
+    # group nodes in the list that belong to the same family, namely have the
+    # same prefix. We then apply the run-length encoding to each group
+    # independently.
+
+    if isinstance(nodes, str):
+        raise TypeError('nodes argument cannot be a string')
+
+    if not isinstance(nodes, collections.abc.Sequence):
+        raise TypeError('nodes argument must be a Sequence')
+
+    node_groups = {}
+    for n in sorted(nodes):
+        basename, width, nid = _parse_node(n)
+        ng = _NodeGroup(basename, width)
+        node_groups.setdefault(ng, ng)
+        node_groups[ng].add(nid)
+
+    return ','.join(str(ng) for ng in node_groups)
+
+
 class ScopedDict(UserDict):
     '''This is a special dictionary that imposes scopes on its keys.
 
diff --git a/unittests/test_utility.py b/unittests/test_utility.py
index f322707d15..17613e7a2a 100644
--- a/unittests/test_utility.py
+++ b/unittests/test_utility.py
@@ -1723,3 +1723,42 @@ def foo():
     assert util.is_copyable(len)
     assert util.is_copyable(int)
     assert not util.is_copyable(foo())
+
+
+def test_nodelist_abbrev():
+    nid_nodes = [f'nid{n:03}' for n in range(5, 20)]
+    cid_nodes = [f'cid{n:03}' for n in range(20)]
+
+    random.shuffle(nid_nodes)
+    random.shuffle(cid_nodes)
+    nid_nodes.insert(0, 'nid002')
+    nid_nodes.insert(0, 'nid001')
+    nid_nodes.append('nid125')
+    cid_nodes += ['cid055', 'cid056']
+
+    all_nodes = nid_nodes + cid_nodes
+    random.shuffle(all_nodes)
+
+    nodelist = util.nodelist_abbrev
+    assert nodelist(nid_nodes) == 'nid00[1-2],nid0[05-19],nid125'
+    assert nodelist(cid_nodes) == 'cid0[00-19],cid05[5-6]'
+    assert nodelist(all_nodes) == (
+        'cid0[00-19],cid05[5-6],nid00[1-2],nid0[05-19],nid125'
+    )
+
+    # Test non-contiguous nodes
+    nid_nodes = []
+    for i in range(3):
+        nid_nodes += [f'nid{n:03}' for n in range(10*i, 10*i+5)]
+
+    random.shuffle(nid_nodes)
+    assert nodelist(nid_nodes) == 'nid00[0-4],nid01[0-4],nid02[0-4]'
+    assert nodelist(['nid01', 'nid10', 'nid20']) == 'nid01,nid10,nid20'
+    assert nodelist([]) == ''
+    assert nodelist(['nid001']) == 'nid001'
+
+    with pytest.raises(TypeError, match='nodes argument must be a Sequence'):
+        nodelist(1)
+
+    with pytest.raises(TypeError, match='nodes argument cannot be a string'):
+        nodelist('foo')

From 146582d059103eca867ad6e9c4a3501e95db60b2 Mon Sep 17 00:00:00 2001
From: Vasileios Karakasis <karakasis@cscs.ch>
Date: Thu, 15 Apr 2021 13:41:55 +0200
Subject: [PATCH 3/6] Remove dead code and fix regex string

---
 reframe/utility/__init__.py | 22 ----------------------
 1 file changed, 22 deletions(-)

diff --git a/reframe/utility/__init__.py b/reframe/utility/__init__.py
index 05d7cb3990..9b6d5dd5ce 100644
--- a/reframe/utility/__init__.py
+++ b/reframe/utility/__init__.py
@@ -701,28 +701,6 @@ def _rl_encode(seq):
     return encoded
 
 
-class _NodeElem:
-    def __init__(self, nodename):
-        m = re.search('(^\D+)(\d+)', nodename)
-        if m is None:
-            self._basename = nodename
-            self._nodeid = -1
-        else:
-            self._basename = m.group(1)
-            self._nodeid = int(m.group(2).lstrip('0'))
-
-    @property
-    def basename(self):
-        return self._basename
-
-    @property
-    def nodeid(self):
-        return self._nodeid
-
-    def __repr__(self):
-        return f'{type(self)}({self.basename}, {self.nodeid})'
-
-
 def _parse_node(nodename):
     m = re.search('(^\D+)(\d+)', nodename)
     if m is None:

From 21a6db9e321d88fa4b27dabefc90391a3e765841 Mon Sep 17 00:00:00 2001
From: Vasileios Karakasis <karakasis@cscs.ch>
Date: Thu, 15 Apr 2021 13:49:21 +0200
Subject: [PATCH 4/6] More comments and PEP8 fixes

---
 reframe/utility/__init__.py | 29 ++++++++++++++++++++++++++++-
 1 file changed, 28 insertions(+), 1 deletion(-)

diff --git a/reframe/utility/__init__.py b/reframe/utility/__init__.py
index 9b6d5dd5ce..e2e88668dc 100644
--- a/reframe/utility/__init__.py
+++ b/reframe/utility/__init__.py
@@ -648,6 +648,18 @@ def _delta_encode(seq):
 
     The input list must be at least of size 1.
 
+    Example of delta encoding:
+
+    - Input list:
+       1 2 5 6 7 8 9 125
+
+    - Output list:
+       1 1 3 1 1 1 1 106
+       ^
+       |
+     First element
+     of the original list.
+
     :returns: the encoded list. The first element of the encoded sequence is
         the first element of the original sequence.
 
@@ -667,6 +679,21 @@ def _rl_encode(seq):
 
     The input list must be at least of size 1.
 
+    Example of run-length encoding:
+
+    - Original list:
+       1 2 5 6 7 8 9 125
+
+    - Delta-encoded list:
+       1 1 3 1 1 1 1 106
+
+    - Run-length-encoded list:
+
+       (1,1,2), (5,1,5), (125,1,1)
+
+    For convenience, in each RLE unit we use the first element of the original
+    unit and not the delta value from the previous unit.
+
     :returns: the encoded list. Each element of the list is a three-tuple
         containing the first element of the unit, the delta value of the unit
         and its length.
@@ -702,7 +729,7 @@ def _rl_encode(seq):
 
 
 def _parse_node(nodename):
-    m = re.search('(^\D+)(\d+)', nodename)
+    m = re.search(r'(^\D+)(\d+)', nodename)
     if m is None:
         basename = nodename
         width = 0

From f13e166d1d76ef1d5be9abe90c5abea4362db329 Mon Sep 17 00:00:00 2001
From: Vasileios Karakasis <karakasis@cscs.ch>
Date: Fri, 16 Apr 2021 15:26:39 +0200
Subject: [PATCH 5/6] Add unit test for node duplicates

---
 unittests/test_utility.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/unittests/test_utility.py b/unittests/test_utility.py
index caabc76220..11aa8c7bf6 100644
--- a/unittests/test_utility.py
+++ b/unittests/test_utility.py
@@ -1746,6 +1746,9 @@ def test_nodelist_abbrev():
     assert nodelist([]) == ''
     assert nodelist(['nid001']) == 'nid001'
 
+    # Test node duplicates
+    assert nodelist(['nid001', 'nid001', 'nid002']) == 'nid001,nid00[1-2]'
+
     with pytest.raises(TypeError, match='nodes argument must be a Sequence'):
         nodelist(1)
 

From 5b69e4530dd008d46e968ed266cc90ea878fd5d9 Mon Sep 17 00:00:00 2001
From: Vasileios Karakasis <karakasis@cscs.ch>
Date: Fri, 16 Apr 2021 16:33:06 +0200
Subject: [PATCH 6/6] Revert changes related to configuration

- Node lists are always abbreviated in the `FAILURE INFO` output but not in the
  JSON report.
---
 reframe/core/schedulers/__init__.py | 13 -------------
 reframe/core/schedulers/slurm.py    |  4 +---
 reframe/frontend/cli.py             |  7 -------
 reframe/frontend/statistics.py      |  8 ++++----
 reframe/schemas/config.json         |  2 --
 5 files changed, 5 insertions(+), 29 deletions(-)

diff --git a/reframe/core/schedulers/__init__.py b/reframe/core/schedulers/__init__.py
index b786cc62c1..287eb35d3b 100644
--- a/reframe/core/schedulers/__init__.py
+++ b/reframe/core/schedulers/__init__.py
@@ -201,7 +201,6 @@ def __init__(self,
         self._exitcode = None
         self._state = None
         self._nodelist = None
-        self._hostlist = None
         self._submit_time = None
         self._completion_time = None
 
@@ -327,18 +326,6 @@ def nodelist(self):
         '''
         return self._nodelist
 
-    @property
-    def hostlist(self):
-        '''The hostlist of this job.
-
-        The value of this field is scheduler-specific.
-
-        .. versionadded:: 3.6
-
-        :type: :class`str` or :class:`None`
-        '''
-        return self._hostlist
-
     @property
     def submit_time(self):
         return self._submit_time
diff --git a/reframe/core/schedulers/slurm.py b/reframe/core/schedulers/slurm.py
index 9ad0255abd..bd019c9df3 100644
--- a/reframe/core/schedulers/slurm.py
+++ b/reframe/core/schedulers/slurm.py
@@ -368,9 +368,7 @@ def _update_nodelist(self, job, nodespec):
 
         if nodespec and nodespec != 'None assigned':
             job._nodelist = [n.name for n in self._get_nodes_by_name(nodespec)]
-            if rt.runtime().get_option('general/0/report_hostlist'):
-                job._hostlist = nodespec
-    
+
     def _update_completion_time(self, job, timestamps):
         if job._completion_time is not None:
             return
diff --git a/reframe/frontend/cli.py b/reframe/frontend/cli.py
index 580dca139c..e107ec5c67 100644
--- a/reframe/frontend/cli.py
+++ b/reframe/frontend/cli.py
@@ -450,13 +450,6 @@ def main():
         action='store_true',
         help='Use a login shell for job scripts'
     )
-    argparser.add_argument(
-        dest='report_hostlist',
-        envvar='RFM_REPORT_HOSTLIST',
-        configvar='general/report_hostlist',
-        action='store_true',
-        help='Use hostlist format to report nodelist'
-    )
 
     # Parse command line
     options = argparser.parse_args()
diff --git a/reframe/frontend/statistics.py b/reframe/frontend/statistics.py
index 95e8a26a6d..963676d519 100644
--- a/reframe/frontend/statistics.py
+++ b/reframe/frontend/statistics.py
@@ -7,6 +7,7 @@
 import traceback
 import reframe.core.runtime as rt
 import reframe.core.exceptions as errors
+import reframe.utility as util
 
 
 class TestStats:
@@ -112,7 +113,6 @@ def json(self, force=False):
                     'maintainers': check.maintainers,
                     'name': check.name,
                     'nodelist': [],
-                    'hostlist': None,
                     'outputdir': None,
                     'perfvars': None,
                     'result': None,
@@ -141,7 +141,6 @@ def json(self, force=False):
                     entry['job_stderr'] = check.stderr.evaluate()
                     entry['job_stdout'] = check.stdout.evaluate()
                     entry['nodelist'] = check.job.nodelist or []
-                    entry['hostlist'] = check.job.hostlist or ''
 
                 if check.build_job:
                     entry['build_stderr'] = check.build_stderr.evaluate()
@@ -218,8 +217,9 @@ def print_failure_report(self, printer):
             printer.info(f"  * System partition: {r['system']}")
             printer.info(f"  * Environment: {r['environment']}")
             printer.info(f"  * Stage directory: {r['stagedir']}")
-            nodelist = ','.join(r['nodelist']) if r['nodelist'] else None
-            printer.info(f"  * Node list: {nodelist}")
+            printer.info(
+                f"  * Node list: {util.nodelist_abbrev(r['nodelist'])}"
+            )
             job_type = 'local' if r['scheduler'] == 'local' else 'batch job'
             jobid = r['jobid']
             printer.info(f"  * Job type: {job_type} (id={r['jobid']})")
diff --git a/reframe/schemas/config.json b/reframe/schemas/config.json
index 11c86989d7..1d5b143b1c 100644
--- a/reframe/schemas/config.json
+++ b/reframe/schemas/config.json
@@ -448,7 +448,6 @@
                     "non_default_craype": {"type": "boolean"},
                     "purge_environment": {"type": "boolean"},
                     "report_file": {"type": "string"},
-                    "report_hostlist": {"type": "boolean"},
                     "save_log_files": {"type": "boolean"},
                     "target_systems": {"$ref": "#/defs/system_ref"},
                     "timestamp_dirs": {"type": "string"},
@@ -487,7 +486,6 @@
         "general/non_default_craype": false,
         "general/purge_environment": false,
         "general/report_file": "${HOME}/.reframe/reports/run-report.json",
-        "general/report_hostlist": false,
         "general/save_log_files": false,
         "general/target_systems": ["*"],
         "general/timestamp_dirs": "",