From 7745dba6a0a12e42802a6a7ff81ec1de523fb79d Mon Sep 17 00:00:00 2001
From: Theofilos Manitaras <manitaras@cscs.ch>
Date: Wed, 12 Jan 2022 11:37:36 +0100
Subject: [PATCH 1/3] Make Slurm polling aware of Heterogeneous Jobs

---
 reframe/core/schedulers/slurm.py | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)
diff --git a/reframe/core/schedulers/slurm.py b/reframe/core/schedulers/slurm.py
index 6adacd47f6..9fb88589c0 100644
--- a/reframe/core/schedulers/slurm.py
+++ b/reframe/core/schedulers/slurm.py
@@ -90,12 +90,16 @@ class SlurmJobScheduler(sched.JobScheduler):
     # standard job state polling using sacct.
     SACCT_SQUEUE_RATIO = 10
 
-    # This matches the format for both normal jobs as well as job arrays.
+    # This matches the format for both normal and heterogeneous jobs,
+    # as well as job arrays.
+    # For heterogeneous jobs, the job_id has the following format:
+    # <het_job_id>+<het_job_offset>
+    # See (`Heterogeneous Job Support<https://slurm.schedmd.com/heterogeneous_jobs.html>`__)  # noqa: E501
     # For job arrays the job_id has one of the following formats:
     #   * <job_id>_<array_task_id>
     #   * <job_id>_[<array_task_id_start>-<array_task_id_end>]
     # See (`Job Array Support<https://slurm.schedmd.com/job_array.html`__)
-    _state_patt = r'\d+(?:_\d+|_\[\d+-\d+\])?'
+    _jobid_patt = r'\d+(?:\+\d+|_\d+|_\[\d+-\d+\])?'
 
     def __init__(self):
         self._prefix = '#SBATCH'
@@ -406,7 +410,7 @@ def poll(self, *jobs):
 
         # We need the match objects, so we have to use finditer()
         state_match = list(re.finditer(
-            fr'^(?P<jobid>{self._state_patt})\|(?P<state>\S+)([^\|]*)\|'
+            fr'^(?P<jobid>{self._jobid_patt})\|(?P<state>\S+)([^\|]*)\|'
             fr'(?P<exitcode>\d+)\:(?P<signal>\d+)\|(?P<end>\S+)\|'
             fr'(?P<nodespec>.*)', completed.stdout, re.MULTILINE)
         )
@@ -574,7 +578,7 @@ def poll(self, *jobs):
 
         # We need the match objects, so we have to use finditer()
         state_match = list(re.finditer(
-            fr'^(?P<jobid>{self._state_patt})\|(?P<state>\S+)\|'
+            fr'^(?P<jobid>{self._jobid_patt})\|(?P<state>\S+)\|'
             fr'(?P<nodespec>\S*)\|(?P<reason>.+)',
             completed.stdout, re.MULTILINE)
         )

From ed3177c68ec3ee92a6ec90439e1c1f722430be2a Mon Sep 17 00:00:00 2001
From: Theofilos Manitaras <manitaras@cscs.ch>
Date: Wed, 12 Jan 2022 13:14:54 +0100
Subject: [PATCH 2/3] Fix job status check for heterogeneous Slurm jobs

---
 reframe/core/schedulers/slurm.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/reframe/core/schedulers/slurm.py b/reframe/core/schedulers/slurm.py
index 9fb88589c0..c20336db83 100644
--- a/reframe/core/schedulers/slurm.py
+++ b/reframe/core/schedulers/slurm.py
@@ -422,7 +422,8 @@ def poll(self, *jobs):
 
         job_info = {}
         for s in state_match:
-            jobid = s.group('jobid').split('_')[0]
+            # Take into account both job arrays and heterogeneous jobs
+            jobid = re.split(r'_|\+', s.group('jobid'))[0]
             job_info.setdefault(jobid, []).append(s)
 
         for job in jobs:
@@ -431,7 +432,7 @@ def poll(self, *jobs):
             except KeyError:
                 continue
 
-            # Join the states with ',' in case of job arrays
+            # Join the states with ',' in case of job arrays|heterogeneous jobs
             job._state = ','.join(m.group('state') for m in jobarr_info)
 
             if not self._update_state_count % self.SACCT_SQUEUE_RATIO:

From efe3cc12f1e5b471ceaae495a87f9849ed424866 Mon Sep 17 00:00:00 2001
From: Theofilos Manitaras <manitaras@cscs.ch>
Date: Thu, 13 Jan 2022 15:43:34 +0100
Subject: [PATCH 3/3] Address PR comments

---
 reframe/core/schedulers/slurm.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/reframe/core/schedulers/slurm.py b/reframe/core/schedulers/slurm.py
index c20336db83..5c3a29dce1 100644
--- a/reframe/core/schedulers/slurm.py
+++ b/reframe/core/schedulers/slurm.py
@@ -94,11 +94,11 @@ class SlurmJobScheduler(sched.JobScheduler):
     # as well as job arrays.
     # For heterogeneous jobs, the job_id has the following format:
     # <het_job_id>+<het_job_offset>
-    # See (`Heterogeneous Job Support<https://slurm.schedmd.com/heterogeneous_jobs.html>`__)  # noqa: E501
+    # (https://slurm.schedmd.com/heterogeneous_jobs.html)
     # For job arrays the job_id has one of the following formats:
     #   * <job_id>_<array_task_id>
     #   * <job_id>_[<array_task_id_start>-<array_task_id_end>]
-    # See (`Job Array Support<https://slurm.schedmd.com/job_array.html`__)
+    # (https://slurm.schedmd.com/job_array.html)
     _jobid_patt = r'\d+(?:\+\d+|_\d+|_\[\d+-\d+\])?'
 
     def __init__(self):