From 7745dba6a0a12e42802a6a7ff81ec1de523fb79d Mon Sep 17 00:00:00 2001 From: Theofilos Manitaras Date: Wed, 12 Jan 2022 11:37:36 +0100 Subject: [PATCH 1/3] Make Slurm polling aware of Heterogeneous Jobs --- reframe/core/schedulers/slurm.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/reframe/core/schedulers/slurm.py b/reframe/core/schedulers/slurm.py index 6adacd47f6..9fb88589c0 100644 --- a/reframe/core/schedulers/slurm.py +++ b/reframe/core/schedulers/slurm.py @@ -90,12 +90,16 @@ class SlurmJobScheduler(sched.JobScheduler): # standard job state polling using sacct. SACCT_SQUEUE_RATIO = 10 - # This matches the format for both normal jobs as well as job arrays. + # This matches the format for both normal and heterogeneous jobs, + # as well as job arrays. + # For heterogeneous jobs, the job_id has the following format: + # + + # See (`Heterogeneous Job Support`__) # noqa: E501 # For job arrays the job_id has one of the following formats: # * _ # * _[-] # See (`Job Array Support{self._state_patt})\|(?P\S+)([^\|]*)\|' + fr'^(?P{self._jobid_patt})\|(?P\S+)([^\|]*)\|' fr'(?P\d+)\:(?P\d+)\|(?P\S+)\|' fr'(?P.*)', completed.stdout, re.MULTILINE) ) @@ -574,7 +578,7 @@ def poll(self, *jobs): # We need the match objects, so we have to use finditer() state_match = list(re.finditer( - fr'^(?P{self._state_patt})\|(?P\S+)\|' + fr'^(?P{self._jobid_patt})\|(?P\S+)\|' fr'(?P\S*)\|(?P.+)', completed.stdout, re.MULTILINE) ) From ed3177c68ec3ee92a6ec90439e1c1f722430be2a Mon Sep 17 00:00:00 2001 From: Theofilos Manitaras Date: Wed, 12 Jan 2022 13:14:54 +0100 Subject: [PATCH 2/3] Fix job status check for heterogeneous Slurm jobs --- reframe/core/schedulers/slurm.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/reframe/core/schedulers/slurm.py b/reframe/core/schedulers/slurm.py index 9fb88589c0..c20336db83 100644 --- a/reframe/core/schedulers/slurm.py +++ b/reframe/core/schedulers/slurm.py @@ -422,7 +422,8 @@ def poll(self, *jobs): job_info = {} for s in state_match: - jobid = s.group('jobid').split('_')[0] + # Take into account both job arrays and heterogeneous jobs + jobid = re.split(r'_|\+', s.group('jobid'))[0] job_info.setdefault(jobid, []).append(s) for job in jobs: @@ -431,7 +432,7 @@ def poll(self, *jobs): except KeyError: continue - # Join the states with ',' in case of job arrays + # Join the states with ',' in case of job arrays|heterogeneous jobs job._state = ','.join(m.group('state') for m in jobarr_info) if not self._update_state_count % self.SACCT_SQUEUE_RATIO: From efe3cc12f1e5b471ceaae495a87f9849ed424866 Mon Sep 17 00:00:00 2001 From: Theofilos Manitaras Date: Thu, 13 Jan 2022 15:43:34 +0100 Subject: [PATCH 3/3] Address PR comments --- reframe/core/schedulers/slurm.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/reframe/core/schedulers/slurm.py b/reframe/core/schedulers/slurm.py index c20336db83..5c3a29dce1 100644 --- a/reframe/core/schedulers/slurm.py +++ b/reframe/core/schedulers/slurm.py @@ -94,11 +94,11 @@ class SlurmJobScheduler(sched.JobScheduler): # as well as job arrays. # For heterogeneous jobs, the job_id has the following format: # + - # See (`Heterogeneous Job Support`__) # noqa: E501 + # (https://slurm.schedmd.com/heterogeneous_jobs.html) # For job arrays the job_id has one of the following formats: # * _ # * _[-] - # See (`Job Array Support