Skip to content

Commit 4657802

Browse files
authored
Merge pull request #3156 from ekouts/feat/sched_option_sbatch
[feat] Allow ReFrame to pass the access options in command line instead of the script for Slurm
2 parents be710fa + e79279a commit 4657802

File tree

9 files changed

+135
-39
lines changed

9 files changed

+135
-39
lines changed

docs/config_reference.rst

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -367,6 +367,20 @@ System Partition Configuration
367367
.. warning::
368368
This option is broken in 4.0.
369369

370+
371+
.. py:attribute:: systems.partitions.sched_options.sched_access_in_submit
372+
373+
:required: No
374+
:default: ``false``
375+
376+
Normally, ReFrame will pass the :attr:`~config.systems.partitions.access` options to the job script only.
377+
When this attribute is ``true`` the options are passed in the submission command instead.
378+
379+
This option is relevant for the LSF, OAR, PBS and Slurm backends.
380+
381+
.. versionadded:: 4.7
382+
383+
370384
.. py:attribute:: systems.partitions.sched_options.ssh_hosts
371385
372386
:required: No

docs/manpage.rst

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1226,6 +1226,21 @@ Here is an alphabetical list of the environment variables recognized by ReFrame.
12261226
Whenever an environment variable is associated with a configuration option, its default value is omitted as it is the same.
12271227

12281228

1229+
.. envvar:: RFM_SCHED_ACCESS_IN_SUBMIT
1230+
1231+
Pass access options in the submission command (relevant for LSF, OAR, PBS and Slurm).
1232+
1233+
.. table::
1234+
:align: left
1235+
1236+
================================== ==================
1237+
Associated command line option N/A
1238+
Associated configuration parameter :attr::attr:`~config.systems.partitions.sched_options.sched_access_in_submit`
1239+
================================== ==================
1240+
1241+
.. versionadded:: 4.7
1242+
1243+
12291244
.. envvar:: RFM_AUTODETECT_FQDN
12301245

12311246
Use the fully qualified domain name as the hostname.

reframe/core/schedulers/lsf.py

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,9 @@ class LsfJobScheduler(PbsJobScheduler):
2727
def __init__(self):
2828
self._prefix = '#BSUB'
2929
self._submit_timeout = self.get_option('job_submit_timeout')
30+
self._sched_access_in_submit = self.get_option(
31+
'sched_access_in_submit'
32+
)
3033

3134
def _format_option(self, var, option):
3235
if var is not None:
@@ -57,8 +60,9 @@ def emit_preamble(self, job):
5760
f'{self._prefix} -W {int(job.time_limit // 60)}'
5861
)
5962

60-
for opt in job.sched_access:
61-
preamble.append(f'{self._prefix} {opt}')
63+
if not self._sched_access_in_submit:
64+
for opt in job.sched_access:
65+
preamble.append(f'{self._prefix} {opt}')
6266

6367
# emit the rest of the options
6468
options = job.options + job.cli_options
@@ -76,7 +80,13 @@ def emit_preamble(self, job):
7680

7781
def submit(self, job):
7882
with open(job.script_filename, 'r') as fp:
79-
completed = _run_strict('bsub', stdin=fp)
83+
cmd_parts = ['bsub']
84+
if self._sched_access_in_submit:
85+
cmd_parts += job.sched_access
86+
87+
cmd = ' '.join(cmd_parts)
88+
completed = _run_strict(cmd, stdin=fp)
89+
8090
jobid_match = re.search(r'^Job <(?P<jobid>\S+)> is submitted',
8191
completed.stdout)
8292
if not jobid_match:

reframe/core/schedulers/oar.py

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,9 @@ class OarJobScheduler(PbsJobScheduler):
6060
def __init__(self):
6161
self._prefix = '#OAR'
6262
self._submit_timeout = self.get_option('job_submit_timeout')
63+
self._sched_access_in_submit = self.get_option(
64+
'sched_access_in_submit'
65+
)
6366

6467
def emit_preamble(self, job):
6568
# host is de-facto nodes and core is number of cores requested per node
@@ -88,8 +91,11 @@ def emit_preamble(self, job):
8891
num_nodes=num_nodes, num_tasks_per_node=num_tasks_per_node,
8992
)]
9093

94+
if not self._sched_access_in_submit:
95+
options += job.sched_access
96+
9197
# Emit the rest of the options
92-
options += job.sched_access + job.options + job.cli_options
98+
options += job.options + job.cli_options
9399
for opt in options:
94100
if opt.startswith('#'):
95101
preamble.append(opt)
@@ -101,9 +107,13 @@ def emit_preamble(self, job):
101107
def submit(self, job):
102108
# OAR batch submission mode needs full path to the job script
103109
job_script_fullpath = os.path.join(job.workdir, job.script_filename)
110+
cmd_parts = ['oarsub']
111+
if self._sched_access_in_submit:
112+
cmd_parts += job.sched_access
104113

105114
# OAR needs -S to submit job in batch mode
106-
cmd = f'oarsub -S {job_script_fullpath}'
115+
cmd_parts += ['-S', job_script_fullpath]
116+
cmd = ' '.join(cmd_parts)
107117
completed = _run_strict(cmd, timeout=self._submit_timeout)
108118
jobid_match = re.search(r'.*OAR_JOB_ID=(?P<jobid>\S+)',
109119
completed.stdout)

reframe/core/schedulers/pbs.py

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,9 @@ class PbsJobScheduler(sched.JobScheduler):
7676
def __init__(self):
7777
self._prefix = '#PBS'
7878
self._submit_timeout = self.get_option('job_submit_timeout')
79+
self._sched_access_in_submit = self.get_option(
80+
'sched_access_in_submit'
81+
)
7982

8083
def _emit_lselect_option(self, job):
8184
num_tasks = job.num_tasks or 1
@@ -92,7 +95,12 @@ def _emit_lselect_option(self, job):
9295
# Options starting with `-` are emitted in separate lines
9396
rem_opts = []
9497
verb_opts = []
95-
for opt in (*job.sched_access, *job.options, *job.cli_options):
98+
if self._sched_access_in_submit:
99+
all_opts = (*job.options, *job.cli_options)
100+
else:
101+
all_opts = (*job.sched_access, *job.options, *job.cli_options)
102+
103+
for opt in all_opts:
96104
if opt.startswith('-'):
97105
rem_opts.append(opt)
98106
elif opt.startswith('#'):
@@ -139,9 +147,14 @@ def filternodes(self, job, nodes):
139147
'node filtering')
140148

141149
def submit(self, job):
150+
cmd_parts = ['qsub']
151+
if self._sched_access_in_submit:
152+
cmd_parts += job.sched_access
153+
142154
# `-o` and `-e` options are only recognized in command line by the PBS
143155
# Slurm wrappers.
144-
cmd = f'qsub -o {job.stdout} -e {job.stderr} {job.script_filename}'
156+
cmd_parts += ['-o', job.stdout, '-e', job.stderr, job.script_filename]
157+
cmd = ' '.join(cmd_parts)
145158
completed = _run_strict(cmd, timeout=self._submit_timeout)
146159
jobid_match = re.search(r'^(?P<jobid>\S+)', completed.stdout)
147160
if not jobid_match:

reframe/core/schedulers/slurm.py

Lines changed: 34 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -140,6 +140,9 @@ def __init__(self):
140140
self._submit_timeout = self.get_option('job_submit_timeout')
141141
self._use_nodes_opt = self.get_option('use_nodes_option')
142142
self._resubmit_on_errors = self.get_option('resubmit_on_errors')
143+
self._sched_access_in_submit = self.get_option(
144+
'sched_access_in_submit'
145+
)
143146

144147
def make_job(self, *args, **kwargs):
145148
return _SlurmJob(*args, **kwargs)
@@ -209,39 +212,33 @@ def emit_preamble(self, job):
209212
)
210213
)
211214

212-
for opt in job.sched_access:
213-
if not opt.strip().startswith(('-C', '--constraint')):
214-
preamble.append('%s %s' % (self._prefix, opt))
215-
216-
# To avoid overriding a constraint that's passed into `sched_access`,
217-
# we AND it with the `--constraint` option passed either in `options`
218-
# or in `cli_options`
219-
constraints = []
220-
constraint_parser = ArgumentParser()
221-
constraint_parser.add_argument('-C', '--constraint')
222-
parsed_options, _ = constraint_parser.parse_known_args(
223-
job.sched_access
224-
)
225-
if parsed_options.constraint:
226-
constraints.append(parsed_options.constraint.strip())
227-
228-
# NOTE: Here last of the passed --constraint job options is taken
229-
# into account in order to respect the behavior of slurm.
230-
parsed_options, _ = constraint_parser.parse_known_args(
231-
job.options + job.cli_options
232-
)
233-
if parsed_options.constraint:
234-
constraints.append(parsed_options.constraint.strip())
235-
236-
if constraints:
237-
if len(constraints) == 1:
238-
constr = constraints[0]
215+
# Combine constraints in `sched_access`
216+
#
217+
# We AND the constraints defined in `sched_access` with those in
218+
# either the `job.options` or `job.cli_options`. We essentially "move"
219+
# the option from the source option list to `sched_access` as if the
220+
# user has specified all the constraint in `sched_access`. We can then
221+
# move with the preamble generation or the submission normally.
222+
c_parser = ArgumentParser()
223+
c_parser.add_argument('-C', '--constraint')
224+
access, access_other = c_parser.parse_known_args(job.sched_access)
225+
job_opts, other_job_opts = c_parser.parse_known_args(job.options)
226+
cli_opts, other_cli_opts = c_parser.parse_known_args(job.cli_options)
227+
if access.constraint and (job_opts.constraint or cli_opts.constraint):
228+
constraints = [access.constraint]
229+
if job_opts.constraint:
230+
constraints.append(job_opts.constraint)
231+
job.options = other_job_opts
239232
else:
240-
# Parenthesize the constraints prior to joining them with `&`
241-
# to make sure that precedence is respected.
242-
constr = '&'.join(f'({c})' for c in constraints)
233+
constraints.append(cli_opts.constraint)
234+
job._cli_options = other_cli_opts
243235

244-
preamble.append(self._format_option(constr, '--constraint={0}'))
236+
arg = '&'.join(f'({c.strip()})' for c in constraints)
237+
job._sched_access = [f'--constraint={arg}']
238+
239+
if not self._sched_access_in_submit:
240+
for opt in job.sched_access:
241+
preamble.append(f'{self._prefix} {opt}')
245242

246243
preamble.append(self._format_option(hint, '--hint={0}'))
247244
prefix_patt = re.compile(r'(#\w+)')
@@ -259,7 +256,12 @@ def emit_preamble(self, job):
259256
return list(filter(None, preamble))
260257

261258
def submit(self, job):
262-
cmd = f'sbatch {job.script_filename}'
259+
cmd_parts = ['sbatch']
260+
if self._sched_access_in_submit:
261+
cmd_parts += job.sched_access
262+
263+
cmd_parts += [job.script_filename]
264+
cmd = ' '.join(cmd_parts)
263265
intervals = itertools.cycle([1, 2, 3])
264266
while True:
265267
try:

reframe/frontend/cli.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -582,6 +582,13 @@ def main():
582582
)
583583

584584
# Options not associated with command-line arguments
585+
argparser.add_argument(
586+
dest='sched_access_in_submit',
587+
envvar='RFM_SCHED_ACCESS_IN_SUBMIT',
588+
configvar='systems*/sched_options/sched_access_in_submit',
589+
action='store_true',
590+
help='Pass access options in the submission command (only for Slurm)'
591+
)
585592
argparser.add_argument(
586593
dest='autodetect_fqdn',
587594
envvar='RFM_AUTODETECT_FQDN',

reframe/schemas/config.json

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,7 @@
109109
"sched_options": {
110110
"type": "object",
111111
"properties": {
112+
"sched_access_in_submit": {"type": "boolean"},
112113
"hosts": {
113114
"type": "array",
114115
"items": {"type": "string"}
@@ -635,6 +636,7 @@
635636
"systems/partitions/time_limit": null,
636637
"systems/partitions/devices": [],
637638
"systems/partitions/extras": {},
639+
"systems*/sched_options/sched_access_in_submit": false,
638640
"systems*/sched_options/ssh_hosts": [],
639641
"systems*/sched_options/ignore_reqnodenotavail": false,
640642
"systems*/sched_options/job_submit_timeout": 60,

unittests/test_schedulers.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -618,6 +618,7 @@ def test_combined_access_constraint(make_job, slurm_only):
618618
with open(job.script_filename) as fp:
619619
script_content = fp.read()
620620

621+
print(script_content)
621622
assert re.search(r'(?m)--constraint=\(c1\)&\(c2&c3\)$', script_content)
622623
assert re.search(r'(?m)--constraint=(c1|c2&c3)$', script_content) is None
623624

@@ -645,6 +646,28 @@ def test_combined_access_verbatim_constraint(make_job, slurm_only):
645646
assert re.search(r'(?m)^#SBATCH -C c3$', script_content)
646647

647648

649+
def test_sched_access_in_submit(make_job):
650+
job = make_job(sched_access=['--constraint=c1', '--foo=bar'])
651+
job.options = ['--constraint=c2', '--xyz']
652+
job.scheduler._sched_access_in_submit = True
653+
654+
if job.scheduler.registered_name in ('flux', 'local', 'ssh'):
655+
pytest.skip(f'not relevant for this scheduler backend')
656+
657+
prepare_job(job)
658+
with open(job.script_filename) as fp:
659+
script_content = fp.read()
660+
661+
print(script_content)
662+
assert '--xyz' in script_content
663+
assert '--foo=bar' not in script_content
664+
if job.scheduler.registered_name in ('slurm', 'squeue'):
665+
# Constraints are combined in `sched_access` for Slurm backends
666+
assert '--constraint' not in script_content
667+
else:
668+
assert '--constraint=c1' not in script_content
669+
670+
648671
def test_guess_num_tasks(minimal_job, scheduler):
649672
minimal_job.num_tasks = 0
650673
if scheduler.registered_name == 'local':

0 commit comments

Comments
 (0)