Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion docs/advanced.rst
Original file line number Diff line number Diff line change
Expand Up @@ -404,8 +404,11 @@ Flexible Regression Tests

.. versionadded:: 2.15

ReFrame can automatically set the number of tasks of a particular test, if its :attr:`num_tasks <reframe.core.pipeline.RegressionTest.num_tasks>` attribute is set to ``0``.
ReFrame can automatically set the number of tasks of a particular test, if its :attr:`num_tasks <reframe.core.pipeline.RegressionTest.num_tasks>` attribute is set to ``<=0``.
In ReFrame's terminology, such tests are called `flexible`.
Negative values indicate the minimum number of tasks that is acceptable for this test (a value of ``-4`` indicates a minimum acceptable number of ``4`` tasks).
A zero value indicates the default minimum number of tasks which is equal to :attr:`num_tasks_per_node <reframe.core.pipeline.RegressionTest.num_tasks_per_node>`.

By default, ReFrame will spawn such a test on all the idle nodes of the current system partition, but this behavior can be adjusted from the command-line.
Flexible tests are very useful for diagnostics tests, e.g., tests for checking the health of a whole set nodes.
In this example, we demonstrate this feature through a simple test that runs ``hostname``.
Expand Down
2 changes: 1 addition & 1 deletion docs/running.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1006,7 +1006,7 @@ Controlling the Flexible Task Allocation

.. versionadded:: 2.15

ReFrame can automatically set the number of tasks of a particular test, if its :attr:`num_tasks <reframe.core.pipeline.RegressionTest.num_tasks>` attribute is set to ``0``.
ReFrame can automatically set the number of tasks of a particular test, if its :attr:`num_tasks <reframe.core.pipeline.RegressionTest.num_tasks>` attribute is set to a value ``<=0``.
By default, ReFrame will spawn such a test on all the idle nodes of the current system partition.
This behavior can be adjusted using the ``--flex-alloc-tasks`` command line option.
This option accepts three values:
Expand Down
15 changes: 12 additions & 3 deletions reframe/core/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -255,9 +255,15 @@ class RegressionTest:

#: Number of tasks required by this test.
#:
#: If the number of tasks is set to ``0``, ReFrame will try to flexibly
#: allocate the number of tasks, based on the command line option
#: ``--flex-alloc-tasks``.
#: If the number of tasks is set to a number ``<=0``, ReFrame will try
#: to flexibly allocate the number of tasks, based on the command line
#: option ``--flex-alloc-tasks``.
#: A negative number is used to indicate the minimum number of tasks
#: required for the test.
#: In this case the minimum number of tasks is the absolute value of
#: the number, while
#: Setting ``num_tasks`` to ``0`` is equivalent to setting it to
#: ``-num_tasks_per_node``.
#:
#: :type: integral
#: :default: ``1``
Expand All @@ -269,6 +275,9 @@ class RegressionTest:
#: (see `Flexible task allocation
#: <running.html#flexible-task-allocation>`__)
#: if the number of tasks is set to ``0``.
#: .. versionchanged:: 2.16
#: Negative ``num_tasks`` is allowed for specifying the minimum
#: number of required tasks by the test.
num_tasks = fields.TypedField('num_tasks', int)

#: Number of tasks per node required by this test.
Expand Down
34 changes: 19 additions & 15 deletions reframe/core/schedulers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -221,14 +221,27 @@ def sched_exclusive_access(self):

def prepare(self, commands, environs=None, **gen_opts):
environs = environs or []
if self.num_tasks == 0:
if self.num_tasks <= 0:
num_tasks_per_node = self.num_tasks_per_node or 1
min_num_tasks = (-self.num_tasks if self.num_tasks else
num_tasks_per_node)

try:
self._num_tasks = self.guess_num_tasks()
getlogger().debug('flex_alloc_tasks: setting num_tasks to %s' %
self._num_tasks)
guessed_num_tasks = self.guess_num_tasks()
except NotImplementedError as e:
raise JobError('guessing number of tasks is not implemented '
'by the backend') from e
raise JobError('flexible task allocation is not supported by '
'this backend') from e

if guessed_num_tasks < min_num_tasks:
nodes_required = min_num_tasks // num_tasks_per_node
nodes_found = guessed_num_tasks // num_tasks_per_node
raise JobError('could not find enough nodes: '
'required %s, found %s' %
(nodes_required, nodes_found))

self._num_tasks = guessed_num_tasks
getlogger().debug('flex_alloc_tasks: setting num_tasks to %s' %
self._num_tasks)

with shell.generate_script(self.script_filename,
**gen_opts) as builder:
Expand Down Expand Up @@ -257,25 +270,16 @@ def guess_num_tasks(self):

# Try to guess the number of tasks now
available_nodes = self.filter_nodes(available_nodes, self.options)
if not available_nodes:
options = ' '.join(self.sched_access + self.options)
raise JobError('could not find any node satisfying the '
'required criteria: %s' % options)

if self.sched_flex_alloc_tasks == 'idle':
available_nodes = {n for n in available_nodes
if n.is_available()}
if not available_nodes:
raise JobError('could not find any idle nodes')

getlogger().debug(
'flex_alloc_tasks: selecting idle nodes: '
'available nodes now: %s' % len(available_nodes))

num_tasks_per_node = self.num_tasks_per_node or 1
num_tasks = len(available_nodes) * num_tasks_per_node
getlogger().debug('flex_alloc_tasks: setting num_tasks to: %s' %
num_tasks)
return num_tasks

@abc.abstractmethod
Expand Down
27 changes: 25 additions & 2 deletions unittests/test_schedulers.py
Original file line number Diff line number Diff line change
Expand Up @@ -390,8 +390,7 @@ def test_guess_num_tasks(self):
# monkey patch `get_partition_nodes()` to simulate extraction of
# slurm nodes through the use of `scontrol show`
self.testjob.get_partition_nodes = lambda: set()
with self.assertRaises(JobError):
self.testjob.guess_num_tasks()
self.assertEqual(self.testjob.guess_num_tasks(), 0)


class TestSqueueJob(TestSlurmJob):
Expand Down Expand Up @@ -708,6 +707,30 @@ def test_exclude_nodes_opt(self):
self.prepare_job()
self.assertEqual(self.testjob.num_tasks, 8)

def test_no_num_tasks_per_node(self):
self.testjob._num_tasks_per_node = None
self.testjob.options = ['-C f1,f2', '--partition=p1,p2']
self.prepare_job()
self.assertEqual(self.testjob.num_tasks, 1)

def test_not_enough_idle_nodes(self):
self.testjob._sched_flex_alloc_tasks = 'idle'
self.testjob._num_tasks = -12
with self.assertRaises(JobError):
self.prepare_job()

def test_not_enough_nodes_constraint_partition(self):
self.testjob.options = ['-C f1,f2', '--partition=p1,p2']
self.testjob._num_tasks = -8
with self.assertRaises(JobError):
self.prepare_job()

def test_enough_nodes_constraint_partition(self):
self.testjob.options = ['-C f1,f2', '--partition=p1,p2']
self.testjob._num_tasks = -4
self.prepare_job()
self.assertEqual(self.testjob.num_tasks, 4)

def prepare_job(self):
self.testjob.prepare(['hostname'])

Expand Down