diff --git a/docs/advanced.rst b/docs/advanced.rst index b99d10601f..a76e90c2ca 100644 --- a/docs/advanced.rst +++ b/docs/advanced.rst @@ -404,8 +404,11 @@ Flexible Regression Tests .. versionadded:: 2.15 -ReFrame can automatically set the number of tasks of a particular test, if its :attr:`num_tasks ` attribute is set to ``0``. +ReFrame can automatically set the number of tasks of a particular test, if its :attr:`num_tasks ` attribute is set to ``<=0``. In ReFrame's terminology, such tests are called `flexible`. +Negative values indicate the minimum number of tasks that is acceptable for this test (a value of ``-4`` indicates a minimum acceptable number of ``4`` tasks). +A zero value indicates the default minimum number of tasks which is equal to :attr:`num_tasks_per_node `. + By default, ReFrame will spawn such a test on all the idle nodes of the current system partition, but this behavior can be adjusted from the command-line. Flexible tests are very useful for diagnostics tests, e.g., tests for checking the health of a whole set nodes. In this example, we demonstrate this feature through a simple test that runs ``hostname``. diff --git a/docs/running.rst b/docs/running.rst index 6a66434a1d..1b9695c401 100644 --- a/docs/running.rst +++ b/docs/running.rst @@ -1006,7 +1006,7 @@ Controlling the Flexible Task Allocation .. versionadded:: 2.15 -ReFrame can automatically set the number of tasks of a particular test, if its :attr:`num_tasks ` attribute is set to ``0``. +ReFrame can automatically set the number of tasks of a particular test, if its :attr:`num_tasks ` attribute is set to a value ``<=0``. By default, ReFrame will spawn such a test on all the idle nodes of the current system partition. This behavior can be adjusted using the ``--flex-alloc-tasks`` command line option. This option accepts three values: diff --git a/reframe/core/pipeline.py b/reframe/core/pipeline.py index c8a45806dd..0d7b188b73 100644 --- a/reframe/core/pipeline.py +++ b/reframe/core/pipeline.py @@ -255,9 +255,15 @@ class RegressionTest: #: Number of tasks required by this test. #: - #: If the number of tasks is set to ``0``, ReFrame will try to flexibly - #: allocate the number of tasks, based on the command line option - #: ``--flex-alloc-tasks``. + #: If the number of tasks is set to a number ``<=0``, ReFrame will try + #: to flexibly allocate the number of tasks, based on the command line + #: option ``--flex-alloc-tasks``. + #: A negative number is used to indicate the minimum number of tasks + #: required for the test. + #: In this case the minimum number of tasks is the absolute value of + #: the number, while + #: Setting ``num_tasks`` to ``0`` is equivalent to setting it to + #: ``-num_tasks_per_node``. #: #: :type: integral #: :default: ``1`` @@ -269,6 +275,9 @@ class RegressionTest: #: (see `Flexible task allocation #: `__) #: if the number of tasks is set to ``0``. + #: .. versionchanged:: 2.16 + #: Negative ``num_tasks`` is allowed for specifying the minimum + #: number of required tasks by the test. num_tasks = fields.TypedField('num_tasks', int) #: Number of tasks per node required by this test. diff --git a/reframe/core/schedulers/__init__.py b/reframe/core/schedulers/__init__.py index b3e796aea6..a831660c35 100644 --- a/reframe/core/schedulers/__init__.py +++ b/reframe/core/schedulers/__init__.py @@ -221,14 +221,27 @@ def sched_exclusive_access(self): def prepare(self, commands, environs=None, **gen_opts): environs = environs or [] - if self.num_tasks == 0: + if self.num_tasks <= 0: + num_tasks_per_node = self.num_tasks_per_node or 1 + min_num_tasks = (-self.num_tasks if self.num_tasks else + num_tasks_per_node) + try: - self._num_tasks = self.guess_num_tasks() - getlogger().debug('flex_alloc_tasks: setting num_tasks to %s' % - self._num_tasks) + guessed_num_tasks = self.guess_num_tasks() except NotImplementedError as e: - raise JobError('guessing number of tasks is not implemented ' - 'by the backend') from e + raise JobError('flexible task allocation is not supported by ' + 'this backend') from e + + if guessed_num_tasks < min_num_tasks: + nodes_required = min_num_tasks // num_tasks_per_node + nodes_found = guessed_num_tasks // num_tasks_per_node + raise JobError('could not find enough nodes: ' + 'required %s, found %s' % + (nodes_required, nodes_found)) + + self._num_tasks = guessed_num_tasks + getlogger().debug('flex_alloc_tasks: setting num_tasks to %s' % + self._num_tasks) with shell.generate_script(self.script_filename, **gen_opts) as builder: @@ -257,25 +270,16 @@ def guess_num_tasks(self): # Try to guess the number of tasks now available_nodes = self.filter_nodes(available_nodes, self.options) - if not available_nodes: - options = ' '.join(self.sched_access + self.options) - raise JobError('could not find any node satisfying the ' - 'required criteria: %s' % options) if self.sched_flex_alloc_tasks == 'idle': available_nodes = {n for n in available_nodes if n.is_available()} - if not available_nodes: - raise JobError('could not find any idle nodes') - getlogger().debug( 'flex_alloc_tasks: selecting idle nodes: ' 'available nodes now: %s' % len(available_nodes)) num_tasks_per_node = self.num_tasks_per_node or 1 num_tasks = len(available_nodes) * num_tasks_per_node - getlogger().debug('flex_alloc_tasks: setting num_tasks to: %s' % - num_tasks) return num_tasks @abc.abstractmethod diff --git a/unittests/test_schedulers.py b/unittests/test_schedulers.py index 7c9aebe1d8..7b48469c40 100644 --- a/unittests/test_schedulers.py +++ b/unittests/test_schedulers.py @@ -390,8 +390,7 @@ def test_guess_num_tasks(self): # monkey patch `get_partition_nodes()` to simulate extraction of # slurm nodes through the use of `scontrol show` self.testjob.get_partition_nodes = lambda: set() - with self.assertRaises(JobError): - self.testjob.guess_num_tasks() + self.assertEqual(self.testjob.guess_num_tasks(), 0) class TestSqueueJob(TestSlurmJob): @@ -708,6 +707,30 @@ def test_exclude_nodes_opt(self): self.prepare_job() self.assertEqual(self.testjob.num_tasks, 8) + def test_no_num_tasks_per_node(self): + self.testjob._num_tasks_per_node = None + self.testjob.options = ['-C f1,f2', '--partition=p1,p2'] + self.prepare_job() + self.assertEqual(self.testjob.num_tasks, 1) + + def test_not_enough_idle_nodes(self): + self.testjob._sched_flex_alloc_tasks = 'idle' + self.testjob._num_tasks = -12 + with self.assertRaises(JobError): + self.prepare_job() + + def test_not_enough_nodes_constraint_partition(self): + self.testjob.options = ['-C f1,f2', '--partition=p1,p2'] + self.testjob._num_tasks = -8 + with self.assertRaises(JobError): + self.prepare_job() + + def test_enough_nodes_constraint_partition(self): + self.testjob.options = ['-C f1,f2', '--partition=p1,p2'] + self.testjob._num_tasks = -4 + self.prepare_job() + self.assertEqual(self.testjob.num_tasks, 4) + def prepare_job(self): self.testjob.prepare(['hostname'])