From cd81b9d63522067e37c7717dc3a24a624e46461a Mon Sep 17 00:00:00 2001 From: Theofilos Manitaras Date: Fri, 18 Jan 2019 10:33:02 +0100 Subject: [PATCH 1/4] Support setting a min number of flex_alloc_tasks --- docs/advanced.rst | 2 +- docs/running.rst | 4 +++- reframe/core/pipeline.py | 12 +++++++++--- reframe/core/schedulers/__init__.py | 15 ++++++++++++--- unittests/test_schedulers.py | 18 ++++++++++++++++++ 5 files changed, 43 insertions(+), 8 deletions(-) diff --git a/docs/advanced.rst b/docs/advanced.rst index b99d10601f..d5e55f3aa2 100644 --- a/docs/advanced.rst +++ b/docs/advanced.rst @@ -404,7 +404,7 @@ Flexible Regression Tests .. versionadded:: 2.15 -ReFrame can automatically set the number of tasks of a particular test, if its :attr:`num_tasks ` attribute is set to ``0``. +ReFrame can automatically set the number of tasks of a particular test, if its :attr:`num_tasks ` attribute is set to ``<=0``. In ReFrame's terminology, such tests are called `flexible`. By default, ReFrame will spawn such a test on all the idle nodes of the current system partition, but this behavior can be adjusted from the command-line. Flexible tests are very useful for diagnostics tests, e.g., tests for checking the health of a whole set nodes. diff --git a/docs/running.rst b/docs/running.rst index 6a66434a1d..0fc7bc4a60 100644 --- a/docs/running.rst +++ b/docs/running.rst @@ -1006,7 +1006,9 @@ Controlling the Flexible Task Allocation .. versionadded:: 2.15 -ReFrame can automatically set the number of tasks of a particular test, if its :attr:`num_tasks ` attribute is set to ``0``. +ReFrame can automatically set the number of tasks of a particular test, if its :attr:`num_tasks ` attribute is set to a value ``<=0``. +Negative values indicate the minimum number of tasks that is acceptable for this test (a value of ``-4`` indicates a minimum acceptable number of ``4`` tasks). +A zero value indicates the default minimum number of tasks which is ``1``. By default, ReFrame will spawn such a test on all the idle nodes of the current system partition. This behavior can be adjusted using the ``--flex-alloc-tasks`` command line option. This option accepts three values: diff --git a/reframe/core/pipeline.py b/reframe/core/pipeline.py index c8a45806dd..59207857f5 100644 --- a/reframe/core/pipeline.py +++ b/reframe/core/pipeline.py @@ -255,9 +255,12 @@ class RegressionTest: #: Number of tasks required by this test. #: - #: If the number of tasks is set to ``0``, ReFrame will try to flexibly - #: allocate the number of tasks, based on the command line option - #: ``--flex-alloc-tasks``. + #: If the number of tasks is set to a number ``<=0``, ReFrame will try + #: to flexibly allocate the number of tasks, based on the command line + #: option ``--flex-alloc-tasks``. A negative number is used to indicate + #: the minimum number of tasks valid for the test. In this case the + #: minimum number of tasks is the absolute value of the number, while + #: ``0`` is used when the minimum number of tasks is ``1``. #: #: :type: integral #: :default: ``1`` @@ -269,6 +272,9 @@ class RegressionTest: #: (see `Flexible task allocation #: `__) #: if the number of tasks is set to ``0``. + #: .. versionchanged:: 2.16 + #: Added support for specifying the minimum number of acceptable + #: tasks when negative numbers are specified. num_tasks = fields.TypedField('num_tasks', int) #: Number of tasks per node required by this test. diff --git a/reframe/core/schedulers/__init__.py b/reframe/core/schedulers/__init__.py index b3e796aea6..8bff82345a 100644 --- a/reframe/core/schedulers/__init__.py +++ b/reframe/core/schedulers/__init__.py @@ -221,9 +221,11 @@ def sched_exclusive_access(self): def prepare(self, commands, environs=None, **gen_opts): environs = environs or [] - if self.num_tasks == 0: + if self.num_tasks <= 0: try: - self._num_tasks = self.guess_num_tasks() + self._num_tasks = (self.guess_num_tasks(abs(self.num_tasks)) if + self.num_tasks != 0 else + self.guess_num_tasks()) getlogger().debug('flex_alloc_tasks: setting num_tasks to %s' % self._num_tasks) except NotImplementedError as e: @@ -243,11 +245,15 @@ def prepare(self, commands, environs=None, **gen_opts): def emit_preamble(self): pass - def guess_num_tasks(self): + def guess_num_tasks(self, min_num_tasks=1): if isinstance(self.sched_flex_alloc_tasks, int): if self.sched_flex_alloc_tasks <= 0: raise JobError('invalid number of flex_alloc_tasks: %s' % self.sched_flex_alloc_tasks) + elif self.sched_flex_alloc_tasks < min_num_tasks: + raise JobError('invalid number of flex_alloc_tasks: %s > ' + '%s (min number of tasks)' % + self.sched_flex_alloc_tasks, min_num_tasks) return self.sched_flex_alloc_tasks @@ -274,6 +280,9 @@ def guess_num_tasks(self): num_tasks_per_node = self.num_tasks_per_node or 1 num_tasks = len(available_nodes) * num_tasks_per_node + if num_tasks < min_num_tasks: + raise JobError('could not schedule enough tasks') + getlogger().debug('flex_alloc_tasks: setting num_tasks to: %s' % num_tasks) return num_tasks diff --git a/unittests/test_schedulers.py b/unittests/test_schedulers.py index 7c9aebe1d8..eba6735f7d 100644 --- a/unittests/test_schedulers.py +++ b/unittests/test_schedulers.py @@ -708,6 +708,24 @@ def test_exclude_nodes_opt(self): self.prepare_job() self.assertEqual(self.testjob.num_tasks, 8) + def test_not_enough_idle_nodes(self): + self.testjob._sched_flex_alloc_tasks = 'idle' + self.testjob._num_tasks = -12 + with self.assertRaises(JobError): + self.prepare_job() + + def test_not_enough_nodes_constraint_partition(self): + self.testjob.options = ['-C f1,f2', '--partition=p1,p2'] + self.testjob._num_tasks = -8 + with self.assertRaises(JobError): + self.prepare_job() + + def test_enough_nodes_constraint_partition(self): + self.testjob.options = ['-C f1,f2', '--partition=p1,p2'] + self.testjob._num_tasks = -4 + self.prepare_job() + self.assertEqual(self.testjob.num_tasks, 4) + def prepare_job(self): self.testjob.prepare(['hostname']) From b63c8afff8af4de41e37b48787b99b45bd0f7f9d Mon Sep 17 00:00:00 2001 From: Theofilos Manitaras Date: Thu, 24 Jan 2019 16:52:06 +0100 Subject: [PATCH 2/4] Address PR comments --- docs/advanced.rst | 3 +++ docs/running.rst | 2 -- reframe/core/pipeline.py | 15 +++++++----- reframe/core/schedulers/__init__.py | 38 ++++++++++++----------------- unittests/test_schedulers.py | 9 +++++-- 5 files changed, 35 insertions(+), 32 deletions(-) diff --git a/docs/advanced.rst b/docs/advanced.rst index d5e55f3aa2..a76e90c2ca 100644 --- a/docs/advanced.rst +++ b/docs/advanced.rst @@ -406,6 +406,9 @@ Flexible Regression Tests ReFrame can automatically set the number of tasks of a particular test, if its :attr:`num_tasks ` attribute is set to ``<=0``. In ReFrame's terminology, such tests are called `flexible`. +Negative values indicate the minimum number of tasks that is acceptable for this test (a value of ``-4`` indicates a minimum acceptable number of ``4`` tasks). +A zero value indicates the default minimum number of tasks which is equal to :attr:`num_tasks_per_node `. + By default, ReFrame will spawn such a test on all the idle nodes of the current system partition, but this behavior can be adjusted from the command-line. Flexible tests are very useful for diagnostics tests, e.g., tests for checking the health of a whole set nodes. In this example, we demonstrate this feature through a simple test that runs ``hostname``. diff --git a/docs/running.rst b/docs/running.rst index 0fc7bc4a60..1b9695c401 100644 --- a/docs/running.rst +++ b/docs/running.rst @@ -1007,8 +1007,6 @@ Controlling the Flexible Task Allocation .. versionadded:: 2.15 ReFrame can automatically set the number of tasks of a particular test, if its :attr:`num_tasks ` attribute is set to a value ``<=0``. -Negative values indicate the minimum number of tasks that is acceptable for this test (a value of ``-4`` indicates a minimum acceptable number of ``4`` tasks). -A zero value indicates the default minimum number of tasks which is ``1``. By default, ReFrame will spawn such a test on all the idle nodes of the current system partition. This behavior can be adjusted using the ``--flex-alloc-tasks`` command line option. This option accepts three values: diff --git a/reframe/core/pipeline.py b/reframe/core/pipeline.py index 59207857f5..0d7b188b73 100644 --- a/reframe/core/pipeline.py +++ b/reframe/core/pipeline.py @@ -257,10 +257,13 @@ class RegressionTest: #: #: If the number of tasks is set to a number ``<=0``, ReFrame will try #: to flexibly allocate the number of tasks, based on the command line - #: option ``--flex-alloc-tasks``. A negative number is used to indicate - #: the minimum number of tasks valid for the test. In this case the - #: minimum number of tasks is the absolute value of the number, while - #: ``0`` is used when the minimum number of tasks is ``1``. + #: option ``--flex-alloc-tasks``. + #: A negative number is used to indicate the minimum number of tasks + #: required for the test. + #: In this case the minimum number of tasks is the absolute value of + #: the number, while + #: Setting ``num_tasks`` to ``0`` is equivalent to setting it to + #: ``-num_tasks_per_node``. #: #: :type: integral #: :default: ``1`` @@ -273,8 +276,8 @@ class RegressionTest: #: `__) #: if the number of tasks is set to ``0``. #: .. versionchanged:: 2.16 - #: Added support for specifying the minimum number of acceptable - #: tasks when negative numbers are specified. + #: Negative ``num_tasks`` is allowed for specifying the minimum + #: number of required tasks by the test. num_tasks = fields.TypedField('num_tasks', int) #: Number of tasks per node required by this test. diff --git a/reframe/core/schedulers/__init__.py b/reframe/core/schedulers/__init__.py index 8bff82345a..7896720f6a 100644 --- a/reframe/core/schedulers/__init__.py +++ b/reframe/core/schedulers/__init__.py @@ -222,12 +222,22 @@ def sched_exclusive_access(self): def prepare(self, commands, environs=None, **gen_opts): environs = environs or [] if self.num_tasks <= 0: + num_tasks_per_node = (self.num_tasks_per_node if + self.num_tasks_per_node else 1) + min_num_tasks = (abs(self.num_tasks) if self.num_tasks < 0 else + num_tasks_per_node) try: - self._num_tasks = (self.guess_num_tasks(abs(self.num_tasks)) if - self.num_tasks != 0 else - self.guess_num_tasks()) - getlogger().debug('flex_alloc_tasks: setting num_tasks to %s' % - self._num_tasks) + guessed_num_tasks = self.guess_num_tasks() + if guessed_num_tasks >= min_num_tasks: + self._num_tasks = guessed_num_tasks + getlogger().debug('flex_alloc_tasks: setting num_tasks ' + 'to %s' % self._num_tasks) + else: + raise JobError( + 'not enough nodes satisfying the minimum ' + 'number of tasks required: %s < %s' % + (guessed_num_tasks, min_num_tasks)) + except NotImplementedError as e: raise JobError('guessing number of tasks is not implemented ' 'by the backend') from e @@ -245,15 +255,11 @@ def prepare(self, commands, environs=None, **gen_opts): def emit_preamble(self): pass - def guess_num_tasks(self, min_num_tasks=1): + def guess_num_tasks(self): if isinstance(self.sched_flex_alloc_tasks, int): if self.sched_flex_alloc_tasks <= 0: raise JobError('invalid number of flex_alloc_tasks: %s' % self.sched_flex_alloc_tasks) - elif self.sched_flex_alloc_tasks < min_num_tasks: - raise JobError('invalid number of flex_alloc_tasks: %s > ' - '%s (min number of tasks)' % - self.sched_flex_alloc_tasks, min_num_tasks) return self.sched_flex_alloc_tasks @@ -263,28 +269,16 @@ def guess_num_tasks(self, min_num_tasks=1): # Try to guess the number of tasks now available_nodes = self.filter_nodes(available_nodes, self.options) - if not available_nodes: - options = ' '.join(self.sched_access + self.options) - raise JobError('could not find any node satisfying the ' - 'required criteria: %s' % options) if self.sched_flex_alloc_tasks == 'idle': available_nodes = {n for n in available_nodes if n.is_available()} - if not available_nodes: - raise JobError('could not find any idle nodes') - getlogger().debug( 'flex_alloc_tasks: selecting idle nodes: ' 'available nodes now: %s' % len(available_nodes)) num_tasks_per_node = self.num_tasks_per_node or 1 num_tasks = len(available_nodes) * num_tasks_per_node - if num_tasks < min_num_tasks: - raise JobError('could not schedule enough tasks') - - getlogger().debug('flex_alloc_tasks: setting num_tasks to: %s' % - num_tasks) return num_tasks @abc.abstractmethod diff --git a/unittests/test_schedulers.py b/unittests/test_schedulers.py index eba6735f7d..7b48469c40 100644 --- a/unittests/test_schedulers.py +++ b/unittests/test_schedulers.py @@ -390,8 +390,7 @@ def test_guess_num_tasks(self): # monkey patch `get_partition_nodes()` to simulate extraction of # slurm nodes through the use of `scontrol show` self.testjob.get_partition_nodes = lambda: set() - with self.assertRaises(JobError): - self.testjob.guess_num_tasks() + self.assertEqual(self.testjob.guess_num_tasks(), 0) class TestSqueueJob(TestSlurmJob): @@ -708,6 +707,12 @@ def test_exclude_nodes_opt(self): self.prepare_job() self.assertEqual(self.testjob.num_tasks, 8) + def test_no_num_tasks_per_node(self): + self.testjob._num_tasks_per_node = None + self.testjob.options = ['-C f1,f2', '--partition=p1,p2'] + self.prepare_job() + self.assertEqual(self.testjob.num_tasks, 1) + def test_not_enough_idle_nodes(self): self.testjob._sched_flex_alloc_tasks = 'idle' self.testjob._num_tasks = -12 From e300d5fcfe8328c14ff7f7dd3596c4bd1f5fb48a Mon Sep 17 00:00:00 2001 From: Theofilos Manitaras Date: Fri, 25 Jan 2019 12:29:27 +0100 Subject: [PATCH 3/4] Address PR comments (version 2) --- reframe/core/schedulers/__init__.py | 30 ++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/reframe/core/schedulers/__init__.py b/reframe/core/schedulers/__init__.py index 7896720f6a..a42866871f 100644 --- a/reframe/core/schedulers/__init__.py +++ b/reframe/core/schedulers/__init__.py @@ -222,25 +222,25 @@ def sched_exclusive_access(self): def prepare(self, commands, environs=None, **gen_opts): environs = environs or [] if self.num_tasks <= 0: - num_tasks_per_node = (self.num_tasks_per_node if - self.num_tasks_per_node else 1) - min_num_tasks = (abs(self.num_tasks) if self.num_tasks < 0 else + num_tasks_per_node = self.num_tasks_per_node or 1 + min_num_tasks = (-self.num_tasks if self.num_tasks else num_tasks_per_node) + try: guessed_num_tasks = self.guess_num_tasks() - if guessed_num_tasks >= min_num_tasks: - self._num_tasks = guessed_num_tasks - getlogger().debug('flex_alloc_tasks: setting num_tasks ' - 'to %s' % self._num_tasks) - else: - raise JobError( - 'not enough nodes satisfying the minimum ' - 'number of tasks required: %s < %s' % - (guessed_num_tasks, min_num_tasks)) - except NotImplementedError as e: - raise JobError('guessing number of tasks is not implemented ' - 'by the backend') from e + raise JobError('flexible task allocation is not supported by ' + 'this backend') from e + + if guessed_num_tasks < min_num_tasks: + raise JobError( + 'could not find enough nodes: required %s, found %s' % + (min_num_tasks // num_tasks_per_node, + guessed_num_tasks // num_tasks_per_node)) + else: + self._num_tasks = guessed_num_tasks + getlogger().debug('flex_alloc_tasks: setting num_tasks ' + 'to %s' % self._num_tasks) with shell.generate_script(self.script_filename, **gen_opts) as builder: From 80a98278a9664568faa85b61a8c5d5921cf906c6 Mon Sep 17 00:00:00 2001 From: Vasileios Karakasis Date: Fri, 25 Jan 2019 13:09:03 +0100 Subject: [PATCH 4/4] Code styling fine tuning --- reframe/core/schedulers/__init__.py | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/reframe/core/schedulers/__init__.py b/reframe/core/schedulers/__init__.py index a42866871f..a831660c35 100644 --- a/reframe/core/schedulers/__init__.py +++ b/reframe/core/schedulers/__init__.py @@ -233,14 +233,15 @@ def prepare(self, commands, environs=None, **gen_opts): 'this backend') from e if guessed_num_tasks < min_num_tasks: - raise JobError( - 'could not find enough nodes: required %s, found %s' % - (min_num_tasks // num_tasks_per_node, - guessed_num_tasks // num_tasks_per_node)) - else: - self._num_tasks = guessed_num_tasks - getlogger().debug('flex_alloc_tasks: setting num_tasks ' - 'to %s' % self._num_tasks) + nodes_required = min_num_tasks // num_tasks_per_node + nodes_found = guessed_num_tasks // num_tasks_per_node + raise JobError('could not find enough nodes: ' + 'required %s, found %s' % + (nodes_required, nodes_found)) + + self._num_tasks = guessed_num_tasks + getlogger().debug('flex_alloc_tasks: setting num_tasks to %s' % + self._num_tasks) with shell.generate_script(self.script_filename, **gen_opts) as builder: