diff --git a/reframe/core/schedulers/__init__.py b/reframe/core/schedulers/__init__.py index 3f4bd4fa32..622822b642 100644 --- a/reframe/core/schedulers/__init__.py +++ b/reframe/core/schedulers/__init__.py @@ -334,3 +334,9 @@ def finished(self): raise JobNotStartedError('cannot poll an unstarted job') return self.scheduler.finished(self) + + +class Node(abc.ABC): + @abc.abstractmethod + def is_available(self): + '''Return ``True`` if this node is available, ``False`` otherwise.''' diff --git a/reframe/core/schedulers/local.py b/reframe/core/schedulers/local.py index 4052c7b6b5..561ebf2ca0 100644 --- a/reframe/core/schedulers/local.py +++ b/reframe/core/schedulers/local.py @@ -56,10 +56,10 @@ def emit_preamble(self, job): return [] def allnodes(self): - return [socket.gethostname()] + return [_LocalNode(socket.gethostname())] def filternodes(self, job, nodes): - return [socket.gethostname()] + return [_LocalNode(socket.gethostname())] def _kill_all(self, job): '''Send SIGKILL to all the processes of the spawned job.''' @@ -169,3 +169,11 @@ def finished(self, job): return False return True + + +class _LocalNode(sched.Node): + def __init__(self, name): + self._name = name + + def is_available(self): + return True diff --git a/reframe/core/schedulers/slurm.py b/reframe/core/schedulers/slurm.py index 4d846bcc6f..37e729f6dd 100644 --- a/reframe/core/schedulers/slurm.py +++ b/reframe/core/schedulers/slurm.py @@ -166,7 +166,7 @@ def allnodes(self): raise JobError('could not retrieve node information') from e node_descriptions = completed.stdout.splitlines() - return create_nodes(node_descriptions) + return _create_nodes(node_descriptions) def _get_default_partition(self): completed = _run_strict('scontrol -a show -o partitions') @@ -272,13 +272,13 @@ def _get_reservation_nodes(self, reservation): completed = _run_strict('scontrol -a show -o %s' % reservation_nodes) node_descriptions = completed.stdout.splitlines() - return create_nodes(node_descriptions) + return _create_nodes(node_descriptions) def _get_nodes_by_name(self, nodespec): completed = os_ext.run_command('scontrol -a show -o node %s' % nodespec) node_descriptions = completed.stdout.splitlines() - return create_nodes(node_descriptions) + return _create_nodes(node_descriptions) def _set_nodelist(self, job, nodespec): if job.nodelist is not None: @@ -485,16 +485,16 @@ def cancel(self, job): self._cancelled = True -def create_nodes(descriptions): +def _create_nodes(descriptions): nodes = set() for descr in descriptions: with suppress(JobError): - nodes.add(SlurmNode(descr)) + nodes.add(_SlurmNode(descr)) return nodes -class SlurmNode: +class _SlurmNode(sched.Node): '''Class representing a Slurm node.''' def __init__(self, node_descr): diff --git a/unittests/test_schedulers.py b/unittests/test_schedulers.py index a061f2d8ef..47b2508270 100644 --- a/unittests/test_schedulers.py +++ b/unittests/test_schedulers.py @@ -16,7 +16,7 @@ from reframe.core.launchers.registry import getlauncher from reframe.core.schedulers import Job from reframe.core.schedulers.registry import getscheduler -from reframe.core.schedulers.slurm import SlurmNode, create_nodes +from reframe.core.schedulers.slurm import _SlurmNode, _create_nodes class _TestJob(abc.ABC): @@ -304,8 +304,14 @@ def test_cancel_term_ignore(self): self.assertProcessDied(sleep_pid) def test_guess_num_tasks(self): + # We want to trigger bug #1087 (Github), that's we set allocation + # policy to idle. self.testjob.num_tasks = 0 - assert self.testjob.guess_num_tasks() == 1 + self.testjob._sched_flex_alloc_nodes = 'idle' + self.prepare() + self.testjob.submit() + self.testjob.wait() + assert self.testjob.num_tasks == 1 class TestSlurmJob(_TestJob, unittest.TestCase): @@ -611,7 +617,7 @@ def create_dummy_nodes(obj): 'Node invalid_node2 not found'] - return create_nodes(node_descriptions) + return _create_nodes(node_descriptions) def create_reservation_nodes(self, res): return {n for n in self.testjob.scheduler.allnodes() @@ -906,15 +912,15 @@ def setUp(self): 'failed [reframe_user@01 Jan 2018]' ) - self.allocated_node = SlurmNode(allocated_node_description) - self.allocated_node_copy = SlurmNode(allocated_node_description) - self.idle_node = SlurmNode(idle_node_description) - self.idle_drained = SlurmNode(idle_drained_node_description) - self.no_partition_node = SlurmNode(no_partition_node_description) + self.allocated_node = _SlurmNode(allocated_node_description) + self.allocated_node_copy = _SlurmNode(allocated_node_description) + self.idle_node = _SlurmNode(idle_node_description) + self.idle_drained = _SlurmNode(idle_drained_node_description) + self.no_partition_node = _SlurmNode(no_partition_node_description) def test_no_node_name(self): with self.assertRaises(JobError): - SlurmNode(self.no_name_node_description) + _SlurmNode(self.no_name_node_description) def test_states(self): self.assertEqual(self.allocated_node.states, {'ALLOCATED'})