From 6fd2cfad2dc0432b5c5d271ecd89b8882f7ff068 Mon Sep 17 00:00:00 2001 From: Theofilos Manitaras Date: Mon, 3 Dec 2018 10:38:26 +0100 Subject: [PATCH 1/2] Allow 'SlurmNode' attributes to be None * The `SlurmNode` attibutes: partitions, active_features and `state` can now be `None`. * Throw a `JobError` when the `nodename` cannot be extracted. * Adjust the unit tests to reflect the above changes. --- reframe/core/schedulers/slurm.py | 29 ++++++++++------- unittests/test_schedulers.py | 56 +++++++++++++++++++++++++++++--- 2 files changed, 68 insertions(+), 17 deletions(-) diff --git a/reframe/core/schedulers/slurm.py b/reframe/core/schedulers/slurm.py index e45d0e6ead..c2b7c2e95a 100644 --- a/reframe/core/schedulers/slurm.py +++ b/reframe/core/schedulers/slurm.py @@ -414,12 +414,14 @@ class SlurmNode: def __init__(self, node_descr): self._name = self._extract_attribute('NodeName', node_descr) - self._partitions = set(self._extract_attribute( - 'Partitions', node_descr).split(',')) - self._active_features = set(self._extract_attribute( - 'ActiveFeatures', node_descr).split(',')) - self._states = set( - self._extract_attribute('State', node_descr).split('+')) + if not self._name: + raise JobError('could not extract NodeName from node description') + + self._partitions = self._extract_attribute( + 'Partitions', node_descr, delim=',') + self._active_features = self._extract_attribute( + 'ActiveFeatures', node_descr, delim=',') + self._states = self._extract_attribute('State', node_descr, delim='+') def __eq__(self, other): if not isinstance(other, type(self)): @@ -431,7 +433,8 @@ def __hash__(self): return hash(self.name) def is_available(self): - return self._states == {'IDLE'} + return (self._states == {'IDLE'} and + all([self._partitions, self._active_features, self._states])) def is_down(self): return bool({'DOWN', 'DRAIN', 'MAINT', 'NO_RESPOND'} & self._states) @@ -452,13 +455,15 @@ def partitions(self): def states(self): return self._states - def _extract_attribute(self, attr_name, node_descr): + def _extract_attribute(self, attr_name, node_descr, delim=None): attr_match = re.search(r'%s=(\S+)' % attr_name, node_descr) if attr_match: - return attr_match.group(1) - else: - raise JobError("could not extract attribute '%s' from " - "node description" % attr_name) + if delim: + return set(attr_match.group(1).split(delim)) + else: + return attr_match.group(1) + + return None def __str__(self): return self._name diff --git a/unittests/test_schedulers.py b/unittests/test_schedulers.py index 13f05270f5..7c9aebe1d8 100644 --- a/unittests/test_schedulers.py +++ b/unittests/test_schedulers.py @@ -774,10 +774,54 @@ def setUp(self): 'failed [reframe_user@01 Jan 2018]' ) + no_partition_node_description = ( + 'NodeName=nid00004 Arch=x86_64 CoresPerSocket=12 ' + 'CPUAlloc=0 CPUErr=0 CPUTot=24 CPULoad=0.00 ' + 'AvailableFeatures=f1,f2 ActiveFeatures=f1,f2 ' + 'Gres=gpu_mem:16280,gpu:1 NodeAddr=nid00001 ' + 'NodeHostName=nid00001 Version=10.00 OS=Linux ' + 'RealMemory=32220 AllocMem=0 FreeMem=10000 ' + 'Sockets=1 Boards=1 State=IDLE+DRAIN ' + 'ThreadsPerCore=2 TmpDisk=0 Weight=1 Owner=N/A ' + 'MCS_label=N/A BootTime=01 Jan 2018 ' + 'SlurmdStartTime=01 Jan 2018 ' + 'CfgTRES=cpu=24,mem=32220M ' + 'AllocTRES= CapWatts=n/a CurrentWatts=100 ' + 'LowestJoules=100000000 ConsumedJoules=0 ' + 'ExtSensorsJoules=n/s ExtSensorsWatts=0 ' + 'ExtSensorsTemp=n/s Reason=Foo/ ' + 'failed [reframe_user@01 Jan 2018]' + ) + + self.no_name_node_description = ( + 'Arch=x86_64 CoresPerSocket=12 ' + 'CPUAlloc=0 CPUErr=0 CPUTot=24 CPULoad=0.00 ' + 'AvailableFeatures=f1,f2 ActiveFeatures=f1,f2 ' + 'Gres=gpu_mem:16280,gpu:1 NodeAddr=nid00001 ' + 'NodeHostName=nid00001 Version=10.00 OS=Linux ' + 'RealMemory=32220 AllocMem=0 FreeMem=10000 ' + 'Sockets=1 Boards=1 State=IDLE+DRAIN ' + 'ThreadsPerCore=2 TmpDisk=0 Weight=1 Owner=N/A ' + 'MCS_label=N/A Partitions=p1,p2 ' + 'BootTime=01 Jan 2018 ' + 'SlurmdStartTime=01 Jan 2018 ' + 'CfgTRES=cpu=24,mem=32220M ' + 'AllocTRES= CapWatts=n/a CurrentWatts=100 ' + 'LowestJoules=100000000 ConsumedJoules=0 ' + 'ExtSensorsJoules=n/s ExtSensorsWatts=0 ' + 'ExtSensorsTemp=n/s Reason=Foo/ ' + 'failed [reframe_user@01 Jan 2018]' + ) + self.allocated_node = SlurmNode(allocated_node_description) self.allocated_node_copy = SlurmNode(allocated_node_description) self.idle_node = SlurmNode(idle_node_description) self.idle_drained = SlurmNode(idle_drained_node_description) + self.no_partition_node = SlurmNode(no_partition_node_description) + + def test_no_node_name(self): + with self.assertRaises(JobError): + SlurmNode(self.no_name_node_description) def test_states(self): self.assertEqual(self.allocated_node.states, {'ALLOCATED'}) @@ -794,10 +838,11 @@ def test_hash(self): def test_attributes(self): self.assertEqual(self.allocated_node.name, 'nid00001') - self.assertEqual(self.allocated_node.partitions, - {'p1', 'p2'}) - self.assertEqual(self.allocated_node.active_features, - {'f1', 'f2'}) + self.assertEqual(self.allocated_node.partitions, {'p1', 'p2'}) + self.assertEqual(self.allocated_node.active_features, {'f1', 'f2'}) + self.assertEqual(self.no_partition_node.name, 'nid00004') + self.assertEqual(self.no_partition_node.partitions, None) + self.assertEqual(self.no_partition_node.active_features, {'f1', 'f2'}) def test_str(self): self.assertEqual('nid00001', str(self.allocated_node)) @@ -806,8 +851,9 @@ def test_is_available(self): self.assertFalse(self.allocated_node.is_available()) self.assertTrue(self.idle_node.is_available()) self.assertFalse(self.idle_drained.is_available()) + self.assertFalse(self.no_partition_node.is_available()) def test_is_down(self): self.assertFalse(self.allocated_node.is_down()) self.assertFalse(self.idle_node.is_down()) - self.assertTrue(self.idle_drained.is_down()) + self.assertTrue(self.no_partition_node.is_down()) From 1a4f5d825c93b5c12a6004f56be90a01faf5460e Mon Sep 17 00:00:00 2001 From: Theofilos Manitaras Date: Tue, 4 Dec 2018 07:22:15 +0100 Subject: [PATCH 2/2] Address PR comments --- reframe/core/schedulers/slurm.py | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/reframe/core/schedulers/slurm.py b/reframe/core/schedulers/slurm.py index c2b7c2e95a..dd9a7a27a3 100644 --- a/reframe/core/schedulers/slurm.py +++ b/reframe/core/schedulers/slurm.py @@ -418,10 +418,10 @@ def __init__(self, node_descr): raise JobError('could not extract NodeName from node description') self._partitions = self._extract_attribute( - 'Partitions', node_descr, delim=',') + 'Partitions', node_descr, sep=',') self._active_features = self._extract_attribute( - 'ActiveFeatures', node_descr, delim=',') - self._states = self._extract_attribute('State', node_descr, delim='+') + 'ActiveFeatures', node_descr, sep=',') + self._states = self._extract_attribute('State', node_descr, sep='+') def __eq__(self, other): if not isinstance(other, type(self)): @@ -433,8 +433,8 @@ def __hash__(self): return hash(self.name) def is_available(self): - return (self._states == {'IDLE'} and - all([self._partitions, self._active_features, self._states])) + return all([self._states == {'IDLE'}, self._partitions, + self._active_features, self._states]) def is_down(self): return bool({'DOWN', 'DRAIN', 'MAINT', 'NO_RESPOND'} & self._states) @@ -455,13 +455,11 @@ def partitions(self): def states(self): return self._states - def _extract_attribute(self, attr_name, node_descr, delim=None): + def _extract_attribute(self, attr_name, node_descr, sep=None): attr_match = re.search(r'%s=(\S+)' % attr_name, node_descr) if attr_match: - if delim: - return set(attr_match.group(1).split(delim)) - else: - return attr_match.group(1) + attr = attr_match.group(1) + return set(attr_match.group(1).split(sep)) if sep else attr return None