From 6a2294de2c4e63fe4c1b6b66bc093eaa120e9c3c Mon Sep 17 00:00:00 2001 From: Theofilos Manitaras Date: Mon, 11 Feb 2019 10:01:14 +0100 Subject: [PATCH 1/3] Use the 'default' partition with 'flex-alloc-tasks' * Flexible node allocations take into account only nodes belonging to the `default` partition if a partition is not specified. --- reframe/core/schedulers/slurm.py | 21 +++++++++++++++++---- unittests/test_schedulers.py | 11 +++++++---- 2 files changed, 24 insertions(+), 8 deletions(-) diff --git a/reframe/core/schedulers/slurm.py b/reframe/core/schedulers/slurm.py index 50eaf2d0b1..3804dcf837 100644 --- a/reframe/core/schedulers/slurm.py +++ b/reframe/core/schedulers/slurm.py @@ -165,6 +165,16 @@ def _get_all_nodes(self): node_descriptions = completed.stdout.splitlines() return {SlurmNode(descr) for descr in node_descriptions} + def _get_default_partition(self): + completed = os_ext.run_command('scontrol -a show -o partitions', + check=True) + partition_match = re.search(r'PartitionName=(?P\S+)\s+' + r'.*Default=YES.*', completed.stdout) + if not partition_match: + raise JobError('could not retrieve a default partition') + + return {partition_match.group('partition')} + def get_partition_nodes(self): nodes = self._get_all_nodes() return self.filter_nodes(nodes, self.sched_access) @@ -191,10 +201,13 @@ def filter_nodes(self, nodes, options): if partitions: partitions = set(partitions.strip().split(',')) - nodes = {n for n in nodes if n.partitions >= partitions} - getlogger().debug( - 'flex_alloc_tasks: filtering nodes by partition(s) %s: ' - 'available nodes now: %s' % (partitions, len(nodes))) + else: + partitions = self._get_default_partition() + + nodes = {n for n in nodes if n.partitions >= partitions} + getlogger().debug( + 'flex_alloc_tasks: filtering nodes by partition(s) %s: ' + 'available nodes now: %s' % (partitions, len(nodes))) if constraints: constraints = set(constraints.strip().split(',')) diff --git a/unittests/test_schedulers.py b/unittests/test_schedulers.py index 7b48469c40..76982d5de0 100644 --- a/unittests/test_schedulers.py +++ b/unittests/test_schedulers.py @@ -496,7 +496,7 @@ def create_dummy_nodes(obj): 'RealMemory=32220 AllocMem=0 FreeMem=10000 ' 'Sockets=1 Boards=1 State=MAINT+DRAIN ' 'ThreadsPerCore=2 TmpDisk=0 Weight=1 Owner=N/A ' - 'MCS_label=N/A Partitions=p1,p2 ' + 'MCS_label=N/A Partitions=p1,p2,pdef ' 'BootTime=01 Jan 2018 ' 'SlurmdStartTime=01 Jan 2018 ' 'CfgTRES=cpu=24,mem=32220M ' @@ -514,7 +514,7 @@ def create_dummy_nodes(obj): 'RealMemory=32220 AllocMem=0 FreeMem=10000 ' 'Sockets=1 Boards=1 State=MAINT+DRAIN ' 'ThreadsPerCore=2 TmpDisk=0 Weight=1 Owner=N/A ' - 'MCS_label=N/A Partitions=p2,p3' + 'MCS_label=N/A Partitions=p2,p3,pdef ' 'BootTime=01 Jan 2018 ' 'SlurmdStartTime=01 Jan 2018 ' 'CfgTRES=cpu=24,mem=32220M ' @@ -532,7 +532,7 @@ def create_dummy_nodes(obj): 'RealMemory=32220 AllocMem=0 FreeMem=10000 ' 'Sockets=1 Boards=1 State=IDLE ' 'ThreadsPerCore=2 TmpDisk=0 Weight=1 Owner=N/A ' - 'MCS_label=N/A Partitions=p1,p3 ' + 'MCS_label=N/A Partitions=p1,p3,pdef ' 'BootTime=01 Jan 2018 ' 'SlurmdStartTime=01 Jan 2018 ' 'CfgTRES=cpu=24,mem=32220M ' @@ -550,7 +550,7 @@ def create_dummy_nodes(obj): 'RealMemory=32220 AllocMem=0 FreeMem=10000 ' 'Sockets=1 Boards=1 State=IDLE ' 'ThreadsPerCore=2 TmpDisk=0 Weight=1 Owner=N/A ' - 'MCS_label=N/A Partitions=p1,p3 ' + 'MCS_label=N/A Partitions=p1,p3,pdef ' 'BootTime=01 Jan 2018 ' 'SlurmdStartTime=01 Jan 2018 ' 'CfgTRES=cpu=24,mem=32220M ' @@ -583,6 +583,9 @@ def setUp(self): # monkey patch `_get_all_nodes` to simulate extraction of # slurm nodes through the use of `scontrol show` self.testjob._get_all_nodes = self.create_dummy_nodes + # monkey patch `_get_default_partition` to simulate extraction + # of the default partition + self.testjob._get_default_partition = lambda: {'pdef'} self.testjob._sched_flex_alloc_tasks = 'all' self.testjob._num_tasks_per_node = 4 self.testjob._num_tasks = 0 From f0214ac1ce86dc8bf20fe86348b3c51a6079b063 Mon Sep 17 00:00:00 2001 From: Theofilos Manitaras Date: Mon, 11 Feb 2019 10:14:17 +0100 Subject: [PATCH 2/3] Fix bug in unittests --- unittests/test_schedulers.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/unittests/test_schedulers.py b/unittests/test_schedulers.py index 76982d5de0..03ec81a7a4 100644 --- a/unittests/test_schedulers.py +++ b/unittests/test_schedulers.py @@ -390,6 +390,9 @@ def test_guess_num_tasks(self): # monkey patch `get_partition_nodes()` to simulate extraction of # slurm nodes through the use of `scontrol show` self.testjob.get_partition_nodes = lambda: set() + # monkey patch `_get_default_partition()` to simulate extraction + # of the default partition through the use of `scontrol show` + self.testjob._get_default_partition = lambda: {'pdef'} self.assertEqual(self.testjob.guess_num_tasks(), 0) From 6bcb02bd3a0fa3d77061ae7b952488850753a10d Mon Sep 17 00:00:00 2001 From: Theofilos Manitaras Date: Wed, 13 Feb 2019 11:35:44 +0100 Subject: [PATCH 3/3] Address PR comments --- reframe/core/schedulers/slurm.py | 12 ++++++------ unittests/test_schedulers.py | 4 ++-- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/reframe/core/schedulers/slurm.py b/reframe/core/schedulers/slurm.py index 98a00facf0..a0c9f5620a 100644 --- a/reframe/core/schedulers/slurm.py +++ b/reframe/core/schedulers/slurm.py @@ -162,14 +162,13 @@ def _get_all_nodes(self): return {SlurmNode(descr) for descr in node_descriptions} def _get_default_partition(self): - completed = os_ext.run_command('scontrol -a show -o partitions', - check=True) + completed = _run_strict('scontrol -a show -o partitions') partition_match = re.search(r'PartitionName=(?P\S+)\s+' r'.*Default=YES.*', completed.stdout) - if not partition_match: - raise JobError('could not retrieve a default partition') + if partition_match: + return partition_match.group('partition') - return {partition_match.group('partition')} + return None def get_partition_nodes(self): nodes = self._get_all_nodes() @@ -198,7 +197,8 @@ def filter_nodes(self, nodes, options): if partitions: partitions = set(partitions.strip().split(',')) else: - partitions = self._get_default_partition() + default_partition = self._get_default_partition() + partitions = {default_partition} if default_partition else set() nodes = {n for n in nodes if n.partitions >= partitions} getlogger().debug( diff --git a/unittests/test_schedulers.py b/unittests/test_schedulers.py index 0063f4cde6..cbab7176e6 100644 --- a/unittests/test_schedulers.py +++ b/unittests/test_schedulers.py @@ -398,7 +398,7 @@ def test_guess_num_tasks(self): self.testjob.get_partition_nodes = lambda: set() # monkey patch `_get_default_partition()` to simulate extraction # of the default partition through the use of `scontrol show` - self.testjob._get_default_partition = lambda: {'pdef'} + self.testjob._get_default_partition = lambda: 'pdef' self.assertEqual(self.testjob.guess_num_tasks(), 0) @@ -594,7 +594,7 @@ def setUp(self): self.testjob._get_all_nodes = self.create_dummy_nodes # monkey patch `_get_default_partition` to simulate extraction # of the default partition - self.testjob._get_default_partition = lambda: {'pdef'} + self.testjob._get_default_partition = lambda: 'pdef' self.testjob._sched_flex_alloc_tasks = 'all' self.testjob._num_tasks_per_node = 4 self.testjob._num_tasks = 0