Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
81 changes: 41 additions & 40 deletions cscs-checks/system/slurm/slurm.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,70 +2,61 @@
import reframe.utility.sanity as sn


# Base class for Slurm simple binary tests
class SlurmSimpleBaseCheck(rfm.RunOnlyRegressionTest):
'''Base class for Slurm simple binary tests'''

def __init__(self):
super().__init__()
self.valid_systems = ['daint:gpu', 'daint:mc',
'dom:gpu', 'dom:mc',
'kesch:cn', 'kesch:pn']
self.valid_prog_environs = ['PrgEnv-cray']
self.tags = {'slurm', 'maintenance', 'ops', 'production'}
self.tags = {'slurm', 'maintenance', 'ops',
'production', 'single-node'}
self.num_tasks_per_node = 1
if self.current_system.name == 'kesch':
self.exclusive_access = True

self.maintainers = ['RS', 'VK']

def setup(self, *args, **kwargs):
if self.num_tasks == 1:
self.tags.add('single-node')

super().setup(*args, **kwargs)

# Base class for Slurm tests that require compiling some code


class SlurmCompiledBaseCheck(rfm.RegressionTest):
'''Base class for Slurm tests that require compiling some code'''

def __init__(self):
super().__init__()
self.valid_systems = ['daint:gpu', 'daint:mc',
'dom:gpu', 'dom:mc',
'kesch:cn', 'kesch:pn']
self.valid_prog_environs = ['PrgEnv-cray']
self.tags = {'slurm', 'maintenance', 'ops', 'production'}
self.tags = {'slurm', 'maintenance', 'ops',
'production', 'single-node'}
self.num_tasks_per_node = 1
if self.current_system.name == 'kesch':
self.exclusive_access = True

self.maintainers = ['RS', 'VK']

def setup(self, *args, **kwargs):
if self.num_tasks == 1:
self.tags.add('single-node')

super().setup(*args, **kwargs)


@rfm.simple_test
class HostnameCheck(SlurmSimpleBaseCheck):
def __init__(self):
super().__init__()
self.executable = '/bin/hostname'
self.hostname_string = {
'kesch:cn': r'keschcn-\d{4}\b',
'kesch:pn': r'keschpn-\d{4}\b',
'daint:gpu': r'nid\d{5}\b',
'daint:mc': r'nid\d{5}\b',
'dom:gpu': r'nid\d{5}\b',
'dom:mc': r'nid\d{5}\b',
self.hostname_patt = {
'kesch:cn': r'^keschcn-\d{4}$',
'kesch:pn': r'^keschpn-\d{4}$',
'daint:gpu': r'^nid\d{5}$',
'daint:mc': r'^nid\d{5}$',
'dom:gpu': r'^nid\d{5}$',
'dom:mc': r'^nid\d{5}$',
}

def setup(self, partition, environ, **job_opts):
num_matches = sn.count(sn.findall(
self.hostname_string[partition.fullname], self.stdout))
@rfm.run_before('sanity')
def set_sanity_patterns(self):
partname = self.current_partition.fullname
num_matches = sn.count(
sn.findall(self.hostname_patt[partname], self.stdout)
)
self.sanity_patterns = sn.assert_eq(self.num_tasks, num_matches)
super().setup(partition, environ, **job_opts)


@rfm.simple_test
Expand All @@ -79,10 +70,24 @@ def __init__(self):
self.executable = '/bin/echo'
self.executable_opts = ['$MY_VAR']
self.variables = {'MY_VAR': 'TEST123456!'}
self.tags.remove('single-node')
num_matches = sn.count(sn.findall(r'TEST123456!', self.stdout))
self.sanity_patterns = sn.assert_eq(self.num_tasks, num_matches)


@rfm.simple_test
class RequiredConstraintCheck(SlurmSimpleBaseCheck):
def __init__(self):
super().__init__()
self.valid_systems = ['daint:login', 'dom:login']
self.executable = 'srun'
self.executable_opts = ['hostname']
self.sanity_patterns = sn.assert_found(
r'error: You have to specify, at least, what sort of node you '
r'need: -C gpu for GPU enabled nodes, or -C mc for multicore '
r'nodes.', self.stderr)


@rfm.simple_test
class RequestLargeMemoryNodeCheck(SlurmSimpleBaseCheck):
def __init__(self):
Expand All @@ -94,10 +99,8 @@ def __init__(self):
self.stdout, 'mem', float)
self.sanity_patterns = sn.assert_bounded(mem_obtained, 122.0, 128.0)

# we override setup function to pass additional
# options to Slurm
def setup(self, partition, environ, **job_opts):
super().setup(partition, environ, **job_opts)
@rfm.run_before('run')
def set_memory_limit(self):
self.job.options += ['--mem=120000']


Expand Down Expand Up @@ -141,10 +144,8 @@ def __init__(self):
self.executable = 'cat /proc/cray_xt/cname'
self.sanity_patterns = sn.assert_found(r'c0-0.*', self.stdout)

# we override setup function to pass additional
# options to Slurm
def setup(self, partition, environ, **job_opts):
super().setup(partition, environ, **job_opts)
@rfm.run_before('run')
def set_slurm_constraint(self):
self.job.options = ['--constraint=c0-0']


Expand All @@ -160,6 +161,6 @@ def __init__(self):
r'(exceeded memory limit)|(Out Of Memory)', self.stderr
)

def setup(self, partition, environ, **job_opts):
super().setup(partition, environ, **job_opts)
@rfm.run_before('run')
def set_memory_limit(self):
self.job.options += ['--mem=2000']