From b5a12077ae9b70f59328be863908cd660aaf06fc Mon Sep 17 00:00:00 2001 From: Christopher Bignamini Date: Thu, 18 Feb 2021 15:07:27 +0100 Subject: [PATCH 1/8] Slurm test created --- cscs-checks/system/slurm/slurm_check.py | 61 +++++++++++++++++++++++++ 1 file changed, 61 insertions(+) create mode 100644 cscs-checks/system/slurm/slurm_check.py diff --git a/cscs-checks/system/slurm/slurm_check.py b/cscs-checks/system/slurm/slurm_check.py new file mode 100644 index 0000000000..95baa732e1 --- /dev/null +++ b/cscs-checks/system/slurm/slurm_check.py @@ -0,0 +1,61 @@ +# Copyright 2016-2020 Swiss National Supercomputing Centre (CSCS/ETH Zurich) +# ReFrame Project Developers. See the top-level LICENSE file for details. +# +# SPDX-License-Identifier: BSD-3-Clause + +import os + +import reframe as rfm +import reframe.utility.sanity as sn + +# TODO: sinfo, do we want to check if the normal, long, debug, etc... partitions are present? +# TODO: scontrol, do we want to scontrol something specific? +@rfm.parameterized_test(['squeue'], + ['sacct'], + ['sinfo'], + ['scontrol']) +class SlurmCheck(rfm.RunOnlyRegressionTest): + def __init__(self): + self.descr = 'File system slurm test base' + # TODO: test from cn as well + self.valid_systems = ['daint:login', 'dom:login'] + self.valid_prog_environs = ['builtin'] + self.num_tasks = 1 + self.num_tasks_per_node = 1 + self.sanity_patterns = sn.assert_found(r'0', self.stdout) + self.perf_patterns = { + 'real_time': sn.extractsingle(r'\nreal.+m(?P\S+)s', + self.stderr, 'real_time', float) + } + self.reference = { + 'daint:login': { + 'real_time': (0.1, None, 0.1, 's') + }, + 'dom:login': { + 'real_time': (0.1, None, 0.1, 's') + } + } + # TODO: system is not always relevant +# self.reference = { +# '/project/csstaff/bignamic': { +# 'size': (1000, None, 0.1, 'MB'), +# 'real_time': (5.0, None, 0.1, 's') +# }, +# '/users/bignamic': { +# 'size': (900, None, 0.1, 'MB'), +# 'real_time': (5.0, None, 0.1, 's') +# }, +# '/scratch/snx3*/bignamic': { +# 'size': (900, None, 0.1, 'MB'), +# 'real_time': (5.0, None, 0.1, 's') +# } +# } + self.executable = 'time ' + variant + if variant == 'sacct': + self.executable_opts = ['-a'] + elif variant == 'scontrol': + self.executable_opts = ['show partitions'] + + self.postrun_cmds = ['echo $?'] + self.tags = {'ops', 'diagnostic'} + self.maintainers = ['CB'] From edda8838b07795f49b7e28a3b93458bce7afd27f Mon Sep 17 00:00:00 2001 From: Christopher Bignamini Date: Thu, 18 Feb 2021 16:33:18 +0100 Subject: [PATCH 2/8] Missing input variant parameter added, performance numbers updated --- cscs-checks/system/slurm/slurm_check.py | 42 ++++++++++--------------- 1 file changed, 17 insertions(+), 25 deletions(-) diff --git a/cscs-checks/system/slurm/slurm_check.py b/cscs-checks/system/slurm/slurm_check.py index 95baa732e1..0da6e1c8a6 100644 --- a/cscs-checks/system/slurm/slurm_check.py +++ b/cscs-checks/system/slurm/slurm_check.py @@ -8,16 +8,16 @@ import reframe as rfm import reframe.utility.sanity as sn -# TODO: sinfo, do we want to check if the normal, long, debug, etc... partitions are present? + +# TODO: sinfo, check if the normal, long, debug, etc... partitions are present? # TODO: scontrol, do we want to scontrol something specific? @rfm.parameterized_test(['squeue'], ['sacct'], ['sinfo'], ['scontrol']) class SlurmCheck(rfm.RunOnlyRegressionTest): - def __init__(self): - self.descr = 'File system slurm test base' - # TODO: test from cn as well + def __init__(self, variant): + self.descr = 'Slurm command test' self.valid_systems = ['daint:login', 'dom:login'] self.valid_prog_environs = ['builtin'] self.num_tasks = 1 @@ -28,34 +28,26 @@ def __init__(self): self.stderr, 'real_time', float) } self.reference = { - 'daint:login': { - 'real_time': (0.1, None, 0.1, 's') + 'squeue': { + 'real_time': (0.02, None, 0.1, 's') + }, + 'sacct': { + 'real_time': (0.7, None, 0.1, 's') + }, + 'sinfo': { + 'real_time': (0.02, None, 0.1, 's') }, - 'dom:login': { - 'real_time': (0.1, None, 0.1, 's') + 'scontrol': { + 'real_time': (0.01, None, 0.1, 's') } } - # TODO: system is not always relevant -# self.reference = { -# '/project/csstaff/bignamic': { -# 'size': (1000, None, 0.1, 'MB'), -# 'real_time': (5.0, None, 0.1, 's') -# }, -# '/users/bignamic': { -# 'size': (900, None, 0.1, 'MB'), -# 'real_time': (5.0, None, 0.1, 's') -# }, -# '/scratch/snx3*/bignamic': { -# 'size': (900, None, 0.1, 'MB'), -# 'real_time': (5.0, None, 0.1, 's') -# } -# } + self.executable = 'time ' + variant if variant == 'sacct': self.executable_opts = ['-a'] elif variant == 'scontrol': self.executable_opts = ['show partitions'] - + self.postrun_cmds = ['echo $?'] - self.tags = {'ops', 'diagnostic'} + self.tags = {'ops', 'diagnostic', 'health'} self.maintainers = ['CB'] From 0b6965d389c0222c9f4a7ed2f3df2dd01b1d4483 Mon Sep 17 00:00:00 2001 From: Christopher Bignamini Date: Fri, 12 Mar 2021 16:38:18 +0100 Subject: [PATCH 3/8] parameterized_test removed and parameter builtin is used instead. Executable options and reference performances updated --- cscs-checks/system/slurm/slurm_check.py | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/cscs-checks/system/slurm/slurm_check.py b/cscs-checks/system/slurm/slurm_check.py index 0da6e1c8a6..e8a0df25b7 100644 --- a/cscs-checks/system/slurm/slurm_check.py +++ b/cscs-checks/system/slurm/slurm_check.py @@ -11,12 +11,11 @@ # TODO: sinfo, check if the normal, long, debug, etc... partitions are present? # TODO: scontrol, do we want to scontrol something specific? -@rfm.parameterized_test(['squeue'], - ['sacct'], - ['sinfo'], - ['scontrol']) +@rfm.simple_test class SlurmCheck(rfm.RunOnlyRegressionTest): - def __init__(self, variant): + slurm_command = parameter(['squeue', 'sacct', 'sinfo', 'scontrol']) + + def __init__(self): self.descr = 'Slurm command test' self.valid_systems = ['daint:login', 'dom:login'] self.valid_prog_environs = ['builtin'] @@ -32,7 +31,7 @@ def __init__(self, variant): 'real_time': (0.02, None, 0.1, 's') }, 'sacct': { - 'real_time': (0.7, None, 0.1, 's') + 'real_time': (0.1, None, 0.1, 's') }, 'sinfo': { 'real_time': (0.02, None, 0.1, 's') @@ -42,10 +41,8 @@ def __init__(self, variant): } } - self.executable = 'time ' + variant - if variant == 'sacct': - self.executable_opts = ['-a'] - elif variant == 'scontrol': + self.executable = 'time ' + self.slurm_command + if self.slurm_command == 'scontrol': self.executable_opts = ['show partitions'] self.postrun_cmds = ['echo $?'] From f41442c60248866ea8c123b186e1b7559f65a906 Mon Sep 17 00:00:00 2001 From: Christopher Bignamini Date: Mon, 15 Mar 2021 10:51:46 +0100 Subject: [PATCH 4/8] Code cleaning, copyright updated and second mantainer added --- cscs-checks/system/slurm/slurm_check.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/cscs-checks/system/slurm/slurm_check.py b/cscs-checks/system/slurm/slurm_check.py index e8a0df25b7..8b11858dc8 100644 --- a/cscs-checks/system/slurm/slurm_check.py +++ b/cscs-checks/system/slurm/slurm_check.py @@ -1,4 +1,4 @@ -# Copyright 2016-2020 Swiss National Supercomputing Centre (CSCS/ETH Zurich) +# Copyright 2016-2021 Swiss National Supercomputing Centre (CSCS/ETH Zurich) # ReFrame Project Developers. See the top-level LICENSE file for details. # # SPDX-License-Identifier: BSD-3-Clause @@ -9,7 +9,6 @@ import reframe.utility.sanity as sn -# TODO: sinfo, check if the normal, long, debug, etc... partitions are present? # TODO: scontrol, do we want to scontrol something specific? @rfm.simple_test class SlurmCheck(rfm.RunOnlyRegressionTest): @@ -47,4 +46,4 @@ def __init__(self): self.postrun_cmds = ['echo $?'] self.tags = {'ops', 'diagnostic', 'health'} - self.maintainers = ['CB'] + self.maintainers = ['CB', 'VH'] From 1bc780dd101b44b889442d36a8b1031663d5af8c Mon Sep 17 00:00:00 2001 From: Christopher Bignamini Date: Tue, 16 Mar 2021 17:05:14 +0100 Subject: [PATCH 5/8] Sanity check and performance pattern updated --- cscs-checks/system/slurm/slurm_check.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/cscs-checks/system/slurm/slurm_check.py b/cscs-checks/system/slurm/slurm_check.py index 8b11858dc8..2411ecac08 100644 --- a/cscs-checks/system/slurm/slurm_check.py +++ b/cscs-checks/system/slurm/slurm_check.py @@ -20,9 +20,8 @@ def __init__(self): self.valid_prog_environs = ['builtin'] self.num_tasks = 1 self.num_tasks_per_node = 1 - self.sanity_patterns = sn.assert_found(r'0', self.stdout) self.perf_patterns = { - 'real_time': sn.extractsingle(r'\nreal.+m(?P\S+)s', + 'real_time': sn.extractsingle(r'real (?P\S+)', self.stderr, 'real_time', float) } self.reference = { @@ -40,10 +39,13 @@ def __init__(self): } } - self.executable = 'time ' + self.slurm_command + self.executable = 'time -p ' + self.slurm_command if self.slurm_command == 'scontrol': self.executable_opts = ['show partitions'] - self.postrun_cmds = ['echo $?'] self.tags = {'ops', 'diagnostic', 'health'} self.maintainers = ['CB', 'VH'] + + @rfm.run_before('sanity') + def set_sanity(self): + self.sanity_patterns = sn.assert_eq(self.job.exitcode, 0) From 9880270cbd58fe6abd3ef1178887feb9ec4b76ca Mon Sep 17 00:00:00 2001 From: Christopher Bignamini Date: Mon, 22 Mar 2021 18:35:13 +0100 Subject: [PATCH 6/8] sinfo and scontrol checks removed --- cscs-checks/system/slurm/slurm_check.py | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/cscs-checks/system/slurm/slurm_check.py b/cscs-checks/system/slurm/slurm_check.py index 2411ecac08..2fbeec23fa 100644 --- a/cscs-checks/system/slurm/slurm_check.py +++ b/cscs-checks/system/slurm/slurm_check.py @@ -9,10 +9,9 @@ import reframe.utility.sanity as sn -# TODO: scontrol, do we want to scontrol something specific? @rfm.simple_test class SlurmCheck(rfm.RunOnlyRegressionTest): - slurm_command = parameter(['squeue', 'sacct', 'sinfo', 'scontrol']) + slurm_command = parameter(['squeue', 'sacct']) def __init__(self): self.descr = 'Slurm command test' @@ -30,18 +29,10 @@ def __init__(self): }, 'sacct': { 'real_time': (0.1, None, 0.1, 's') - }, - 'sinfo': { - 'real_time': (0.02, None, 0.1, 's') - }, - 'scontrol': { - 'real_time': (0.01, None, 0.1, 's') } } self.executable = 'time -p ' + self.slurm_command - if self.slurm_command == 'scontrol': - self.executable_opts = ['show partitions'] self.tags = {'ops', 'diagnostic', 'health'} self.maintainers = ['CB', 'VH'] From fc70eafbd3aebf5b919d119c7f655141df5edfdd Mon Sep 17 00:00:00 2001 From: Christopher Bignamini Date: Mon, 22 Mar 2021 18:55:51 +0100 Subject: [PATCH 7/8] useless import os removed --- cscs-checks/system/slurm/slurm_check.py | 1 - 1 file changed, 1 deletion(-) diff --git a/cscs-checks/system/slurm/slurm_check.py b/cscs-checks/system/slurm/slurm_check.py index 2fbeec23fa..cd302e3191 100644 --- a/cscs-checks/system/slurm/slurm_check.py +++ b/cscs-checks/system/slurm/slurm_check.py @@ -3,7 +3,6 @@ # # SPDX-License-Identifier: BSD-3-Clause -import os import reframe as rfm import reframe.utility.sanity as sn From 14cb915d4ec4e921eb691aa5453a5428eff9340a Mon Sep 17 00:00:00 2001 From: Vasileios Karakasis Date: Fri, 9 Apr 2021 17:44:08 +0200 Subject: [PATCH 8/8] Update test syntax and move tests into slurm.py --- cscs-checks/system/slurm/slurm.py | 33 ++++++++++++++++++++ cscs-checks/system/slurm/slurm_check.py | 41 ------------------------- 2 files changed, 33 insertions(+), 41 deletions(-) delete mode 100644 cscs-checks/system/slurm/slurm_check.py diff --git a/cscs-checks/system/slurm/slurm.py b/cscs-checks/system/slurm/slurm.py index 41c2fbe649..ac42ca4792 100644 --- a/cscs-checks/system/slurm/slurm.py +++ b/cscs-checks/system/slurm/slurm.py @@ -257,3 +257,36 @@ def reference_meminfo(self): 'pilatus:mc': 250, } return reference_meminfo[self.current_partition.fullname] + + +@rfm.simple_test +class slurm_response_check(rfm.RunOnlyRegressionTest): + command = parameter(['squeue', 'sacct']) + descr = 'Slurm command test' + valid_systems = ['daint:login', 'dom:login'] + valid_prog_environs = ['builtin'] + num_tasks = 1 + num_tasks_per_node = 1 + reference = { + 'squeue': { + 'real_time': (0.02, None, 0.1, 's') + }, + 'sacct': { + 'real_time': (0.1, None, 0.1, 's') + } + } + executable = 'time -p' + tags = {'diagnostic', 'health'} + maintainers = ['CB', 'VH'] + + @rfm.run_before('run') + def set_exec_opts(self): + self.executable_opts = [self.command] + + @rfm.run_before('sanity') + def set_sanity(self): + self.sanity_patterns = sn.assert_eq(self.job.exitcode, 0) + self.perf_patterns = { + 'real_time': sn.extractsingle(r'real (?P\S+)', + self.stderr, 'real_time', float) + } diff --git a/cscs-checks/system/slurm/slurm_check.py b/cscs-checks/system/slurm/slurm_check.py deleted file mode 100644 index cd302e3191..0000000000 --- a/cscs-checks/system/slurm/slurm_check.py +++ /dev/null @@ -1,41 +0,0 @@ -# Copyright 2016-2021 Swiss National Supercomputing Centre (CSCS/ETH Zurich) -# ReFrame Project Developers. See the top-level LICENSE file for details. -# -# SPDX-License-Identifier: BSD-3-Clause - - -import reframe as rfm -import reframe.utility.sanity as sn - - -@rfm.simple_test -class SlurmCheck(rfm.RunOnlyRegressionTest): - slurm_command = parameter(['squeue', 'sacct']) - - def __init__(self): - self.descr = 'Slurm command test' - self.valid_systems = ['daint:login', 'dom:login'] - self.valid_prog_environs = ['builtin'] - self.num_tasks = 1 - self.num_tasks_per_node = 1 - self.perf_patterns = { - 'real_time': sn.extractsingle(r'real (?P\S+)', - self.stderr, 'real_time', float) - } - self.reference = { - 'squeue': { - 'real_time': (0.02, None, 0.1, 's') - }, - 'sacct': { - 'real_time': (0.1, None, 0.1, 's') - } - } - - self.executable = 'time -p ' + self.slurm_command - - self.tags = {'ops', 'diagnostic', 'health'} - self.maintainers = ['CB', 'VH'] - - @rfm.run_before('sanity') - def set_sanity(self): - self.sanity_patterns = sn.assert_eq(self.job.exitcode, 0)