From 8b1ff976498ba6e4ee6e129cd93d16a5f6b14178 Mon Sep 17 00:00:00 2001 From: Sergei Kliavinek Date: Thu, 9 Sep 2021 16:06:23 +0300 Subject: [PATCH 1/2] add redesigned cp2k tests --- cscs-checks/apps/cp2k/cp2k_check.py | 311 +++++++++--------- hpctestlib/apps/cp2k/nve.py | 91 +++++ .../apps/cp2k/src/GTH_BASIS_SETS | 0 .../apps/cp2k/src/H2O-256.inp | 0 .../apps/cp2k/src/POTENTIAL | 0 5 files changed, 249 insertions(+), 153 deletions(-) create mode 100644 hpctestlib/apps/cp2k/nve.py rename {cscs-checks => hpctestlib}/apps/cp2k/src/GTH_BASIS_SETS (100%) rename {cscs-checks => hpctestlib}/apps/cp2k/src/H2O-256.inp (100%) rename {cscs-checks => hpctestlib}/apps/cp2k/src/POTENTIAL (100%) diff --git a/cscs-checks/apps/cp2k/cp2k_check.py b/cscs-checks/apps/cp2k/cp2k_check.py index 89ce306594..101362b6d1 100644 --- a/cscs-checks/apps/cp2k/cp2k_check.py +++ b/cscs-checks/apps/cp2k/cp2k_check.py @@ -5,173 +5,178 @@ import reframe as rfm import reframe.utility.sanity as sn +from hpctestlib.apps.cp2k.nve import Cp2k_NVE + + +REFERENCE_CPU_PERFORMANCE_SMALL = { + 'dom:mc': { + 'maint': (202.2, None, 0.05, 's'), + 'prod': (202.2, None, 0.05, 's') + }, + 'daint:mc': { + 'maint': (180.9, None, 0.08, 's'), + 'prod': (180.9, None, 0.08, 's') + }, + 'eiger:mc': { + 'maint': (70.0, None, 0.08, 's'), + 'prod': (46.0, None, 0.05, 's') + }, + 'pilatus:mc': { + 'maint': (70.0, None, 0.08, 's'), + 'prod': (70.0, None, 0.08, 's') + }, +} + +REFERENCE_CPU_PERFORMANCE_LARGE = { + 'daint:mc': { + 'maint': (141.0, None, 0.05, 's'), + 'prod': (113.0, None, 0.05, 's') + }, + 'eiger:mc': { + 'maint': (46.0, None, 0.05, 's'), + 'prod': (46.0, None, 0.05, 's') + }, + 'pilatus:mc': { + 'maint': (46.0, None, 0.05, 's'), + 'prod': (46.0, None, 0.05, 's') + }, +} + +REFERENCE_CPU_PERFORMANCE = { + 'small': REFERENCE_CPU_PERFORMANCE_SMALL, + 'large': REFERENCE_CPU_PERFORMANCE_LARGE, +} + +REFERENCE_GPU_PERFORMANCE_SMALL = { + 'dom:mc': { + 'maint': (251.8, None, 0.15, 's'), + 'prod': (240.0, None, 0.05, 's') + }, + 'daint:mc': { + 'maint': (241.3, None, 0.05, 's'), + 'prod': (241.3, None, 0.05, 's') + } +} + +REFERENCE_GPU_PERFORMANCE_LARGE = { + 'daint:mc': { + 'maint': (199.6, None, 0.06, 's'), + 'prod': (199.6, None, 0.06, 's') + } +} + +REFERENCE_GPU_PERFORMANCE = { + 'small': REFERENCE_GPU_PERFORMANCE_SMALL, + 'large': REFERENCE_GPU_PERFORMANCE_LARGE, +} + +REFERENCE_PERFORMANCE = { + 'cpu': REFERENCE_CPU_PERFORMANCE, + 'gpu': REFERENCE_GPU_PERFORMANCE, +} + + +@rfm.simple_test +class cp2k_check(Cp2k_NVE): + modules = ['CP2K'] + maintainers = ['LM'] + tags = {'scs'} + strict_check = False + extra_resources = { + 'switches': { + 'num_switches': 1 + } + } + scale = parameter(['small', 'large']) + mode = parameter(['prod', 'maint']) - -class Cp2kCheck(rfm.RunOnlyRegressionTest): - def __init__(self): + @run_after('init') + def env_define(self): if self.current_system.name in ['eiger', 'pilatus']: self.valid_prog_environs = ['cpeGNU'] else: self.valid_prog_environs = ['builtin'] - self.modules = ['CP2K'] - self.executable = 'cp2k.psmp' - self.executable_opts = ['H2O-256.inp'] - - energy = sn.extractsingle( - r'\s+ENERGY\| Total FORCE_EVAL \( QS \) ' - r'energy [\[\(]a\.u\.[\]\)]:\s+(?P\S+)', - self.stdout, 'energy', float, item=-1 - ) - energy_reference = -4404.2323 - energy_diff = sn.abs(energy-energy_reference) - self.sanity_patterns = sn.all([ - sn.assert_found(r'PROGRAM STOPPED IN', self.stdout), - sn.assert_eq(sn.count(sn.extractall( - r'(?i)(?PSTEP NUMBER)', - self.stdout, 'step_count')), 10), - sn.assert_lt(energy_diff, 1e-4) - ]) - - self.perf_patterns = { - 'time': sn.extractsingle(r'^ CP2K(\s+[\d\.]+){4}\s+(?P\S+)', - self.stdout, 'perf', float) - } - - self.maintainers = ['LM'] - self.tags = {'scs'} - self.strict_check = False - self.extra_resources = { - 'switches': { - 'num_switches': 1 - } - } + @run_after('init') + def set_tags(self): + self.tags |= {'maintenance' if self.mode == 'maint' + else 'production'} - -@rfm.parameterized_test(*([s, v] - for s in ['small', 'large'] - for v in ['maint', 'prod'])) -class Cp2kCpuCheck(Cp2kCheck): - def __init__(self, scale, variant): - super().__init__() - self.descr = 'CP2K CPU check (version: %s, %s)' % (scale, variant) - self.valid_systems = ['daint:mc', 'eiger:mc', 'pilatus:mc'] - if scale == 'small': - self.valid_systems += ['dom:mc'] - if self.current_system.name in ['daint', 'dom']: - self.num_tasks = 216 - self.num_tasks_per_node = 36 - elif self.current_system.name in ['eiger', 'pilatus']: + @run_after('init') + def set_valid_systems(self): + if self.platform_name == 'cpu': + self.valid_systems = ['daint:mc', 'eiger:mc', 'pilatus:mc'] + else: + self.valid_systems = ['daint:gpu'] + + @run_after('init') + def set_description(self): + if self.platform_name == 'cpu': + self.descr = (f'CP2K {self.platform_name} check' + f'(version: {self.scale}, {self.mode})') + + @run_after('init') + def set_num_tasks(self): + if self.platform_name == 'cpu': + if self.scale == 'small': + self.valid_systems += ['dom:mc'] + if self.current_system.name in ['daint', 'dom']: + self.num_tasks = 216 + self.num_tasks_per_node = 36 + elif self.current_system.name in ['eiger', 'pilatus']: + self.num_tasks = 96 + self.num_tasks_per_node = 16 + self.num_cpus_per_task = 16 + self.num_tasks_per_core = 1 + self.use_multithreading = False + self.variables = { + 'MPICH_OFI_STARTUP_CONNECT': '1', + 'OMP_NUM_THREADS': '8', + 'OMP_PLACES': 'cores', + 'OMP_PROC_BIND': 'close' + } + + else: + if self.current_system.name in ['daint', 'dom']: + self.num_tasks = 576 + self.num_tasks_per_node = 36 + elif self.current_system.name in ['eiger', 'pilatus']: + self.num_tasks = 256 + self.num_tasks_per_node = 16 + self.num_cpus_per_task = 16 + self.num_tasks_per_core = 1 + self.use_multithreading = False + self.variables = { + 'MPICH_OFI_STARTUP_CONNECT': '1', + 'OMP_NUM_THREADS': '8', + 'OMP_PLACES': 'cores', + 'OMP_PROC_BIND': 'close' + } + else: + self.num_gpus_per_node = 1 + self.num_tasks_per_node = 6 + self.num_cpus_per_task = 2 + if self.scale == 'small': + self.valid_systems += ['dom:gpu'] + self.num_tasks = 36 + else: self.num_tasks = 96 - self.num_tasks_per_node = 16 - self.num_cpus_per_task = 16 - self.num_tasks_per_core = 1 - self.use_multithreading = False - self.variables = { - 'MPICH_OFI_STARTUP_CONNECT': '1', - 'OMP_NUM_THREADS': '8', - 'OMP_PLACES': 'cores', - 'OMP_PROC_BIND': 'close' - } - else: - if self.current_system.name in ['daint', 'dom']: - self.num_tasks = 576 - self.num_tasks_per_node = 36 - elif self.current_system.name in ['eiger', 'pilatus']: - self.num_tasks = 256 - self.num_tasks_per_node = 16 - self.num_cpus_per_task = 16 - self.num_tasks_per_core = 1 - self.use_multithreading = False - self.variables = { - 'MPICH_OFI_STARTUP_CONNECT': '1', - 'OMP_NUM_THREADS': '8', - 'OMP_PLACES': 'cores', - 'OMP_PROC_BIND': 'close' - } - - references = { - 'maint': { - 'small': { - 'dom:mc': {'time': (202.2, None, 0.05, 's')}, - 'daint:mc': {'time': (180.9, None, 0.08, 's')}, - 'eiger:mc': {'time': (70.0, None, 0.08, 's')}, - 'pilatus:mc': {'time': (70.0, None, 0.08, 's')} - }, - 'large': { - 'daint:mc': {'time': (141.0, None, 0.05, 's')}, - 'eiger:mc': {'time': (46.0, None, 0.05, 's')}, - 'pilatus:mc': {'time': (46.0, None, 0.05, 's')} - } - }, - 'prod': { - 'small': { - 'dom:mc': {'time': (202.2, None, 0.05, 's')}, - 'daint:mc': {'time': (180.9, None, 0.08, 's')}, - 'eiger:mc': {'time': (70.0, None, 0.08, 's')}, - 'pilatus:mc': {'time': (70.0, None, 0.08, 's')} - }, - 'large': { - 'daint:mc': {'time': (113.0, None, 0.05, 's')}, - 'eiger:mc': {'time': (46.0, None, 0.05, 's')}, - 'pilatus:mc': {'time': (46.0, None, 0.05, 's')} - } + self.variables = { + 'CRAY_CUDA_MPS': '1', + 'OMP_NUM_THREADS': str(self.num_cpus_per_task) } - } - self.reference = references[variant][scale] - self.tags |= {'maintenance' if variant == 'maint' else 'production'} + @run_after('setup') + def set_reference(self): + self.reference = REFERENCE_PERFORMANCE[self.platform_name][self.scale] @run_before('run') def set_task_distribution(self): - self.job.options = ['--distribution=block:block'] + if self.platform_name == 'cpu': + self.job.options = ['--distribution=block:block'] @run_before('run') def set_cpu_binding(self): - self.job.launcher.options = ['--cpu-bind=cores'] - - -@rfm.parameterized_test(*([s, v] - for s in ['small', 'large'] - for v in ['maint', 'prod'])) -class Cp2kGpuCheck(Cp2kCheck): - def __init__(self, scale, variant): - super().__init__() - self.descr = 'CP2K GPU check (version: %s, %s)' % (scale, variant) - self.valid_systems = ['daint:gpu'] - self.num_gpus_per_node = 1 - if scale == 'small': - self.valid_systems += ['dom:gpu'] - self.num_tasks = 36 - else: - self.num_tasks = 96 - - self.num_tasks_per_node = 6 - self.num_cpus_per_task = 2 - self.variables = { - 'CRAY_CUDA_MPS': '1', - 'OMP_NUM_THREADS': str(self.num_cpus_per_task) - } - references = { - 'maint': { - 'small': { - 'dom:gpu': {'time': (251.8, None, 0.15, 's')}, - 'daint:gpu': {'time': (241.3, None, 0.05, 's')} - }, - 'large': { - 'daint:gpu': {'time': (199.6, None, 0.06, 's')} - } - }, - 'prod': { - 'small': { - 'dom:gpu': {'time': (240.0, None, 0.05, 's')}, - 'daint:gpu': {'time': (241.3, None, 0.05, 's')} - }, - 'large': { - 'daint:gpu': {'time': (199.6, None, 0.06, 's')} - } - } - } - self.reference = references[variant][scale] - self.tags |= {'maintenance' if variant == 'maint' else 'production'} + if self.platform_name == 'cpu': + self.job.launcher.options = ['--cpu-bind=cores'] diff --git a/hpctestlib/apps/cp2k/nve.py b/hpctestlib/apps/cp2k/nve.py new file mode 100644 index 0000000000..d29fadd49a --- /dev/null +++ b/hpctestlib/apps/cp2k/nve.py @@ -0,0 +1,91 @@ +# Copyright 2016-2021 Swiss National Supercomputing Centre (CSCS/ETH Zurich) +# ReFrame Project Developers. See the top-level LICENSE file for details. +# +# SPDX-License-Identifier: BSD-3-Clause + +import reframe as rfm +import reframe.utility.sanity as sn +import reframe.utility.typecheck as typ + + +class Cp2k_NVE(rfm.RunOnlyRegressionTest, pin_prefix=True): + '''Base class for the CP2K NVE Test. + + CP2K is a quantum chemistry and solid state physics software + package that can perform atomistic simulations of solid state, + liquid, molecular, periodic, material, crystal, and biological + systems. CP2K provides a general framework for different modeling + methods such as DFT using the mixed Gaussian and plane waves + approaches GPW and GAPW. (see cp2k.org). + + The presented abstract run-only class checks the perfomance of cp2k. + To do this, it is necessary to define in tests the reference + values of energy and possible deviations from this value. + This data is used to check if the task is being executed + correctly, that is, the final energy is correct + (approximately the reference). The default assumption + is that CP2K is already installed on the device under test. + ''' + + #: Parameter pack containing the platform ID + platform_name = parameter(['cpu', 'gpu']) + + #: Reference value of energy, that is used for the comparison + #: with the execution ouput on the sanity step. The absolute + #: difference between final energy value and reference value + #: should be smaller than energy_tolerance + #: + #: :type: str + #: :default: :class:`required` + energy_value = -4404.2323 + + #: Maximum deviation from the reference value of energy, + #: that is acceptable. + #: + #: :type: float + #: :default: :class:`required` + energy_tolerance = variable(float) + + #: :default: :class:`required` + num_tasks_per_node = required + + #: :default: :class:`required` + executable = required + + energy_tolerance = 1E-04 + executable = 'cp2k.psmp' + executable_opts = ['H2O-256.inp'] + + @performance_function('s', perf_key='time') + def set_perf_patterns(self): + return sn.extractsingle(r'^ CP2K(\s+[\d\.]+){4}\s+(?P\S+)', + self.stdout, 'perf', float) + + @run_before('performance') + def set_the_performance_dict(self): + self.perf_variables = {self.mode: + sn.make_performance_function( + sn.extractsingle( + r'^ CP2K(\s+[\d\.]+){4}\s+(?P' + r'\S+)', self.stdout, 'perf', + float), 's')} + + @sanity_function + def set_sanity_patterns(self): + '''Assert the obtained energy meets the specified tolerances.''' + + energy = sn.extractsingle( + r'\s+ENERGY\| Total FORCE_EVAL \( QS \) ' + r'energy [\[\(]a\.u\.[\]\)]:\s+(?P\S+)', + self.stdout, 'energy', float, item=-1 + ) + energy_diff = sn.abs(energy - self.energy_value) + ref_ener_diff = sn.abs(self.energy_tolerance) + + return sn.all([ + sn.assert_found(r'PROGRAM STOPPED IN', self.stdout), + sn.assert_eq(sn.count(sn.extractall( + r'(?i)(?PSTEP NUMBER)', + self.stdout, 'step_count')), 10), + sn.assert_lt(energy_diff, ref_ener_diff) + ]) diff --git a/cscs-checks/apps/cp2k/src/GTH_BASIS_SETS b/hpctestlib/apps/cp2k/src/GTH_BASIS_SETS similarity index 100% rename from cscs-checks/apps/cp2k/src/GTH_BASIS_SETS rename to hpctestlib/apps/cp2k/src/GTH_BASIS_SETS diff --git a/cscs-checks/apps/cp2k/src/H2O-256.inp b/hpctestlib/apps/cp2k/src/H2O-256.inp similarity index 100% rename from cscs-checks/apps/cp2k/src/H2O-256.inp rename to hpctestlib/apps/cp2k/src/H2O-256.inp diff --git a/cscs-checks/apps/cp2k/src/POTENTIAL b/hpctestlib/apps/cp2k/src/POTENTIAL similarity index 100% rename from cscs-checks/apps/cp2k/src/POTENTIAL rename to hpctestlib/apps/cp2k/src/POTENTIAL From ed4ac7fedb59825e827e3775d4046d6bd655a39b Mon Sep 17 00:00:00 2001 From: Sergei Kliavinek Date: Thu, 9 Sep 2021 19:10:35 +0300 Subject: [PATCH 2/2] delete unneccessary imports --- cscs-checks/apps/cp2k/cp2k_check.py | 1 - hpctestlib/apps/cp2k/nve.py | 1 - 2 files changed, 2 deletions(-) diff --git a/cscs-checks/apps/cp2k/cp2k_check.py b/cscs-checks/apps/cp2k/cp2k_check.py index 101362b6d1..9b3a3cf66a 100644 --- a/cscs-checks/apps/cp2k/cp2k_check.py +++ b/cscs-checks/apps/cp2k/cp2k_check.py @@ -4,7 +4,6 @@ # SPDX-License-Identifier: BSD-3-Clause import reframe as rfm -import reframe.utility.sanity as sn from hpctestlib.apps.cp2k.nve import Cp2k_NVE diff --git a/hpctestlib/apps/cp2k/nve.py b/hpctestlib/apps/cp2k/nve.py index d29fadd49a..fa6d900470 100644 --- a/hpctestlib/apps/cp2k/nve.py +++ b/hpctestlib/apps/cp2k/nve.py @@ -5,7 +5,6 @@ import reframe as rfm import reframe.utility.sanity as sn -import reframe.utility.typecheck as typ class Cp2k_NVE(rfm.RunOnlyRegressionTest, pin_prefix=True):