From 5da55b94015f0372c180c6ae9f7d38a83017ec82 Mon Sep 17 00:00:00 2001 From: Sergei Kliavinek Date: Thu, 9 Sep 2021 15:57:42 +0300 Subject: [PATCH 1/3] add redisgned lammps tests --- cscs-checks/apps/lammps/lammps_check.py | 242 ++++++++++++++---------- hpctestlib/apps/lammps/nve.py | 113 +++++++++++ 2 files changed, 258 insertions(+), 97 deletions(-) create mode 100644 hpctestlib/apps/lammps/nve.py diff --git a/cscs-checks/apps/lammps/lammps_check.py b/cscs-checks/apps/lammps/lammps_check.py index 9a8fcee39e..253751d59c 100644 --- a/cscs-checks/apps/lammps/lammps_check.py +++ b/cscs-checks/apps/lammps/lammps_check.py @@ -3,59 +3,143 @@ # # SPDX-License-Identifier: BSD-3-Clause -import os - import reframe as rfm import reframe.utility.sanity as sn +from hpctestlib.apps.lammps.nve import LAMMPS_NVE + +dom_gpu_small = { + 'maint': (3457, -0.10, None, 'timesteps/s'), + 'prod': (3132, -0.05, None, 'timesteps/s'), +} + +daint_gpu_small = { + 'maint': (2524, -0.10, None, 'timesteps/s'), + 'prod': (2400, -0.40, None, 'timesteps/s'), +} + +REFERENCE_GPU_PERFORMANCE_SMALL = { + 'dom:gpu': dom_gpu_small, + 'daint:gpu': daint_gpu_small, +} + + +daint_gpu_large = { + 'maint': (3832, -0.05, None, 'timesteps/s'), + 'prod': (3260, -0.50, None, 'timesteps/s'), +} + +REFERENCE_GPU_PERFORMANCE_LARGE = { + 'daint:gpu': daint_gpu_large, +} + +dom_cpu_small = { + 'prod': (4394, -0.05, None, 'timesteps/s'), +} + +daint_cpu_small = { + 'prod': (3824, -0.10, None, 'timesteps/s'), +} + +eiger_cpu_small = { + 'prod': (4500, -0.10, None, 'timesteps/s'), +} + +pilatus_cpu_small = { + 'prod': (5000, -0.10, None, 'timesteps/s'), +} + +REFERENCE_CPU_PERFORMANCE_SMALL = { + 'dom:mc': dom_cpu_small, + 'daint:mc': daint_cpu_small, + 'eiger:mc': eiger_cpu_small, + 'pilatus:mc': pilatus_cpu_small + +} + +daint_cpu_large = { + 'prod': (5310, -0.65, None, 'timesteps/s'), +} + +eiger_cpu_large = { + 'prod': (6500, -0.10, None, 'timesteps/s'), +} + +pilatus_cpu_large = { + 'prod': (7500, -0.10, None, 'timesteps/s'), +} + +REFERENCE_CPU_PERFORMANCE_LARGE = { + 'daint:mc': daint_cpu_large, + 'eiger:mc': eiger_cpu_large, + 'pilatus:mc': pilatus_cpu_large, + +} + +REFERENCE_PERFORMANCE = { + 'gpu': { + 'small': REFERENCE_GPU_PERFORMANCE_SMALL, + 'large': REFERENCE_GPU_PERFORMANCE_LARGE, + }, + 'cpu': { + 'small': REFERENCE_CPU_PERFORMANCE_SMALL, + 'large': REFERENCE_CPU_PERFORMANCE_LARGE, + }, +} + +def inherit_cpu_only(params): + return tuple(filter(lambda p: p[0] == 'cpu', params)) + +def inherit_gpu_only(params): + return tuple(filter(lambda p: p[0] == 'gpu', params)) -class LAMMPSBaseCheck(rfm.RunOnlyRegressionTest): - def __init__(self): + +class LAMMPSCheckCSCS(LAMMPS_NVE): + scale = parameter(['small', 'large']) + modules = ['LAMMPS'] + strict_check = False + extra_resources = { + 'switches': { + 'num_switches': 1 + } + } + + tags = {'scs', 'external-resources'} + maintainers = ['TR', 'VH'] + + @run_after('init') + def env_define(self): if self.current_system.name in ['eiger', 'pilatus']: self.valid_prog_environs = ['cpeGNU'] else: self.valid_prog_environs = ['builtin'] - self.modules = ['LAMMPS'] - - # Reset sources dir relative to the SCS apps prefix - self.sourcesdir = os.path.join(self.current_system.resourcesdir, - 'LAMMPS') - energy_reference = -4.6195 - energy = sn.extractsingle( - r'\s+500000(\s+\S+){3}\s+(?P\S+)\s+\S+\s\n', - self.stdout, 'energy', float) - self.perf_patterns = { - 'perf': sn.extractsingle(r'\s+(?P\S+) timesteps/s', - self.stdout, 'perf', float), - } - energy_diff = sn.abs(energy-energy_reference) - self.sanity_patterns = sn.all([ - sn.assert_found(r'Total wall time:', self.stdout), - sn.assert_lt(energy_diff, 6e-4) - ]) - self.strict_check = False - self.extra_resources = { - 'switches': { - 'num_switches': 1 - } - } - self.tags = {'scs', 'external-resources'} - self.maintainers = ['TR', 'VH'] + @run_after('init') + def set_tags(self): + self.tags |= {'maintenance' if self.mode == 'maint' + else 'production'} + + @run_before('performance') + def set_reference(self): + self.reference = REFERENCE_PERFORMANCE[self.platform_name][self.scale] + +@rfm.simple_test +class lammps_gpu_check(LAMMPSCheckCSCS): + platform = parameter(inherit_params=True, + filter_params=inherit_gpu_only) + mode = parameter(['prod', 'maint']) + valid_systems = ['daint:gpu'] + num_gpus_per_node = 1 -@rfm.parameterized_test(*([s, v] - for s in ['small', 'large'] - for v in ['prod', 'maint'])) -class LAMMPSGPUCheck(LAMMPSBaseCheck): - def __init__(self, scale, variant): - super().__init__() - self.valid_systems = ['daint:gpu'] + @run_before('run') + def set_executable_opts(self): self.executable = 'lmp_mpi' - self.executable_opts = ['-sf gpu', '-pk gpu 1', '-in in.lj.gpu'] + self.executable_opts = ['-sf gpu', + '-pk gpu 1', + '-in', self.input_file] self.variables = {'CRAY_CUDA_MPS': '1'} - self.num_gpus_per_node = 1 - if scale == 'small': + if self.scale == 'small': self.valid_systems += ['dom:gpu'] self.num_tasks = 12 self.num_tasks_per_node = 2 @@ -63,46 +147,17 @@ def __init__(self, scale, variant): self.num_tasks = 32 self.num_tasks_per_node = 2 - references = { - 'maint': { - 'small': { - 'dom:gpu': {'perf': (3457, -0.10, None, 'timesteps/s')}, - 'daint:gpu': {'perf': (2524, -0.10, None, 'timesteps/s')} - }, - 'large': { - 'daint:gpu': {'perf': (3832, -0.05, None, 'timesteps/s')} - } - }, - 'prod': { - 'small': { - 'dom:gpu': {'perf': (3132, -0.05, None, 'timesteps/s')}, - 'daint:gpu': {'perf': (2400, -0.40, None, 'timesteps/s')} - }, - 'large': { - 'daint:gpu': {'perf': (3260, -0.50, None, 'timesteps/s')} - } - }, - } - self.reference = references[variant][scale] - self.tags |= {'maintenance' if variant == 'maint' else 'production'} - -@rfm.parameterized_test(*([s, v] - for s in ['small', 'large'] - for v in ['prod'])) -class LAMMPSCPUCheck(LAMMPSBaseCheck): - def __init__(self, scale, variant): - super().__init__() - self.valid_systems = ['daint:mc', 'eiger:mc', 'pilatus:mc'] - if self.current_system.name in ['eiger', 'pilatus']: - self.executable = 'lmp_mpi' - self.executable_opts = ['-in in.lj.cpu'] - else: - self.executable = 'lmp_omp' - self.executable_opts = ['-sf omp', '-pk omp 1', '-in in.lj.cpu'] +@rfm.simple_test +class lammps_cpu_check(LAMMPSCheckCSCS): + platform = parameter(inherit_params=True, + filter_params=inherit_cpu_only) + mode = parameter(['prod']) + valid_systems = ['daint:mc', 'eiger:mc', 'pilatus:mc'] - self.scale = scale - if scale == 'small': + @run_after('init') + def set_num_tasks(self): + if self.scale == 'small': self.valid_systems += ['dom:mc'] self.num_tasks = 216 self.num_tasks_per_node = 36 @@ -112,22 +167,15 @@ def __init__(self, scale, variant): if self.current_system.name == 'eiger': self.num_tasks_per_node = 128 - self.num_tasks = 256 if self.scale == 'small' else 512 - - references = { - 'prod': { - 'small': { - 'dom:mc': {'perf': (4394, -0.05, None, 'timesteps/s')}, - 'daint:mc': {'perf': (3824, -0.10, None, 'timesteps/s')}, - 'eiger:mc': {'perf': (4500, -0.10, None, 'timesteps/s')}, - 'pilatus:mc': {'perf': (5000, -0.10, None, 'timesteps/s')} - }, - 'large': { - 'daint:mc': {'perf': (5310, -0.65, None, 'timesteps/s')}, - 'eiger:mc': {'perf': (6500, -0.10, None, 'timesteps/s')}, - 'pilatus:mc': {'perf': (7500, -0.10, None, 'timesteps/s')} - } - }, - } - self.reference = references[variant][scale] - self.tags |= {'maintenance' if variant == 'maint' else 'production'} + self.num_tasks = 256 if self.benchmark == 'small' else 512 + + @run_before('run') + def set_hierarchical_prgenvs(self): + if self.current_system.name in ['eiger', 'pilatus']: + self.executable = 'lmp_mpi' + self.executable_opts = ['-in', self.input_file] + else: + self.executable = 'lmp_omp' + self.executable_opts = ['-sf omp', + '-pk omp 1', + '-in', self.input_file] diff --git a/hpctestlib/apps/lammps/nve.py b/hpctestlib/apps/lammps/nve.py new file mode 100644 index 0000000000..3a99accf1b --- /dev/null +++ b/hpctestlib/apps/lammps/nve.py @@ -0,0 +1,113 @@ +# Copyright 2016-2021 Swiss National Supercomputing Centre (CSCS/ETH Zurich) +# ReFrame Project Developers. See the top-level LICENSE file for details. +# +# SPDX-License-Identifier: BSD-3-Clause + +import os + +import reframe as rfm +import reframe.utility.sanity as sn +import reframe.utility.typecheck as typ + + +class LAMMPS_NVE(rfm.RunOnlyRegressionTest, pin_prefix=True): + '''Base class for the LAMMPS NVE Test. + + LAMMPS is a classical molecular dynamics code with a focus + on materials modeling. It's an acronym for Large-scale + Atomic/Molecular Massively Parallel Simulator. + + LAMMPS has potentials for solid-state materials (metals, + semiconductors) and soft matter (biomolecules, polymers) + and coarse-grained or mesoscopic systems. It can be used + to model atoms or, more generically, as a parallel particle + simulator at the atomic, meso, or continuum scale. + (see lammps.org) + + The presented abstract run-only class checks the LAMMPS perfomance. + To do this, it is necessary to define in tests the name + of the running script (input file), as well as set the + reference values of energy and possible deviations from this + value. This data is used to check if the task is being + executed correctly, that is, the final energy is correct + (approximately the reference). The default assumption is that + LAMMPS is already installed on the device under test. + ''' + + #: Name of executed script + #: + #: :default: :class:`required` + input_file = variable(str) + + #: Reference value of energy, that is used for the comparison + #: with the execution ouput on the sanity step. The absolute + #: difference between final energy value and reference value + #: should be smaller than energy_tolerance + #: + #: :type: float + #: :default: :class:`required` + energy_value = variable(float) + + #: Maximum deviation from the reference value of energy, + #: that is acceptable. + #: + #: :type: float + #: :default: :class:`required` + energy_tolerance = variable(float) + + #: :default: :class:`required` + num_tasks_per_node = required + + #: Parameter pack containing the platform ID and input file + platform = parameter([ + ('cpu', 'in.lj.cpu'), + ('gpu', 'in.lj.gpu') + ]) + + energy_value = -4.6195 + energy_tolerance = 6.0E-04 + + @run_after('init') + def unpack_platform_parameter(self): + '''Set the executable and input file.''' + + self.platform_name, self.input_file = self.platform + + @run_after('init') + def source_install(self): + # Reset sources dir relative to the SCS apps prefix + self.sourcesdir = os.path.join(self.current_system.resourcesdir, + 'LAMMPS') + + @performance_function('timesteps/s', perf_key='nve') + def set_perf_patterns(self): + return sn.extractsingle(r'\s+(?P\S+) timesteps/s', + self.stdout, 'perf', float) + + @run_before('performance') + def set_the_performance_dict(self): + self.perf_variables = {self.mode: + sn.make_performance_function( + sn.extractsingle( + r'\s+(?P\S+) timesteps/s', + self.stdout, 'perf', float), + 'timesteps/s')} + + @sanity_function + def set_sanity_patterns(self): + '''Standart sanity check for the LAMMPS. Compare the + reference value of energy with obtained from the executed + program. + ''' + + energy = sn.extractsingle( + r'\s+500000(\s+\S+){3}\s+(?P\S+)\s+\S+\s\n', + self.stdout, 'energy', float) + energy_diff = sn.abs(energy - self.energy_value) + ref_ener_diff = sn.abs(self.energy_value * + self.energy_tolerance) + + return sn.all([ + sn.assert_found(r'Total wall time:', self.stdout), + sn.assert_lt(energy_diff, ref_ener_diff) + ]) From 2876c2efad8662d8b318e882b8f7cfa563fe3e21 Mon Sep 17 00:00:00 2001 From: Sergei Kliavinek Date: Thu, 9 Sep 2021 16:00:41 +0300 Subject: [PATCH 2/3] fix blank lines --- cscs-checks/apps/lammps/lammps_check.py | 1 + 1 file changed, 1 insertion(+) diff --git a/cscs-checks/apps/lammps/lammps_check.py b/cscs-checks/apps/lammps/lammps_check.py index 253751d59c..89e6ec3e39 100644 --- a/cscs-checks/apps/lammps/lammps_check.py +++ b/cscs-checks/apps/lammps/lammps_check.py @@ -86,6 +86,7 @@ }, } + def inherit_cpu_only(params): return tuple(filter(lambda p: p[0] == 'cpu', params)) From 9c78fef9fb020cd349b1f3fbc953ed274cf74ef7 Mon Sep 17 00:00:00 2001 From: Sergei Kliavinek Date: Thu, 9 Sep 2021 19:07:37 +0300 Subject: [PATCH 3/3] delete unneccessary imports --- cscs-checks/apps/lammps/lammps_check.py | 1 - hpctestlib/apps/lammps/nve.py | 1 - 2 files changed, 2 deletions(-) diff --git a/cscs-checks/apps/lammps/lammps_check.py b/cscs-checks/apps/lammps/lammps_check.py index 89e6ec3e39..b0a6fa6a88 100644 --- a/cscs-checks/apps/lammps/lammps_check.py +++ b/cscs-checks/apps/lammps/lammps_check.py @@ -4,7 +4,6 @@ # SPDX-License-Identifier: BSD-3-Clause import reframe as rfm -import reframe.utility.sanity as sn from hpctestlib.apps.lammps.nve import LAMMPS_NVE dom_gpu_small = { diff --git a/hpctestlib/apps/lammps/nve.py b/hpctestlib/apps/lammps/nve.py index 3a99accf1b..078e235eed 100644 --- a/hpctestlib/apps/lammps/nve.py +++ b/hpctestlib/apps/lammps/nve.py @@ -7,7 +7,6 @@ import reframe as rfm import reframe.utility.sanity as sn -import reframe.utility.typecheck as typ class LAMMPS_NVE(rfm.RunOnlyRegressionTest, pin_prefix=True):