diff --git a/cscs-checks/apps/amber/amber_check.py b/cscs-checks/apps/amber/amber_check.py index 4209c33b09..68e691687c 100644 --- a/cscs-checks/apps/amber/amber_check.py +++ b/cscs-checks/apps/amber/amber_check.py @@ -3,197 +3,122 @@ # # SPDX-License-Identifier: BSD-3-Clause +import contextlib import reframe as rfm -import reframe.utility.sanity as sn +from hpctestlib.apps.amber.nve import amber_nve_check -# FIXME: Use tuples as dictionary keys as soon as -# https://github.com/eth-cscs/reframe/issues/2022 is in -daint_gpu_performance = { - 'Cellulose_production_NVE': (30.0, -0.05, None, 'ns/day'), - 'FactorIX_production_NVE': (134.0, -0.05, None, 'ns/day'), - 'JAC_production_NVE': (388.0, -0.05, None, 'ns/day'), - 'JAC_production_NVE_4fs': (742, -0.05, None, 'ns/day'), -} - -REFERENCE_GPU_PERFORMANCE = { - 'daint:gpu': daint_gpu_performance, - 'dom:gpu': daint_gpu_performance -} - -daint_mc_performance_small = { - 'Cellulose_production_NVE': (8.0, -0.30, None, 'ns/day'), - 'FactorIX_production_NVE': (34.0, -0.30, None, 'ns/day'), - 'JAC_production_NVE': (90.0, -0.30, None, 'ns/day'), - 'JAC_production_NVE_4fs': (150.0, -0.30, None, 'ns/day'), -} - -eiger_mc_performance_small = { - 'Cellulose_production_NVE': (3.2, -0.30, None, 'ns/day'), - 'FactorIX_production_NVE': (7.0, -0.30, None, 'ns/day'), - 'JAC_production_NVE': (30.0, -0.30, None, 'ns/day'), - 'JAC_production_NVE_4fs': (45.0, -0.30, None, 'ns/day'), -} - -REFERENCE_CPU_PERFORMANCE_SMALL = { - 'daint:mc': daint_mc_performance_small, - 'dom:mc': daint_mc_performance_small, - 'eiger:mc': eiger_mc_performance_small, - 'pilatus:mc': eiger_mc_performance_small, -} - -REFERENCE_CPU_PERFORMANCE_LARGE = { - 'daint:mc': { - 'Cellulose_production_NVE': (10.0, -0.30, None, 'ns/day'), - 'FactorIX_production_NVE': (36.0, -0.30, None, 'ns/day'), - 'JAC_production_NVE': (78.0, -0.30, None, 'ns/day'), - 'JAC_production_NVE_4fs': (135.0, -0.30, None, 'ns/day'), - }, - 'eiger:mc': { - 'Cellulose_production_NVE': (1.3, -0.30, None, 'ns/day'), - 'FactorIX_production_NVE': (3.5, -0.30, None, 'ns/day'), - 'JAC_production_NVE': (17.0, -0.30, None, 'ns/day'), - 'JAC_production_NVE_4fs': (30.5, -0.30, None, 'ns/day'), - }, -} - - -class AmberBaseCheck(rfm.RunOnlyRegressionTest): - valid_prog_environs = ['builtin'] - strict_check = False +@rfm.simple_test +class cscs_amber_check(amber_nve_check): modules = ['Amber'] + valid_prog_environs = ['builtin'] extra_resources = { 'switches': { 'num_switches': 1 } } + tags |= {'maintenance', 'production'} maintainers = ['VH', 'SO'] - tags = {'scs', 'external-resources'} - - benchmark = parameter([ - # NVE simulations - 'Cellulose_production_NVE', - 'FactorIX_production_NVE', - 'JAC_production_NVE_4fs', - 'JAC_production_NVE', - ]) - - @run_after('init') - def download_files(self): - self.prerun_cmds = [ - # cannot use wget because it is not installed on eiger - f'curl -LJO https://github.com/victorusu/amber_benchmark_suite' - f'/raw/main/amber_16_benchmark_suite/PME/{self.benchmark}.tar.bz2', - f'tar xf {self.benchmark}.tar.bz2' - ] - - @run_after('init') - def set_energy_and_tolerance_reference(self): - self.ener_ref = { - # every system has a different reference energy and drift - 'Cellulose_production_NVE': (-443246, 5.0E-05), - 'FactorIX_production_NVE': (-234188, 1.0E-04), - 'JAC_production_NVE_4fs': (-44810, 1.0E-03), - 'JAC_production_NVE': (-58138, 5.0E-04), - } - - @run_after('setup') - def set_executable_opts(self): - self.executable_opts = ['-O', - '-i', self.input_file, - '-o', self.output_file] - self.keep_files = [self.output_file] - - @run_after('setup') - def set_sanity_patterns(self): - energy = sn.extractsingle(r' Etot\s+=\s+(?P\S+)', - self.output_file, 'energy', float, item=-2) - energy_reference = self.ener_ref[self.benchmark][0] - energy_diff = sn.abs(energy - energy_reference) - ref_ener_diff = sn.abs(self.ener_ref[self.benchmark][0] * - self.ener_ref[self.benchmark][1]) - self.sanity_patterns = sn.all([ - sn.assert_found(r'Final Performance Info:', self.output_file), - sn.assert_lt(energy_diff, ref_ener_diff) - ]) - - @run_after('setup') - def set_generic_perf_references(self): - self.reference.update({'*': { - self.benchmark: (0, None, None, 'ns/day') - }}) - - @run_after('setup') - def set_perf_patterns(self): - self.perf_patterns = { - self.benchmark: sn.extractsingle(r'ns/day =\s+(?P\S+)', - self.output_file, 'perf', - float, item=1) + num_nodes = parameter([1, 4, 6, 8, 16]) + allref = { + 1: { + 'p100': { + 'Cellulose_production_NVE': (30.0, -0.05, None, 'ns/day'), + 'FactorIX_production_NVE': (134.0, -0.05, None, 'ns/day'), + 'JAC_production_NVE': (388.0, -0.05, None, 'ns/day'), + 'JAC_production_NVE_4fs': (742, -0.05, None, 'ns/day') + } + }, + 4: { + 'zen2': { + 'Cellulose_production_NVE': (3.2, -0.30, None, 'ns/day'), + 'FactorIX_production_NVE': (7.0, -0.30, None, 'ns/day'), + 'JAC_production_NVE': (30.0, -0.30, None, 'ns/day'), + 'JAC_production_NVE_4fs': (45.0, -0.30, None, 'ns/day') + } + }, + 6: { + 'broadwell': { + 'Cellulose_production_NVE': (8.0, -0.30, None, 'ns/day'), + 'FactorIX_production_NVE': (34.0, -0.30, None, 'ns/day'), + 'JAC_production_NVE': (90.0, -0.30, None, 'ns/day'), + 'JAC_production_NVE_4fs': (150.0, -0.30, None, 'ns/day') + } + }, + 8: { + 'zen2': { + 'Cellulose_production_NVE': (1.3, -0.30, None, 'ns/day'), + 'FactorIX_production_NVE': (3.5, -0.30, None, 'ns/day'), + 'JAC_production_NVE': (17.0, -0.30, None, 'ns/day'), + 'JAC_production_NVE_4fs': (30.5, -0.30, None, 'ns/day') + } + }, + 16: { + 'broadwell': { + 'Cellulose_production_NVE': (10.0, -0.30, None, 'ns/day'), + 'FactorIX_production_NVE': (36.0, -0.30, None, 'ns/day'), + 'JAC_production_NVE': (78.0, -0.30, None, 'ns/day'), + 'JAC_production_NVE_4fs': (135.0, -0.30, None, 'ns/day') + } } - - -@rfm.simple_test -class AmberGPUCheck(AmberBaseCheck): - num_tasks = 1 - num_tasks_per_node = 1 - num_gpus_per_node = 1 - valid_systems = ['daint:gpu', 'dom:gpu'] - executable = 'pmemd.cuda.MPI' - input_file = 'mdin.GPU' - output_file = 'amber.out' - descr = f'Amber GPU check' - tags = {'maintenance', 'production', 'health'} - reference = REFERENCE_GPU_PERFORMANCE - - -@rfm.simple_test -class AmberCPUCheck(AmberBaseCheck): - scale = parameter(['small', 'large']) - valid_systems = ['daint:mc', 'eiger:mc'] - executable = 'pmemd.MPI' - input_file = 'mdin.CPU' - output_file = 'amber.out' - tags = {'maintenance', 'production'} - - @run_after('init') - def set_description(self): - self.mydescr = f'Amber parallel {self.scale} CPU check' + } @run_after('init') - def set_additional_systems(self): - if self.scale == 'small': - self.valid_systems += ['dom:mc', 'pilatus:mc'] + def scope_systems(self): + valid_systems = { + 'cuda': {1: ['daint:gpu', 'dom:gpu']}, + 'mpi': { + 4: ['eiger:mc', 'pilatus:mc'], + 6: ['daint:mc', 'dom:mc'], + 8: ['pilatus:mc'], + 16: ['daint:mc'] + } + } + try: + self.valid_systems = valid_systems[self.variant][self.num_nodes] + except KeyError: + self.valid_systems = [] @run_after('init') def set_hierarchical_prgenvs(self): if self.current_system.name in ['eiger', 'pilatus']: self.valid_prog_environs = ['cpeIntel'] + @run_after('init') + def set_num_gpus_per_node(self): + if self.variant == 'cuda': + self.num_gpus_per_node = 1 + @run_after('setup') - def set_perf_reference(self): - if self.scale == 'small': - self.reference = REFERENCE_CPU_PERFORMANCE_SMALL + def skip_if_no_topo(self): + proc = self.current_partition.processor + pname = self.current_partition.fullname + if not proc.info: + self.skip(f'no topology information found for partition {pname!r}') + + @run_after('setup') + def set_num_tasks(self): + if self.variant == 'cuda': + self.num_tasks_per_node = 1 else: - self.reference = REFERENCE_CPU_PERFORMANCE_LARGE + proc = self.current_partition.processor + pname = self.current_partition.fullname + self.num_tasks_per_node = proc.num_cores - @run_after('init') - def set_num_tasks_cray_xc(self): - if self.current_system.name in ['daint', 'dom']: - self.num_tasks_per_node = 36 - if self.scale == 'small': - self.num_nodes = 6 - else: - self.num_nodes = 16 - self.num_tasks = self.num_nodes * self.num_tasks_per_node + self.num_tasks = self.num_nodes * self.num_tasks_per_node - @run_after('init') - def set_num_tasks_cray_shasta(self): - if self.current_system.name in ['eiger', 'pilatus']: - self.num_tasks_per_node = 128 - if self.scale == 'small': - self.num_nodes = 4 - else: - # there are too many processors, the large jobs cannot start - # need to decrease to just 8 nodes - self.num_nodes = 8 - self.num_tasks = self.num_nodes * self.num_tasks_per_node + @run_before('performance') + def set_perf_reference(self): + proc = self.current_partition.processor + pname = self.current_partition.fullname + if pname in ('daint:gpu', 'dom:gpu'): + arch = 'p100' + else: + arch = proc.arch + + with contextlib.suppress(KeyError): + self.reference = { + pname: { + 'perf': self.allref[self.num_nodes][arch][self.benchmark] + } + } diff --git a/docs/index.rst b/docs/index.rst index 31137fd2a0..1b1a449de1 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -56,3 +56,4 @@ Publications usecases migration_2_to_3 manuals + hpctestlib diff --git a/hpctestlib/apps/amber/nve.py b/hpctestlib/apps/amber/nve.py new file mode 100644 index 0000000000..d1e8e7df92 --- /dev/null +++ b/hpctestlib/apps/amber/nve.py @@ -0,0 +1,153 @@ +# Copyright 2016-2021 Swiss National Supercomputing Centre (CSCS/ETH Zurich) +# ReFrame Project Developers. See the top-level LICENSE file for details. +# +# SPDX-License-Identifier: BSD-3-Clause + +import reframe as rfm +import reframe.utility.sanity as sn + + +class amber_nve_check(rfm.RunOnlyRegressionTest, pin_prefix=True): + '''Amber NVE test. + + `Amber `__ is a suite of biomolecular simulation + programs. It began in the late 1970's, and is maintained by an active + development community. + + This test is parametrized over the benchmark type (see + :attr:`benchmark_info`) and the variant of the code (see :attr:`variant`). + Each test instance executes the benchmark, validates numerically its output + and extracts and reports a performance metric. + + ''' + + #: The output file to pass to the Amber executable. + #: + #: :type: :class:`str` + #: :required: No + #: :default: ``'amber.out'`` + output_file = variable(str, value='amber.out') + + #: The input file to use. + #: + #: This is set to ``mdin.CPU`` or ``mdin.GPU`` depending on the test + #: variant during initialization. + #: + #: :type: :class:`str` + #: :required: Yes + input_file = variable(str) + + #: The name of the benchmark that this test encodes. + #: + #: This is set from the corresponding value in the :attr:`benchmark_info` + #: parameter pack during initialization. + #: + #: :type: :class:`str` + #: :required: Yes + benchmark = variable(str) + + #: Energy value reference. + #: + #: This is set from the corresponding value in the :attr:`benchmark_info` + #: parameter pack during initialization. + #: + #: :type: `float` + #: :required: Yes + energy_ref = variable(float) + + #: Energy value tolerance. + #: + #: This is set from the corresponding value in the :attr:`benchmark_info` + #: parameter pack during initialization. + #: + #: :type: `float` + #: :required: Yes + energy_tol = variable(float) + + #: Parameter pack encoding the benchmark information. + #: + #: The first element of the tuple refers to the benchmark name, + #: the second is the energy reference and the third is the + #: tolerance threshold. + #: + #: :type: `Tuple[str, float, float]` + #: :values: + #: .. code-block:: python + #: + #: [ + #: ('Cellulose_production_NVE', -443246.0, 5.0E-05), + #: ('FactorIX_production_NVE', -234188.0, 1.0E-04), + #: ('JAC_production_NVE_4fs', -44810.0, 1.0E-03), + #: ('JAC_production_NVE', -58138.0, 5.0E-04) + #: ] + benchmark_info = parameter([ + ('Cellulose_production_NVE', -443246.0, 5.0E-05), + ('FactorIX_production_NVE', -234188.0, 1.0E-04), + ('JAC_production_NVE_4fs', -44810.0, 1.0E-03), + ('JAC_production_NVE', -58138.0, 5.0E-04) + ]) + + # Parameter encoding the variant of the test. + # + # :type:`str` + # :values: ``['mpi', 'cuda']`` + variant = parameter(['mpi', 'cuda']) + + # Test tags + # + # :required: No + # :default: ``{'sciapp', 'chemistry'}`` + tags = {'sciapp', 'chemistry'} + + #: See :attr:`~reframe.core.pipeline.RegressionTest.num_tasks`. + #: + #: The ``mpi`` variant of the test requires ``num_tasks > 1``. + #: + #: :required: Yes + num_tasks = required + + @run_after('init') + def prepare_test(self): + self.benchmark, self.energy_ref, self.energy_tol = self.benchmark_info + self.descr = f'Amber NVE {self.benchmark} benchmark ({self.variant})' + + params = { + 'mpi': ('mdin.CPU', 'pmemd.MPI'), + 'cuda': ('mdin.GPU', 'pmemd.cuda.MPI') + } + try: + self.input_file, self.executable = params[self.variant] + except KeyError: + raise ValueError( + f'test not set up for platform {self.variant!r}' + ) from None + + self.prerun_cmds = [ + f'curl -LJO https://github.com/victorusu/amber_benchmark_suite' + f'/raw/main/amber_16_benchmark_suite/PME/{self.benchmark}.tar.bz2', + f'tar xf {self.benchmark}.tar.bz2' + ] + self.executable_opts = ['-O', + '-i', self.input_file, + '-o', self.output_file] + self.keep_files = [self.output_file] + + @performance_function('ns/day') + def perf(self): + '''The performance of the benchmark expressed in ``ns/day``.''' + return sn.extractsingle(r'ns/day =\s+(?P\S+)', + self.output_file, 'perf', float, item=1) + + @sanity_function + def assert_energy_readout(self): + '''Assert that the obtained energy meets the required tolerance.''' + + energy = sn.extractsingle(r' Etot\s+=\s+(?P\S+)', + self.output_file, 'energy', float, item=-2) + energy_diff = sn.abs(energy - self.energy_ref) + ref_ener_diff = sn.abs(self.energy_ref * + self.energy_tol) + return sn.all([ + sn.assert_found(r'Final Performance Info:', self.output_file), + sn.assert_lt(energy_diff, ref_ener_diff) + ])