diff --git a/config/cscs.py b/config/cscs.py index a0b1b2e3a8..28c5ae5739 100644 --- a/config/cscs.py +++ b/config/cscs.py @@ -114,7 +114,8 @@ class ReframeSettings: 'descr': 'Hybrid nodes (Haswell/P100)', 'max_jobs': 100, 'resources': { - 'switches': ['--switches={num_switches}'] + 'switches': ['--switches={num_switches}'], + 'gres': ['--gres={gres}'] } }, @@ -135,7 +136,8 @@ class ReframeSettings: 'descr': 'Multicore nodes (Broadwell)', 'max_jobs': 100, 'resources': { - 'switches': ['--switches={num_switches}'] + 'switches': ['--switches={num_switches}'], + 'gres': ['--gres={gres}'] } }, @@ -193,6 +195,9 @@ class ReframeSettings: 'PrgEnv-intel', 'PrgEnv-pgi'], 'descr': 'Hybrid nodes (Haswell/P100)', 'max_jobs': 100, + 'resources': { + 'gres': ['--gres={gres}'] + } }, 'mc': { @@ -213,7 +218,7 @@ class ReframeSettings: 'descr': 'Multicore nodes (Broadwell)', 'max_jobs': 100, 'resources': { - 'switches': ['--switches={num_switches}'] + 'gres': ['--gres={gres}'] } }, diff --git a/cscs-checks/apps/greasy/greasy_check.py b/cscs-checks/apps/greasy/greasy_check.py new file mode 100644 index 0000000000..eaf4fd89f0 --- /dev/null +++ b/cscs-checks/apps/greasy/greasy_check.py @@ -0,0 +1,231 @@ +# Copyright 2016-2020 Swiss National Supercomputing Centre (CSCS/ETH Zurich) +# ReFrame Project Developers. See the top-level LICENSE file for details. +# +# SPDX-License-Identifier: BSD-3-Clause + +import itertools +import os +import fnmatch +from datetime import datetime + +import reframe as rfm +import reframe.utility.sanity as sn +from reframe.core.launchers.registry import getlauncher + + +def to_seconds(str): + return (datetime.strptime(str, '%H:%M:%S') - + datetime.strptime('00:00:00', '%H:%M:%S')).total_seconds() + + +@rfm.required_version('>=2.19') +@rfm.parameterized_test( + ['serial', 'gpu', 24, 12, 1, 1], + ['serial', 'mc', 72, 36, 1, 1], + ['openmp', 'gpu', 24, 3, 1, 4], + ['openmp', 'mc', 72, 9, 1, 4], + ['mpi', 'gpu', 24, 4, 3, 1], + ['mpi', 'mc', 72, 12, 3, 1], + ['mpi+openmp', 'gpu', 24, 3, 2, 2], + ['mpi+openmp', 'mc', 72, 6, 3, 2] +) +class GREASYCheck(rfm.RegressionTest): + def __init__(self, variant, partition, num_greasy_tasks, nworkes_per_node, + nranks_per_worker, ncpus_per_worker): + self.valid_systems = ['daint:' + partition, 'dom:' + partition] + + self.valid_prog_environs = ['PrgEnv-gnu'] + self.sourcepath = 'tasks_mpi_openmp.c' + self.build_system = 'SingleSource' + + # sleep enough time to distinguish if the files are running in parallel + # or not + self.sleep_time = 60 + self.build_system.cflags = ['-DSLEEP_TIME=%d' % self.sleep_time] + if variant == 'openmp': + self.build_system.cflags += ['-fopenmp'] + elif variant == 'mpi': + self.build_system.cflags += ['-D_MPI'] + elif variant == 'mpi+openmp': + self.build_system.cflags += ['-fopenmp', '-D_MPI'] + + self.executable = 'tasks_mpi_openmp.x' + self.tasks_file = 'tasks.txt' + self.executable_opts = [self.tasks_file] + self.greasy_logfile = 'greasy.log' + self.keep_files = [self.tasks_file, self.greasy_logfile] + nnodes = 2 + self.use_multithreading = False + self.num_greasy_tasks = num_greasy_tasks + self.nworkes_per_node = nworkes_per_node + self.nranks_per_worker = nranks_per_worker + self.num_tasks_per_node = nranks_per_worker * nworkes_per_node + self.num_tasks = self.num_tasks_per_node * nnodes + self.num_cpus_per_task = ncpus_per_worker + self.sanity_patterns = self.eval_sanity() + + # Reference value is system agnostic + # Adding 10 secs of slowdown per greasy tasks + # this is to compensate for whenever the systems are full and srun gets + # slightly slower + refperf = ( + (self.sleep_time+10)*num_greasy_tasks / nworkes_per_node / nnodes + ) + self.reference = { + '*': { + 'time': (refperf, None, 0.5, 's') + } + } + self.perf_patterns = { + 'time': sn.extractsingle(r'Total time: (?P\S+)', + self.greasy_logfile, + 'perf', to_seconds) + } + # On SLURM there is no need to set OMP_NUM_THREADS if one defines + # num_cpus_per_task, but adding for completeness and portability + self.variables = { + 'OMP_NUM_THREADS': str(self.num_cpus_per_task), + 'GREASY_NWORKERS_PER_NODE': str(nworkes_per_node), + 'GREASY_LOGFILE': self.greasy_logfile + } + self.modules = ['GREASY'] + self.maintainers = ['VH', 'SK'] + self.tags = {'production'} + + @rfm.run_before('run') + def generate_tasks_file(self): + with open(os.path.join(self.stagedir, self.tasks_file), 'w') as fp: + for i in range(self.num_greasy_tasks): + fp.write(f'./{self.executable} output-{i}\n') + + @rfm.run_before('run') + def daint_dom_gpu_specific_workaround(self): + if self.current_partition.fullname in ['daint:gpu', 'dom:gpu']: + self.variables['CRAY_CUDA_MPS'] = '1' + self.variables['CUDA_VISIBLE_DEVICES'] = '0' + self.variables['GPU_DEVICE_ORDINAL'] = '0' + self.extra_resources = { + 'gres': { + 'gres': 'gpu:0,craynetwork:4' + } + } + elif self.current_partition.fullname in ['daint:mc', 'dom:mc']: + self.extra_resources = { + 'gres': { + 'gres': 'craynetwork:72' + } + } + + @rfm.run_before('run') + def change_executable_name(self): + # After compiling the code we can change the executable to be + # the greasy one + self.executable = 'greasy' + + @rfm.run_before('run') + def set_launcher(self): + # The job launcher has to be changed to local since greasy + # make calls to srun + self.job.launcher = getlauncher('local')() + + @sn.sanity_function + def eval_sanity(self): + output_files = [] + output_files = [file for file in os.listdir(self.stagedir) + if file.startswith('output-')] + num_greasy_tasks = len(output_files) + failure_msg = (f'Requested {self.num_greasy_tasks} task(s), but ' + f'executed only {num_greasy_tasks} tasks(s)') + sn.evaluate(sn.assert_eq(num_greasy_tasks, self.num_greasy_tasks, + msg=failure_msg)) + num_tasks = sn.getattr(self, 'nranks_per_worker') + num_cpus_per_task = sn.getattr(self, 'num_cpus_per_task') + + def tid(match): + return int(match.group(1)) + + def num_threads(match): + return int(match.group(2)) + + def rank(match): + return int(match.group(3)) + + def num_ranks(match): + return int(match.group(4)) + + for output_file in output_files: + result = sn.findall(r'Hello, World from thread \s*(\d+) out ' + r'of \s*(\d+) from process \s*(\d+) out of ' + r'\s*(\d+)', output_file) + + failure_msg = (f'Found {sn.count(result)} Hello, World... ' + f'pattern(s) but expected ' + f'{num_tasks * num_cpus_per_task} pattern(s) ' + f'inside the output file {output_file}') + sn.evaluate(sn.assert_eq(sn.count(result), + num_tasks * num_cpus_per_task, + msg=failure_msg)) + + sn.evaluate(sn.all( + sn.chain( + sn.map( + lambda x: sn.assert_lt( + tid(x), num_threads(x), + msg=(f'Found {tid(x)} threads rather than ' + f'{num_threads(x)}') + ), result + ), + sn.map( + lambda x: sn.assert_lt( + rank(x), num_ranks(x), + msg=(f'Rank id {rank(x)} is not lower than the ' + f'number of ranks {self.nranks_per_worker} ' + f'in output file') + ), result + ), + sn.map( + lambda x: sn.assert_lt( + tid(x), self.num_cpus_per_task, + msg=(f'Rank id {tid(x)} is not lower than the ' + f'number of cpus per task ' + f'{self.num_cpus_per_task} in output ' + f'file {output_file}') + ), result + ), + sn.map( + lambda x: sn.assert_eq( + num_threads(x), num_cpus_per_task, + msg=(f'Found {num_threads(x)} threads rather than ' + f'{self.num_cpus_per_task} in output file ' + f'{output_file}') + ), result + ), + sn.map( + lambda x: sn.assert_lt( + rank(x), num_tasks, + msg=(f'Found {rank(x)} threads rather than ' + f'{self.num_cpus_per_task} in output file ' + f'{output_file}') + ), result + ), + sn.map( + lambda x: sn.assert_eq( + num_ranks(x), num_tasks, + msg=(f'Number of ranks {num_ranks(x)} is not ' + f'equal to {self.nranks_per_worker} in ' + f'output file {output_file}') + ), result + ) + ) + )) + sn.evaluate(sn.assert_found(r'Finished greasing', self.greasy_logfile)) + sn.evaluate(sn.assert_found( + (f'INFO: Summary of {self.num_greasy_tasks} ' + f'tasks: ' + f'{self.num_greasy_tasks} OK, ' + f'0 FAILED, ' + f'0 CANCELLED, ' + fr'0 INVALID\.'), self.greasy_logfile + )) + + return True diff --git a/cscs-checks/apps/greasy/src/tasks_mpi_openmp.c b/cscs-checks/apps/greasy/src/tasks_mpi_openmp.c new file mode 100644 index 0000000000..527c456e1d --- /dev/null +++ b/cscs-checks/apps/greasy/src/tasks_mpi_openmp.c @@ -0,0 +1,62 @@ +#include +#include +#include + +#ifdef _OPENMP +#include +#endif +#ifdef _MPI +#include "mpi.h" +#endif + +#define STRINGIZE_MACRO(A) #A +#define STRINGIZE(A) STRINGIZE_MACRO(A) + +int main(int argc, char *argv[]) +{ + int size = 1; + int rank = 0; + int tid = 0; + FILE *outputfile; + + if (argc > 1) + { + outputfile = fopen(argv[1], "a"); + if (outputfile == NULL) { + fprintf(stdout, "Error. Unable to open output file %s", argv[1]); + } + } + else + { + outputfile = stdout; + } + +#ifdef _MPI + MPI_Init(&argc, &argv); + MPI_Comm_size(MPI_COMM_WORLD, &size); + MPI_Comm_rank(MPI_COMM_WORLD, &rank); +#endif + +#ifdef _OPENMP + #pragma omp parallel default(shared) private(tid) +#endif + { +#ifdef _OPENMP + int nthreads = omp_get_num_threads(); + tid = omp_get_thread_num(); +#else + int nthreads = 1; +#endif + // sleep for as long as it is necessary to distinguish whether we are running in parallel or not + // if GREASY is running correctly the test should take approximatelly this amount of time to run + sleep(atoi(STRINGIZE(SLEEP_TIME))); + fprintf(outputfile, "Hello, World from thread %d out of %d from process %d out of %d\n", + tid, nthreads, rank, size); + } + +#ifdef _MPI + MPI_Finalize(); +#endif + + return 0; +}