From 367869d1b155893143806191a0ddf2130ad63bae Mon Sep 17 00:00:00 2001 From: Theofilos Manitaras Date: Fri, 18 Jun 2021 16:50:56 +0200 Subject: [PATCH 01/10] Update TM checks for PrgEnv-nvidia --- cscs-checks/microbenchmarks/gpu/hooks.py | 7 +- .../gpu/kernel_latency/kernel_latency.py | 4 +- cscs-checks/prgenv/cpu_target_check.py | 21 ++--- .../prgenv/cuda-fortran/cuda_fortran_check.py | 40 ++++++---- cscs-checks/prgenv/environ_check.py | 77 +++++++++---------- cscs-checks/prgenv/openacc_checks.py | 27 +++++-- cscs-checks/prgenv/opencl_check.py | 12 ++- 7 files changed, 111 insertions(+), 77 deletions(-) diff --git a/cscs-checks/microbenchmarks/gpu/hooks.py b/cscs-checks/microbenchmarks/gpu/hooks.py index a3e5591612..eaafbff145 100644 --- a/cscs-checks/microbenchmarks/gpu/hooks.py +++ b/cscs-checks/microbenchmarks/gpu/hooks.py @@ -13,16 +13,15 @@ def set_gpu_arch(self): cs = self.current_system.name cp = self.current_partition.fullname + ce = self.current_environ.name self.gpu_arch = None # Nvidia options self.gpu_build = 'cuda' if cs in {'dom', 'daint'}: self.gpu_arch = '60' - self.modules = ['craype-accel-nvidia60', 'cdt-cuda'] - if cs == 'dom': - self.modules += ['cdt-cuda'] - + if ce != 'PrgEnv-nvidia': + self.modules = ['craype-accel-nvidia60', 'cdt-cuda'] elif cs in {'arola', 'tsa'}: self.gpu_arch = '70' self.modules = ['cuda/10.1.243'] diff --git a/cscs-checks/microbenchmarks/gpu/kernel_latency/kernel_latency.py b/cscs-checks/microbenchmarks/gpu/kernel_latency/kernel_latency.py index 7108fac0d4..8718acfafa 100644 --- a/cscs-checks/microbenchmarks/gpu/kernel_latency/kernel_latency.py +++ b/cscs-checks/microbenchmarks/gpu/kernel_latency/kernel_latency.py @@ -65,8 +65,8 @@ class gpu_kernel_latency_check(GpuKernelLatency): def set_valid_prog_environs(self): cs = self.current_system.name if cs in {'dom', 'daint'}: - self.valid_prog_environs = ['PrgEnv-cray', - 'PrgEnv-pgi', 'PrgEnv-gnu'] + self.valid_prog_environs = ['PrgEnv-cray', 'PrgEnv-pgi', + 'PrgEnv-gnu', 'PrgEnv-nvidia'] elif cs in {'arolla', 'tsa'}: self.valid_prog_environs = ['PrgEnv-pgi'] elif cs in {'ault'}: diff --git a/cscs-checks/prgenv/cpu_target_check.py b/cscs-checks/prgenv/cpu_target_check.py index c8d9bb5288..a60bc0f6bf 100644 --- a/cscs-checks/prgenv/cpu_target_check.py +++ b/cscs-checks/prgenv/cpu_target_check.py @@ -9,15 +9,16 @@ @rfm.simple_test class CrayCPUTargetTest(rfm.RunOnlyRegressionTest): - def __init__(self): - self.descr = 'Checks whether CRAY_CPU_TARGET is set' - self.valid_systems = ['daint:login', 'dom:login'] - self.valid_prog_environs = ['PrgEnv-cray', 'PrgEnv-gnu', - 'PrgEnv-intel', 'PrgEnv-pgi'] - self.sourcesdir = None - self.executable = 'echo CRAY_CPU_TARGET=$CRAY_CPU_TARGET' + descr = 'Checks whether CRAY_CPU_TARGET is set' + valid_systems = ['daint:login', 'dom:login'] + valid_prog_environs = ['PrgEnv-cray', 'PrgEnv-gnu', 'PrgEnv-intel', + 'PrgEnv-pgi', 'PrgEnv-nvidia'] + sourcesdir = None + executable = 'echo CRAY_CPU_TARGET=$CRAY_CPU_TARGET' + maintainers = ['TM', 'LM'] + tags = {'production', 'maintenance', 'craype'} + + @run_before('sanity') + def set_sanity(self): self.sanity_patterns = sn.assert_found(r'CRAY_CPU_TARGET=\S+', self.stdout) - - self.maintainers = ['TM', 'LM'] - self.tags = {'production', 'maintenance', 'craype'} diff --git a/cscs-checks/prgenv/cuda-fortran/cuda_fortran_check.py b/cscs-checks/prgenv/cuda-fortran/cuda_fortran_check.py index a4176982ab..d47225a3f2 100644 --- a/cscs-checks/prgenv/cuda-fortran/cuda_fortran_check.py +++ b/cscs-checks/prgenv/cuda-fortran/cuda_fortran_check.py @@ -9,22 +9,34 @@ @rfm.simple_test class CUDAFortranCheck(rfm.RegressionTest): - def __init__(self): - self.valid_systems = ['daint:gpu', 'dom:gpu'] - self.valid_prog_environs = ['PrgEnv-pgi'] - self.sourcepath = 'vecAdd_cuda.cuf' - self.modules = ['craype-accel-nvidia60'] - self.build_system = 'SingleSource' - self.build_system.fflags = ['-ta=tesla:cc60'] - self.num_gpus_per_node = 1 - result = sn.extractsingle(r'final result:\s+(?P\d+\.?\d*)', - self.stdout, 'result', float) - self.sanity_patterns = sn.assert_reference(result, 1., -1e-5, 1e-5) - self.maintainers = ['TM', 'AJ'] - self.tags = {'production', 'craype'} + valid_systems = ['daint:gpu', 'dom:gpu'] + valid_prog_environs = ['PrgEnv-pgi', 'PrgEnv-nvidia'] + sourcepath = 'vecAdd_cuda.cuf' + build_system = 'SingleSource' + build_system.fflags = ['-ta=tesla:cc60'] + num_gpus_per_node = 1 + maintainers = ['TM', 'AJ'] + tags = {'production', 'craype'} + + @run_after('setup') + def set_modules(self): + if self.current_environ.name != 'PrgEnv-nvidia': + self.modules = ['craype-accel-nvidia60'] # FIXME: PGI 20.x does not support CUDA 11, see case #275674 @run_before('compile') def cudatoolkit_pgi_20x_workaround(self): - cudatoolkit_version = '10.2.89_3.29-7.0.2.1_3.27__g67354b4' + if self.current_system.name == 'daint': + cudatoolkit_version = '10.2.89_3.29-7.0.2.1_3.27__g67354b4' + else: + self.variables['CUDA_HOME'] = '$CUDATOOLKIT_HOME' + cudatoolkit_version = '10.2.89_3.28-2.1__g52c0314' + self.modules += [f'cudatoolkit/{cudatoolkit_version}'] + + @run_before('sanity') + def set_sanity(self): + result = sn.extractsingle(r'final result:\s+(?P\d+\.?\d*)', + self.stdout, 'result', float) + self.sanity_patterns = sn.assert_reference(result, 1., -1e-5, 1e-5) + diff --git a/cscs-checks/prgenv/environ_check.py b/cscs-checks/prgenv/environ_check.py index dafee390b8..fd02e5a061 100644 --- a/cscs-checks/prgenv/environ_check.py +++ b/cscs-checks/prgenv/environ_check.py @@ -10,57 +10,57 @@ @rfm.simple_test class DefaultPrgEnvCheck(rfm.RunOnlyRegressionTest): - def __init__(self): - self.descr = 'Ensure PrgEnv-cray is loaded by default' - self.valid_prog_environs = ['builtin'] - self.valid_systems = ['daint:login', 'dom:login', - 'eiger:login', 'pilatus:login'] - self.executable = 'module' - self.maintainers = ['TM', 'CB'] - self.tags = {'production', 'craype'} - self.sanity_patterns = sn.assert_found(r'^PrgEnv-cray', self.stderr) + descr = 'Ensure PrgEnv-cray is loaded by default' + valid_prog_environs = ['builtin'] + valid_systems = ['daint:login', 'dom:login', + 'eiger:login', 'pilatus:login'] + executable = 'module' + executable_opts = ['--terse', 'list'] + maintainers = ['TM', 'CB'] + tags = {'production', 'craype'} - self.executable_opts = ['--terse', 'list'] - prgenv_patt = r'^PrgEnv-cray' - self.sanity_patterns = sn.assert_found(prgenv_patt, self.stderr) + @run_before('sanity') + def set_sanity(self): + self.sanity_patterns = sn.assert_found(r'^PrgEnv-cray', self.stderr) @rfm.simple_test class EnvironmentCheck(rfm.RunOnlyRegressionTest): - def __init__(self): - self.descr = 'Ensure programming environment is loaded correctly' - self.valid_systems = ['daint:login', 'dom:login', - 'eiger:login', 'pilatus:login'] - self.valid_prog_environs = ['PrgEnv-aocc', 'PrgEnv-cray', 'PrgEnv-gnu', - 'PrgEnv-intel', 'PrgEnv-pgi'] - self.executable = 'module' - self.executable_opts = ['--terse', 'list'] - - self.maintainers = ['TM', 'CB'] - self.tags = {'production', 'craype'} + descr = 'Ensure programming environment is loaded correctly' + valid_systems = ['daint:login', 'dom:login', + 'eiger:login', 'pilatus:login'] + valid_prog_environs = ['PrgEnv-aocc', 'PrgEnv-cray', 'PrgEnv-gnu', + 'PrgEnv-intel', 'PrgEnv-pgi', 'PrgEnv-nvidia'] + executable = 'module' + executable_opts = ['--terse', 'list'] + maintainers = ['TM', 'CB'] + tags = {'production', 'craype'} @run_before('sanity') def set_sanity(self): module_patt = rf'^{self.current_environ.name}' - self.sanity_patterns = sn.assert_found(module_patt, self.stderr) class CrayVariablesCheck(rfm.RunOnlyRegressionTest): cray_module = parameter() - - def __init__(self): - self.descr = 'Check for standard Cray variables' - self.valid_prog_environs = ['builtin'] - self.executable = 'module' + descr = 'Check for standard Cray variables' + valid_prog_environs = ['builtin'] + executable = 'module' + tags = {'production', 'craype'} + maintainers = ['EK', 'TM'] + + @run_before('run') + def set_exec_opts(self): self.executable_opts = ['show', self.cray_module] + + @run_before('sanity') + def set_sanity(self): envvar_prefix = self.cray_module.upper().replace('-', '_') self.sanity_patterns = sn.all([ sn.assert_found(f'{envvar_prefix}_PREFIX', self.stderr), sn.assert_found(f'{envvar_prefix}_VERSION', self.stderr) ]) - self.tags = {'production', 'craype'} - self.maintainers = ['EK', 'TM'] @rfm.simple_test @@ -71,11 +71,10 @@ class CrayVariablesCheckDaint(CrayVariablesCheck): 'cray-petsc-complex-64', 'cray-python', 'cray-R', 'cray-tpsl', 'cray-tpsl-64', 'cudatoolkit', 'gcc', 'papi', 'pmi' ]) + valid_systems = ['daint:login', 'dom:login'] - def __init__(self): - super().__init__() - self.valid_systems = ['daint:login', 'dom:login'] - + @run_after('init') + def skip_modules(self): # FIXME: These modules should be fixed in later releases cdt = osext.cray_cdt_version() if ((cdt and cdt <= '20.11' and @@ -92,12 +91,10 @@ class CrayVariablesCheckEiger(CrayVariablesCheck): 'cray-mpich', 'cray-openshmemx', 'cray-parallel-netcdf', 'cray-pmi', 'cray-python', 'cray-R', 'gcc', 'papi' ]) + valid_systems = ['eiger:login'] - def __init__(self): - super().__init__() - self.valid_systems = ['eiger:login'] - + @run_after('init') + def skip_modules(self): # FIXME: These modules should be fixed in later releases - if self.cray_module in {'cray-fftw', 'cray-python', 'cray-mpich'}: self.valid_systems = [] diff --git a/cscs-checks/prgenv/openacc_checks.py b/cscs-checks/prgenv/openacc_checks.py index 442cf74709..30260e0712 100644 --- a/cscs-checks/prgenv/openacc_checks.py +++ b/cscs-checks/prgenv/openacc_checks.py @@ -17,7 +17,8 @@ def __init__(self, variant): self.num_tasks = 2 self.valid_systems = ['daint:gpu', 'dom:gpu', 'arolla:cn', 'tsa:cn'] - self.valid_prog_environs = ['PrgEnv-cray', 'PrgEnv-pgi'] + self.valid_prog_environs = ['PrgEnv-cray', 'PrgEnv-pgi', + 'PrgEnv-nvidia'] self.sourcesdir = 'src/openacc' if self.num_tasks == 1: self.sourcepath = 'vecAdd_openacc_nompi.f90' @@ -26,9 +27,7 @@ def __init__(self, variant): else: self.sourcepath = 'vecAdd_openacc_mpi.f90' - if self.current_system.name in ['daint', 'dom']: - self.modules = ['craype-accel-nvidia60'] - elif self.current_system.name in ['arolla', 'tsa']: + if self.current_system.name in ['arolla', 'tsa']: self.exclusive_access = True self.variables = { 'CRAY_ACCEL_TARGET': 'nvidia70', @@ -46,11 +45,29 @@ def __init__(self, variant): self.maintainers = ['TM', 'AJ'] self.tags = {'production', 'craype'} + @run_after('setup') + def set_modules(self): + if (self.current_system.name in ['daint', 'dom'] and + self.current_environ.name != 'PrgEnv-nvidia'): + self.modules = ['craype-accel-nvidia60'] + + # FIXME: PGI 20.x does not support CUDA 11, see case #275674 + @run_before('compile') + def cudatoolkit_pgi_20x_workaround(self): + if self.current_system.name == 'daint': + cudatoolkit_version = '10.2.89_3.29-7.0.2.1_3.27__g67354b4' + else: + self.variables['CUDA_HOME'] = '$CUDATOOLKIT_HOME' + cudatoolkit_version = '10.2.89_3.28-2.1__g52c0314' + + self.modules += [f'cudatoolkit/{cudatoolkit_version}'] + @run_before('compile') def setflags(self): if self.current_environ.name.startswith('PrgEnv-cray'): self.build_system.fflags = ['-hacc', '-hnoomp'] - elif self.current_environ.name.startswith('PrgEnv-pgi'): + elif (self.current_environ.name.startswith('PrgEnv-pgi') or + self.current_environ.name == 'PrgEnv-nvidia'): if self.current_system.name in ['daint', 'dom']: self.build_system.fflags = ['-acc', '-ta=tesla:cc60'] elif self.current_system.name in ['arolla', 'tsa']: diff --git a/cscs-checks/prgenv/opencl_check.py b/cscs-checks/prgenv/opencl_check.py index 9dce7c4dd3..16b380b959 100644 --- a/cscs-checks/prgenv/opencl_check.py +++ b/cscs-checks/prgenv/opencl_check.py @@ -15,8 +15,8 @@ def __init__(self): self.tags = {'production', 'craype'} self.valid_systems = ['daint:gpu', 'dom:gpu'] - self.valid_prog_environs = ['PrgEnv-cray', 'PrgEnv-pgi'] - self.modules = ['craype-accel-nvidia60'] + self.valid_prog_environs = ['PrgEnv-cray', 'PrgEnv-pgi', + 'PrgEnv-nvidia'] self.build_system = 'Make' self.sourcesdir = 'src/opencl' self.num_gpus_per_node = 1 @@ -24,6 +24,14 @@ def __init__(self): self.sanity_patterns = sn.assert_found('SUCCESS', self.stdout) + @run_after('setup') + def setup_nvidia(self): + if self.current_environ.name == 'PrgEnv-nvidia': + self.variables.update( + {'CUDATOOLKIT_HOME': '$CRAY_NVIDIA_PREFIX/cuda'}) + else: + self.modules = ['craype-accel-nvidia60'] + @run_before('compile') def setflags(self): if self.current_environ.name == 'PrgEnv-pgi': From 72e8c9c88d934fd64af87c30820484b93705c119 Mon Sep 17 00:00:00 2001 From: Theofilos Manitaras Date: Mon, 21 Jun 2021 13:23:00 +0200 Subject: [PATCH 02/10] Modernize more test + add PrgEnv-nvidia --- cscs-checks/compile/libsci_resolve.py | 74 +++++++++---------- cscs-checks/libraries/io/hdf5_compile_run.py | 59 +++++++++------ .../libraries/io/netcdf_compile_run.py | 49 +++++++----- .../prgenv/cuda-fortran/cuda_fortran_check.py | 7 +- 4 files changed, 108 insertions(+), 81 deletions(-) diff --git a/cscs-checks/compile/libsci_resolve.py b/cscs-checks/compile/libsci_resolve.py index 24d6574606..5c3fe908d8 100644 --- a/cscs-checks/compile/libsci_resolve.py +++ b/cscs-checks/compile/libsci_resolve.py @@ -8,37 +8,37 @@ class LibSciResolveBaseTest(rfm.CompileOnlyRegressionTest): - def __init__(self): - self.sourcesdir = 'src/libsci_resolve' - self.sourcepath = 'libsci_resolve.f90' - self.valid_systems = ['daint:login', 'daint:gpu', - 'dom:login', 'dom:gpu'] - self.modules = ['craype-haswell'] - self.maintainers = ['AJ', 'LM'] - self.tags = {'production', 'craype'} + sourcesdir = 'src/libsci_resolve' + sourcepath = 'libsci_resolve.f90' + valid_systems = ['daint:login', 'daint:gpu', 'dom:login', 'dom:gpu'] + modules = ['craype-haswell'] + maintainers = ['AJ', 'LM'] + tags = {'production', 'craype'} + + @run_after('setup') + def set_postbuild_cmds(self): + self.postbuild_cmds = [f'readelf -d {self.executable}'] -@rfm.parameterized_test(['craype-accel-nvidia35'], ['craype-accel-nvidia60']) +@rfm.simple_test class NvidiaResolveTest(LibSciResolveBaseTest): - def __init__(self, module_name): - super().__init__() - self.descr = f'Module {module_name} resolves libsci_acc' - self.build_system = 'SingleSource' + accel_nvidia_version = parameter(['35', '60']) + valid_prog_environs = ['PrgEnv-cray', 'PrgEnv-gnu'] + build_system = 'SingleSource' + compiler_version = '81' + + @run_after('init') + def set_description(self): + self.descr = (f'Module craype-accel-nvidia{self.accel_nvidia_version} ' + f'resolves libsci_acc') + + @run_after('init') + def update_tags(self): self.tags.add('health') - self.module_name = module_name - self.module_version = { - 'craype-accel-nvidia35': 'nv35', - 'craype-accel-nvidia60': 'nv60' - } - self.compiler_version = '81' - self.modules = ['craype-haswell', module_name] - self.valid_prog_environs = ['PrgEnv-cray', 'PrgEnv-gnu'] - self.prgenv_names = { - 'PrgEnv-cray': 'cray', - 'PrgEnv-gnu': 'gnu' - } - self.postbuild_cmds = [f'readelf -d {self.executable}'] + @run_after('setup') + def set_modules(self): + self.modules += [f'craype-accel-nvidia{self.accel_nvidia_version}'] @run_before('sanity') def set_sanity(self): @@ -46,10 +46,9 @@ def set_sanity(self): # libsci_acc_cray_nv35.so regex = (r'.*\(NEEDED\).*libsci_acc_(?P[A-Za-z]+)_' r'((?P[A-Za-z0-9]+)_)?(?P\S+)\.so') - prgenv = self.prgenv_names[self.current_environ.name] + prgenv = self.current_environ.name.split('-')[1] cver = self.compiler_version - mod_name = self.module_version[self.module_name] - + mod_name = f'nv{self.accel_nvidia_version}' if self.current_environ.name == 'PrgEnv-cray': cver_sanity = sn.assert_found(regex, self.stdout) else: @@ -67,14 +66,16 @@ def set_sanity(self): @rfm.simple_test class MKLResolveTest(LibSciResolveBaseTest): - def __init__(self): - super().__init__() - self.descr = '-mkl Resolves to MKL' - self.valid_prog_environs = ['PrgEnv-intel'] - self.build_system = 'SingleSource' + descr = '-mkl Resolves to MKL' + valid_prog_environs = ['PrgEnv-intel'] + build_system = 'SingleSource' + @run_before('compile') + def set_fflags(self): self.build_system.fflags = ['-mkl'] - self.postbuild_cmds = [f'readelf -d {self.executable}'] + + @run_before('sanity') + def set_sanity(self): regex = (r'.*\(NEEDED\).*libmkl_(?P[A-Za-z]+)_(?P\S+)' r'\.so') self.sanity_patterns = sn.all([ @@ -83,6 +84,3 @@ def __init__(self): sn.assert_eq( sn.extractsingle(regex, self.stdout, 'version'), 'lp64') ]) - - self.maintainers = ['AJ', 'LM'] - self.tags = {'production', 'craype'} diff --git a/cscs-checks/libraries/io/hdf5_compile_run.py b/cscs-checks/libraries/io/hdf5_compile_run.py index 079b676a1d..a2ce561eee 100644 --- a/cscs-checks/libraries/io/hdf5_compile_run.py +++ b/cscs-checks/libraries/io/hdf5_compile_run.py @@ -7,35 +7,57 @@ import reframe.utility.sanity as sn -@rfm.parameterized_test(*([lang, linkage] for lang in ['c', 'f90'] - for linkage in ['static', 'dynamic'])) +@rfm.simple_test class HDF5Test(rfm.RegressionTest): - def __init__(self, lang, linkage): + lang = parameter(['c', 'f90']) + linkage = parameter(['static', 'dynamic']) + valid_systems = ['daint:gpu', 'daint:mc', 'dom:gpu', 'dom:mc'] + build_system = 'SingleSource' + modules = ['cray-hdf5'] + keep_files = ['h5dump_out.txt'] + num_tasks = 1 + num_tasks_per_node = 1 + postrun_cmds = ['h5dump h5ex_d_chunk.h5 > h5dump_out.txt'] + maintainers = ['SO', 'RS'] + tags = {'production', 'craype', 'health'} + + @run_after('init') + def set_description(self): lang_names = { 'c': 'C', 'f90': 'Fortran 90' } - self.linkage = linkage - self.descr = lang_names[lang] + ' HDF5 ' + linkage.capitalize() - self.sourcepath = f'h5ex_d_chunk.{lang}' - self.valid_systems = ['daint:gpu', 'daint:mc', 'dom:gpu', 'dom:mc'] - if linkage == 'dynamic': + self.descr = (f'{lang_names[self.lang]} HDF5 ' + f'{self.linkage.capitalize()}') + + @run_after('init') + def set_valid_systems(self): + if self.linkage == 'dynamic': self.valid_systems += ['eiger:mc', 'pilatus:mc'] + @run_after('init') + def set_prog_environs(self): if self.current_system.name in ['eiger', 'pilatus']: - # no cray-hdf5 as of PE 21.02 with PrgEnv-intel on Eiger and - # Pilatus + # no cray-hdf5 as of PE 21.02 with PrgEnv-intel on Eiger & Pilatus self.valid_prog_environs = ['PrgEnv-aocc', 'PrgEnv-cray', 'PrgEnv-gnu'] else: self.valid_prog_environs = ['PrgEnv-cray', 'PrgEnv-gnu', - 'PrgEnv-intel', 'PrgEnv-pgi'] + 'PrgEnv-intel', 'PrgEnv-pgi', + 'PrgEnv-nvidia'] + + @run_after('setup') + def set_sourcepath(self): + self.sourcepath = f'h5ex_d_chunk.{self.lang}' - self.modules = ['cray-hdf5'] - self.keep_files = ['h5dump_out.txt'] + @run_after('setup') + def set_ldflags(self): + self.build_system.ldflags = [f'-{self.linkage}'] + @run_before('sanity') + def set_sanity(self): # C and Fortran write transposed matrix - if lang == 'c': + if self.lang == 'c': self.sanity_patterns = sn.all([ sn.assert_found(r'Data as written to disk by hyberslabs', self.stdout), @@ -79,12 +101,3 @@ def __init__(self, lang, linkage): sn.assert_found(r'\(7,0\): 1, 1, 0, 1, 1, 0', 'h5dump_out.txt'), ]) - - self.num_tasks = 1 - self.num_tasks_per_node = 1 - self.build_system = 'SingleSource' - self.build_system.ldflags = [f'-{linkage}'] - self.postrun_cmds = ['h5dump h5ex_d_chunk.h5 > h5dump_out.txt'] - - self.maintainers = ['SO', 'RS'] - self.tags = {'production', 'craype', 'health'} diff --git a/cscs-checks/libraries/io/netcdf_compile_run.py b/cscs-checks/libraries/io/netcdf_compile_run.py index 56407a45a5..79955cd8ec 100644 --- a/cscs-checks/libraries/io/netcdf_compile_run.py +++ b/cscs-checks/libraries/io/netcdf_compile_run.py @@ -9,26 +9,37 @@ import reframe.utility.sanity as sn -@rfm.parameterized_test(*([lang, linkage] for lang in ['cpp', 'c', 'f90'] - for linkage in ['dynamic', 'static'])) +@rfm.simple_test class NetCDFTest(rfm.RegressionTest): - def __init__(self, lang, linkage): - lang_names = { + lang = parameter(['cpp', 'c', 'f90']) + linkage = parameter(['dynamic', 'static']) + valid_systems = ['daint:gpu', 'daint:mc', 'dom:gpu', 'dom:mc', + 'arolla:cn', 'tsa:cn'] + build_system = 'SingleSource' + num_tasks = 1 + num_tasks_per_node = 1 + maintainers = ['AJ', 'SO'] + tags = {'production', 'craype', 'external-resources', 'health'} + lang_names = { 'c': 'C', 'cpp': 'C++', 'f90': 'Fortran 90' - } - self.lang = lang - self.linkage = linkage - self.descr = f'{lang_names[lang]} NetCDF {linkage.capitalize()}' - self.valid_systems = ['daint:gpu', 'daint:mc', 'dom:gpu', 'dom:mc', - 'arolla:cn', 'tsa:cn'] - if linkage == 'dynamic': + } + + @run_after('init') + def set_description(self): + self.descr = (f'{self.lang_names[self.lang]} NetCDF ' + f'{self.linkage.capitalize()}') + + @run_after('init') + def setup_prgenvs(self): + if self.linkage == 'dynamic': self.valid_systems += ['eiger:mc', 'pilatus:mc'] if self.current_system.name in ['daint', 'dom']: self.valid_prog_environs = ['PrgEnv-cray', 'PrgEnv-gnu', - 'PrgEnv-intel', 'PrgEnv-pgi'] + 'PrgEnv-intel', 'PrgEnv-pgi', + 'PrgEnv-nvidia'] self.modules = ['cray-netcdf'] elif self.current_system.name in ['arolla', 'tsa']: self.exclusive_access = True @@ -41,15 +52,11 @@ def __init__(self, lang, linkage): else: self.valid_prog_environs = [] + @run_before('compile') + def set_sources(self): self.sourcesdir = os.path.join(self.current_system.resourcesdir, 'netcdf') - self.build_system = 'SingleSource' - self.sourcepath = 'netcdf_read_write.' + lang - self.num_tasks = 1 - self.num_tasks_per_node = 1 - self.sanity_patterns = sn.assert_found(r'SUCCESS', self.stdout) - self.maintainers = ['AJ', 'SO'] - self.tags = {'production', 'craype', 'external-resources', 'health'} + self.sourcepath = f'netcdf_read_write.{self.lang}' @run_before('compile') def setflags(self): @@ -71,3 +78,7 @@ def setflags(self): ] else: self.build_system.ldflags = [f'-{self.linkage}'] + + @run_before('sanity') + def set_sanity(self): + self.sanity_patterns = sn.assert_found(r'SUCCESS', self.stdout) diff --git a/cscs-checks/prgenv/cuda-fortran/cuda_fortran_check.py b/cscs-checks/prgenv/cuda-fortran/cuda_fortran_check.py index d47225a3f2..832667b431 100644 --- a/cscs-checks/prgenv/cuda-fortran/cuda_fortran_check.py +++ b/cscs-checks/prgenv/cuda-fortran/cuda_fortran_check.py @@ -13,7 +13,6 @@ class CUDAFortranCheck(rfm.RegressionTest): valid_prog_environs = ['PrgEnv-pgi', 'PrgEnv-nvidia'] sourcepath = 'vecAdd_cuda.cuf' build_system = 'SingleSource' - build_system.fflags = ['-ta=tesla:cc60'] num_gpus_per_node = 1 maintainers = ['TM', 'AJ'] tags = {'production', 'craype'} @@ -22,6 +21,12 @@ class CUDAFortranCheck(rfm.RegressionTest): def set_modules(self): if self.current_environ.name != 'PrgEnv-nvidia': self.modules = ['craype-accel-nvidia60'] + else: + self.modules = ['cdt-cuda/21.05'] + + @run_before('compile') + def set_fflags(self): + self.build_system.fflags = ['-ta=tesla:cc60'] # FIXME: PGI 20.x does not support CUDA 11, see case #275674 @run_before('compile') From 80580f67b50cbe7dcafb4e82e83a49c26ff7be71 Mon Sep 17 00:00:00 2001 From: Theofilos Manitaras Date: Mon, 21 Jun 2021 13:47:25 +0200 Subject: [PATCH 03/10] Move fields to class level --- cscs-checks/libraries/io/hdf5_compile_run.py | 6 ++--- cscs-checks/prgenv/opencl_check.py | 27 ++++++++++---------- 2 files changed, 17 insertions(+), 16 deletions(-) diff --git a/cscs-checks/libraries/io/hdf5_compile_run.py b/cscs-checks/libraries/io/hdf5_compile_run.py index a2ce561eee..297ab482e1 100644 --- a/cscs-checks/libraries/io/hdf5_compile_run.py +++ b/cscs-checks/libraries/io/hdf5_compile_run.py @@ -46,13 +46,13 @@ def set_prog_environs(self): 'PrgEnv-intel', 'PrgEnv-pgi', 'PrgEnv-nvidia'] - @run_after('setup') + @run_before('compile') def set_sourcepath(self): self.sourcepath = f'h5ex_d_chunk.{self.lang}' - @run_after('setup') + @run_before('compile') def set_ldflags(self): - self.build_system.ldflags = [f'-{self.linkage}'] + self.build_system.ldflags = [f'-{self.linkage}'] @run_before('sanity') def set_sanity(self): diff --git a/cscs-checks/prgenv/opencl_check.py b/cscs-checks/prgenv/opencl_check.py index 16b380b959..c6d28552f1 100644 --- a/cscs-checks/prgenv/opencl_check.py +++ b/cscs-checks/prgenv/opencl_check.py @@ -10,23 +10,20 @@ @rfm.simple_test class OpenCLCheck(rfm.RegressionTest): - def __init__(self): - self.maintainers = ['TM', 'SK'] - self.tags = {'production', 'craype'} - - self.valid_systems = ['daint:gpu', 'dom:gpu'] - self.valid_prog_environs = ['PrgEnv-cray', 'PrgEnv-pgi', - 'PrgEnv-nvidia'] - self.build_system = 'Make' - self.sourcesdir = 'src/opencl' - self.num_gpus_per_node = 1 - self.executable = 'vecAdd' - - self.sanity_patterns = sn.assert_found('SUCCESS', self.stdout) + valid_systems = ['daint:gpu', 'dom:gpu'] + valid_prog_environs = ['PrgEnv-cray', 'PrgEnv-pgi', + 'PrgEnv-nvidia'] + build_system = 'Make' + sourcesdir = 'src/opencl' + num_gpus_per_node = 1 + executable = 'vecAdd' + maintainers = ['TM', 'SK'] + tags = {'production', 'craype'} @run_after('setup') def setup_nvidia(self): if self.current_environ.name == 'PrgEnv-nvidia': + # This is used by the Makefile for the OpenCL headers self.variables.update( {'CUDATOOLKIT_HOME': '$CRAY_NVIDIA_PREFIX/cuda'}) else: @@ -45,3 +42,7 @@ def cdt2006_pgi_workaround(self): if (self.current_environ.name == 'PrgEnv-pgi' and cdt == '20.08'): self.variables.update({'CUDA_HOME': '$CUDATOOLKIT_HOME'}) + + @run_before('sanity') + def set_sanity(self): + self.sanity_patterns = sn.assert_found('SUCCESS', self.stdout) From c344fb6e93f53500713ca82791dbf58d7d255b5b Mon Sep 17 00:00:00 2001 From: Theofilos Manitaras Date: Mon, 21 Jun 2021 14:01:24 +0200 Subject: [PATCH 04/10] Resolve gpu hooks conflicts --- cscs-checks/microbenchmarks/gpu/hooks.py | 9 +++++++-- cscs-checks/prgenv/cuda-fortran/cuda_fortran_check.py | 1 - 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/cscs-checks/microbenchmarks/gpu/hooks.py b/cscs-checks/microbenchmarks/gpu/hooks.py index eaafbff145..7a5f049690 100644 --- a/cscs-checks/microbenchmarks/gpu/hooks.py +++ b/cscs-checks/microbenchmarks/gpu/hooks.py @@ -13,15 +13,20 @@ def set_gpu_arch(self): cs = self.current_system.name cp = self.current_partition.fullname - ce = self.current_environ.name self.gpu_arch = None # Nvidia options self.gpu_build = 'cuda' if cs in {'dom', 'daint'}: self.gpu_arch = '60' - if ce != 'PrgEnv-nvidia': + if self.current_environ.name not in {'PrgEnv-nvidia'}: self.modules = ['craype-accel-nvidia60', 'cdt-cuda'] + else: + self.modules = ['cdt-cuda/21.05'] + + if cs == 'dom': + self.modules += ['cudatoolkit/11.1.0_3.39-4.1__g484e319'] + elif cs in {'arola', 'tsa'}: self.gpu_arch = '70' self.modules = ['cuda/10.1.243'] diff --git a/cscs-checks/prgenv/cuda-fortran/cuda_fortran_check.py b/cscs-checks/prgenv/cuda-fortran/cuda_fortran_check.py index 832667b431..cf66339414 100644 --- a/cscs-checks/prgenv/cuda-fortran/cuda_fortran_check.py +++ b/cscs-checks/prgenv/cuda-fortran/cuda_fortran_check.py @@ -44,4 +44,3 @@ def set_sanity(self): result = sn.extractsingle(r'final result:\s+(?P\d+\.?\d*)', self.stdout, 'result', float) self.sanity_patterns = sn.assert_reference(result, 1., -1e-5, 1e-5) - From 5afb16a6b168721a4d64644aa29c6b1c4d95ae66 Mon Sep 17 00:00:00 2001 From: Theofilos Manitaras Date: Mon, 21 Jun 2021 14:24:45 +0200 Subject: [PATCH 05/10] Modernize openacc Fortran checks --- cscs-checks/prgenv/openacc_checks.py | 67 +++++++++++++++------------- 1 file changed, 37 insertions(+), 30 deletions(-) diff --git a/cscs-checks/prgenv/openacc_checks.py b/cscs-checks/prgenv/openacc_checks.py index 30260e0712..b821f4ac20 100644 --- a/cscs-checks/prgenv/openacc_checks.py +++ b/cscs-checks/prgenv/openacc_checks.py @@ -8,25 +8,41 @@ import reframe.utility.sanity as sn -@rfm.parameterized_test(['mpi'], ['nompi']) +@rfm.simple_test class OpenACCFortranCheck(rfm.RegressionTest): - def __init__(self, variant): - if variant == 'nompi': - self.num_tasks = 1 - else: - self.num_tasks = 2 + variant = parameter(['mpi', 'nompi']) + valid_systems = ['daint:gpu', 'dom:gpu', 'arolla:cn', 'tsa:cn'] + valid_prog_environs = ['PrgEnv-cray', 'PrgEnv-pgi', 'PrgEnv-nvidia'] + sourcesdir = 'src/openacc' + build_system = 'SingleSource' + num_gpus_per_node = 1 + num_tasks_per_node = 1 + maintainers = ['TM', 'AJ'] + tags = {'production', 'craype'} - self.valid_systems = ['daint:gpu', 'dom:gpu', 'arolla:cn', 'tsa:cn'] - self.valid_prog_environs = ['PrgEnv-cray', 'PrgEnv-pgi', - 'PrgEnv-nvidia'] - self.sourcesdir = 'src/openacc' - if self.num_tasks == 1: + @run_after('init') + def set_numtasks(self): + if self.variant == 'nompi': + self.num_tasks = 1 self.sourcepath = 'vecAdd_openacc_nompi.f90' if self.current_system.name in ['arolla', 'tsa']: self.valid_prog_environs = ['PrgEnv-pgi-nompi'] else: + self.num_tasks = 2 self.sourcepath = 'vecAdd_openacc_mpi.f90' + @run_after('setup') + def set_modules(self): + if (self.current_system.name in ['daint', 'dom'] and + self.current_environ.name != 'PrgEnv-nvidia'): + self.modules = ['craype-accel-nvidia60'] + + @run_after('setup') + def set_executable(self): + self.executable = self.name + + @run_before('compile') + def set_variables(self): if self.current_system.name in ['arolla', 'tsa']: self.exclusive_access = True self.variables = { @@ -34,33 +50,18 @@ def __init__(self, variant): 'MV2_USE_CUDA': '1' } - self.executable = self.name - self.build_system = 'SingleSource' - self.num_gpus_per_node = 1 - self.num_tasks_per_node = 1 - result = sn.extractsingle(r'final result:\s+(?P\d+\.?\d*)', - self.stdout, 'result', float) - self.sanity_patterns = sn.assert_reference(result, 1., -1e-5, 1e-5) - - self.maintainers = ['TM', 'AJ'] - self.tags = {'production', 'craype'} - - @run_after('setup') - def set_modules(self): - if (self.current_system.name in ['daint', 'dom'] and - self.current_environ.name != 'PrgEnv-nvidia'): - self.modules = ['craype-accel-nvidia60'] - # FIXME: PGI 20.x does not support CUDA 11, see case #275674 @run_before('compile') def cudatoolkit_pgi_20x_workaround(self): + # FIXME: Align cudatoolkit versions when daint/dom are in sync if self.current_system.name == 'daint': cudatoolkit_version = '10.2.89_3.29-7.0.2.1_3.27__g67354b4' - else: + elif self.current_system.name == 'dom': self.variables['CUDA_HOME'] = '$CUDATOOLKIT_HOME' cudatoolkit_version = '10.2.89_3.28-2.1__g52c0314' - self.modules += [f'cudatoolkit/{cudatoolkit_version}'] + if self.current_system.name in {'daint', 'dom'}: + self.modules += [f'cudatoolkit/{cudatoolkit_version}'] @run_before('compile') def setflags(self): @@ -81,3 +82,9 @@ def cdt2008_pgi_workaround(self): if (self.current_environ.name == 'PrgEnv-pgi' and cdt == '20.08'): self.variables.update({'CUDA_HOME': '$CUDATOOLKIT_HOME'}) + + @run_before('sanity') + def set_sanity(self): + result = sn.extractsingle(r'final result:\s+(?P\d+\.?\d*)', + self.stdout, 'result', float) + self.sanity_patterns = sn.assert_reference(result, 1., -1e-5, 1e-5) From 08d18396ea7e68efff241e4ee6c1238dd5590aa1 Mon Sep 17 00:00:00 2001 From: Theofilos Manitaras Date: Mon, 21 Jun 2021 17:30:57 +0200 Subject: [PATCH 06/10] Address PR comments --- cscs-checks/compile/libsci_resolve.py | 2 +- cscs-checks/libraries/io/hdf5_compile_run.py | 15 +++++++++++++++ 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/cscs-checks/compile/libsci_resolve.py b/cscs-checks/compile/libsci_resolve.py index 5c3fe908d8..d6e201d47d 100644 --- a/cscs-checks/compile/libsci_resolve.py +++ b/cscs-checks/compile/libsci_resolve.py @@ -22,7 +22,7 @@ def set_postbuild_cmds(self): @rfm.simple_test class NvidiaResolveTest(LibSciResolveBaseTest): - accel_nvidia_version = parameter(['35', '60']) + accel_nvidia_version = parameter(['60']) valid_prog_environs = ['PrgEnv-cray', 'PrgEnv-gnu'] build_system = 'SingleSource' compiler_version = '81' diff --git a/cscs-checks/libraries/io/hdf5_compile_run.py b/cscs-checks/libraries/io/hdf5_compile_run.py index 297ab482e1..ec692ac122 100644 --- a/cscs-checks/libraries/io/hdf5_compile_run.py +++ b/cscs-checks/libraries/io/hdf5_compile_run.py @@ -4,6 +4,7 @@ # SPDX-License-Identifier: BSD-3-Clause import reframe as rfm +import reframe.utility.osext as osext import reframe.utility.sanity as sn @@ -46,6 +47,20 @@ def set_prog_environs(self): 'PrgEnv-intel', 'PrgEnv-pgi', 'PrgEnv-nvidia'] + @run_after('setup') + def cdt_2105_skip(self): + # cray-hdf5 is supported only on PrgEnv-nvidia for cdt >= 21.05 + if self.current_environ.name == 'PrgEnv-nvidia': + self.skip_if( + osext.cray_cdt_version() < '21.05', + "cray-hdf5 is not supported for cdt < 21.05 on PrgEnv-nvidia" + ) + elif self.current_environ.name == 'PrgEnv-pgi': + self.skip_if( + osext.cray_cdt_version() >= '21.05', + "cray-hdf5 is not supported for cdt >= 21.05 on PrgEnv-pgi" + ) + @run_before('compile') def set_sourcepath(self): self.sourcepath = f'h5ex_d_chunk.{self.lang}' From 7240d21288ff3a2e81a4dc2bbbced668788bf330 Mon Sep 17 00:00:00 2001 From: Theofilos Manitaras Date: Tue, 22 Jun 2021 09:58:15 +0200 Subject: [PATCH 07/10] Apply cdt 21.05 workaround to netcdf tests --- cscs-checks/libraries/io/netcdf_compile_run.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/cscs-checks/libraries/io/netcdf_compile_run.py b/cscs-checks/libraries/io/netcdf_compile_run.py index 79955cd8ec..7063e51c6b 100644 --- a/cscs-checks/libraries/io/netcdf_compile_run.py +++ b/cscs-checks/libraries/io/netcdf_compile_run.py @@ -6,6 +6,7 @@ import os import reframe as rfm +import reframe.utility.osext as osext import reframe.utility.sanity as sn @@ -52,6 +53,20 @@ def setup_prgenvs(self): else: self.valid_prog_environs = [] + @run_after('setup') + def cdt_2105_skip(self): + # cray-netcdf is supported only on PrgEnv-nvidia for cdt >= 21.05 + if self.current_environ.name == 'PrgEnv-nvidia': + self.skip_if( + osext.cray_cdt_version() < '21.05', + "cray-netcdf is not supported for cdt < 21.05 on PrgEnv-nvidia" + ) + elif self.current_environ.name == 'PrgEnv-pgi': + self.skip_if( + osext.cray_cdt_version() >= '21.05', + "cray-netcdf is not supported for cdt >= 21.05 on PrgEnv-pgi" + ) + @run_before('compile') def set_sources(self): self.sourcesdir = os.path.join(self.current_system.resourcesdir, From b76cbf0a2278315ed8cf736d8f32e6ce805e66dc Mon Sep 17 00:00:00 2001 From: Theofilos Manitaras Date: Tue, 22 Jun 2021 10:22:54 +0200 Subject: [PATCH 08/10] Address PR comments (version 2) --- cscs-checks/compile/libsci_resolve.py | 9 +++++++++ cscs-checks/prgenv/cpu_target_check.py | 2 +- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/cscs-checks/compile/libsci_resolve.py b/cscs-checks/compile/libsci_resolve.py index d6e201d47d..2d5f2f8a5f 100644 --- a/cscs-checks/compile/libsci_resolve.py +++ b/cscs-checks/compile/libsci_resolve.py @@ -4,6 +4,7 @@ # SPDX-License-Identifier: BSD-3-Clause import reframe as rfm +import reframe.utility.osext as osext import reframe.utility.sanity as sn @@ -74,6 +75,14 @@ class MKLResolveTest(LibSciResolveBaseTest): def set_fflags(self): self.build_system.fflags = ['-mkl'] + @run_before('compile') + def cdt_2105_workaround(self): + # FIXME: The mkl libraries are not found in cdt 21.05, CASE #285117 + if osext.cray_cdt_version() == '21.05': + self.build_system.ldflags += [ + '-L/opt/intel/oneapi/mkl/latest/lib/intel64/' + ] + @run_before('sanity') def set_sanity(self): regex = (r'.*\(NEEDED\).*libmkl_(?P[A-Za-z]+)_(?P\S+)' diff --git a/cscs-checks/prgenv/cpu_target_check.py b/cscs-checks/prgenv/cpu_target_check.py index a60bc0f6bf..4c0fd705b7 100644 --- a/cscs-checks/prgenv/cpu_target_check.py +++ b/cscs-checks/prgenv/cpu_target_check.py @@ -10,7 +10,7 @@ @rfm.simple_test class CrayCPUTargetTest(rfm.RunOnlyRegressionTest): descr = 'Checks whether CRAY_CPU_TARGET is set' - valid_systems = ['daint:login', 'dom:login'] + valid_systems = ['daint:login', 'dom:login', 'pilatus:login'] valid_prog_environs = ['PrgEnv-cray', 'PrgEnv-gnu', 'PrgEnv-intel', 'PrgEnv-pgi', 'PrgEnv-nvidia'] sourcesdir = None From 82b25b3756cb0c4ee3a60dea01264df76d114a91 Mon Sep 17 00:00:00 2001 From: Theofilos Manitaras Date: Mon, 28 Jun 2021 16:25:40 +0200 Subject: [PATCH 09/10] Enable CPU target test for eiger:login --- cscs-checks/prgenv/cpu_target_check.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cscs-checks/prgenv/cpu_target_check.py b/cscs-checks/prgenv/cpu_target_check.py index 4c0fd705b7..5156b7d9b6 100644 --- a/cscs-checks/prgenv/cpu_target_check.py +++ b/cscs-checks/prgenv/cpu_target_check.py @@ -10,7 +10,7 @@ @rfm.simple_test class CrayCPUTargetTest(rfm.RunOnlyRegressionTest): descr = 'Checks whether CRAY_CPU_TARGET is set' - valid_systems = ['daint:login', 'dom:login', 'pilatus:login'] + valid_systems = ['daint:login', 'dom:login', 'eiger:login', 'pilatus:login'] valid_prog_environs = ['PrgEnv-cray', 'PrgEnv-gnu', 'PrgEnv-intel', 'PrgEnv-pgi', 'PrgEnv-nvidia'] sourcesdir = None From fdf84bab3756eff9b9fbe530c01f5eeedf52987d Mon Sep 17 00:00:00 2001 From: Theofilos Manitaras Date: Thu, 1 Jul 2021 14:30:45 +0200 Subject: [PATCH 10/10] Fix PEP8 issue --- cscs-checks/prgenv/cpu_target_check.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cscs-checks/prgenv/cpu_target_check.py b/cscs-checks/prgenv/cpu_target_check.py index 5156b7d9b6..7cf9383d68 100644 --- a/cscs-checks/prgenv/cpu_target_check.py +++ b/cscs-checks/prgenv/cpu_target_check.py @@ -10,7 +10,8 @@ @rfm.simple_test class CrayCPUTargetTest(rfm.RunOnlyRegressionTest): descr = 'Checks whether CRAY_CPU_TARGET is set' - valid_systems = ['daint:login', 'dom:login', 'eiger:login', 'pilatus:login'] + valid_systems = ['daint:login', 'dom:login', 'eiger:login', + 'pilatus:login'] valid_prog_environs = ['PrgEnv-cray', 'PrgEnv-gnu', 'PrgEnv-intel', 'PrgEnv-pgi', 'PrgEnv-nvidia'] sourcesdir = None