From 38bb2392e866f9cc433899f7613dfe6174fc1263 Mon Sep 17 00:00:00 2001 From: Matthias Kraushaar Date: Fri, 19 Jul 2019 14:45:54 +0200 Subject: [PATCH] MCH tests for Arolla/Tsa --- cscs-checks/mch/automatic_arrays_acc.py | 17 ++++++- cscs-checks/mch/collectives_halo.py | 39 ++++++++++++++- cscs-checks/mch/gpu_direct_acc.py | 22 ++++++++- cscs-checks/mch/gpu_direct_cuda.py | 11 ++++- cscs-checks/mch/openacc_cuda_mpi_cppstd.py | 56 ++++++++++++++++++++-- 5 files changed, 134 insertions(+), 11 deletions(-) diff --git a/cscs-checks/mch/automatic_arrays_acc.py b/cscs-checks/mch/automatic_arrays_acc.py index f8fe77d371..dd4b1feb51 100644 --- a/cscs-checks/mch/automatic_arrays_acc.py +++ b/cscs-checks/mch/automatic_arrays_acc.py @@ -6,7 +6,7 @@ class AutomaticArraysCheck(rfm.RegressionTest): def __init__(self): super().__init__() - self.valid_systems = ['daint:gpu', 'dom:gpu', 'kesch:cn', 'tsa:cn'] + self.valid_systems = ['daint:gpu', 'dom:gpu', 'kesch:cn', 'tsa:cn', 'arolla:cn'] self.valid_prog_environs = ['PrgEnv-cray', 'PrgEnv-cce', 'PrgEnv-pgi'] if self.current_system.name in ['daint', 'dom']: self.modules = ['craype-accel-nvidia60'] @@ -16,9 +16,18 @@ def __init__(self): # FIXME: workaround -- the variable should not be needed since # there is no GPUdirect in this check self.variables = {'MV2_USE_CUDA': '1'} + elif self.current_system.name == 'arolla': + self.exclusive_access = True + self.modules = [ + 'cuda92/toolkit/9.2.88', + 'craype-accel-nvidia70', + ] elif self.current_system.name == 'tsa': self.exclusive_access = True - self.modules = ['craype-accel-nvidia70', 'cuda10.0/toolkit/10.0.130'] + self.modules = [ + 'cuda10.0/toolkit/10.0.130', + 'craype-accel-nvidia70', + ] # This tets requires an MPI compiler, although it uses a single task self.num_tasks = 1 @@ -36,6 +45,7 @@ def __init__(self): self.arrays_reference = { 'PrgEnv-cce': { 'kesch:cn': {'time': (2.9E-04, None, 0.15)}, + 'arolla:cn': {'time': (2.9E-04, None, 0.15)}, 'tsa:cn': {'time': (2.9E-04, None, 0.15)}, }, 'PrgEnv-cray': { @@ -47,6 +57,7 @@ def __init__(self): 'daint:gpu': {'time': (6.4E-05, None, 0.15)}, 'dom:gpu': {'time': (6.3E-05, None, 0.15)}, 'kesch:cn': {'time': (1.4E-04, None, 0.15)}, + 'arolla:cn': {'time': (1.4E-04, None, 0.15)}, 'tsa:cn': {'time': (1.4E-04, None, 0.15)}, } } @@ -68,6 +79,8 @@ def setup(self, partition, environ, **job_opts): self.build_system.fflags += ['-ta=tesla,cc60', '-Mnorpath'] elif self.current_system.name == 'kesch': self.build_system.fflags += ['-ta=tesla,cc35,cuda9.2'] + elif self.current_system.name == 'arolla': + self.build_system.fflags += ['-ta=tesla,cc70,cuda9.2'] elif self.current_system.name == 'tsa': self.build_system.fflags += ['-ta=tesla,cc70,cuda10.0'] else: diff --git a/cscs-checks/mch/collectives_halo.py b/cscs-checks/mch/collectives_halo.py index 018236660c..1728e71f26 100644 --- a/cscs-checks/mch/collectives_halo.py +++ b/cscs-checks/mch/collectives_halo.py @@ -5,7 +5,7 @@ class CommunicationTestBase(rfm.RegressionTest): def __init__(self, variant, bench_reference): super().__init__() - self.valid_systems = ['dom:gpu', 'daint:gpu', 'kesch:cn'] + self.valid_systems = ['dom:gpu', 'daint:gpu', 'kesch:cn', 'arolla:cn', 'tsa:cn'] self.valid_prog_environs = ['PrgEnv-gnu'] self.variables = {'G2G': '1'} self.executable = 'build/src/comm_overlap_benchmark' @@ -40,6 +40,34 @@ def __init__(self, variant, bench_reference): '-DCUDA_COMPUTE_CAPABILITY="sm_60"' ] self.build_system.max_concurrency = 8 + elif self.current_system.name == 'arolla': + self.exclusive_access = True + self.num_tasks = 16 + self.num_gpus_per_node = 8 + self.modules = [ + 'cmake', + 'cuda92/toolkit/9.2.88', + ] + self.variables['MV2_USE_CUDA'] = '1' + self.variables['MPICH_RDMA_ENABLED_CUDA'] = '1' + self.build_system.config_opts += [ + '-DMPI_VENDOR=openmpi', + '-DCUDA_COMPUTE_CAPABILITY="sm_70"' + ] + elif self.current_system.name == 'tsa': + self.exclusive_access = True + self.num_tasks = 16 + self.num_gpus_per_node = 8 + self.modules = [ + 'cmake', + 'cuda10.0/toolkit/10.0.130', + ] + self.variables['MV2_USE_CUDA'] = '1' + self.variables['MPICH_RDMA_ENABLED_CUDA'] = '1' + self.build_system.config_opts += [ + #'-DMPI_VENDOR=mvapich2', + '-DCUDA_COMPUTE_CAPABILITY="sm_70"' + ] else: self.num_tasks = 4 self.num_gpus_per_node = 1 @@ -77,6 +105,12 @@ def __init__(self, variant, bench_reference): 'kesch:cn': { 'elapsed_time': (ref, None, 0.15) }, + 'arolla:cn': { + 'elapsed_time': (ref, None, 0.15) + }, + 'tsa:cn': { + 'elapsed_time': (ref, None, 0.15) + }, 'daint': { 'elapsed_time': (ref, None, 0.15) }, @@ -93,7 +127,8 @@ def __init__(self, variant, bench_reference): def setup(self, *args, **kwargs): super().setup(*args, **kwargs) - if self.current_system.name == 'kesch': + #if self.current_system.name == 'kesch': + if self.current_system.name in {'kesch', 'tsa', 'arolla'}: self.job.launcher.options = ['--distribution=block:block', '--cpu_bind=q'] diff --git a/cscs-checks/mch/gpu_direct_acc.py b/cscs-checks/mch/gpu_direct_acc.py index ad25210387..58d295efc1 100644 --- a/cscs-checks/mch/gpu_direct_acc.py +++ b/cscs-checks/mch/gpu_direct_acc.py @@ -7,7 +7,7 @@ class GpuDirectAccCheck(rfm.RegressionTest): def __init__(self): super().__init__() self.descr = 'tests gpu-direct for Fortran OpenACC' - self.valid_systems = ['daint:gpu', 'dom:gpu', 'kesch:cn', 'tsa:cn'] + self.valid_systems = ['daint:gpu', 'dom:gpu', 'kesch:cn', 'tsa:cn', 'arolla:cn'] self.valid_prog_environs = ['PrgEnv-cray', 'PrgEnv-cce', 'PrgEnv-pgi'] if self.current_system.name in ['daint', 'dom']: @@ -26,9 +26,25 @@ def __init__(self): self.num_tasks = 8 self.num_gpus_per_node = 8 self.num_tasks_per_node = 8 + elif self.current_system.name == 'arolla': + self.exclusive_access = True + self.modules = [ + 'cuda92/toolkit/9.2.88', + 'craype-accel-nvidia70', + ] + self.variables = { + 'MV2_USE_CUDA': '1', + 'G2G': '1' + } + self.num_tasks = 8 + self.num_gpus_per_node = 8 + self.num_tasks_per_node = 8 elif self.current_system.name == 'tsa': self.exclusive_access = True - self.modules = ['craype-accel-nvidia70', 'cuda10.0/toolkit/10.0.130'] + self.modules = [ + 'cuda10.0/toolkit/10.0.130', + 'craype-accel-nvidia70', + ] self.variables = { 'MV2_USE_CUDA': '1', 'G2G': '1' @@ -58,6 +74,8 @@ def setup(self, partition, environ, **job_opts): self.build_system.fflags += ['-ta=tesla:cc60', '-Mnorpath'] elif self.current_system.name == 'kesch': self.build_system.fflags += ['-ta=tesla:cc35'] + elif self.current_system.name == 'arolla': + self.build_system.fflags += ['-ta=tesla:cc70'] elif self.current_system.name == 'tsa': self.build_system.fflags += ['-ta=tesla:cc70'] diff --git a/cscs-checks/mch/gpu_direct_cuda.py b/cscs-checks/mch/gpu_direct_cuda.py index d81b265722..e040c799e2 100644 --- a/cscs-checks/mch/gpu_direct_cuda.py +++ b/cscs-checks/mch/gpu_direct_cuda.py @@ -8,7 +8,7 @@ class GpuDirectCudaCheck(rfm.RegressionTest): def __init__(self): super().__init__() self.descr = 'tests gpu-direct for CUDA' - self.valid_systems = ['daint:gpu', 'dom:gpu', 'kesch:cn', 'tsa:cn'] + self.valid_systems = ['daint:gpu', 'dom:gpu', 'kesch:cn', 'tsa:cn', 'arolla:cn'] self.valid_prog_environs = ['PrgEnv-gnu'] self.sourcepath = 'gpu_direct_cuda.cu' self.build_system = 'SingleSource' @@ -26,6 +26,15 @@ def __init__(self): 'G2G': '1', } self.build_system.cxxflags = ['-ccbin', 'mpicxx', '-arch=sm_37'] + elif self.current_system.name == 'arolla': + self.exclusive_access = True + self.valid_prog_environs = ['PrgEnv-gnu'] + self.modules = ['cuda92/toolkit/9.2.88'] + self.variables = { + 'MV2_USE_CUDA': '1', + 'G2G': '1', + } + self.build_system.cxxflags = ['-ccbin', 'mpicxx', '-arch=sm_70'] elif self.current_system.name == 'tsa': self.exclusive_access = True self.valid_prog_environs = ['PrgEnv-gnu'] diff --git a/cscs-checks/mch/openacc_cuda_mpi_cppstd.py b/cscs-checks/mch/openacc_cuda_mpi_cppstd.py index 9eeed7fe79..0e8a495be3 100644 --- a/cscs-checks/mch/openacc_cuda_mpi_cppstd.py +++ b/cscs-checks/mch/openacc_cuda_mpi_cppstd.py @@ -7,8 +7,8 @@ class OpenaccCudaCpp(rfm.RegressionTest): def __init__(self): super().__init__() self.descr = 'test for OpenACC, CUDA, MPI, and C++' - self.valid_systems = ['daint:gpu', 'dom:gpu', 'kesch:cn', 'tsa:cn'] - self.valid_prog_environs = ['PrgEnv-cray', 'PrgEnv-pgi', 'PrgEnv-gnu'] + self.valid_systems = ['daint:gpu', 'dom:gpu', 'kesch:cn', 'tsa:cn', 'arolla:cn'] + self.valid_prog_environs = ['PrgEnv-cray', 'PrgEnv-pgi', 'PrgEnv-pgi-nompi', 'PrgEnv-gnu'] self.build_system = 'Make' self.build_system.fflags = ['-O2'] if self.current_system.name in ['daint', 'dom']: @@ -24,9 +24,16 @@ def __init__(self): self.num_tasks_per_node = 8 self.num_gpus_per_node = 8 self.build_system.options = ['NVCC_FLAGS="-arch=compute_37"'] + elif self.current_system.name == 'arolla': + self.exclusive_access = True + self.modules = ['cuda92/toolkit/9.2.88'] + self.num_tasks = 8 + self.num_tasks_per_node = 8 + self.num_gpus_per_node = 8 + self.build_system.options = ['NVCC_FLAGS="-arch=compute_70"'] elif self.current_system.name == 'tsa': self.exclusive_access = True - self.modules = ['craype-accel-nvidia70', 'cuda10.0/toolkit/10.0.130'] + self.modules = ['cuda10.0/toolkit/10.0.130'] self.num_tasks = 8 self.num_tasks_per_node = 8 self.num_gpus_per_node = 8 @@ -41,6 +48,11 @@ def __init__(self): 'MV2_USE_CUDA': '1', 'G2G': '1' } + elif self.current_system.name in ['arolla']: + self.variables = { + 'MV2_USE_CUDA': '1', + 'G2G': '1' + } elif self.current_system.name in ['tsa']: self.variables = { 'MV2_USE_CUDA': '1', @@ -65,12 +77,43 @@ def setup(self, partition, environ, **job_opts): self.build_system.fflags += ['-ta=tesla,cc35,cuda8.0'] self.build_system.ldflags = ['-acc', '-ta:tesla:cc35,cuda8.0', '-lstdc++'] + if environ.name == 'PrgEnv-pgi-nompi': + self.build_system.ldflags += [ + '-L/global/opt/nvidia/cudatoolkit/8.0.61/lib64', + '-lcublas', '-lcudart' + ] + elif self.current_system.name == 'arolla': + self.build_system.fflags += ['-ta=tesla,cc70,cuda10.0'] + self.build_system.ldflags = ['-acc', '-ta:tesla:cc70,cuda10.0', + '-lstdc++', '-L/cm/shared/apps/cuda92/toolkit/9.2.88/lib64', + '-lcublas', '-lcudart' + ] + if environ.name == 'PrgEnv-pgi-nompi': + self.build_system.fflags += [ + '-ta=tesla,cc70,cuda9.2', + '-I${EBROOTOPENMPI}/include' + ] + self.build_system.ldflags += [ + '-L${EBROOTOPENMPI}/lib', '-lmpi_mpifh', + '-L/cm/shared/apps/cuda92/toolkit/9.2.88/lib64', + '-lcublas', '-lcudart' + ] elif self.current_system.name == 'tsa': self.build_system.fflags += ['-ta=tesla,cc70,cuda10.0'] self.build_system.ldflags = ['-acc', '-ta:tesla:cc70,cuda10.0', - '-lstdc++' '-L/cm/shared/apps/cuda10.0/toolkit/10.0.130/lib64', + '-lstdc++', '-L/cm/shared/apps/cuda10.0/toolkit/10.0.130/lib64', '-lcublas', '-lcudart' ] + if environ.name == 'PrgEnv-pgi-nompi': + self.build_system.fflags += [ + '-ta=tesla,cc70,cuda10.0', + '-I${EBROOTOPENMPI}/include' + ] + self.build_system.ldflags += [ + '-L${EBROOTOPENMPI}/lib', '-lmpi_mpifh', + '-L/cm/shared/apps/cuda10.0/toolkit/10.0.130/lib64', + '-lcublas', '-lcudart' + ] elif environ.name.startswith('PrgEnv-gnu'): self.build_system.ldflags = ['-lstdc++'] if self.current_system.name == 'kesch': @@ -78,6 +121,11 @@ def setup(self, partition, environ, **job_opts): '-L/global/opt/nvidia/cudatoolkit/8.0.61/lib64', '-lcublas', '-lcudart' ] + if self.current_system.name == 'arolla': + self.build_system.ldflags += [ + '-L/cm/shared/apps/cuda92/toolkit/9.2.88/lib64', + '-lcublas', '-lcudart' + ] if self.current_system.name == 'tsa': self.build_system.ldflags += [ '-L/cm/shared/apps/cuda10.0/toolkit/10.0.130/lib64',