reframe-hpc · vkarak · Sep 3, 2019 · Jul 15, 2019 · Jul 15, 2019 · Jul 15, 2019
diff --git a/config/cscs.py b/config/cscs.py
@@ -348,8 +348,10 @@ class ReframeSettings:
                 },
                 'PrgEnv-pgi': {
                     'type': 'ProgEnvironment',
-                    'modules': ['PE/17.06',
-                                'PrgEnv-pgi/18.5'],
+                    'modules': [
+                        'PE/17.06', 'pgi/18.5-gcc-5.4.0-2.26',
+                        'openmpi/4.0.1-pgi-18.5-gcc-5.4.0-2.26-cuda-8.0'
+                    ],
                     'cc': 'mpicc',
                     'cxx': 'mpicxx',
                     'ftn': 'mpif90',

diff --git a/cscs-checks/mch/automatic_arrays_acc.py b/cscs-checks/mch/automatic_arrays_acc.py
@@ -7,18 +7,18 @@ class AutomaticArraysCheck(rfm.RegressionTest):
     def __init__(self):
         super().__init__()
         self.valid_systems = ['daint:gpu', 'dom:gpu', 'kesch:cn']
-        self.valid_prog_environs = ['PrgEnv-cray', 'PrgEnv-pgi',
-                                    'PrgEnv-cray-c2sm-gpu',
-                                    'PrgEnv-pgi-c2sm-gpu']
+        self.valid_prog_environs = ['PrgEnv-cray', 'PrgEnv-pgi']
         if self.current_system.name in ['daint', 'dom']:
             self.modules = ['craype-accel-nvidia60']
         elif self.current_system.name == 'kesch':
             self.exclusive_access = True
-            self.modules = ['craype-accel-nvidia35']
+            self.modules = ['cudatoolkit/8.0.61']
             # FIXME: workaround -- the variable should not be needed since
             # there is no GPUdirect in this check
-            self.variables = {'MV2_USE_CUDA': '1'}
-
+            self.variables = {
+                'CRAY_ACCEL_TARGET': 'nvidia35',
+                'MV2_USE_CUDA': '1'
+            }
         # This tets requires an MPI compiler, although it uses a single task
         self.num_tasks = 1
         self.num_gpus_per_node = 1
@@ -56,7 +56,7 @@ def setup(self, partition, environ, **job_opts):
             envname = 'PrgEnv-pgi'
             self.build_system.fflags += ['-acc']
             if self.current_system.name == 'kesch':
-                self.build_system.fflags += ['-ta=tesla,cc35,cuda8.0']
+                self.build_system.fflags += ['-ta=tesla,cc35']
             elif self.current_system.name in ['daint', 'dom']:
                 self.build_system.fflags += ['-ta=tesla,cc60', '-Mnorpath']
         else:

diff --git a/cscs-checks/mch/collectives_halo.py b/cscs-checks/mch/collectives_halo.py
@@ -2,7 +2,7 @@
 import reframe.utility.sanity as sn
 
 
-class CommunicationTestBase(rfm.RegressionTest):
+class CollectivesBaseTest(rfm.RegressionTest):
     def __init__(self, variant, bench_reference):
         super().__init__()
         self.valid_systems = ['dom:gpu', 'daint:gpu', 'kesch:cn']
@@ -23,7 +23,7 @@ def __init__(self, variant, bench_reference):
             self.num_gpus_per_node = 16
             self.num_tasks_per_node = 16
             self.num_tasks_per_socket = 8
-            self.modules = ['craype-accel-nvidia35', 'cmake']
+            self.modules = ['cmake']
             self.variables['MV2_USE_CUDA'] = '1'
             self.build_system.config_opts += [
                 '-DMPI_VENDOR=mvapich2',
@@ -63,6 +63,7 @@ def __init__(self, variant, bench_reference):
                 'default': 0.0138493
             }
         }
+
         if self.current_system.name == 'dom':
             sysname = 'daint'
         else:
@@ -98,11 +99,8 @@ def setup(self, *args, **kwargs):
                                          '--cpu_bind=q']
 
 
-# the values default, nocomm and nocomp refer to the different parts
-# of the check where the time is measured; default == all
-# nocomm == no communication  nocomp == no computation
 @rfm.parameterized_test(['default'], ['nocomm'], ['nocomp'])
-class AlltoallvTest(CommunicationTestBase):
+class AlltoallvTest(CollectivesBaseTest):
     def __init__(self, variant):
         super().__init__(variant,
                          {
@@ -117,14 +115,13 @@ def __init__(self, variant):
                                  'default': 0.0138493
                              }
                          })
-        self.descr = 'Alltoall communication test'
         self.strict_check = False
         self.sourcesdir = 'https://github.com/cosunae/comm_overlap_bench'
         self.prebuild_cmd = ['git checkout alltoallv']
 
 
 @rfm.parameterized_test(['default'], ['nocomm'], ['nocomp'])
-class HaloExchangeTest(CommunicationTestBase):
+class HaloExchangeTest(CollectivesBaseTest):
     def __init__(self, variant):
         super().__init__(variant,
                          {
@@ -139,6 +136,5 @@ def __init__(self, variant):
                                  'default': 2.53509
                              }
                          })
-        self.descr = 'Halo-cell exchange test'
         self.sourcesdir = 'https://github.com/MeteoSwiss-APN/comm_overlap_bench.git'
         self.prebuild_cmd = ['git checkout barebones']
diff --git a/cscs-checks/mch/cuda_stress_test.py b/cscs-checks/mch/cuda_stress_test.py
@@ -11,7 +11,7 @@ def __init__(self):
         if self.current_system.name == 'kesch':
             self.exclusive_access = True
             self.valid_prog_environs = ['PrgEnv-gnu-nompi']
-            self.modules = ['craype-accel-nvidia35']
+            self.modules = ['cudatoolkit/8.0.61']
         else:
             self.valid_prog_environs = ['PrgEnv-gnu']
             self.modules = ['craype-accel-nvidia60']
@@ -33,7 +33,7 @@ def __init__(self):
                 'time': (1.39758, None, 0.05)
             },
             'kesch:cn': {
-                'time': (2.12769, None, 0.05)
+                'time': (2.25, None, 0.05)
             }
         }
         self.tags = {'production', 'mch'}

diff --git a/cscs-checks/mch/g2g_meteoswiss_check.py b/cscs-checks/mch/g2g_meteoswiss_check.py
@@ -14,7 +14,7 @@ def __init__(self, g2g):
         #        'PrgEnv-gnu-c2sm-gpu' will be added later
         self.valid_prog_environs = ['PrgEnv-gnu']
         self.exclusive_access = True
-        self.modules = ['cmake', 'craype-accel-nvidia35']
+        self.modules = ['cmake']
         self.pre_run = ["export EXECUTABLE=$(ls src/ | "
                         "grep 'GNU.*MVAPICH.*CUDA.*kesch.*')"]
         self.executable = 'build/src/comm_overlap_benchmark'

diff --git a/cscs-checks/mch/gpu_direct_acc.py b/cscs-checks/mch/gpu_direct_acc.py
@@ -10,10 +10,7 @@ def __init__(self):
         self.descr = 'tests gpu-direct for Fortran OpenACC'
         self.valid_systems = ['daint:gpu', 'dom:gpu', 'kesch:cn']
 
-        # FIXME: temporary workaround until the mvapich module is fixed;
-        #        'PrgEnv-pgi-c2sm-gpu' will be added later
-        self.valid_prog_environs = ['PrgEnv-cray', 'PrgEnv-cray-c2sm-gpu',
-                                    'PrgEnv-pgi']
+        self.valid_prog_environs = ['PrgEnv-cray', 'PrgEnv-pgi']
         if self.current_system.name in ['daint', 'dom']:
             self.modules = ['craype-accel-nvidia60']
             self.variables = {'MPICH_RDMA_ENABLED_CUDA': '1'}
@@ -22,8 +19,9 @@ def __init__(self):
             self.num_tasks_per_node = 1
         elif self.current_system.name == 'kesch':
             self.exclusive_access = True
-            self.modules = ['craype-accel-nvidia35']
+            self.modules = ['cudatoolkit/8.0.61']
             self.variables = {
+                'CRAY_ACCEL_TARGET': 'nvidia35',
                 'MV2_USE_CUDA': '1',
                 'G2G': '1'
             }

diff --git a/cscs-checks/mch/gpu_direct_cuda.py b/cscs-checks/mch/gpu_direct_cuda.py
@@ -22,7 +22,7 @@ def __init__(self):
         elif self.current_system.name == 'kesch':
             self.exclusive_access = True
             self.valid_prog_environs = ['PrgEnv-gnu']
-            self.modules = ['craype-accel-nvidia35']
+            self.modules = ['cudatoolkit/8.0.61']
             self.variables = {
                 'MV2_USE_CUDA': '1',
                 'G2G': '1',

diff --git a/cscs-checks/mch/openacc_cuda_mpi_cppstd.py b/cscs-checks/mch/openacc_cuda_mpi_cppstd.py
@@ -7,33 +7,54 @@ class OpenaccCudaCpp(rfm.RegressionTest):
     def __init__(self):
         super().__init__()
         self.descr = 'test for OpenACC, CUDA, MPI, and C++'
-        self.valid_systems = ['daint:gpu', 'dom:gpu', 'kesch:cn']
-        self.valid_prog_environs = ['PrgEnv-cray', 'PrgEnv-pgi', 'PrgEnv-gnu']
+        self.valid_systems = ['daint:gpu', 'dom:gpu',
+                              'kesch:cn', 'arolla:cn', 'tsa:cn']
+        self.valid_prog_environs = ['PrgEnv-cce', 'PrgEnv-cray',
+                                    'PrgEnv-pgi', 'PrgEnv-gnu']
         self.build_system = 'Make'
         self.build_system.fflags = ['-O2']
+
         if self.current_system.name in ['daint', 'dom']:
             self.modules = ['craype-accel-nvidia60']
             self.num_tasks = 12
             self.num_tasks_per_node = 12
             self.num_gpus_per_node = 1
             self.build_system.options = ['NVCC_FLAGS="-arch=compute_60"']
+            self.variables = {
+                'MPICH_RDMA_ENABLED_CUDA': '1',
+                'CRAY_CUDA_MPS': '1'
+            }
         elif self.current_system.name == 'kesch':
             self.exclusive_access = True
-            self.modules = ['craype-accel-nvidia35']
+            self.modules = ['cudatoolkit/8.0.61']
             self.num_tasks = 8
             self.num_tasks_per_node = 8
             self.num_gpus_per_node = 8
             self.build_system.options = ['NVCC_FLAGS="-arch=compute_37"']
-            # FIXME: temporary workaround until the mvapich module is fixed;
-            #        'PrgEnv-{pgi,gnu}-c2sm-gpu' will be added later
-            self.valid_prog_environs += ['PrgEnv-cray-c2sm-gpu']
-
-        if self.current_system.name in ['daint', 'dom']:
             self.variables = {
-                'MPICH_RDMA_ENABLED_CUDA': '1',
-                'CRAY_CUDA_MPS': '1'
+                'MV2_USE_CUDA': '1',
+                'G2G': '1'
             }
-        elif self.current_system.name in ['kesch']:
+        elif self.current_system.name == 'arolla':
+            self.exclusive_access = True
+            self.modules = ['cuda92/toolkit/9.2.88',
+                            'craype-accel-nvidia70']
+            self.num_tasks = 8
+            self.num_tasks_per_node = 8
+            self.num_gpus_per_node = 8
+            self.build_system.options = ['NVCC_FLAGS="-arch=compute_70"']
+            self.variables = {
+                'MV2_USE_CUDA': '1',
+                'G2G': '1'
+            }
+        elif self.current_system.name == 'tsa':
+            self.exclusive_access = True
+            self.modules = ['cuda10.0/toolkit/10.0.130',
+                            'craype-accel-nvidia70']
+            self.num_tasks = 8
+            self.num_tasks_per_node = 8
+            self.num_gpus_per_node = 8
+            self.build_system.options = ['NVCC_FLAGS="-arch=compute_70"']
             self.variables = {
                 'MV2_USE_CUDA': '1',
                 'G2G': '1'
@@ -47,6 +68,20 @@ def __init__(self):
     def setup(self, partition, environ, **job_opts):
         if environ.name.startswith('PrgEnv-cray'):
             self.build_system.fflags += ['-hacc', '-hnoomp']
+
+        elif environ.name.startswith('PrgEnv-cce'):
+            self.build_system.fflags += ['-hacc', '-hnoomp']
+            if self.current_system.name == 'arolla':
+                self.build_system.ldflags = [
+                    '-L/cm/shared/apps/cuda92/toolkit/9.2.88/lib64',
+                    '-lcublas', '-lcudart'
+                ]
+            elif self.current_system.name == 'tsa':
+                self.build_system.ldflags = [
+                    '-L/cm/shared/apps/cuda10.0/toolkit/10.0.130/lib64',
+                    '-lcublas', '-lcudart'
+                ]
+
         elif environ.name.startswith('PrgEnv-pgi'):
             self.build_system.fflags += ['-acc']
             if self.current_system.name in ['daint', 'dom']:
@@ -55,19 +90,39 @@ def setup(self, partition, environ, **job_opts):
                                              '-Mnorpath', '-lstdc++']
             elif self.current_system.name == 'kesch':
                 self.build_system.fflags += ['-ta=tesla,cc35,cuda8.0']
-                self.build_system.ldflags = ['-acc', '-ta:tesla:cc35,cuda8.0',
-                                             '-lstdc++']
-                if environ.name == 'PrgEnv-pgi-nompi':
-                    self.build_system.ldflags += [
-                        '-L/global/opt/nvidia/cudatoolkit/8.0.61/lib64',
-                        '-lcublas', '-lcudart'
-                    ]
+                self.build_system.ldflags = [
+                    '-acc', '-ta:tesla:cc35,cuda8.0', '-lstdc++',
+                    '-L/global/opt/nvidia/cudatoolkit/8.0.61/lib64',
+                    '-lcublas', '-lcudart'
+                ]
+            elif self.current_system.name == 'arolla':
+                self.build_system.fflags += ['-ta=tesla,cc70,cuda10.0']
+                self.build_system.ldflags = [
+                    '-acc', '-ta:tesla:cc70,cuda10.0', '-lstdc++',
+                    '-L/cm/shared/apps/cuda92/toolkit/9.2.88/lib64',
+                    '-lcublas', '-lcudart'
+                ]
+            elif self.current_system.name == 'tsa':
+                self.build_system.fflags += ['-ta=tesla,cc70,cuda10.0']
+                self.build_system.ldflags = [
+                    '-acc', '-ta:tesla:cc70,cuda10.0', '-lstdc++',
+                    '-L/cm/shared/apps/cuda10.0/toolkit/10.0.130/lib64',
+                    '-lcublas', '-lcudart'
+                ]
+
         elif environ.name.startswith('PrgEnv-gnu'):
             self.build_system.ldflags = ['-lstdc++']
             if self.current_system.name == 'kesch':
                 self.build_system.ldflags += [
-                    '-L/global/opt/nvidia/cudatoolkit/8.0.61/lib64',
-                    '-lcublas', '-lcudart'
+                    '-L/global/opt/nvidia/cudatoolkit/8.0.61/lib64'
                 ]
+            if self.current_system.name == 'arolla':
+                self.build_system.ldflags += [
+                    '-L/cm/shared/apps/cuda92/toolkit/9.2.88/lib64'
+                ]
+            if self.current_system.name == 'tsa':
+                self.build_system.ldflags += [
+                    '-L/cm/shared/apps/cuda10.0/toolkit/10.0.130/lib64']
+            self.build_system.ldflags += ['-lcublas', '-lcudart']
 
         super().setup(partition, environ, **job_opts)
diff --git a/cscs-checks/mch/src/openacc_cuda_mpi_cppstd.F90 b/cscs-checks/mch/src/openacc_cuda_mpi_cppstd.F90
@@ -35,6 +35,7 @@ program openacc_cuda_mpi_cppstd
   !$acc end host_data
   !$acc end data
 
+
   if(mpi_rank == 0) then
 
     ! Allocate and initialize arrays on the GPU
@@ -92,6 +93,7 @@ program openacc_cuda_mpi_cppstd
     deallocate(f1)
     deallocate(f2)
     deallocate(f3)
+    write (*,*) "Result: OK"
   end if
 
   call MPI_Finalize(ierr);