Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
63 commits
Select commit Hold shift + click to select a range
4d29b82
Starting point for ReFrame on Tsa 7.6
Jul 15, 2019
7bd539e
Adding tsa-cn018
Jul 15, 2019
b826382
Adding new tests
Jul 15, 2019
566147b
Reverting to tmod
Jul 16, 2019
527e5bf
Updating hostname
Jul 16, 2019
a9bb2bb
Updating tests
Jul 17, 2019
61314ef
OpenAccCudaMpi
Jul 17, 2019
0c97972
OpenAccCudaMpi
Jul 17, 2019
8bb7f46
Update openacc_cuda_mpi_cppstd.py
kraushm Jul 19, 2019
0af517f
Merge pull request #1 from kraushm/patch-1
lucamar Jul 19, 2019
0727489
Merge branch 'master' of github.com:eth-cscs/reframe into 19.04
Jul 19, 2019
b7485bf
Merge branch '19.04' of github.com:lucamar/reframe into 19.04
Jul 19, 2019
e1f7709
Fixing craype-accel
Jul 19, 2019
38bb239
MCH tests for Arolla/Tsa
kraushm Jul 19, 2019
1d19861
Merge pull request #2 from kraushm/19.04
lucamar Jul 19, 2019
6782735
Removing PrgEnv-pgi-nompi
Jul 19, 2019
fcd36e9
Update openacccudampi
Jul 19, 2019
7e00644
Updating tests
Jul 19, 2019
6ed43b4
Merge branch 'master' of github.com:eth-cscs/reframe into 19.04
Jul 22, 2019
8a6e1bf
Adding DDT checks
Jul 24, 2019
cd62128
Merge branch 'master' of github.com:eth-cscs/reframe into 19.04
Jul 25, 2019
2f74a26
Updating tests
Jul 25, 2019
130d99b
Merge branch 'master' of github.com:eth-cscs/reframe into 19.04
Jul 29, 2019
2705133
Fixing link to libcuda with PrgEnv-cce
Aug 6, 2019
64a1bf5
Setting reference values for Tsawq
Aug 6, 2019
2074e4e
Adding kernel latency test
Aug 6, 2019
ef879bb
Merge branch 'master' of github.com:eth-cscs/reframe into 19.04
Aug 8, 2019
b2ec5f2
Adding slurm checks
Aug 8, 2019
9dec01a
Changing deprecated syntax
Aug 12, 2019
b677bb8
Starting point for mapping devices
Aug 13, 2019
bf840f4
Merge branch 'master' of github.com:eth-cscs/reframe into kesch
Aug 19, 2019
e661537
Adding modified check AutomaticArrays
Aug 21, 2019
3874f26
Merge branch 'master' of github.com:eth-cscs/reframe into kesch
Aug 27, 2019
1bb5d0d
Adding custom PGI wrapper
Aug 27, 2019
732f911
OpenACC CUda witl CUDA 8.0
Aug 27, 2019
eb4d157
Reverting back to original source file
Aug 28, 2019
dc493da
Fixing PGI with MPI
Aug 28, 2019
1c01d87
Removing obsolete envs
Aug 28, 2019
620abb8
Fix gpudirectacc
Aug 28, 2019
cc03af3
Update automatic_arrays_acc.py
lucamar Aug 28, 2019
babdc65
Update collectives_halo.py
lucamar Aug 28, 2019
a2c2cae
Removing set_openacc_cuda_mpi
Aug 28, 2019
c623554
Merge branch 'kesch' of github.com:lucamar/reframe into kesch
Aug 28, 2019
ba6cfe5
Reverting back to master
Aug 28, 2019
5ca9931
Fixing line length
Aug 28, 2019
b68b685
Removing tsa config file
Aug 28, 2019
3463005
Removing trailing whitespace
Aug 28, 2019
a2d276c
Removing craype-accel-nvidia
Aug 28, 2019
e508c58
Adjusting line lenght
Aug 28, 2019
612530e
Removing trailing characters
Aug 28, 2019
20e1e72
Fixing typo in OpenMPI module name
Aug 29, 2019
15fa08e
Merge branch 'master' into kesch
lucamar Aug 29, 2019
60f9620
Restoring check_ruby
Aug 30, 2019
4a37e64
Merge branch 'kesch' of github.com:lucamar/reframe into kesch
Aug 30, 2019
7468e85
Updating reference value for Kesch
Aug 30, 2019
c6b8613
Merge branch 'master' into kesch
lucamar Aug 30, 2019
29661b8
Merge branch 'master' of github.com:eth-cscs/reframe into kesch
Sep 3, 2019
41b71bb
Moving closing bracket to the next line
Sep 3, 2019
f503983
Fixing collectives according to review
Sep 3, 2019
d99ee94
Removing obsolete comment
Sep 3, 2019
9b637f1
Re-inserting cuda 8.0 module in gpu_direct_cuda
Sep 3, 2019
176a5a0
Adjusting format according to review
Sep 3, 2019
af86008
Fixing typo in collectives
Sep 3, 2019
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions config/cscs.py
Original file line number Diff line number Diff line change
Expand Up @@ -348,8 +348,10 @@ class ReframeSettings:
},
'PrgEnv-pgi': {
'type': 'ProgEnvironment',
'modules': ['PE/17.06',
'PrgEnv-pgi/18.5'],
'modules': [
'PE/17.06', 'pgi/18.5-gcc-5.4.0-2.26',
'openmpi/4.0.1-pgi-18.5-gcc-5.4.0-2.26-cuda-8.0'
],
'cc': 'mpicc',
'cxx': 'mpicxx',
'ftn': 'mpif90',
Expand Down
14 changes: 7 additions & 7 deletions cscs-checks/mch/automatic_arrays_acc.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,18 +7,18 @@ class AutomaticArraysCheck(rfm.RegressionTest):
def __init__(self):
super().__init__()
self.valid_systems = ['daint:gpu', 'dom:gpu', 'kesch:cn']
self.valid_prog_environs = ['PrgEnv-cray', 'PrgEnv-pgi',
'PrgEnv-cray-c2sm-gpu',
'PrgEnv-pgi-c2sm-gpu']
self.valid_prog_environs = ['PrgEnv-cray', 'PrgEnv-pgi']
if self.current_system.name in ['daint', 'dom']:
self.modules = ['craype-accel-nvidia60']
elif self.current_system.name == 'kesch':
self.exclusive_access = True
self.modules = ['craype-accel-nvidia35']
self.modules = ['cudatoolkit/8.0.61']
# FIXME: workaround -- the variable should not be needed since
# there is no GPUdirect in this check
self.variables = {'MV2_USE_CUDA': '1'}

self.variables = {
'CRAY_ACCEL_TARGET': 'nvidia35',
'MV2_USE_CUDA': '1'
}
# This tets requires an MPI compiler, although it uses a single task
self.num_tasks = 1
self.num_gpus_per_node = 1
Expand Down Expand Up @@ -56,7 +56,7 @@ def setup(self, partition, environ, **job_opts):
envname = 'PrgEnv-pgi'
self.build_system.fflags += ['-acc']
if self.current_system.name == 'kesch':
self.build_system.fflags += ['-ta=tesla,cc35,cuda8.0']
self.build_system.fflags += ['-ta=tesla,cc35']
elif self.current_system.name in ['daint', 'dom']:
self.build_system.fflags += ['-ta=tesla,cc60', '-Mnorpath']
else:
Expand Down
14 changes: 5 additions & 9 deletions cscs-checks/mch/collectives_halo.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import reframe.utility.sanity as sn


class CommunicationTestBase(rfm.RegressionTest):
class CollectivesBaseTest(rfm.RegressionTest):
def __init__(self, variant, bench_reference):
super().__init__()
self.valid_systems = ['dom:gpu', 'daint:gpu', 'kesch:cn']
Expand All @@ -23,7 +23,7 @@ def __init__(self, variant, bench_reference):
self.num_gpus_per_node = 16
self.num_tasks_per_node = 16
self.num_tasks_per_socket = 8
self.modules = ['craype-accel-nvidia35', 'cmake']
self.modules = ['cmake']
self.variables['MV2_USE_CUDA'] = '1'
self.build_system.config_opts += [
'-DMPI_VENDOR=mvapich2',
Expand Down Expand Up @@ -63,6 +63,7 @@ def __init__(self, variant, bench_reference):
'default': 0.0138493
}
}

if self.current_system.name == 'dom':
sysname = 'daint'
else:
Expand Down Expand Up @@ -98,11 +99,8 @@ def setup(self, *args, **kwargs):
'--cpu_bind=q']


# the values default, nocomm and nocomp refer to the different parts
# of the check where the time is measured; default == all
# nocomm == no communication nocomp == no computation
@rfm.parameterized_test(['default'], ['nocomm'], ['nocomp'])
class AlltoallvTest(CommunicationTestBase):
class AlltoallvTest(CollectivesBaseTest):
def __init__(self, variant):
super().__init__(variant,
{
Expand All @@ -117,14 +115,13 @@ def __init__(self, variant):
'default': 0.0138493
}
})
self.descr = 'Alltoall communication test'
self.strict_check = False
self.sourcesdir = 'https://github.com/cosunae/comm_overlap_bench'
self.prebuild_cmd = ['git checkout alltoallv']


@rfm.parameterized_test(['default'], ['nocomm'], ['nocomp'])
class HaloExchangeTest(CommunicationTestBase):
class HaloExchangeTest(CollectivesBaseTest):
def __init__(self, variant):
super().__init__(variant,
{
Expand All @@ -139,6 +136,5 @@ def __init__(self, variant):
'default': 2.53509
}
})
self.descr = 'Halo-cell exchange test'
self.sourcesdir = 'https://github.com/MeteoSwiss-APN/comm_overlap_bench.git'
self.prebuild_cmd = ['git checkout barebones']
4 changes: 2 additions & 2 deletions cscs-checks/mch/cuda_stress_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ def __init__(self):
if self.current_system.name == 'kesch':
self.exclusive_access = True
self.valid_prog_environs = ['PrgEnv-gnu-nompi']
self.modules = ['craype-accel-nvidia35']
self.modules = ['cudatoolkit/8.0.61']
else:
self.valid_prog_environs = ['PrgEnv-gnu']
self.modules = ['craype-accel-nvidia60']
Expand All @@ -33,7 +33,7 @@ def __init__(self):
'time': (1.39758, None, 0.05)
},
'kesch:cn': {
'time': (2.12769, None, 0.05)
'time': (2.25, None, 0.05)
}
}
self.tags = {'production', 'mch'}
Expand Down
2 changes: 1 addition & 1 deletion cscs-checks/mch/g2g_meteoswiss_check.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ def __init__(self, g2g):
# 'PrgEnv-gnu-c2sm-gpu' will be added later
self.valid_prog_environs = ['PrgEnv-gnu']
self.exclusive_access = True
self.modules = ['cmake', 'craype-accel-nvidia35']
self.modules = ['cmake']
self.pre_run = ["export EXECUTABLE=$(ls src/ | "
"grep 'GNU.*MVAPICH.*CUDA.*kesch.*')"]
self.executable = 'build/src/comm_overlap_benchmark'
Expand Down
8 changes: 3 additions & 5 deletions cscs-checks/mch/gpu_direct_acc.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,7 @@ def __init__(self):
self.descr = 'tests gpu-direct for Fortran OpenACC'
self.valid_systems = ['daint:gpu', 'dom:gpu', 'kesch:cn']

# FIXME: temporary workaround until the mvapich module is fixed;
# 'PrgEnv-pgi-c2sm-gpu' will be added later
self.valid_prog_environs = ['PrgEnv-cray', 'PrgEnv-cray-c2sm-gpu',
'PrgEnv-pgi']
self.valid_prog_environs = ['PrgEnv-cray', 'PrgEnv-pgi']
if self.current_system.name in ['daint', 'dom']:
self.modules = ['craype-accel-nvidia60']
self.variables = {'MPICH_RDMA_ENABLED_CUDA': '1'}
Expand All @@ -22,8 +19,9 @@ def __init__(self):
self.num_tasks_per_node = 1
elif self.current_system.name == 'kesch':
self.exclusive_access = True
self.modules = ['craype-accel-nvidia35']
self.modules = ['cudatoolkit/8.0.61']
self.variables = {
'CRAY_ACCEL_TARGET': 'nvidia35',
'MV2_USE_CUDA': '1',
'G2G': '1'
}
Expand Down
2 changes: 1 addition & 1 deletion cscs-checks/mch/gpu_direct_cuda.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ def __init__(self):
elif self.current_system.name == 'kesch':
self.exclusive_access = True
self.valid_prog_environs = ['PrgEnv-gnu']
self.modules = ['craype-accel-nvidia35']
self.modules = ['cudatoolkit/8.0.61']
self.variables = {
'MV2_USE_CUDA': '1',
'G2G': '1',
Expand Down
95 changes: 75 additions & 20 deletions cscs-checks/mch/openacc_cuda_mpi_cppstd.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,33 +7,54 @@ class OpenaccCudaCpp(rfm.RegressionTest):
def __init__(self):
super().__init__()
self.descr = 'test for OpenACC, CUDA, MPI, and C++'
self.valid_systems = ['daint:gpu', 'dom:gpu', 'kesch:cn']
self.valid_prog_environs = ['PrgEnv-cray', 'PrgEnv-pgi', 'PrgEnv-gnu']
self.valid_systems = ['daint:gpu', 'dom:gpu',
'kesch:cn', 'arolla:cn', 'tsa:cn']
self.valid_prog_environs = ['PrgEnv-cce', 'PrgEnv-cray',
'PrgEnv-pgi', 'PrgEnv-gnu']
self.build_system = 'Make'
self.build_system.fflags = ['-O2']

if self.current_system.name in ['daint', 'dom']:
self.modules = ['craype-accel-nvidia60']
self.num_tasks = 12
self.num_tasks_per_node = 12
self.num_gpus_per_node = 1
self.build_system.options = ['NVCC_FLAGS="-arch=compute_60"']
self.variables = {
'MPICH_RDMA_ENABLED_CUDA': '1',
'CRAY_CUDA_MPS': '1'
}
elif self.current_system.name == 'kesch':
self.exclusive_access = True
self.modules = ['craype-accel-nvidia35']
self.modules = ['cudatoolkit/8.0.61']
self.num_tasks = 8
self.num_tasks_per_node = 8
self.num_gpus_per_node = 8
self.build_system.options = ['NVCC_FLAGS="-arch=compute_37"']
# FIXME: temporary workaround until the mvapich module is fixed;
# 'PrgEnv-{pgi,gnu}-c2sm-gpu' will be added later
self.valid_prog_environs += ['PrgEnv-cray-c2sm-gpu']

if self.current_system.name in ['daint', 'dom']:
self.variables = {
'MPICH_RDMA_ENABLED_CUDA': '1',
'CRAY_CUDA_MPS': '1'
'MV2_USE_CUDA': '1',
'G2G': '1'
}
elif self.current_system.name in ['kesch']:
elif self.current_system.name == 'arolla':
self.exclusive_access = True
self.modules = ['cuda92/toolkit/9.2.88',
'craype-accel-nvidia70']
self.num_tasks = 8
self.num_tasks_per_node = 8
self.num_gpus_per_node = 8
self.build_system.options = ['NVCC_FLAGS="-arch=compute_70"']
self.variables = {
'MV2_USE_CUDA': '1',
'G2G': '1'
}
elif self.current_system.name == 'tsa':
self.exclusive_access = True
self.modules = ['cuda10.0/toolkit/10.0.130',
'craype-accel-nvidia70']
self.num_tasks = 8
self.num_tasks_per_node = 8
self.num_gpus_per_node = 8
self.build_system.options = ['NVCC_FLAGS="-arch=compute_70"']
self.variables = {
'MV2_USE_CUDA': '1',
'G2G': '1'
Expand All @@ -47,6 +68,20 @@ def __init__(self):
def setup(self, partition, environ, **job_opts):
if environ.name.startswith('PrgEnv-cray'):
self.build_system.fflags += ['-hacc', '-hnoomp']

elif environ.name.startswith('PrgEnv-cce'):
self.build_system.fflags += ['-hacc', '-hnoomp']
if self.current_system.name == 'arolla':
self.build_system.ldflags = [
'-L/cm/shared/apps/cuda92/toolkit/9.2.88/lib64',
'-lcublas', '-lcudart'
]
elif self.current_system.name == 'tsa':
self.build_system.ldflags = [
'-L/cm/shared/apps/cuda10.0/toolkit/10.0.130/lib64',
'-lcublas', '-lcudart'
]

elif environ.name.startswith('PrgEnv-pgi'):
self.build_system.fflags += ['-acc']
if self.current_system.name in ['daint', 'dom']:
Expand All @@ -55,19 +90,39 @@ def setup(self, partition, environ, **job_opts):
'-Mnorpath', '-lstdc++']
elif self.current_system.name == 'kesch':
self.build_system.fflags += ['-ta=tesla,cc35,cuda8.0']
self.build_system.ldflags = ['-acc', '-ta:tesla:cc35,cuda8.0',
'-lstdc++']
if environ.name == 'PrgEnv-pgi-nompi':
self.build_system.ldflags += [
'-L/global/opt/nvidia/cudatoolkit/8.0.61/lib64',
'-lcublas', '-lcudart'
]
self.build_system.ldflags = [
'-acc', '-ta:tesla:cc35,cuda8.0', '-lstdc++',
'-L/global/opt/nvidia/cudatoolkit/8.0.61/lib64',
'-lcublas', '-lcudart'
]
elif self.current_system.name == 'arolla':
self.build_system.fflags += ['-ta=tesla,cc70,cuda10.0']
self.build_system.ldflags = [
'-acc', '-ta:tesla:cc70,cuda10.0', '-lstdc++',
'-L/cm/shared/apps/cuda92/toolkit/9.2.88/lib64',
'-lcublas', '-lcudart'
]
elif self.current_system.name == 'tsa':
self.build_system.fflags += ['-ta=tesla,cc70,cuda10.0']
self.build_system.ldflags = [
'-acc', '-ta:tesla:cc70,cuda10.0', '-lstdc++',
'-L/cm/shared/apps/cuda10.0/toolkit/10.0.130/lib64',
'-lcublas', '-lcudart'
]

elif environ.name.startswith('PrgEnv-gnu'):
self.build_system.ldflags = ['-lstdc++']
if self.current_system.name == 'kesch':
self.build_system.ldflags += [
'-L/global/opt/nvidia/cudatoolkit/8.0.61/lib64',
'-lcublas', '-lcudart'
'-L/global/opt/nvidia/cudatoolkit/8.0.61/lib64'
]
if self.current_system.name == 'arolla':
self.build_system.ldflags += [
'-L/cm/shared/apps/cuda92/toolkit/9.2.88/lib64'
]
if self.current_system.name == 'tsa':
self.build_system.ldflags += [
'-L/cm/shared/apps/cuda10.0/toolkit/10.0.130/lib64']
self.build_system.ldflags += ['-lcublas', '-lcudart']

super().setup(partition, environ, **job_opts)
2 changes: 2 additions & 0 deletions cscs-checks/mch/src/openacc_cuda_mpi_cppstd.F90
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ program openacc_cuda_mpi_cppstd
!$acc end host_data
!$acc end data


if(mpi_rank == 0) then

! Allocate and initialize arrays on the GPU
Expand Down Expand Up @@ -92,6 +93,7 @@ program openacc_cuda_mpi_cppstd
deallocate(f1)
deallocate(f2)
deallocate(f3)
write (*,*) "Result: OK"
end if

call MPI_Finalize(ierr);
Expand Down