From c672693fa7562b8c71ac96abb4313a169adbd9eb Mon Sep 17 00:00:00 2001 From: Luca Date: Thu, 29 Aug 2019 11:32:55 +0200 Subject: [PATCH 001/104] Removing modules system --- config/tsa76.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/config/tsa76.py b/config/tsa76.py index 582be266a4..7ff40d51d9 100644 --- a/config/tsa76.py +++ b/config/tsa76.py @@ -253,9 +253,8 @@ class ReframeSettings: }, 'tsa': { - 'descr': 'TsaTDS MCH', + 'descr': 'Tsa MCH', 'hostnames': [r'tsa-\w+\d+'], - 'modules_system': 'tmod', 'resourcesdir': '/apps/common/UES/reframe/resources', 'partitions': { 'login': { From 506d82f221e3cc4002fa1adfe79812cccedc0a3c Mon Sep 17 00:00:00 2001 From: Luca Date: Thu, 29 Aug 2019 11:34:27 +0200 Subject: [PATCH 002/104] Inserting modules system back --- config/tsa76.py | 1 + 1 file changed, 1 insertion(+) diff --git a/config/tsa76.py b/config/tsa76.py index 7ff40d51d9..0994819860 100644 --- a/config/tsa76.py +++ b/config/tsa76.py @@ -255,6 +255,7 @@ class ReframeSettings: 'tsa': { 'descr': 'Tsa MCH', 'hostnames': [r'tsa-\w+\d+'], + 'modules_system': 'tmod', 'resourcesdir': '/apps/common/UES/reframe/resources', 'partitions': { 'login': { From 5f5371baa0716617838fc439b1305b9670c97695 Mon Sep 17 00:00:00 2001 From: Luca Date: Thu, 29 Aug 2019 12:29:50 +0200 Subject: [PATCH 003/104] Fixing pattern for sanity check --- cscs-checks/mch/openacc_cuda_mpi_cppstd.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cscs-checks/mch/openacc_cuda_mpi_cppstd.py b/cscs-checks/mch/openacc_cuda_mpi_cppstd.py index 5c835b89e9..c0865eb24b 100644 --- a/cscs-checks/mch/openacc_cuda_mpi_cppstd.py +++ b/cscs-checks/mch/openacc_cuda_mpi_cppstd.py @@ -56,7 +56,7 @@ def __init__(self): } self.executable = 'openacc_cuda_mpi_cppstd' - self.sanity_patterns = sn.assert_found(r'Result:\s+OK', self.stdout) + self.sanity_patterns = sn.assert_found(r'Result\s:\s+OK', self.stdout) self.maintainers = ['AJ', 'VK'] self.tags = {'production', 'mch'} From 74faa696527ee9e5378e0b397698db2b316630df Mon Sep 17 00:00:00 2001 From: Luca Date: Wed, 18 Sep 2019 14:54:03 +0200 Subject: [PATCH 004/104] Removing blank line --- cscs-checks/cuda/cuda_checks.py | 1 - 1 file changed, 1 deletion(-) diff --git a/cscs-checks/cuda/cuda_checks.py b/cscs-checks/cuda/cuda_checks.py index b5c26887c9..71a9e1dc42 100644 --- a/cscs-checks/cuda/cuda_checks.py +++ b/cscs-checks/cuda/cuda_checks.py @@ -12,7 +12,6 @@ def __init__(self): if self.current_system.name == 'kesch': self.valid_prog_environs += ['PrgEnv-cray-nompi', 'PrgEnv-gnu-nompi'] - self.sourcesdir = os.path.join(self.current_system.resourcesdir, 'CUDA', 'essentials') if self.current_system.name == 'kesch': From f5cf9702db421dddeebfb650abe7eb1c22586dc6 Mon Sep 17 00:00:00 2001 From: Luca Date: Fri, 4 Oct 2019 19:10:48 +0200 Subject: [PATCH 005/104] ReFrame configuration for Arolla --- config/cscs.py | 98 ++++++++----------------- cscs-checks/mch/automatic_arrays_acc.py | 20 ++++- 2 files changed, 48 insertions(+), 70 deletions(-) diff --git a/config/cscs.py b/config/cscs.py index 08ebe41874..1812d295c9 100644 --- a/config/cscs.py +++ b/config/cscs.py @@ -239,9 +239,9 @@ class ReframeSettings: } }, - 'tsa': { - 'descr': 'Tsa MCH', - 'hostnames': [r'tsa-\w+\d+'], + 'arolla': { + 'descr': 'Arolla MCH', + 'hostnames': [r'arolla-\w+\d+'], 'modules_system': 'tmod', 'resourcesdir': '/apps/common/UES/reframe/resources', 'partitions': { @@ -250,7 +250,7 @@ class ReframeSettings: 'environs': ['PrgEnv-cce', 'PrgEnv-cce-nompi', 'PrgEnv-pgi', 'PrgEnv-pgi-nompi', 'PrgEnv-gnu', 'PrgEnv-gnu-nompi'], - 'descr': 'Tsa login nodes', + 'descr': 'Arolla login nodes', }, 'pn': { 'scheduler': 'nativeslurm', @@ -258,7 +258,7 @@ class ReframeSettings: 'environs': ['PrgEnv-cce', 'PrgEnv-cce-nompi', 'PrgEnv-pgi', 'PrgEnv-pgi-nompi', 'PrgEnv-gnu', 'PrgEnv-gnu-nompi'], - 'descr': 'Tsa post-processing nodes', + 'descr': 'Arolla post-processing nodes', }, 'cn': { 'scheduler': 'nativeslurm', @@ -266,7 +266,7 @@ class ReframeSettings: 'environs': ['PrgEnv-cce', 'PrgEnv-cce-nompi', 'PrgEnv-gnu', 'PrgEnv-gnu-nompi', 'PrgEnv-pgi', 'PrgEnv-pgi-nompi'], - 'descr': 'Tsa compute nodes', + 'descr': 'Arolla compute nodes', 'resources': { '_rfm_gpu': ['--gres=gpu:{num_gpus_per_node}'], } @@ -407,83 +407,45 @@ class ReframeSettings: 'cxx': 'g++', 'ftn': 'gfortran', }, - 'PrgEnv-cray-c2sm': { - 'type': 'ProgEnvironment', - 'modules': ['c2sm-rcm/1.00.00-kesch', - 'c2sm/cray-env/base'], - }, - 'PrgEnv-cray-c2sm-gpu': { + }, + + 'arolla': { + 'PrgEnv-pgi-nompi': { 'type': 'ProgEnvironment', - 'modules': ['c2sm-rcm/1.00.00-kesch', - 'c2sm/cray-env/gpu'], + 'modules': ['PrgEnv-pgi/18.10'], + 'cc': 'pgcc', + 'cxx': 'pgc++', + 'ftn': 'pgf90', }, - 'PrgEnv-pgi-c2sm': { + 'PrgEnv-pgi': { 'type': 'ProgEnvironment', - 'modules': ['c2sm-rcm/1.00.00-kesch', - 'c2sm/pgi-env/base'], + 'modules': ['PrgEnv-pgi/18.10'], 'cc': 'mpicc', 'cxx': 'mpicxx', - 'ftn': 'mpif90', + 'ftn': 'mpifort', }, - 'PrgEnv-pgi-c2sm-gpu': { + 'PrgEnv-cce': { 'type': 'ProgEnvironment', - 'modules': ['c2sm-rcm/1.00.00-kesch', - 'c2sm/pgi-env/gpu'], - 'cc': 'mpicc', - 'cxx': 'mpicxx', - 'ftn': 'mpif90', + 'modules': ['PrgEnv-cce/18.12'], }, - 'PrgEnv-gnu-c2sm': { + 'PrgEnv-cce-nompi': { 'type': 'ProgEnvironment', - 'modules': ['c2sm-rcm/1.00.00-kesch', - 'c2sm/gnu-env/base'], - 'cc': 'mpicc', - 'cxx': 'mpicxx', - 'ftn': 'mpif90', + 'modules': ['PrgEnv-cce/18.12'] }, - 'PrgEnv-gnu-c2sm-gpu': { + 'PrgEnv-gnu': { 'type': 'ProgEnvironment', - 'modules': ['c2sm-rcm/1.00.00-kesch', - 'c2sm/gnu-env/gpu'], + 'modules': ['PrgEnv-gnu/18.12'], 'cc': 'mpicc', 'cxx': 'mpicxx', 'ftn': 'mpif90', }, - }, - - 'tsa': { - 'descr': 'Tsa MCH', - 'hostnames': [r'tsa-\w+\d+'], - 'modules_system': 'tmod', - 'resourcesdir': '/apps/common/UES/reframe/resources', - 'partitions': { - 'login': { - 'scheduler': 'local', - 'environs': ['PrgEnv-cce', 'PrgEnv-cce-nompi', - 'PrgEnv-pgi', 'PrgEnv-pgi-nompi', - 'PrgEnv-gnu', 'PrgEnv-gnu-nompi'], - 'descr': 'Tsa login nodes', - }, - 'pn': { - 'scheduler': 'nativeslurm', - 'access': ['--partition=pn-regression'], - 'environs': ['PrgEnv-cce', 'PrgEnv-cce-nompi', - 'PrgEnv-pgi', 'PrgEnv-pgi-nompi', - 'PrgEnv-gnu', 'PrgEnv-gnu-nompi'], - 'descr': 'Tsa post-processing nodes', - }, - 'cn': { - 'scheduler': 'nativeslurm', - 'access': ['--partition=cn-regression'], - 'environs': ['PrgEnv-cce', 'PrgEnv-cce-nompi', - 'PrgEnv-gnu', 'PrgEnv-gnu-nompi', - 'PrgEnv-pgi', 'PrgEnv-pgi-nompi'], - 'descr': 'Tsa compute nodes', - 'resources': { - '_rfm_gpu': ['--gres=gpu:{num_gpus_per_node}'], - } - } - } + 'PrgEnv-gnu-nompi': { + 'type': 'ProgEnvironment', + 'modules': ['PrgEnv-gnu/18.12'], + 'cc': 'gcc', + 'cxx': 'g++', + 'ftn': 'gfortran', + }, }, 'leone': { diff --git a/cscs-checks/mch/automatic_arrays_acc.py b/cscs-checks/mch/automatic_arrays_acc.py index b19647b79f..84beed0a6e 100644 --- a/cscs-checks/mch/automatic_arrays_acc.py +++ b/cscs-checks/mch/automatic_arrays_acc.py @@ -6,8 +6,9 @@ class AutomaticArraysCheck(rfm.RegressionTest): def __init__(self): super().__init__() - self.valid_systems = ['daint:gpu', 'dom:gpu', 'kesch:cn'] - self.valid_prog_environs = ['PrgEnv-cray', 'PrgEnv-pgi'] + self.valid_systems = ['daint:gpu', 'dom:gpu', + 'kesch:cn', 'arolla:cn'] + self.valid_prog_environs = ['PrgEnv-cray', 'PrgEnv-cce', 'PrgEnv-pgi'] if self.current_system.name in ['daint', 'dom']: self.modules = ['craype-accel-nvidia60'] elif self.current_system.name == 'kesch': @@ -19,6 +20,12 @@ def __init__(self): 'CRAY_ACCEL_TARGET': 'nvidia35', 'MV2_USE_CUDA': '1' } + elif self.current_system.name == 'arolla': + self.exclusive_access = True + self.variables = { + 'CRAY_ACCEL_TARGET': 'nvidia35', + 'MV2_USE_CUDA': '1' + } # This tets requires an MPI compiler, although it uses a single task self.num_tasks = 1 self.num_gpus_per_node = 1 @@ -38,10 +45,14 @@ def __init__(self): 'dom:gpu': {'time': (7.5E-05, None, 0.15)}, 'kesch:cn': {'time': (2.9E-04, None, 0.15)}, }, + 'PrgEnv-cce': { + 'arolla:cn': {'time': (2.9E-04, None, 0.15)}, + }, 'PrgEnv-pgi': { 'daint:gpu': {'time': (6.4E-05, None, 0.15)}, 'dom:gpu': {'time': (7.5e-05, None, 0.15)}, 'kesch:cn': {'time': (1.4E-04, None, 0.15)}, + 'arolla:cn': {'time': (1.4E-04, None, 0.15)}, } } @@ -52,11 +63,16 @@ def setup(self, partition, environ, **job_opts): if environ.name.startswith('PrgEnv-cray'): envname = 'PrgEnv-cray' self.build_system.fflags += ['-hacc', '-hnoomp'] + elif environ.name.startswith('PrgEnv-cce'): + envname = 'PrgEnv-cce' + self.build_system.fflags += ['-hacc', '-hnoomp'] elif environ.name.startswith('PrgEnv-pgi'): envname = 'PrgEnv-pgi' self.build_system.fflags += ['-acc'] if self.current_system.name == 'kesch': self.build_system.fflags += ['-ta=tesla,cc35'] + elif self.current_system.name == 'arolla': + self.build_system.fflags += ['-ta=tesla,cc70'] elif self.current_system.name in ['daint', 'dom']: self.build_system.fflags += ['-ta=tesla,cc60', '-Mnorpath'] else: From 1f8001d6e18a6ff813cc8b2711ca9ead1ebb4887 Mon Sep 17 00:00:00 2001 From: Luca Date: Mon, 21 Oct 2019 17:24:12 +0200 Subject: [PATCH 006/104] Adding cuda_stress_test on Arolla --- cscs-checks/mch/cuda_stress_test.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/cscs-checks/mch/cuda_stress_test.py b/cscs-checks/mch/cuda_stress_test.py index 3824718d13..d99766e79a 100644 --- a/cscs-checks/mch/cuda_stress_test.py +++ b/cscs-checks/mch/cuda_stress_test.py @@ -7,11 +7,16 @@ class CudaStressTest(rfm.RegressionTest): def __init__(self): super().__init__() self.descr = 'MCH CUDA stress test' - self.valid_systems = ['daint:gpu', 'dom:gpu', 'kesch:cn'] + self.valid_systems = ['daint:gpu', 'dom:gpu', 'kesch:cn', + 'arolla:cn'] if self.current_system.name == 'kesch': self.exclusive_access = True self.valid_prog_environs = ['PrgEnv-gnu-nompi'] self.modules = ['cudatoolkit/8.0.61'] + elif self.current_system.name == 'arolla': + self.exclusive_access = True + self.valid_prog_environs = ['PrgEnv-gnu-nompi'] + self.modules = ['cuda92/toolkit/9.2.88'] else: self.valid_prog_environs = ['PrgEnv-gnu'] self.modules = ['craype-accel-nvidia60'] From 1b7f5d77e33e0dc7580ab1c437d97a2472f7b54b Mon Sep 17 00:00:00 2001 From: Luca Date: Mon, 21 Oct 2019 17:28:35 +0200 Subject: [PATCH 007/104] Adding gpu_direct_acc on Arolla --- cscs-checks/mch/gpu_direct_acc.py | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/cscs-checks/mch/gpu_direct_acc.py b/cscs-checks/mch/gpu_direct_acc.py index a03608a503..8c26eed48d 100644 --- a/cscs-checks/mch/gpu_direct_acc.py +++ b/cscs-checks/mch/gpu_direct_acc.py @@ -8,8 +8,8 @@ class GpuDirectAccCheck(rfm.RegressionTest): def __init__(self): super().__init__() self.descr = 'tests gpu-direct for Fortran OpenACC' - self.valid_systems = ['daint:gpu', 'dom:gpu', 'kesch:cn'] - + self.valid_systems = ['daint:gpu', 'dom:gpu', 'kesch:cn', + 'arolla:cn'] self.valid_prog_environs = ['PrgEnv-cray', 'PrgEnv-pgi'] if self.current_system.name in ['daint', 'dom']: self.modules = ['craype-accel-nvidia60'] @@ -28,6 +28,17 @@ def __init__(self): self.num_tasks = 8 self.num_gpus_per_node = 8 self.num_tasks_per_node = 8 + elif self.current_system.name == 'arolla': + self.exclusive_access = True + self.modules = ['cuda92/toolkit/9.2.88'] + self.variables = { + 'CRAY_ACCEL_TARGET': 'nvidia70', + 'MV2_USE_CUDA': '1', + 'G2G': '1' + } + self.num_tasks = 8 + self.num_gpus_per_node = 8 + self.num_tasks_per_node = 8 self.sourcepath = 'gpu_direct_acc.F90' self.build_system = 'SingleSource' @@ -48,5 +59,7 @@ def setup(self, partition, environ, **job_opts): self.build_system.fflags += ['-ta=tesla:cc60', '-Mnorpath'] elif self.current_system.name == 'kesch': self.build_system.fflags += ['-ta=tesla:cc35'] + elif self.current_system.name == 'arolla': + self.build_system.fflags += ['-ta=tesla:cc70'] super().setup(partition, environ, **job_opts) From f889ebbec42aa878db8a9ed1397920c124d5c783 Mon Sep 17 00:00:00 2001 From: Luca Date: Mon, 21 Oct 2019 17:33:55 +0200 Subject: [PATCH 008/104] Adding gpu_direct_cuda on Arolla --- cscs-checks/mch/gpu_direct_cuda.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/cscs-checks/mch/gpu_direct_cuda.py b/cscs-checks/mch/gpu_direct_cuda.py index 3f07dfd9ea..cb0e8cecb6 100644 --- a/cscs-checks/mch/gpu_direct_cuda.py +++ b/cscs-checks/mch/gpu_direct_cuda.py @@ -8,7 +8,8 @@ class GpuDirectCudaCheck(rfm.RegressionTest): def __init__(self): super().__init__() self.descr = 'tests gpu-direct for CUDA' - self.valid_systems = ['daint:gpu', 'dom:gpu', 'kesch:cn', 'tiger:gpu'] + self.valid_systems = ['daint:gpu', 'dom:gpu', 'kesch:cn', + 'arolla:cn', 'tiger:gpu'] # FIXME: temporary workaround until the mvapich module is fixed # 'PrgEnv-gnu-c2sm-gpu' will be added later self.valid_prog_environs = ['PrgEnv-gnu'] @@ -28,6 +29,15 @@ def __init__(self): 'G2G': '1', } self.build_system.cxxflags = ['-ccbin', 'mpicxx', '-arch=sm_37'] + elif self.current_system.name == 'arolla': + self.exclusive_access = True + self.valid_prog_environs = ['PrgEnv-gnu'] + self.modules = ['cuda92/toolkit/9.2.88'] + self.variables = { + 'MV2_USE_CUDA': '1', + 'G2G': '1', + } + self.build_system.cxxflags = ['-ccbin', 'mpicxx', '-arch=sm_70'] self.num_tasks = 2 self.num_gpus_per_node = 1 From 76ab5010af9d96ec335b18b7302937d6b14dc96c Mon Sep 17 00:00:00 2001 From: Luca Date: Fri, 15 Nov 2019 18:17:02 +0100 Subject: [PATCH 009/104] ReFrame configuration for Tsa --- config/cscs.py | 74 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 74 insertions(+) diff --git a/config/cscs.py b/config/cscs.py index 3a3d7bf841..2b9f18f08a 100644 --- a/config/cscs.py +++ b/config/cscs.py @@ -337,6 +337,41 @@ class ReframeSettings: } }, + 'tsa': { + 'descr': 'Tsa MCH', + 'hostnames': [r'tsa-\w+\d+'], + 'modules_system': 'tmod', + 'resourcesdir': '/apps/common/UES/reframe/resources', + 'partitions': { + 'login': { + 'scheduler': 'local', + 'environs': ['PrgEnv-cce', 'PrgEnv-cce-nompi', + 'PrgEnv-pgi', 'PrgEnv-pgi-nompi', + 'PrgEnv-gnu', 'PrgEnv-gnu-nompi'], + 'descr': 'Tsa login nodes', + }, + 'pn': { + 'scheduler': 'nativeslurm', + 'access': ['--partition=pn-regression'], + 'environs': ['PrgEnv-cce', 'PrgEnv-cce-nompi', + 'PrgEnv-pgi', 'PrgEnv-pgi-nompi', + 'PrgEnv-gnu', 'PrgEnv-gnu-nompi'], + 'descr': 'Tsa post-processing nodes', + }, + 'cn': { + 'scheduler': 'nativeslurm', + 'access': ['--partition=cn-regression'], + 'environs': ['PrgEnv-cce', 'PrgEnv-cce-nompi', + 'PrgEnv-gnu', 'PrgEnv-gnu-nompi', + 'PrgEnv-pgi', 'PrgEnv-pgi-nompi'], + 'descr': 'Tsa compute nodes', + 'resources': { + '_rfm_gpu': ['--gres=gpu:{num_gpus_per_node}'], + } + } + } + }, + 'leone': { 'descr': 'Leone', 'hostnames': ['leone'], @@ -512,6 +547,45 @@ class ReframeSettings: }, }, + 'tsa': { + 'PrgEnv-pgi-nompi': { + 'type': 'ProgEnvironment', + 'modules': ['PrgEnv-pgi/19.5'], + 'cc': 'pgcc', + 'cxx': 'pgc++', + 'ftn': 'pgf90', + }, + 'PrgEnv-pgi': { + 'type': 'ProgEnvironment', + 'modules': ['PrgEnv-pgi/19.5'], + 'cc': 'mpicc', + 'cxx': 'mpicxx', + 'ftn': 'mpifort', + }, + 'PrgEnv-cce': { + 'type': 'ProgEnvironment', + 'modules': ['PrgEnv-cce/19.04'], + }, + 'PrgEnv-cce-nompi': { + 'type': 'ProgEnvironment', + 'modules': ['PrgEnv-cce/19.04'] + }, + 'PrgEnv-gnu': { + 'type': 'ProgEnvironment', + 'modules': ['PrgEnv-gnu/18.1'], + 'cc': 'mpicc', + 'cxx': 'mpicxx', + 'ftn': 'mpif90', + }, + 'PrgEnv-gnu-nompi': { + 'type': 'ProgEnvironment', + 'modules': ['PrgEnv-gnu/18.1'], + 'cc': 'gcc', + 'cxx': 'g++', + 'ftn': 'gfortran', + }, + }, + 'leone': { 'PrgEnv-gnu': { 'type': 'ProgEnvironment', From 4ef2c6c37218de700eeba96d32787f49adbe11eb Mon Sep 17 00:00:00 2001 From: Luca Date: Fri, 15 Nov 2019 19:07:31 +0100 Subject: [PATCH 010/104] AutomaticArrays on Tsa --- cscs-checks/mch/automatic_arrays_acc.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/cscs-checks/mch/automatic_arrays_acc.py b/cscs-checks/mch/automatic_arrays_acc.py index a2fd4b7c54..02645fd502 100644 --- a/cscs-checks/mch/automatic_arrays_acc.py +++ b/cscs-checks/mch/automatic_arrays_acc.py @@ -7,7 +7,7 @@ class AutomaticArraysCheck(rfm.RegressionTest): def __init__(self): super().__init__() self.valid_systems = ['daint:gpu', 'dom:gpu', 'kesch:cn', 'tiger:gpu', - 'arolla:cn'] + 'arolla:cn', 'tsa:cn'] self.valid_prog_environs = ['PrgEnv-cray', 'PrgEnv-cce', 'PrgEnv-pgi'] if self.current_system.name in ['daint', 'dom', 'tiger']: self.modules = ['craype-accel-nvidia60'] @@ -20,7 +20,7 @@ def __init__(self): 'CRAY_ACCEL_TARGET': 'nvidia35', 'MV2_USE_CUDA': '1' } - elif self.current_system.name == 'arolla': + elif self.current_system.name in ['arolla','tsa']: self.exclusive_access = True self.variables = { 'CRAY_ACCEL_TARGET': 'nvidia70', @@ -47,12 +47,14 @@ def __init__(self): }, 'PrgEnv-cce': { 'arolla:cn': {'time': (2.9E-04, None, 0.15)}, + 'tsa:cn': {'time': (2.9E-04, None, 0.15)}, }, 'PrgEnv-pgi': { 'daint:gpu': {'time': (7.5E-05, None, 0.15)}, 'dom:gpu': {'time': (7.5e-05, None, 0.15)}, 'kesch:cn': {'time': (1.4E-04, None, 0.15)}, 'arolla:cn': {'time': (1.4E-04, None, 0.15)}, + 'tsa:cn': {'time': (1.4E-04, None, 0.15)}, } } @@ -71,7 +73,7 @@ def setup(self, partition, environ, **job_opts): self.build_system.fflags += ['-acc'] if self.current_system.name == 'kesch': self.build_system.fflags += ['-ta=tesla,cc35'] - elif self.current_system.name == 'arolla': + elif self.current_system.name in ['arolla','tsa']: self.build_system.fflags += ['-ta=tesla,cc70'] elif self.current_system.name in ['daint', 'dom', 'tiger']: self.build_system.fflags += ['-ta=tesla,cc60', '-Mnorpath'] From de394961a14366d14483b3a8c61fceee7ac1578e Mon Sep 17 00:00:00 2001 From: Luca Date: Tue, 17 Dec 2019 10:03:13 +0100 Subject: [PATCH 011/104] Update settings --- config/cscs.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/config/cscs.py b/config/cscs.py index 2b9f18f08a..f58d517273 100644 --- a/config/cscs.py +++ b/config/cscs.py @@ -550,25 +550,25 @@ class ReframeSettings: 'tsa': { 'PrgEnv-pgi-nompi': { 'type': 'ProgEnvironment', - 'modules': ['PrgEnv-pgi/19.5'], + 'modules': ['PrgEnv-pgi/19.9'], 'cc': 'pgcc', 'cxx': 'pgc++', 'ftn': 'pgf90', }, 'PrgEnv-pgi': { 'type': 'ProgEnvironment', - 'modules': ['PrgEnv-pgi/19.5'], + 'modules': ['PrgEnv-pgi/19.9'], 'cc': 'mpicc', 'cxx': 'mpicxx', 'ftn': 'mpifort', }, 'PrgEnv-cce': { 'type': 'ProgEnvironment', - 'modules': ['PrgEnv-cce/19.04'], + 'modules': ['PrgEnv-cce/19.04','cuda10.0/toolkit/10.0.130'], }, 'PrgEnv-cce-nompi': { 'type': 'ProgEnvironment', - 'modules': ['PrgEnv-cce/19.04'] + 'modules': ['PrgEnv-cce/19.04','cuda10.0/toolkit/10.0.130'] }, 'PrgEnv-gnu': { 'type': 'ProgEnvironment', From 71237860c6e0890cc10359a68619a0d488eb4f8d Mon Sep 17 00:00:00 2001 From: Luca Date: Tue, 17 Dec 2019 10:22:24 +0100 Subject: [PATCH 012/104] Update configuration on Tsa --- config/cscs.py | 21 +++++---------------- 1 file changed, 5 insertions(+), 16 deletions(-) diff --git a/config/cscs.py b/config/cscs.py index 996187cc4c..ed648646aa 100644 --- a/config/cscs.py +++ b/config/cscs.py @@ -342,24 +342,21 @@ class ReframeSettings: 'partitions': { 'login': { 'scheduler': 'local', - 'environs': ['PrgEnv-cce', 'PrgEnv-cce-nompi', - 'PrgEnv-pgi', 'PrgEnv-pgi-nompi', + 'environs': ['PrgEnv-pgi', 'PrgEnv-pgi-nompi', 'PrgEnv-gnu', 'PrgEnv-gnu-nompi'], 'descr': 'Tsa login nodes', }, 'pn': { 'scheduler': 'nativeslurm', 'access': ['--partition=pn-regression'], - 'environs': ['PrgEnv-cce', 'PrgEnv-cce-nompi', - 'PrgEnv-pgi', 'PrgEnv-pgi-nompi', + 'environs': ['PrgEnv-pgi', 'PrgEnv-pgi-nompi', 'PrgEnv-gnu', 'PrgEnv-gnu-nompi'], 'descr': 'Tsa post-processing nodes', }, 'cn': { 'scheduler': 'nativeslurm', 'access': ['--partition=cn-regression'], - 'environs': ['PrgEnv-cce', 'PrgEnv-cce-nompi', - 'PrgEnv-gnu', 'PrgEnv-gnu-nompi', + 'environs': ['PrgEnv-gnu', 'PrgEnv-gnu-nompi', 'PrgEnv-pgi', 'PrgEnv-pgi-nompi'], 'descr': 'Tsa compute nodes', 'resources': { @@ -559,24 +556,16 @@ class ReframeSettings: 'cxx': 'mpicxx', 'ftn': 'mpifort', }, - 'PrgEnv-cce': { - 'type': 'ProgEnvironment', - 'modules': ['PrgEnv-cce/19.04','cuda10.0/toolkit/10.0.130'], - }, - 'PrgEnv-cce-nompi': { - 'type': 'ProgEnvironment', - 'modules': ['PrgEnv-cce/19.04','cuda10.0/toolkit/10.0.130'] - }, 'PrgEnv-gnu': { 'type': 'ProgEnvironment', - 'modules': ['PrgEnv-gnu/18.1'], + 'modules': ['PrgEnv-gnu/19.2'], 'cc': 'mpicc', 'cxx': 'mpicxx', 'ftn': 'mpif90', }, 'PrgEnv-gnu-nompi': { 'type': 'ProgEnvironment', - 'modules': ['PrgEnv-gnu/18.1'], + 'modules': ['PrgEnv-gnu/19.2'], 'cc': 'gcc', 'cxx': 'g++', 'ftn': 'gfortran', From efcaab6528631888fdfd11c705e61a7b03257759 Mon Sep 17 00:00:00 2001 From: Luca Date: Tue, 17 Dec 2019 11:23:57 +0100 Subject: [PATCH 013/104] Updating checks for latest PrgEnv setup on Tsa --- cscs-checks/mch/automatic_arrays_acc.py | 15 ++---- cscs-checks/mch/collectives_halo.py | 39 ++++++++++++++-- cscs-checks/mch/cuda_stress_test.py | 12 +++-- cscs-checks/mch/gpu_direct_acc.py | 9 ++-- cscs-checks/mch/gpu_direct_cuda.py | 6 +-- cscs-checks/mch/openacc_cuda_mpi_cppstd.py | 54 +++------------------- 6 files changed, 57 insertions(+), 78 deletions(-) diff --git a/cscs-checks/mch/automatic_arrays_acc.py b/cscs-checks/mch/automatic_arrays_acc.py index 02645fd502..50285852ec 100644 --- a/cscs-checks/mch/automatic_arrays_acc.py +++ b/cscs-checks/mch/automatic_arrays_acc.py @@ -7,7 +7,7 @@ class AutomaticArraysCheck(rfm.RegressionTest): def __init__(self): super().__init__() self.valid_systems = ['daint:gpu', 'dom:gpu', 'kesch:cn', 'tiger:gpu', - 'arolla:cn', 'tsa:cn'] + 'tsa:cn'] self.valid_prog_environs = ['PrgEnv-cray', 'PrgEnv-cce', 'PrgEnv-pgi'] if self.current_system.name in ['daint', 'dom', 'tiger']: self.modules = ['craype-accel-nvidia60'] @@ -20,12 +20,8 @@ def __init__(self): 'CRAY_ACCEL_TARGET': 'nvidia35', 'MV2_USE_CUDA': '1' } - elif self.current_system.name in ['arolla','tsa']: + elif self.current_system.name == 'tsa': self.exclusive_access = True - self.variables = { - 'CRAY_ACCEL_TARGET': 'nvidia70', - 'MV2_USE_CUDA': '1' - } # This tets requires an MPI compiler, although it uses a single task self.num_tasks = 1 self.num_gpus_per_node = 1 @@ -45,15 +41,10 @@ def __init__(self): 'dom:gpu': {'time': (5.7E-05, None, 0.15)}, 'kesch:cn': {'time': (2.9E-04, None, 0.15)}, }, - 'PrgEnv-cce': { - 'arolla:cn': {'time': (2.9E-04, None, 0.15)}, - 'tsa:cn': {'time': (2.9E-04, None, 0.15)}, - }, 'PrgEnv-pgi': { 'daint:gpu': {'time': (7.5E-05, None, 0.15)}, 'dom:gpu': {'time': (7.5e-05, None, 0.15)}, 'kesch:cn': {'time': (1.4E-04, None, 0.15)}, - 'arolla:cn': {'time': (1.4E-04, None, 0.15)}, 'tsa:cn': {'time': (1.4E-04, None, 0.15)}, } } @@ -73,7 +64,7 @@ def setup(self, partition, environ, **job_opts): self.build_system.fflags += ['-acc'] if self.current_system.name == 'kesch': self.build_system.fflags += ['-ta=tesla,cc35'] - elif self.current_system.name in ['arolla','tsa']: + elif self.current_system.name == 'tsa': self.build_system.fflags += ['-ta=tesla,cc70'] elif self.current_system.name in ['daint', 'dom', 'tiger']: self.build_system.fflags += ['-ta=tesla,cc60', '-Mnorpath'] diff --git a/cscs-checks/mch/collectives_halo.py b/cscs-checks/mch/collectives_halo.py index 770ecd6daf..da670d2c66 100644 --- a/cscs-checks/mch/collectives_halo.py +++ b/cscs-checks/mch/collectives_halo.py @@ -4,7 +4,8 @@ class CollectivesBaseTest(rfm.RegressionTest): def __init__(self, variant, bench_reference): - self.valid_systems = ['dom:gpu', 'daint:gpu', 'kesch:cn', 'tiger:gpu'] + self.valid_systems = ['dom:gpu', 'daint:gpu', 'kesch:cn', 'tiger:gpu', + 'tsa:cn'] self.valid_prog_environs = ['PrgEnv-gnu'] self.variables = {'G2G': '1'} self.executable = 'build/src/comm_overlap_benchmark' @@ -29,6 +30,18 @@ def __init__(self, variant, bench_reference): '-DCUDA_COMPUTE_CAPABILITY="sm_37"' ] self.build_system.max_concurrency = 1 + elif self.current_system.name == 'tsa': + self.exclusive_access = True + self.num_tasks = 144 + self.num_gpus_per_node = 8 + self.num_tasks_per_node = 16 + self.num_tasks_per_socket = 8 + self.modules = ['cmake'] + self.build_system.config_opts += [ + '-DMPI_VENDOR=openmpi', + '-DCUDA_COMPUTE_CAPABILITY="sm_70"' + ] + self.build_system.max_concurrency = 1 elif self.current_system.name in {'daint', 'dom', 'tiger'}: self.num_tasks = 4 self.num_gpus_per_node = 1 @@ -64,7 +77,12 @@ def __init__(self, variant, bench_reference): 'nocomm': 0.0171947, 'nocomp': 0.0137893, 'default': 0.0138493 - } + }, + 'tsa': { + 'nocomm': 5.7878, + 'nocomp': 5.62155, + 'default': 5.53777 + }, } if self.current_system.name == 'dom': @@ -87,6 +105,9 @@ def __init__(self, variant, bench_reference): 'dom': { 'elapsed_time': (ref, None, 0.15) }, + 'tsa:cn': { + 'elapsed_time': (ref, None, 0.15) + }, '*': { 'elapsed_time': (ref, None, None) } @@ -116,7 +137,12 @@ def __init__(self, variant): 'nocomm': 0.0171947, 'nocomp': 0.0137893, 'default': 0.0138493 - } + }, + 'tsa': { + 'nocomm': 5.7878, + 'nocomp': 5.62155, + 'default': 5.53777 + }, }) self.strict_check = False self.sourcesdir = 'https://github.com/eth-cscs/comm_overlap_bench.git' @@ -137,7 +163,12 @@ def __init__(self, variant): 'nocomm': 0.978306, 'nocomp': 1.36716, 'default': 2.53509 - } + }, + 'tsa': { + 'nocomm': 5.7878, + 'nocomp': 5.62155, + 'default': 5.53777 + }, }) self.sourcesdir = 'https://github.com/eth-cscs/comm_overlap_bench.git' self.prebuild_cmd = ['git checkout barebones'] diff --git a/cscs-checks/mch/cuda_stress_test.py b/cscs-checks/mch/cuda_stress_test.py index 1cda732889..2b80422d2e 100644 --- a/cscs-checks/mch/cuda_stress_test.py +++ b/cscs-checks/mch/cuda_stress_test.py @@ -7,19 +7,18 @@ class CudaStressTest(rfm.RegressionTest): def __init__(self): super().__init__() self.descr = 'MCH CUDA stress test' - self.valid_systems = ['daint:gpu', 'dom:gpu', 'kesch:cn', 'tiger:gpu'] - 'arolla:cn'] + self.valid_systems = ['daint:gpu', 'dom:gpu', 'kesch:cn', 'tiger:gpu', + 'tsa:cn'] if self.current_system.name == 'kesch': self.exclusive_access = True self.valid_prog_environs = ['PrgEnv-gnu-nompi'] self.modules = ['cudatoolkit/8.0.61'] - elif self.current_system.name == 'arolla': + elif self.current_system.name == 'tsa': self.exclusive_access = True self.valid_prog_environs = ['PrgEnv-gnu-nompi'] - self.modules = ['cuda92/toolkit/9.2.88'] + self.modules = ['cuda/10.1.243'] else: self.valid_prog_environs = ['PrgEnv-gnu'] - self.modules = ['craype-accel-nvidia60'] self.sourcepath = 'cuda_stencil_test.cu' self.build_system = 'SingleSource' @@ -39,6 +38,9 @@ def __init__(self): }, 'kesch:cn': { 'time': (2.25, None, 0.05) + }, + 'tsa:cn': { + 'time': (2.25, None, 0.05) } } self.tags = {'production', 'mch', 'craype'} diff --git a/cscs-checks/mch/gpu_direct_acc.py b/cscs-checks/mch/gpu_direct_acc.py index 49b7591f6e..4717a9e7ba 100644 --- a/cscs-checks/mch/gpu_direct_acc.py +++ b/cscs-checks/mch/gpu_direct_acc.py @@ -10,7 +10,7 @@ def __init__(self): super().__init__() self.descr = 'tests gpu-direct for Fortran OpenACC' self.valid_systems = ['daint:gpu', 'dom:gpu', 'kesch:cn', 'tiger:gpu', - 'arolla:cn'] + 'tsa:cn'] self.valid_prog_environs = ['PrgEnv-cray', 'PrgEnv-pgi'] if self.current_system.name in ['daint', 'dom', 'tiger']: self.modules = ['craype-accel-nvidia60'] @@ -36,12 +36,9 @@ def __init__(self): self.num_tasks = 8 self.num_gpus_per_node = 8 self.num_tasks_per_node = 8 - elif self.current_system.name == 'arolla': + elif self.current_system.name == 'tsa': self.exclusive_access = True - self.modules = ['cuda92/toolkit/9.2.88'] self.variables = { - 'CRAY_ACCEL_TARGET': 'nvidia70', - 'MV2_USE_CUDA': '1', 'G2G': '1' } self.num_tasks = 8 @@ -68,7 +65,7 @@ def setup(self, partition, environ, **job_opts): self.build_system.fflags += ['-ta=tesla:cc60', '-Mnorpath'] elif self.current_system.name == 'kesch': self.build_system.fflags += ['-ta=tesla:cc35'] - elif self.current_system.name == 'arolla': + elif self.current_system.name == 'tsa': self.build_system.fflags += ['-ta=tesla:cc70'] super().setup(partition, environ, **job_opts) diff --git a/cscs-checks/mch/gpu_direct_cuda.py b/cscs-checks/mch/gpu_direct_cuda.py index cb0e8cecb6..a2c02597d6 100644 --- a/cscs-checks/mch/gpu_direct_cuda.py +++ b/cscs-checks/mch/gpu_direct_cuda.py @@ -9,7 +9,7 @@ def __init__(self): super().__init__() self.descr = 'tests gpu-direct for CUDA' self.valid_systems = ['daint:gpu', 'dom:gpu', 'kesch:cn', - 'arolla:cn', 'tiger:gpu'] + 'tiger:gpu', 'tsa:cn'] # FIXME: temporary workaround until the mvapich module is fixed # 'PrgEnv-gnu-c2sm-gpu' will be added later self.valid_prog_environs = ['PrgEnv-gnu'] @@ -29,12 +29,10 @@ def __init__(self): 'G2G': '1', } self.build_system.cxxflags = ['-ccbin', 'mpicxx', '-arch=sm_37'] - elif self.current_system.name == 'arolla': + elif self.current_system.name == 'tsa': self.exclusive_access = True self.valid_prog_environs = ['PrgEnv-gnu'] - self.modules = ['cuda92/toolkit/9.2.88'] self.variables = { - 'MV2_USE_CUDA': '1', 'G2G': '1', } self.build_system.cxxflags = ['-ccbin', 'mpicxx', '-arch=sm_70'] diff --git a/cscs-checks/mch/openacc_cuda_mpi_cppstd.py b/cscs-checks/mch/openacc_cuda_mpi_cppstd.py index 18cf468ef3..93656ac3cb 100644 --- a/cscs-checks/mch/openacc_cuda_mpi_cppstd.py +++ b/cscs-checks/mch/openacc_cuda_mpi_cppstd.py @@ -8,7 +8,7 @@ def __init__(self): super().__init__() self.descr = 'test for OpenACC, CUDA, MPI, and C++' self.valid_systems = ['daint:gpu', 'dom:gpu', 'tiger:gpu', - 'kesch:cn', 'arolla:cn', 'tsa:cn', 'tiger:gpu'] + 'kesch:cn', 'tsa:cn'] self.valid_prog_environs = ['PrgEnv-cce', 'PrgEnv-cray', 'PrgEnv-pgi'] self.build_system = 'Make' @@ -35,28 +35,14 @@ def __init__(self): 'MV2_USE_CUDA': '1', 'G2G': '1' } - elif self.current_system.name == 'arolla': - self.exclusive_access = True - self.modules = ['cuda92/toolkit/9.2.88', - 'craype-accel-nvidia70'] - self.num_tasks = 8 - self.num_tasks_per_node = 8 - self.num_gpus_per_node = 8 - self.build_system.options = ['NVCC_FLAGS="-arch=compute_70"'] - self.variables = { - 'MV2_USE_CUDA': '1', - 'G2G': '1' - } elif self.current_system.name == 'tsa': self.exclusive_access = True - self.modules = ['cuda10.0/toolkit/10.0.130', - 'craype-accel-nvidia70'] + self.modules = ['cuda/10.1.243'] self.num_tasks = 8 self.num_tasks_per_node = 8 self.num_gpus_per_node = 8 self.build_system.options = ['NVCC_FLAGS="-arch=compute_70"'] self.variables = { - 'MV2_USE_CUDA': '1', 'G2G': '1' } @@ -69,19 +55,6 @@ def setup(self, partition, environ, **job_opts): if environ.name.startswith('PrgEnv-cray'): self.build_system.fflags += ['-hacc', '-hnoomp'] - elif environ.name.startswith('PrgEnv-cce'): - self.build_system.fflags += ['-hacc', '-hnoomp'] - if self.current_system.name == 'arolla': - self.build_system.ldflags = [ - '-L/cm/shared/apps/cuda92/toolkit/9.2.88/lib64', - '-lcublas', '-lcudart' - ] - elif self.current_system.name == 'tsa': - self.build_system.ldflags = [ - '-L/cm/shared/apps/cuda10.0/toolkit/10.0.130/lib64', - '-lcublas', '-lcudart' - ] - elif environ.name.startswith('PrgEnv-pgi'): self.build_system.fflags += ['-acc'] if self.current_system.name in ['daint', 'dom', 'tiger']: @@ -95,19 +68,11 @@ def setup(self, partition, environ, **job_opts): '-L/global/opt/nvidia/cudatoolkit/8.0.61/lib64', '-lcublas', '-lcudart' ] - elif self.current_system.name == 'arolla': - self.build_system.fflags += ['-ta=tesla,cc70,cuda10.0'] - self.build_system.ldflags = [ - '-acc', '-ta:tesla:cc70,cuda10.0', '-lstdc++', - '-L/cm/shared/apps/cuda92/toolkit/9.2.88/lib64', - '-lcublas', '-lcudart' - ] elif self.current_system.name == 'tsa': - self.build_system.fflags += ['-ta=tesla,cc70,cuda10.0'] + self.build_system.fflags += ['-ta=tesla,cc70,cuda10.1'] self.build_system.ldflags = [ - '-acc', '-ta:tesla:cc70,cuda10.0', '-lstdc++', - '-L/cm/shared/apps/cuda10.0/toolkit/10.0.130/lib64', - '-lcublas', '-lcudart' + '-acc', '-ta:tesla:cc70,cuda10.1', '-lstdc++', + '-L$EBROOTCUDA/lib64', '-lcublas', '-lcudart' ] elif environ.name.startswith('PrgEnv-gnu'): @@ -116,13 +81,8 @@ def setup(self, partition, environ, **job_opts): self.build_system.ldflags += [ '-L/global/opt/nvidia/cudatoolkit/8.0.61/lib64' ] - if self.current_system.name == 'arolla': - self.build_system.ldflags += [ - '-L/cm/shared/apps/cuda92/toolkit/9.2.88/lib64' - ] - if self.current_system.name == 'tsa': + elif self.current_system.name == 'tsa': self.build_system.ldflags += [ - '-L/cm/shared/apps/cuda10.0/toolkit/10.0.130/lib64'] - self.build_system.ldflags += ['-lcublas', '-lcudart'] + '-L$EBROOTCUDA/lib64', '-lcublas', '-lcudart'] super().setup(partition, environ, **job_opts) From 6c3c4a418855e18ab1f25eb0aed680d725ed92e5 Mon Sep 17 00:00:00 2001 From: Luca Date: Fri, 17 Jan 2020 11:57:58 +0100 Subject: [PATCH 014/104] Fixing config and adding helloworld --- config/cscs.py | 3 +++ cscs-checks/mch/collectives_halo.py | 2 +- cscs-checks/prgenv/helloworld.py | 19 ++++++++++++------- 3 files changed, 16 insertions(+), 8 deletions(-) diff --git a/config/cscs.py b/config/cscs.py index e57a7b90ea..9cc4cf9628 100644 --- a/config/cscs.py +++ b/config/cscs.py @@ -539,6 +539,9 @@ class ReframeSettings: 'PrgEnv-gnu': { 'type': 'ProgEnvironment', 'modules': ['PrgEnv-gnu/19.2'], + 'cc': 'mpicc', + 'cxx': 'mpicxx', + 'ftn': 'mpifort', }, 'PrgEnv-gnu-nompi': { 'type': 'ProgEnvironment', diff --git a/cscs-checks/mch/collectives_halo.py b/cscs-checks/mch/collectives_halo.py index f3c66542f1..57c2099f7a 100644 --- a/cscs-checks/mch/collectives_halo.py +++ b/cscs-checks/mch/collectives_halo.py @@ -32,7 +32,7 @@ def __init__(self, variant, bench_reference): self.build_system.max_concurrency = 1 elif self.current_system.name == 'tsa': self.exclusive_access = True - self.num_tasks = 144 + self.num_tasks = 32 self.num_gpus_per_node = 8 self.num_tasks_per_node = 16 self.num_tasks_per_socket = 8 diff --git a/cscs-checks/prgenv/helloworld.py b/cscs-checks/prgenv/helloworld.py index d7ad04c8fe..8db6455e8b 100644 --- a/cscs-checks/prgenv/helloworld.py +++ b/cscs-checks/prgenv/helloworld.py @@ -18,16 +18,16 @@ def __init__(self, variant, lang, linkage): self.sourcepath = 'hello_world' self.build_system = 'SingleSource' self.valid_systems = ['daint:gpu', 'daint:mc', 'dom:gpu', 'dom:mc', - 'kesch:cn', 'leone:normal', 'tiger:gpu'] + 'kesch:cn', 'tsa:cn', 'tiger:gpu'] self.valid_prog_environs = ['PrgEnv-cray', 'PrgEnv-cray_classic', 'PrgEnv-gnu', 'PrgEnv-intel', 'PrgEnv-pgi'] - if self.current_system.name == 'kesch': + if self.current_system.name in ['kesch', 'tsa']: self.exclusive_access = True - # Removing static compilation from kesch - if (self.current_system.name in ['kesch', 'leone'] and + # Removing static compilation from kesch and tsa + if (self.current_system.name in ['kesch', 'tsa'] and linkage == 'static'): self.valid_prog_environs = [] @@ -101,7 +101,7 @@ def compile(self): class HelloWorldTestSerial(HelloWorldBaseTest): def __init__(self, lang, linkage, **kwargs): super().__init__('serial', lang, linkage, **kwargs) - self.valid_systems += ['kesch:pn'] + self.valid_systems += ['kesch:pn', 'tsa:pn'] self.sourcepath += '_serial.' + lang self.descr += ' Serial ' + linkage.capitalize() self.prgenv_flags = { @@ -118,7 +118,9 @@ def __init__(self, lang, linkage, **kwargs): self.valid_prog_environs += ['PrgEnv-cray-nompi', 'PrgEnv-pgi-nompi', 'PrgEnv-gnu-nompi'] - + elif self.current_system.name == 'tsa' and linkage == 'dynamic': + self.valid_prog_environs += ['PrgEnv-pgi-nompi', + 'PrgEnv-gnu-nompi'] @rfm.required_version('>=2.14') @rfm.parameterized_test(*([lang, linkage] @@ -127,7 +129,7 @@ def __init__(self, lang, linkage, **kwargs): class HelloWorldTestOpenMP(HelloWorldBaseTest): def __init__(self, lang, linkage): super().__init__('openmp', lang, linkage) - self.valid_systems += ['kesch:pn'] + self.valid_systems += ['kesch:pn', 'tsa:pn'] self.sourcepath += '_openmp.' + lang self.descr += ' OpenMP ' + str.capitalize(linkage) self.prgenv_flags = { @@ -147,6 +149,9 @@ def __init__(self, lang, linkage): self.valid_prog_environs += ['PrgEnv-cray-nompi', 'PrgEnv-pgi-nompi', 'PrgEnv-gnu-nompi'] + elif self.current_system.name == 'tsa' and linkage == 'dynamic': + self.valid_prog_environs += ['PrgEnv-pgi-nompi', + 'PrgEnv-gnu-nompi'] # On SLURM there is no need to set OMP_NUM_THREADS if one defines # num_cpus_per_task, but adding for completeness and portability From e7bd62a8712348aaf2cf99b21bbc8283477d6bee Mon Sep 17 00:00:00 2001 From: Luca Date: Fri, 17 Jan 2020 12:41:25 +0100 Subject: [PATCH 015/104] OpenACC checks --- cscs-checks/prgenv/openacc_checks.py | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/cscs-checks/prgenv/openacc_checks.py b/cscs-checks/prgenv/openacc_checks.py index 09f86a2777..6270bdd878 100644 --- a/cscs-checks/prgenv/openacc_checks.py +++ b/cscs-checks/prgenv/openacc_checks.py @@ -11,13 +11,17 @@ def __init__(self, variant): else: self.num_tasks = 2 - self.valid_systems = ['daint:gpu', 'dom:gpu', 'kesch:cn', 'tiger:gpu'] + self.valid_systems = ['daint:gpu', 'dom:gpu', 'kesch:cn', 'tiger:gpu', + 'tsa:cn'] self.valid_prog_environs = ['PrgEnv-cray', 'PrgEnv-pgi'] if self.num_tasks == 1: self.sourcepath = 'vecAdd_openacc.f90' if self.current_system.name == 'kesch': self.valid_prog_environs = ['PrgEnv-cray-nompi', 'PrgEnv-pgi-nompi'] + elif self.current_system.name == 'tsa': + self.valid_prog_environs = ['PrgEnv-pgi-nompi'] + else: self.sourcepath = 'vecAdd_openacc_mpi.f90' @@ -29,6 +33,12 @@ def __init__(self, variant): 'CRAY_ACCEL_TARGET': 'nvidia35', 'MV2_USE_CUDA': '1' } + elif self.current_system.name == 'tsa': + self.exclusive_access = True + self.variables = { + 'CRAY_ACCEL_TARGET': 'nvidia70', + 'MV2_USE_CUDA': '1' + } self.executable = self.name self.build_system = 'SingleSource' @@ -47,7 +57,9 @@ def setup(self, partition, environ, **job_opts): elif environ.name.startswith('PrgEnv-pgi'): if self.current_system.name in ['daint', 'dom', 'tiger']: self.build_system.fflags = ['-acc', '-ta=tesla:cc60'] - else: + elif self.current_system.name == 'kesch': self.build_system.fflags = ['-acc', '-ta=tesla:cc35'] + elif self.current_system.name == 'tsa': + self.build_system.fflags = ['-acc', '-ta=tesla:cc70'] super().setup(partition, environ, **job_opts) From 26e388c3291552bfc61b8846ce2b0b7801a26d80 Mon Sep 17 00:00:00 2001 From: Luca Date: Tue, 21 Jan 2020 09:48:45 +0100 Subject: [PATCH 016/104] Updating checks to include arolla --- cscs-checks/mch/automatic_arrays_acc.py | 7 ++++--- cscs-checks/mch/collectives_halo.py | 24 +++++++++++++++++++--- cscs-checks/mch/cuda_stress_test.py | 7 +++++-- cscs-checks/mch/gpu_direct_acc.py | 6 +++--- cscs-checks/mch/gpu_direct_cuda.py | 4 ++-- cscs-checks/mch/openacc_cuda_mpi_cppstd.py | 8 ++++---- cscs-checks/prgenv/helloworld.py | 14 ++++++------- cscs-checks/prgenv/openacc_checks.py | 6 +++--- 8 files changed, 49 insertions(+), 27 deletions(-) diff --git a/cscs-checks/mch/automatic_arrays_acc.py b/cscs-checks/mch/automatic_arrays_acc.py index 2ff54b783b..b1adae3fb3 100644 --- a/cscs-checks/mch/automatic_arrays_acc.py +++ b/cscs-checks/mch/automatic_arrays_acc.py @@ -7,7 +7,7 @@ class AutomaticArraysCheck(rfm.RegressionTest): def __init__(self): super().__init__() self.valid_systems = ['daint:gpu', 'dom:gpu', 'kesch:cn', 'tiger:gpu', - 'tsa:cn'] + 'arolla:cn', 'tsa:cn'] self.valid_prog_environs = ['PrgEnv-cray', 'PrgEnv-cce', 'PrgEnv-pgi'] if self.current_system.name in ['daint', 'dom', 'tiger']: self.modules = ['craype-accel-nvidia60'] @@ -20,7 +20,7 @@ def __init__(self): 'CRAY_ACCEL_TARGET': 'nvidia35', 'MV2_USE_CUDA': '1' } - elif self.current_system.name == 'tsa': + elif self.current_system.name in ['arolla','tsa']: self.exclusive_access = True # This tets requires an MPI compiler, although it uses a single task self.num_tasks = 1 @@ -42,6 +42,7 @@ def __init__(self): 'kesch:cn': {'time': (2.9E-04, None, 0.15)}, }, 'PrgEnv-pgi': { + 'arolla:cn': {'time': (1.4E-04, None, 0.15)}, 'daint:gpu': {'time': (7.5E-05, None, 0.15)}, 'dom:gpu': {'time': (7.5e-05, None, 0.15)}, 'kesch:cn': {'time': (1.4E-04, None, 0.15)}, @@ -64,7 +65,7 @@ def setup(self, partition, environ, **job_opts): self.build_system.fflags += ['-acc'] if self.current_system.name == 'kesch': self.build_system.fflags += ['-ta=tesla,cc35'] - elif self.current_system.name == 'tsa': + elif self.current_system.name in ['arolla','tsa']: self.build_system.fflags += ['-ta=tesla,cc70'] elif self.current_system.name in ['daint', 'dom', 'tiger']: self.build_system.fflags += ['-ta=tesla,cc60', '-Mnorpath'] diff --git a/cscs-checks/mch/collectives_halo.py b/cscs-checks/mch/collectives_halo.py index 57c2099f7a..42f9bfbc80 100644 --- a/cscs-checks/mch/collectives_halo.py +++ b/cscs-checks/mch/collectives_halo.py @@ -5,7 +5,7 @@ class CollectivesBaseTest(rfm.RegressionTest): def __init__(self, variant, bench_reference): self.valid_systems = ['dom:gpu', 'daint:gpu', 'kesch:cn', 'tiger:gpu', - 'tsa:cn'] + 'arolla:cn', 'tsa:cn'] self.valid_prog_environs = ['PrgEnv-gnu'] self.variables = {'G2G': '1'} self.executable = 'build/src/comm_overlap_benchmark' @@ -30,7 +30,7 @@ def __init__(self, variant, bench_reference): '-DCUDA_COMPUTE_CAPABILITY="sm_37"' ] self.build_system.max_concurrency = 1 - elif self.current_system.name == 'tsa': + elif self.current_system.name in ['arolla','tsa']: self.exclusive_access = True self.num_tasks = 32 self.num_gpus_per_node = 8 @@ -68,6 +68,11 @@ def __init__(self, variant, bench_reference): self.stdout, 1, float, -1) } ref_values = { + 'arolla': { + 'nocomm': 5.7878, + 'nocomp': 5.62155, + 'default': 5.53777 + }, 'kesch': { 'nocomm': 5.7878, 'nocomp': 5.62155, @@ -96,6 +101,9 @@ def __init__(self, variant, bench_reference): ref = 0.0 self.reference = { + 'arolla:cn': { + 'elapsed_time': (ref, None, 0.15) + }, 'kesch:cn': { 'elapsed_time': (ref, None, 0.15) }, @@ -118,7 +126,7 @@ def __init__(self, variant, bench_reference): def setup(self, *args, **kwargs): super().setup(*args, **kwargs) - if self.current_system.name == 'kesch': + if self.current_system.name in ['arolla', 'kesch', 'tsa']: self.job.launcher.options = ['--distribution=block:block', '--cpu_bind=q'] @@ -128,6 +136,11 @@ class AlltoallvTest(CollectivesBaseTest): def __init__(self, variant): super().__init__(variant, { + 'arolla': { + 'nocomm': 5.7878, + 'nocomp': 5.62155, + 'default': 5.53777 + }, 'kesch': { 'nocomm': 6.89819, 'nocomp': 6.98276, @@ -154,6 +167,11 @@ class HaloExchangeTest(CollectivesBaseTest): def __init__(self, variant): super().__init__(variant, { + 'arolla': { + 'nocomm': 5.7878, + 'nocomp': 5.62155, + 'default': 5.53777 + }, 'kesch': { 'nocomm': 5.7878, 'nocomp': 54.2012, diff --git a/cscs-checks/mch/cuda_stress_test.py b/cscs-checks/mch/cuda_stress_test.py index 161b81b774..ccb3519959 100644 --- a/cscs-checks/mch/cuda_stress_test.py +++ b/cscs-checks/mch/cuda_stress_test.py @@ -8,12 +8,12 @@ def __init__(self): super().__init__() self.descr = 'MCH CUDA stress test' self.valid_systems = ['daint:gpu', 'dom:gpu', 'kesch:cn', 'tiger:gpu', - 'tsa:cn'] + 'arolla:cn', 'tsa:cn'] if self.current_system.name == 'kesch': self.exclusive_access = True self.valid_prog_environs = ['PrgEnv-gnu-nompi'] self.modules = ['cudatoolkit/8.0.61'] - elif self.current_system.name == 'tsa': + elif self.current_system.name in ['arolla', 'tsa']: self.exclusive_access = True self.valid_prog_environs = ['PrgEnv-gnu-nompi'] self.modules = ['cuda/10.1.243'] @@ -30,6 +30,9 @@ def __init__(self): 'time': sn.extractsingle(r'Timing: (\S+)', self.stdout, 1, float) } self.reference = { + 'arolla:cn': { + 'time': (2.25, None, 0.05) + }, 'daint:gpu': { 'time': (1.41184, None, 0.05) }, diff --git a/cscs-checks/mch/gpu_direct_acc.py b/cscs-checks/mch/gpu_direct_acc.py index bfb299f81a..7448ea5bd3 100644 --- a/cscs-checks/mch/gpu_direct_acc.py +++ b/cscs-checks/mch/gpu_direct_acc.py @@ -10,7 +10,7 @@ def __init__(self): super().__init__() self.descr = 'tests gpu-direct for Fortran OpenACC' self.valid_systems = ['daint:gpu', 'dom:gpu', 'kesch:cn', 'tiger:gpu', - 'tsa:cn'] + 'arollai:cn', 'tsa:cn'] self.valid_prog_environs = ['PrgEnv-cray', 'PrgEnv-pgi'] if self.current_system.name in ['daint', 'dom', 'tiger']: self.modules = ['craype-accel-nvidia60'] @@ -36,7 +36,7 @@ def __init__(self): self.num_tasks = 8 self.num_gpus_per_node = 8 self.num_tasks_per_node = 8 - elif self.current_system.name == 'tsa': + elif self.current_system.name in ['arolla', 'tsa']: self.exclusive_access = True self.variables = { 'G2G': '1' @@ -65,7 +65,7 @@ def setup(self, partition, environ, **job_opts): self.build_system.fflags += ['-ta=tesla:cc60', '-Mnorpath'] elif self.current_system.name == 'kesch': self.build_system.fflags += ['-ta=tesla:cc35'] - elif self.current_system.name == 'tsa': + elif self.current_system.name in ['arolla', 'tsa']: self.build_system.fflags += ['-ta=tesla:cc70'] super().setup(partition, environ, **job_opts) diff --git a/cscs-checks/mch/gpu_direct_cuda.py b/cscs-checks/mch/gpu_direct_cuda.py index 7eb5dcb91e..188d1d155b 100644 --- a/cscs-checks/mch/gpu_direct_cuda.py +++ b/cscs-checks/mch/gpu_direct_cuda.py @@ -9,7 +9,7 @@ def __init__(self): super().__init__() self.descr = 'tests gpu-direct for CUDA' self.valid_systems = ['daint:gpu', 'dom:gpu', 'kesch:cn', - 'tiger:gpu', 'tsa:cn'] + 'tiger:gpu', 'arolla:cn', 'tsa:cn'] # FIXME: temporary workaround until the mvapich module is fixed # 'PrgEnv-gnu-c2sm-gpu' will be added later self.valid_prog_environs = ['PrgEnv-gnu'] @@ -29,7 +29,7 @@ def __init__(self): 'G2G': '1', } self.build_system.cxxflags = ['-ccbin', 'mpicxx', '-arch=sm_37'] - elif self.current_system.name == 'tsa': + elif self.current_system.name in ['arolla', 'tsa']: self.exclusive_access = True self.valid_prog_environs = ['PrgEnv-gnu'] self.variables = { diff --git a/cscs-checks/mch/openacc_cuda_mpi_cppstd.py b/cscs-checks/mch/openacc_cuda_mpi_cppstd.py index c45c24d500..aba07b1244 100644 --- a/cscs-checks/mch/openacc_cuda_mpi_cppstd.py +++ b/cscs-checks/mch/openacc_cuda_mpi_cppstd.py @@ -8,7 +8,7 @@ def __init__(self): super().__init__() self.descr = 'test for OpenACC, CUDA, MPI, and C++' self.valid_systems = ['daint:gpu', 'dom:gpu', 'tiger:gpu', - 'kesch:cn', 'tsa:cn'] + 'arolla:cn', 'kesch:cn', 'tsa:cn'] self.valid_prog_environs = ['PrgEnv-cce', 'PrgEnv-cray', 'PrgEnv-pgi'] self.build_system = 'Make' @@ -35,7 +35,7 @@ def __init__(self): 'MV2_USE_CUDA': '1', 'G2G': '1' } - elif self.current_system.name == 'tsa': + elif self.current_system.name in ['arolla', 'tsa']: self.exclusive_access = True self.modules = ['cuda/10.1.243'] self.num_tasks = 8 @@ -68,7 +68,7 @@ def setup(self, partition, environ, **job_opts): '-L/global/opt/nvidia/cudatoolkit/8.0.61/lib64', '-lcublas', '-lcudart' ] - elif self.current_system.name == 'tsa': + elif self.current_system.name in ['arolla','tsa']: self.build_system.fflags += ['-ta=tesla,cc70,cuda10.1'] self.build_system.ldflags = [ '-acc', '-ta:tesla:cc70,cuda10.1', '-lstdc++', @@ -81,7 +81,7 @@ def setup(self, partition, environ, **job_opts): self.build_system.ldflags += [ '-L/global/opt/nvidia/cudatoolkit/8.0.61/lib64' ] - elif self.current_system.name == 'tsa': + elif self.current_system.name in ['arolla','tsa']: self.build_system.ldflags += [ '-L$EBROOTCUDA/lib64', '-lcublas', '-lcudart'] diff --git a/cscs-checks/prgenv/helloworld.py b/cscs-checks/prgenv/helloworld.py index 8db6455e8b..50f77b2948 100644 --- a/cscs-checks/prgenv/helloworld.py +++ b/cscs-checks/prgenv/helloworld.py @@ -18,16 +18,16 @@ def __init__(self, variant, lang, linkage): self.sourcepath = 'hello_world' self.build_system = 'SingleSource' self.valid_systems = ['daint:gpu', 'daint:mc', 'dom:gpu', 'dom:mc', - 'kesch:cn', 'tsa:cn', 'tiger:gpu'] + 'kesch:cn', 'arolla:cn', 'tsa:cn', 'tiger:gpu'] self.valid_prog_environs = ['PrgEnv-cray', 'PrgEnv-cray_classic', 'PrgEnv-gnu', 'PrgEnv-intel', 'PrgEnv-pgi'] - if self.current_system.name in ['kesch', 'tsa']: + if self.current_system.name in ['kesch', 'arolla', 'tsa']: self.exclusive_access = True # Removing static compilation from kesch and tsa - if (self.current_system.name in ['kesch', 'tsa'] and + if (self.current_system.name in ['kesch', 'arolla', 'tsa'] and linkage == 'static'): self.valid_prog_environs = [] @@ -101,7 +101,7 @@ def compile(self): class HelloWorldTestSerial(HelloWorldBaseTest): def __init__(self, lang, linkage, **kwargs): super().__init__('serial', lang, linkage, **kwargs) - self.valid_systems += ['kesch:pn', 'tsa:pn'] + self.valid_systems += ['kesch:pn', 'arolla:pn', 'tsa:pn'] self.sourcepath += '_serial.' + lang self.descr += ' Serial ' + linkage.capitalize() self.prgenv_flags = { @@ -118,7 +118,7 @@ def __init__(self, lang, linkage, **kwargs): self.valid_prog_environs += ['PrgEnv-cray-nompi', 'PrgEnv-pgi-nompi', 'PrgEnv-gnu-nompi'] - elif self.current_system.name == 'tsa' and linkage == 'dynamic': + elif self.current_system.name in ['arolla', 'tsa'] and linkage == 'dynamic': self.valid_prog_environs += ['PrgEnv-pgi-nompi', 'PrgEnv-gnu-nompi'] @@ -129,7 +129,7 @@ def __init__(self, lang, linkage, **kwargs): class HelloWorldTestOpenMP(HelloWorldBaseTest): def __init__(self, lang, linkage): super().__init__('openmp', lang, linkage) - self.valid_systems += ['kesch:pn', 'tsa:pn'] + self.valid_systems += ['kesch:pn', 'arolla:pn', 'tsa:pn'] self.sourcepath += '_openmp.' + lang self.descr += ' OpenMP ' + str.capitalize(linkage) self.prgenv_flags = { @@ -149,7 +149,7 @@ def __init__(self, lang, linkage): self.valid_prog_environs += ['PrgEnv-cray-nompi', 'PrgEnv-pgi-nompi', 'PrgEnv-gnu-nompi'] - elif self.current_system.name == 'tsa' and linkage == 'dynamic': + elif self.current_system.name in ['arolla', 'tsa'] and linkage == 'dynamic': self.valid_prog_environs += ['PrgEnv-pgi-nompi', 'PrgEnv-gnu-nompi'] diff --git a/cscs-checks/prgenv/openacc_checks.py b/cscs-checks/prgenv/openacc_checks.py index 6270bdd878..03ce5eb0fd 100644 --- a/cscs-checks/prgenv/openacc_checks.py +++ b/cscs-checks/prgenv/openacc_checks.py @@ -12,7 +12,7 @@ def __init__(self, variant): self.num_tasks = 2 self.valid_systems = ['daint:gpu', 'dom:gpu', 'kesch:cn', 'tiger:gpu', - 'tsa:cn'] + 'arolla:cn', 'tsa:cn'] self.valid_prog_environs = ['PrgEnv-cray', 'PrgEnv-pgi'] if self.num_tasks == 1: self.sourcepath = 'vecAdd_openacc.f90' @@ -33,7 +33,7 @@ def __init__(self, variant): 'CRAY_ACCEL_TARGET': 'nvidia35', 'MV2_USE_CUDA': '1' } - elif self.current_system.name == 'tsa': + elif self.current_system.name in ['arolla', 'tsa']: self.exclusive_access = True self.variables = { 'CRAY_ACCEL_TARGET': 'nvidia70', @@ -59,7 +59,7 @@ def setup(self, partition, environ, **job_opts): self.build_system.fflags = ['-acc', '-ta=tesla:cc60'] elif self.current_system.name == 'kesch': self.build_system.fflags = ['-acc', '-ta=tesla:cc35'] - elif self.current_system.name == 'tsa': + elif self.current_system.name in ['arolla', 'tsa']: self.build_system.fflags = ['-acc', '-ta=tesla:cc70'] super().setup(partition, environ, **job_opts) From 9699ccb848a5ab02ca0613304ebdd9d30429cfe6 Mon Sep 17 00:00:00 2001 From: Luca Date: Tue, 21 Jan 2020 10:20:37 +0100 Subject: [PATCH 017/104] Fixing Arolla environments --- config/cscs.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/config/cscs.py b/config/cscs.py index 9cc4cf9628..1fa82b5b2f 100644 --- a/config/cscs.py +++ b/config/cscs.py @@ -492,29 +492,28 @@ class ReframeSettings: 'arolla': { 'PrgEnv-pgi-nompi': { 'type': 'ProgEnvironment', - 'modules': ['PrgEnv-pgi/18.10'], + 'modules': ['PrgEnv-pgi/19.9'], 'cc': 'pgcc', 'cxx': 'pgc++', 'ftn': 'pgf90', }, 'PrgEnv-pgi': { 'type': 'ProgEnvironment', - 'modules': ['PrgEnv-pgi/18.10', - 'openmpi/4.0.1-pgi-18.10-gcc-7.4.0-2.31.1-cuda-9.2'], + 'modules': ['PrgEnv-pgi/19.9'], 'cc': 'mpicc', 'cxx': 'mpicxx', 'ftn': 'mpifort', }, 'PrgEnv-gnu': { 'type': 'ProgEnvironment', - 'modules': ['PrgEnv-gnu/18.12'], + 'modules': ['PrgEnv-gnu/19.2'], 'cc': 'mpicc', 'cxx': 'mpicxx', - 'ftn': 'mpif90', + 'ftn': 'mpifort', }, 'PrgEnv-gnu-nompi': { 'type': 'ProgEnvironment', - 'modules': ['PrgEnv-gnu/18.12'], + 'modules': ['PrgEnv-gnu/19.2'], 'cc': 'gcc', 'cxx': 'g++', 'ftn': 'gfortran', From 05e508862dbd7ea598ef45d289f5c579ed23f2a9 Mon Sep 17 00:00:00 2001 From: Luca Date: Tue, 21 Jan 2020 10:23:22 +0100 Subject: [PATCH 018/104] Fixing typo in arolla name --- cscs-checks/mch/gpu_direct_acc.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cscs-checks/mch/gpu_direct_acc.py b/cscs-checks/mch/gpu_direct_acc.py index 7448ea5bd3..8857160598 100644 --- a/cscs-checks/mch/gpu_direct_acc.py +++ b/cscs-checks/mch/gpu_direct_acc.py @@ -10,7 +10,7 @@ def __init__(self): super().__init__() self.descr = 'tests gpu-direct for Fortran OpenACC' self.valid_systems = ['daint:gpu', 'dom:gpu', 'kesch:cn', 'tiger:gpu', - 'arollai:cn', 'tsa:cn'] + 'arolla:cn', 'tsa:cn'] self.valid_prog_environs = ['PrgEnv-cray', 'PrgEnv-pgi'] if self.current_system.name in ['daint', 'dom', 'tiger']: self.modules = ['craype-accel-nvidia60'] From 9cc854933308a3c1b9b256454f910ed57545d5a9 Mon Sep 17 00:00:00 2001 From: Luca Date: Tue, 21 Jan 2020 10:24:10 +0100 Subject: [PATCH 019/104] Changing tasks for collectives checks --- cscs-checks/mch/collectives_halo.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cscs-checks/mch/collectives_halo.py b/cscs-checks/mch/collectives_halo.py index 42f9bfbc80..0985bd3924 100644 --- a/cscs-checks/mch/collectives_halo.py +++ b/cscs-checks/mch/collectives_halo.py @@ -32,7 +32,7 @@ def __init__(self, variant, bench_reference): self.build_system.max_concurrency = 1 elif self.current_system.name in ['arolla','tsa']: self.exclusive_access = True - self.num_tasks = 32 + self.num_tasks = 144 self.num_gpus_per_node = 8 self.num_tasks_per_node = 16 self.num_tasks_per_socket = 8 From 338a364a19c972ba738cf41238fab4783c781f57 Mon Sep 17 00:00:00 2001 From: Luca Date: Tue, 21 Jan 2020 15:11:37 +0100 Subject: [PATCH 020/104] Reverting test to run on two nodes --- cscs-checks/mch/collectives_halo.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cscs-checks/mch/collectives_halo.py b/cscs-checks/mch/collectives_halo.py index 0985bd3924..42f9bfbc80 100644 --- a/cscs-checks/mch/collectives_halo.py +++ b/cscs-checks/mch/collectives_halo.py @@ -32,7 +32,7 @@ def __init__(self, variant, bench_reference): self.build_system.max_concurrency = 1 elif self.current_system.name in ['arolla','tsa']: self.exclusive_access = True - self.num_tasks = 144 + self.num_tasks = 32 self.num_gpus_per_node = 8 self.num_tasks_per_node = 16 self.num_tasks_per_socket = 8 From f8b0d6e299b8bc9a08547bb875290746bf71c0fc Mon Sep 17 00:00:00 2001 From: lucamar Date: Fri, 24 Jan 2020 15:38:09 +0100 Subject: [PATCH 021/104] Update automatic_arrays_acc.py --- cscs-checks/mch/automatic_arrays_acc.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cscs-checks/mch/automatic_arrays_acc.py b/cscs-checks/mch/automatic_arrays_acc.py index b1adae3fb3..391cbc0c33 100644 --- a/cscs-checks/mch/automatic_arrays_acc.py +++ b/cscs-checks/mch/automatic_arrays_acc.py @@ -20,7 +20,7 @@ def __init__(self): 'CRAY_ACCEL_TARGET': 'nvidia35', 'MV2_USE_CUDA': '1' } - elif self.current_system.name in ['arolla','tsa']: + elif self.current_system.name in ['arolla', 'tsa']: self.exclusive_access = True # This tets requires an MPI compiler, although it uses a single task self.num_tasks = 1 @@ -65,7 +65,7 @@ def setup(self, partition, environ, **job_opts): self.build_system.fflags += ['-acc'] if self.current_system.name == 'kesch': self.build_system.fflags += ['-ta=tesla,cc35'] - elif self.current_system.name in ['arolla','tsa']: + elif self.current_system.name in ['arolla', 'tsa']: self.build_system.fflags += ['-ta=tesla,cc70'] elif self.current_system.name in ['daint', 'dom', 'tiger']: self.build_system.fflags += ['-ta=tesla,cc60', '-Mnorpath'] From 7994f2e39e30ba0d71de63ff6b34f1d390335fee Mon Sep 17 00:00:00 2001 From: lucamar Date: Fri, 24 Jan 2020 15:38:33 +0100 Subject: [PATCH 022/104] Update collectives_halo.py --- cscs-checks/mch/collectives_halo.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cscs-checks/mch/collectives_halo.py b/cscs-checks/mch/collectives_halo.py index 42f9bfbc80..909551c318 100644 --- a/cscs-checks/mch/collectives_halo.py +++ b/cscs-checks/mch/collectives_halo.py @@ -30,7 +30,7 @@ def __init__(self, variant, bench_reference): '-DCUDA_COMPUTE_CAPABILITY="sm_37"' ] self.build_system.max_concurrency = 1 - elif self.current_system.name in ['arolla','tsa']: + elif self.current_system.name in ['arolla', 'tsa']: self.exclusive_access = True self.num_tasks = 32 self.num_gpus_per_node = 8 From 7bbe786e6c7daab1400441f7107b7e3f7119e235 Mon Sep 17 00:00:00 2001 From: lucamar Date: Fri, 24 Jan 2020 15:40:33 +0100 Subject: [PATCH 023/104] Update openacc_cuda_mpi_cppstd.py --- cscs-checks/mch/openacc_cuda_mpi_cppstd.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cscs-checks/mch/openacc_cuda_mpi_cppstd.py b/cscs-checks/mch/openacc_cuda_mpi_cppstd.py index aba07b1244..64c90cff65 100644 --- a/cscs-checks/mch/openacc_cuda_mpi_cppstd.py +++ b/cscs-checks/mch/openacc_cuda_mpi_cppstd.py @@ -68,7 +68,7 @@ def setup(self, partition, environ, **job_opts): '-L/global/opt/nvidia/cudatoolkit/8.0.61/lib64', '-lcublas', '-lcudart' ] - elif self.current_system.name in ['arolla','tsa']: + elif self.current_system.name in ['arolla', 'tsa']: self.build_system.fflags += ['-ta=tesla,cc70,cuda10.1'] self.build_system.ldflags = [ '-acc', '-ta:tesla:cc70,cuda10.1', '-lstdc++', @@ -81,7 +81,7 @@ def setup(self, partition, environ, **job_opts): self.build_system.ldflags += [ '-L/global/opt/nvidia/cudatoolkit/8.0.61/lib64' ] - elif self.current_system.name in ['arolla','tsa']: + elif self.current_system.name in ['arolla', 'tsa']: self.build_system.ldflags += [ '-L$EBROOTCUDA/lib64', '-lcublas', '-lcudart'] From 72b59d8fb317d61985d1c6ae6620d77af61eba66 Mon Sep 17 00:00:00 2001 From: lucamar Date: Fri, 24 Jan 2020 15:41:42 +0100 Subject: [PATCH 024/104] Update helloworld.py --- cscs-checks/prgenv/helloworld.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/cscs-checks/prgenv/helloworld.py b/cscs-checks/prgenv/helloworld.py index 50f77b2948..9c1b38ba76 100644 --- a/cscs-checks/prgenv/helloworld.py +++ b/cscs-checks/prgenv/helloworld.py @@ -118,7 +118,8 @@ def __init__(self, lang, linkage, **kwargs): self.valid_prog_environs += ['PrgEnv-cray-nompi', 'PrgEnv-pgi-nompi', 'PrgEnv-gnu-nompi'] - elif self.current_system.name in ['arolla', 'tsa'] and linkage == 'dynamic': + elif self.current_system.name in ['arolla', 'tsa'] + and linkage == 'dynamic': self.valid_prog_environs += ['PrgEnv-pgi-nompi', 'PrgEnv-gnu-nompi'] @@ -149,7 +150,8 @@ def __init__(self, lang, linkage): self.valid_prog_environs += ['PrgEnv-cray-nompi', 'PrgEnv-pgi-nompi', 'PrgEnv-gnu-nompi'] - elif self.current_system.name in ['arolla', 'tsa'] and linkage == 'dynamic': + elif self.current_system.name in ['arolla', 'tsa'] + and linkage == 'dynamic': self.valid_prog_environs += ['PrgEnv-pgi-nompi', 'PrgEnv-gnu-nompi'] From 2670d87ce3c1556066c4245df492e3f1276a7a84 Mon Sep 17 00:00:00 2001 From: lucamar Date: Fri, 24 Jan 2020 15:42:30 +0100 Subject: [PATCH 025/104] Update openacc_checks.py --- cscs-checks/prgenv/openacc_checks.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/cscs-checks/prgenv/openacc_checks.py b/cscs-checks/prgenv/openacc_checks.py index 03ce5eb0fd..1890243daa 100644 --- a/cscs-checks/prgenv/openacc_checks.py +++ b/cscs-checks/prgenv/openacc_checks.py @@ -12,7 +12,7 @@ def __init__(self, variant): self.num_tasks = 2 self.valid_systems = ['daint:gpu', 'dom:gpu', 'kesch:cn', 'tiger:gpu', - 'arolla:cn', 'tsa:cn'] + 'arolla:cn', 'tsa:cn'] self.valid_prog_environs = ['PrgEnv-cray', 'PrgEnv-pgi'] if self.num_tasks == 1: self.sourcepath = 'vecAdd_openacc.f90' @@ -21,7 +21,6 @@ def __init__(self, variant): 'PrgEnv-pgi-nompi'] elif self.current_system.name == 'tsa': self.valid_prog_environs = ['PrgEnv-pgi-nompi'] - else: self.sourcepath = 'vecAdd_openacc_mpi.f90' From f891f422bb2b2783e399de3032b6e8b3d453f8d7 Mon Sep 17 00:00:00 2001 From: Matthias Kraushaar Date: Tue, 4 Feb 2020 15:22:14 +0100 Subject: [PATCH 026/104] Change location of and-operator in helloworld --- cscs-checks/prgenv/helloworld.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/cscs-checks/prgenv/helloworld.py b/cscs-checks/prgenv/helloworld.py index 9c1b38ba76..c8b321d474 100644 --- a/cscs-checks/prgenv/helloworld.py +++ b/cscs-checks/prgenv/helloworld.py @@ -118,8 +118,8 @@ def __init__(self, lang, linkage, **kwargs): self.valid_prog_environs += ['PrgEnv-cray-nompi', 'PrgEnv-pgi-nompi', 'PrgEnv-gnu-nompi'] - elif self.current_system.name in ['arolla', 'tsa'] - and linkage == 'dynamic': + elif self.current_system.name in ['arolla', 'tsa'] and + linkage == 'dynamic': self.valid_prog_environs += ['PrgEnv-pgi-nompi', 'PrgEnv-gnu-nompi'] @@ -150,8 +150,8 @@ def __init__(self, lang, linkage): self.valid_prog_environs += ['PrgEnv-cray-nompi', 'PrgEnv-pgi-nompi', 'PrgEnv-gnu-nompi'] - elif self.current_system.name in ['arolla', 'tsa'] - and linkage == 'dynamic': + elif self.current_system.name in ['arolla', 'tsa'] and + linkage == 'dynamic': self.valid_prog_environs += ['PrgEnv-pgi-nompi', 'PrgEnv-gnu-nompi'] From cb3b2a70d5ef7ad93855c31ba3a7ad45b49f6416 Mon Sep 17 00:00:00 2001 From: Matthias Kraushaar Date: Tue, 4 Feb 2020 16:22:01 +0100 Subject: [PATCH 027/104] Add parentheses --- cscs-checks/prgenv/helloworld.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/cscs-checks/prgenv/helloworld.py b/cscs-checks/prgenv/helloworld.py index c8b321d474..f762767171 100644 --- a/cscs-checks/prgenv/helloworld.py +++ b/cscs-checks/prgenv/helloworld.py @@ -118,8 +118,8 @@ def __init__(self, lang, linkage, **kwargs): self.valid_prog_environs += ['PrgEnv-cray-nompi', 'PrgEnv-pgi-nompi', 'PrgEnv-gnu-nompi'] - elif self.current_system.name in ['arolla', 'tsa'] and - linkage == 'dynamic': + elif (self.current_system.name in ['arolla', 'tsa'] and + linkage == 'dynamic'): self.valid_prog_environs += ['PrgEnv-pgi-nompi', 'PrgEnv-gnu-nompi'] @@ -150,8 +150,8 @@ def __init__(self, lang, linkage): self.valid_prog_environs += ['PrgEnv-cray-nompi', 'PrgEnv-pgi-nompi', 'PrgEnv-gnu-nompi'] - elif self.current_system.name in ['arolla', 'tsa'] and - linkage == 'dynamic': + elif (self.current_system.name in ['arolla', 'tsa'] and + linkage == 'dynamic'): self.valid_prog_environs += ['PrgEnv-pgi-nompi', 'PrgEnv-gnu-nompi'] From edb1bb39da7ea7b32e45eb9f0f46a3340a93c82f Mon Sep 17 00:00:00 2001 From: Luca Date: Wed, 5 Feb 2020 12:17:06 +0100 Subject: [PATCH 028/104] Adding cuda and slurm checks --- cscs-checks/cuda/cuda_checks.py | 16 +++++++++++++++- cscs-checks/system/slurm/slurm.py | 28 ++++++++++++++++++++-------- 2 files changed, 35 insertions(+), 9 deletions(-) diff --git a/cscs-checks/cuda/cuda_checks.py b/cscs-checks/cuda/cuda_checks.py index d26e05cbf4..b294387d14 100644 --- a/cscs-checks/cuda/cuda_checks.py +++ b/cscs-checks/cuda/cuda_checks.py @@ -7,23 +7,34 @@ class CudaCheck(rfm.RegressionTest): def __init__(self): super().__init__() - self.valid_systems = ['daint:gpu', 'dom:gpu', 'kesch:cn', 'tiger:gpu'] + self.valid_systems = ['daint:gpu', 'dom:gpu', 'kesch:cn', 'tiger:gpu', + 'arolla:cn', 'tsa:cn'] self.valid_prog_environs = ['PrgEnv-cray', 'PrgEnv-gnu'] + if self.current_system.name == 'kesch': self.valid_prog_environs += ['PrgEnv-cray-nompi', 'PrgEnv-gnu-nompi'] + elif: self.current_system.name in ['arolla', 'tsa']: + self.valid_prog_environs += ['PrgEnv-gnu-nompi'] self.sourcesdir = os.path.join(self.current_system.resourcesdir, 'CUDA', 'essentials') + if self.current_system.name == 'kesch': self.modules = ['cudatoolkit/8.0.61'] + elif: self.current_system.name in ['arolla', 'tsa']: + self.modules = ['cuda/10.1.243'] else: self.modules = ['craype-accel-nvidia60'] self.num_gpus_per_node = 1 self.nvidia_sm = '60' + if self.current_system.name == 'kesch': self.exclusive_access = True self.nvidia_sm = '37' + elif: self.current_system.name in ['arolla', 'tsa']: + self.exclusive_access = True + self.nvidia_sm = '70' self.maintainers = ['AJ', 'SK'] self.tags = {'production', 'craype', 'external-resources'} @@ -88,6 +99,9 @@ def __init__(self): self.valid_prog_environs = ['PrgEnv-cray', 'PrgEnv-gnu'] self.variables = {'G2G': '0'} self.num_gpus_per_node = 2 + elif: self.current_system.name in ['arolla', 'tsa']: + self.valid_prog_environs = ['PrgEnv-gnu'] + self.num_gpus_per_node = 2 else: self.variables = {'CRAY_CUDA_MPS': '1'} diff --git a/cscs-checks/system/slurm/slurm.py b/cscs-checks/system/slurm/slurm.py index 17093f63fb..9497b161d1 100644 --- a/cscs-checks/system/slurm/slurm.py +++ b/cscs-checks/system/slurm/slurm.py @@ -8,12 +8,14 @@ class SlurmSimpleBaseCheck(rfm.RunOnlyRegressionTest): def __init__(self): self.valid_systems = ['daint:gpu', 'daint:mc', 'dom:gpu', 'dom:mc', - 'kesch:cn', 'kesch:pn'] - self.valid_prog_environs = ['PrgEnv-cray'] + 'kesch:cn', 'kesch:pn', + 'arolla:cn', 'arolla:pn', + 'tsa:cn', 'tsa:pn'] + self.valid_prog_environs = ['PrgEnv-cray', 'PrgEnv-gnu'] self.tags = {'slurm', 'maintenance', 'ops', 'production', 'single-node'} self.num_tasks_per_node = 1 - if self.current_system.name == 'kesch': + if self.current_system.name in ['arolla', 'kesch', 'tsa']: self.exclusive_access = True self.maintainers = ['RS', 'VH'] @@ -25,12 +27,14 @@ class SlurmCompiledBaseCheck(rfm.RegressionTest): def __init__(self): self.valid_systems = ['daint:gpu', 'daint:mc', 'dom:gpu', 'dom:mc', - 'kesch:cn', 'kesch:pn'] - self.valid_prog_environs = ['PrgEnv-cray'] + 'kesch:cn', 'kesch:pn', + 'arolla:cn', 'arolla:pn', + 'tsa:cn', 'tsa:pn'] + self.valid_prog_environs = ['PrgEnv-cray', 'PrgEnv-gnu'] self.tags = {'slurm', 'maintenance', 'ops', 'production', 'single-node'} self.num_tasks_per_node = 1 - if self.current_system.name == 'kesch': + if self.current_system.name in ['arolla', 'kesch', 'tsa']: self.exclusive_access = True self.maintainers = ['RS', 'VH'] @@ -42,6 +46,10 @@ def __init__(self): super().__init__() self.executable = '/bin/hostname' self.hostname_patt = { + 'arolla:cn': r'^arolla-cn\d{3}$', + 'arolla:pn': r'^arolla-pp\d{3}$', + 'tsa:cn': r'^tsa-cn\d{3}$', + 'tsa:pn': r'^tsa-pp\d{3}$', 'kesch:cn': r'^keschcn-\d{4}$', 'kesch:pn': r'^keschpn-\d{4}$', 'daint:gpu': r'^nid\d{5}$', @@ -66,7 +74,9 @@ def __init__(self): self.num_tasks = 2 self.valid_systems = ['daint:gpu', 'daint:mc', 'dom:gpu', 'dom:mc', - 'kesch:cn', 'kesch:pn'] + 'kesch:cn', 'kesch:pn', + 'arolla:cn', 'arolla:pn', + 'tsa:cn', 'tsa:pn'] self.executable = '/bin/echo' self.executable_opts = ['$MY_VAR'] self.variables = {'MY_VAR': 'TEST123456!'} @@ -108,7 +118,9 @@ def set_memory_limit(self): class DefaultRequestGPU(SlurmSimpleBaseCheck): def __init__(self): super().__init__() - self.valid_systems = ['daint:gpu', 'dom:gpu', 'kesch:cn'] + self.valid_systems = ['daint:gpu', 'dom:gpu', + 'arolla:cn', 'kesch:cn', + 'tsa:cn'] self.executable = 'nvidia-smi' self.sanity_patterns = sn.assert_found( r'NVIDIA-SMI.*Driver Version.*', self.stdout) From c8ced78d8b5925ed61d414e1e8d5bc9c1392416b Mon Sep 17 00:00:00 2001 From: Luca Date: Wed, 5 Feb 2020 13:26:57 +0100 Subject: [PATCH 029/104] Adding cuda checks --- cscs-checks/cuda/cuda_checks.py | 8 ++++---- cscs-checks/cuda/multi_gpu.py | 20 ++++++++++++++++---- 2 files changed, 20 insertions(+), 8 deletions(-) diff --git a/cscs-checks/cuda/cuda_checks.py b/cscs-checks/cuda/cuda_checks.py index 4f29b1aff9..401d3b7128 100644 --- a/cscs-checks/cuda/cuda_checks.py +++ b/cscs-checks/cuda/cuda_checks.py @@ -13,14 +13,14 @@ def __init__(self): if self.current_system.name == 'kesch': self.valid_prog_environs += ['PrgEnv-cray-nompi', 'PrgEnv-gnu-nompi'] - elif: self.current_system.name in ['arolla', 'tsa']: + elif self.current_system.name in ['arolla', 'tsa']: self.valid_prog_environs += ['PrgEnv-gnu-nompi'] self.sourcesdir = os.path.join(self.current_system.resourcesdir, 'CUDA', 'essentials') if self.current_system.name == 'kesch': self.modules = ['cudatoolkit/8.0.61'] - elif: self.current_system.name in ['arolla', 'tsa']: + elif self.current_system.name in ['arolla', 'tsa']: self.modules = ['cuda/10.1.243'] else: self.modules = ['craype-accel-nvidia60'] @@ -31,7 +31,7 @@ def __init__(self): if self.current_system.name == 'kesch': self.exclusive_access = True self.nvidia_sm = '37' - elif: self.current_system.name in ['arolla', 'tsa']: + elif self.current_system.name in ['arolla', 'tsa']: self.exclusive_access = True self.nvidia_sm = '70' @@ -98,7 +98,7 @@ def __init__(self): self.valid_prog_environs = ['PrgEnv-cray', 'PrgEnv-gnu'] self.variables = {'G2G': '0'} self.num_gpus_per_node = 2 - elif: self.current_system.name in ['arolla', 'tsa']: + elif self.current_system.name in ['arolla', 'tsa']: self.valid_prog_environs = ['PrgEnv-gnu'] self.num_gpus_per_node = 2 else: diff --git a/cscs-checks/cuda/multi_gpu.py b/cscs-checks/cuda/multi_gpu.py index fb085135be..4fe2404976 100644 --- a/cscs-checks/cuda/multi_gpu.py +++ b/cscs-checks/cuda/multi_gpu.py @@ -9,9 +9,10 @@ class GpuBandwidthCheck(rfm.RegressionTest): def __init__(self): super().__init__() - self.valid_systems = ['kesch:cn', 'daint:gpu', 'dom:gpu', 'tiger:gpu'] + self.valid_systems = ['kesch:cn', 'daint:gpu', 'dom:gpu', 'tiger:gpu', + 'arolla:cn', 'tsa:cn'] self.valid_prog_environs = ['PrgEnv-gnu'] - if self.current_system.name == 'kesch': + if self.current_system.name in ['arolla', 'kesch', 'tsa']: self.valid_prog_environs = ['PrgEnv-gnu-nompi'] self.exclusive_access = True @@ -24,6 +25,8 @@ def __init__(self): nvidia_sm = '60' if self.current_system.name == 'kesch': nvidia_sm = '37' + elif self.current_system.name in ['arolla', 'tsa']: + nvidia_sm = '70' self.build_system.cxxflags = ['-I.', '-m64', '-arch=sm_%s' % nvidia_sm] self.sourcepath = 'bandwidthtestflex.cu' @@ -41,8 +44,11 @@ def __init__(self): if self.current_system.name in ['daint', 'dom', 'tiger']: self.modules = ['craype-accel-nvidia60'] self.num_gpus_per_node = 1 - else: - self.modules = ['craype-accel-nvidia35'] + elif self.current_system.name == 'kesch': + self.modules = ['cudatoolkit/8.0.61'] + self.num_gpus_per_node = 8 + elif self.current_system.name in ['arolla', 'tsa']: + self.modules = ['cuda/10.1.243'] self.num_gpus_per_node = 8 # perf_patterns and reference will be set by the sanity check function @@ -50,6 +56,9 @@ def __init__(self): self.perf_patterns = {} self.reference = {} self.__bwref = { + 'arolla:cn:h2d': (7583, -0.1, None, 'MB/s'), + 'arolla:cn:d2h': (7584, -0.1, None, 'MB/s'), + 'arolla:cn:d2d': (137408, -0.1, None, 'MB/s'), 'daint:gpu:h2d': (11881, -0.1, None, 'MB/s'), 'daint:gpu:d2h': (12571, -0.1, None, 'MB/s'), 'daint:gpu:d2d': (499000, -0.1, None, 'MB/s'), @@ -62,6 +71,9 @@ def __init__(self): 'tiger:gpu:h2d': (0, None, None, 'MB/s'), 'tiger:gpu:d2h': (0, None, None, 'MB/s'), 'tiger:gpu:d2d': (0, None, None, 'MB/s'), + 'tsa:cn:h2d': (7583, -0.1, None, 'MB/s'), + 'tsa:cn:d2h': (7584, -0.1, None, 'MB/s'), + 'tsa:cn:d2d': (137408, -0.1, None, 'MB/s'), } self.tags = {'diagnostic', 'benchmark', 'mch', 'craype', 'external-resources'} From 0f1b0ad90f550946e0f02e82f8dd4bac60731758 Mon Sep 17 00:00:00 2001 From: Luca Date: Wed, 5 Feb 2020 14:30:01 +0100 Subject: [PATCH 030/104] Adding cuda_gdb test --- .../tools/profiling_and_debugging/cuda_gdb.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/cscs-checks/tools/profiling_and_debugging/cuda_gdb.py b/cscs-checks/tools/profiling_and_debugging/cuda_gdb.py index c764f88f55..ada2b834fd 100644 --- a/cscs-checks/tools/profiling_and_debugging/cuda_gdb.py +++ b/cscs-checks/tools/profiling_and_debugging/cuda_gdb.py @@ -10,7 +10,8 @@ class CudaGdbCheck(rfm.RegressionTest): def __init__(self): super().__init__() self.valid_prog_environs = ['PrgEnv-gnu'] - self.valid_systems = ['daint:gpu', 'dom:gpu', 'kesch:cn', 'tiger:gpu'] + self.valid_systems = ['daint:gpu', 'dom:gpu', 'kesch:cn', 'tiger:gpu', + 'arolla:cn', 'tsa:cn'] self.num_gpus_per_node = 1 self.num_tasks_per_node = 1 self.sourcesdir = 'src/Cuda' @@ -21,21 +22,28 @@ def __init__(self): if self.current_system.name == 'kesch': self.exclusive_access = True self.modules = ['cudatoolkit/8.0.61'] + nvidia_sm = '37' + elif self.current_system.name in ['arolla', 'tsa']: + self.exclusive_access = True + self.modules = ['cuda/10.1.243'] + nvidia_sm = '70' else: self.modules = ['craype-accel-nvidia60'] + nvidia_sm = '60' self.build_system = 'Make' self.build_system.makefile = 'Makefile_cuda_gdb' self.build_system.cflags = ['-g', '-D_CSCS_ITMAX=1', '-DUSE_MPI', '-fopenmp'] - nvidia_sm = '37' if self.current_system.name == 'kesch' else '60' self.build_system.cxxflags = ['-g', '-G', '-arch=sm_%s' % nvidia_sm] self.build_system.ldflags = ['-g', '-fopenmp', '-lstdc++'] - # FIXME: workaround until the kesch programming environment is fixed if self.current_system.name == 'kesch': self.build_system.ldflags = ['-g', '-fopenmp', '-lcublas', '-lcudart', '-lm'] + elif self.current_system.name in ['arolla', 'tsa']: + self.build_system.ldflags += ['-L$EBROOTCUDA/lib64', + '-lcudart', '-lm'] self.sanity_patterns = sn.all([ sn.assert_found(r'^Breakpoint 1 at .*: file ', self.stdout), From bd714a5823b1c051f0e14fdaf7115107b51aa08c Mon Sep 17 00:00:00 2001 From: Luca Date: Wed, 5 Feb 2020 14:45:31 +0100 Subject: [PATCH 031/104] Adding ddt and nvprof checks --- .../tools/profiling_and_debugging/ddt.py | 21 ++++++++++++------- .../tools/profiling_and_debugging/nvprof.py | 11 +++++++--- 2 files changed, 22 insertions(+), 10 deletions(-) diff --git a/cscs-checks/tools/profiling_and_debugging/ddt.py b/cscs-checks/tools/profiling_and_debugging/ddt.py index 650fdc1b7d..055b8a6adb 100644 --- a/cscs-checks/tools/profiling_and_debugging/ddt.py +++ b/cscs-checks/tools/profiling_and_debugging/ddt.py @@ -27,12 +27,9 @@ def __init__(self, lang, extension): self.modules = ['ddt'] self.prgenv_flags = { - # 'PrgEnv-cray': ' -O2 -homp', 'PrgEnv-gnu': ['-g', '-O2', '-fopenmp'], - # 'PrgEnv-intel': ' -O2 -qopenmp', - # 'PrgEnv-pgi': ' -O2 -mp' } - if self.current_system.name == 'kesch': + if self.current_system.name in ['arolla', 'kesch', 'tsa']: self.exclusive_access = True self.num_tasks = 1 @@ -70,9 +67,9 @@ class DdtCpuCheck(DdtCheck): def __init__(self, lang, extension): super().__init__(lang, extension) self.valid_systems = ['daint:gpu', 'daint:mc', 'dom:gpu', 'dom:mc', - 'kesch:cn', 'tiger:gpu'] + 'kesch:cn', 'tiger:gpu', 'arolla:cn', 'tsa:cn'] - if self.current_system.name == 'kesch' and self.lang == 'C': + if self.current_system.name in ['arolla', 'kesch', 'tsa'] and self.lang == 'C': self.build_system.ldflags = ['-lm'] residual_pattern = '_jacobi.%s:%d,residual' @@ -98,14 +95,17 @@ def __init__(self, lang, extension): class DdtGpuCheck(DdtCheck): def __init__(self, lang, extension): super().__init__(lang, extension) - self.valid_systems = ['daint:gpu', 'dom:gpu', 'kesch:cn', 'tiger:gpu'] + self.valid_systems = ['daint:gpu', 'dom:gpu', 'kesch:cn', 'tiger:gpu', + 'arolla:cn', 'tsa:cn'] self.num_gpus_per_node = 1 self.num_tasks_per_node = 1 self.system_modules = { + 'arolla': ['cuda/10.1.243'], 'daint': ['craype-accel-nvidia60'], 'dom': ['craype-accel-nvidia60'], 'kesch': ['cudatoolkit/8.0.61'], 'tiger': ['craype-accel-nvidia60'], + 'tsa': ['cuda/10.1.243'] } sysname = self.current_system.name self.modules += self.system_modules.get(sysname, []) @@ -113,6 +113,8 @@ def __init__(self, lang, extension): # as long as cuda/9 will not be the default, we will need: if sysname in {'daint', 'kesch'}: self.variables = {'ALLINEA_FORCE_CUDA_VERSION': '8.0'} + elif sysname in {'arolla', 'tsa'}: + self.variables = {'ALLINEA_FORCE_CUDA_VERSION': '10.1'} self.ddt_options = [ '--offline --output=ddtreport.txt ', @@ -123,6 +125,11 @@ def __init__(self, lang, extension): if self.current_system.name == 'kesch': arch = 'sm_37' self.build_system.ldflags = ['-lm', '-lcudart'] + elif self.current_system.name in ['arolla', 'tsa']: + arch = 'sm_70' + self.build_system.ldflags = ['-lstdc++', '-lm', + '-L$EBROOTCUDA/lib64', + '-lcudart'] else: arch = 'sm_60' self.build_system.ldflags = ['-lstdc++'] diff --git a/cscs-checks/tools/profiling_and_debugging/nvprof.py b/cscs-checks/tools/profiling_and_debugging/nvprof.py index 8d5946408a..00655934ca 100644 --- a/cscs-checks/tools/profiling_and_debugging/nvprof.py +++ b/cscs-checks/tools/profiling_and_debugging/nvprof.py @@ -8,7 +8,8 @@ class NvprofCheck(rfm.RegressionTest): def __init__(self): super().__init__() self.descr = 'Checks the nvprof tool' - self.valid_systems = ['daint:gpu', 'dom:gpu', 'kesch:cn', 'tiger:gpu'] + self.valid_systems = ['daint:gpu', 'dom:gpu', 'kesch:cn', 'tiger:gpu', + 'arolla:cn', 'tsa:cn'] self.valid_prog_environs = ['PrgEnv-gnu'] self.num_gpus_per_node = 1 self.num_tasks_per_node = 1 @@ -32,12 +33,16 @@ def __init__(self): self.build_system.cxxflags = ['-g', '-G'] self.build_system.ldflags = ['-g', '-fopenmp', '-std=c99', '-lstdc++'] - # FIXME temporary workaround - # the programming environment should be adapted / fixed if self.current_system.name == 'kesch': self.exclusive_access = True self.modules = ['cudatoolkit/8.0.61'] self.build_system.ldflags += ['-lcudart', '-lm'] + elif self.current_system.name in ['arolla', 'tsa']: + self.exclusive_access = True + self.modules = ['cuda/10.1.243'] + self.build_system.ldflags = ['-lstdc++', '-lm', + '-L$EBROOTCUDA/lib64', + '-lcudart'] else: self.modules = ['craype-accel-nvidia60'] From 80b27aadf0c26391ff847ce902c16cfe9e670e35 Mon Sep 17 00:00:00 2001 From: Luca Date: Wed, 5 Feb 2020 16:03:50 +0100 Subject: [PATCH 032/104] Adding flexible check mpi --- cscs-checks/prgenv/mpi.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/cscs-checks/prgenv/mpi.py b/cscs-checks/prgenv/mpi.py index ae04401402..fdb97bc7e1 100644 --- a/cscs-checks/prgenv/mpi.py +++ b/cscs-checks/prgenv/mpi.py @@ -70,12 +70,15 @@ class MpiHelloTest(rfm.RegressionTest): def __init__(self): super().__init__() self.valid_systems = ['daint:gpu', 'daint:mc', 'dom:gpu', 'dom:mc', - 'kesch:cn', 'kesch:pn', 'tiger:gpu'] + 'kesch:cn', 'kesch:pn', 'tiger:gpu', + 'arolla:cn', 'arolla:pn', 'tsa:cn', 'tsa:pn'] self.valid_prog_environs = ['PrgEnv-cray'] if self.current_system.name == 'kesch': self.exclusive_access = True - self.valid_prog_environs = ['PrgEnv-cray', 'PrgEnv-gnu', - 'PrgEnv-intel'] + self.valid_prog_environs = ['PrgEnv-cray', 'PrgEnv-gnu'] + elif self.current_system.name in ['arolla', 'tsa']: + self.exclusive_access = True + self.valid_prog_environs = ['PrgEnv-gnu'] self.descr = 'MPI Hello World' self.sourcepath = 'mpi_helloworld.c' From b92593b681c97cfd3b7644de71b447d2d150e83c Mon Sep 17 00:00:00 2001 From: Luca Date: Wed, 5 Feb 2020 17:17:36 +0100 Subject: [PATCH 033/104] Minor update of helloworld --- cscs-checks/prgenv/helloworld.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/cscs-checks/prgenv/helloworld.py b/cscs-checks/prgenv/helloworld.py index ef2f75468a..cac15c5b87 100644 --- a/cscs-checks/prgenv/helloworld.py +++ b/cscs-checks/prgenv/helloworld.py @@ -17,7 +17,8 @@ def __init__(self, variant, lang, linkage): self.sourcepath = 'hello_world' self.build_system = 'SingleSource' self.valid_systems = ['daint:gpu', 'daint:mc', 'dom:gpu', 'dom:mc', - 'kesch:cn', 'arolla:cn', 'tsa:cn', 'tiger:gpu'] + 'kesch:cn', 'tiger:gpu','arolla:cn', 'arolla:pn', + 'tsa:cn', 'tsa:pn'] self.valid_prog_environs = ['PrgEnv-cray', 'PrgEnv-cray_classic', 'PrgEnv-gnu', 'PrgEnv-intel', 'PrgEnv-pgi'] @@ -25,8 +26,8 @@ def __init__(self, variant, lang, linkage): if self.current_system.name in ['kesch', 'arolla', 'tsa']: self.exclusive_access = True - # Removing static compilation from kesch and tsa - if (self.current_system.name in ['kesch', 'arolla', 'tsa'] and + # Removing static compilation from kesch + if (self.current_system.name in ['kesch'] and linkage == 'static'): self.valid_prog_environs = [] From 10ea4de9ad97223e18148b3fc74722703c99cae5 Mon Sep 17 00:00:00 2001 From: Luca Date: Wed, 5 Feb 2020 17:57:57 +0100 Subject: [PATCH 034/104] NetCDF tests --- .../libraries/io/netcdf_compile_run.py | 24 ++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/cscs-checks/libraries/io/netcdf_compile_run.py b/cscs-checks/libraries/io/netcdf_compile_run.py index 9dcbbf297f..e97eb87f10 100644 --- a/cscs-checks/libraries/io/netcdf_compile_run.py +++ b/cscs-checks/libraries/io/netcdf_compile_run.py @@ -18,7 +18,8 @@ def __init__(self, lang, linkage): self.linkage = linkage self.descr = lang_names[lang] + ' NetCDF ' + linkage.capitalize() self.valid_systems = ['daint:gpu', 'daint:mc', - 'dom:gpu', 'dom:mc', 'kesch:cn', 'tiger:gpu'] + 'dom:gpu', 'dom:mc', 'kesch:cn', 'tiger:gpu', + 'arolla:cn', 'tsa:cn'] if self.current_system.name in ['daint', 'dom', 'tiger']: self.valid_prog_environs = ['PrgEnv-cray', 'PrgEnv-gnu', 'PrgEnv-intel', 'PrgEnv-pgi'] @@ -30,6 +31,9 @@ def __init__(self, lang, linkage): if lang != 'f90': self.valid_prog_environs += ['PrgEnv-cray-nompi'] + elif self.current_system.name in ['arolla', 'tsa']: + self.exclusive_access = True + self.valid_prog_environs = ['PrgEnv-gnu-nompi', 'PrgEnv-pgi-nompi'] self.sourcesdir = os.path.join(self.current_system.resourcesdir, 'netcdf') @@ -82,5 +86,23 @@ def setflags(self): '-I$EBROOTNETCDFMINCPLUSPLUS/include', '-I$EBROOTNETCDFMINFORTRAN/include' ] + elif self.current_system.name in ['arolla', 'tsa']: + self.modules = ['netcdf', + 'netcdf-c++', + 'netcdf-fortran'] + self.build_system.cppflags = [ + '-I$EBROOTNETCDF/include', + '-I$EBROOTNETCDFMINCPLUSPLUS/include', + '-I$EBROOTNETCDFMINFORTRAN/include' + ] + self.build_system.ldflags = [ + '-L$EBROOTNETCDF/lib', + '-L$EBROOTNETCDFMINCPLUSPLUS/lib', + '-L$EBROOTNETCDFMINFORTRAN/lib', + '-L$EBROOTNETCDF/lib64', + '-L$EBROOTNETCDFMINCPLUSPLUS/lib64', + '-L$EBROOTNETCDFMINFORTRAN/lib64', + '-lnetcdf', '-lnetcdf_c++4', '-lnetcdff' + ] else: self.build_system.ldflags = ['-%s' % self.linkage] From 37d20ca0ca5611371641524e7e3eb6b2e3c862b4 Mon Sep 17 00:00:00 2001 From: Luca Date: Wed, 5 Feb 2020 18:15:08 +0100 Subject: [PATCH 035/104] Adding halo exchange --- .../microbenchmarks/mpi/halo_cell_exchange.py | 25 ++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/cscs-checks/microbenchmarks/mpi/halo_cell_exchange.py b/cscs-checks/microbenchmarks/mpi/halo_cell_exchange.py index 697a518eee..9747ac2fac 100644 --- a/cscs-checks/microbenchmarks/mpi/halo_cell_exchange.py +++ b/cscs-checks/microbenchmarks/mpi/halo_cell_exchange.py @@ -10,7 +10,8 @@ def __init__(self): self.sourcepath = 'halo_cell_exchange.c' self.build_system = 'SingleSource' self.build_system.cflags = ['-O2'] - self.valid_systems = ['daint:gpu', 'dom:gpu', 'kesch:cn'] + self.valid_systems = ['daint:gpu', 'dom:gpu', 'kesch:cn', + 'arolla:cn', 'tsa:cn'] self.valid_prog_environs = ['PrgEnv-cray', 'PrgEnv-pgi', 'PrgEnv-gnu'] self.num_tasks = 6 @@ -62,6 +63,17 @@ def __init__(self): } self.reference = { + 'arolla:cn': { + 'time_2_10': (2.280450e-06, None, 0.50, 's'), + 'time_2_10000': (8.059907e-06, None, 0.50, 's'), + 'time_2_1000000': (5.959686e-04, None, 0.50, 's'), + 'time_4_10': (2.951527e-06, None, 0.50, 's'), + 'time_4_10000': (1.258132e-05, None, 0.50, 's'), + 'time_4_1000000': (8.539153e-04, None, 0.50, 's'), + 'time_6_10': (3.740311e-06, None, 0.50, 's'), + 'time_6_10000': (1.448979e-05, None, 0.50, 's'), + 'time_6_1000000': (8.432294e-04, None, 0.50, 's') + }, 'dom:gpu': { 'time_2_10': (3.925395e-06, None, 0.50, 's'), 'time_2_10000': (9.721279e-06, None, 0.50, 's'), @@ -95,6 +107,17 @@ def __init__(self): 'time_6_10000': (1.448979e-05, None, 0.50, 's'), 'time_6_1000000': (8.432294e-04, None, 0.50, 's') }, + 'tsa:cn': { + 'time_2_10': (2.280450e-06, None, 0.50, 's'), + 'time_2_10000': (8.059907e-06, None, 0.50, 's'), + 'time_2_1000000': (5.959686e-04, None, 0.50, 's'), + 'time_4_10': (2.951527e-06, None, 0.50, 's'), + 'time_4_10000': (1.258132e-05, None, 0.50, 's'), + 'time_4_1000000': (8.539153e-04, None, 0.50, 's'), + 'time_6_10': (3.740311e-06, None, 0.50, 's'), + 'time_6_10000': (1.448979e-05, None, 0.50, 's'), + 'time_6_1000000': (8.432294e-04, None, 0.50, 's') + }, '*': { 'time_2_10': (0, None, None, 's'), 'time_2_10000': (0, None, None, 's'), From dd7ce1f6324556f212a519a15c48e55357b1cdbf Mon Sep 17 00:00:00 2001 From: Luca Date: Wed, 5 Feb 2020 18:27:58 +0100 Subject: [PATCH 036/104] Adding alloc_speed --- .../microbenchmarks/alloc_speed/alloc_speed.py | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/cscs-checks/microbenchmarks/alloc_speed/alloc_speed.py b/cscs-checks/microbenchmarks/alloc_speed/alloc_speed.py index 3a3a7de168..82e153d8ad 100644 --- a/cscs-checks/microbenchmarks/alloc_speed/alloc_speed.py +++ b/cscs-checks/microbenchmarks/alloc_speed/alloc_speed.py @@ -15,7 +15,9 @@ def __init__(self, hugepages): 'tiger:gpu'] self.valid_prog_environs = ['PrgEnv-gnu'] if hugepages == 'no': - self.valid_systems += ['kesch:cn', 'kesch:pn'] + self.valid_systems += ['kesch:cn', 'kesch:pn', + 'arolla:cn', 'arolla:pn', + 'tsa:cn', 'tsa:pn'] else: if self.current_system.name in {'dom', 'daint', 'tiger'}: self.modules = ['craype-hugepages%s' % hugepages] @@ -34,6 +36,12 @@ def __init__(self, hugepages): } self.sys_reference = { 'no': { + 'arolla:cn': { + 'time': (1.60, None, 0.10, 's') + }, + 'arolla:pn': { + 'time': (0.70, None, 0.10, 's') + }, 'dom:gpu': { 'time': (1.22, None, 0.05, 's') }, @@ -52,6 +60,12 @@ def __init__(self, hugepages): 'kesch:pn': { 'time': (0.70, None, 0.10, 's') }, + 'tsa:cn': { + 'time': (1.60, None, 0.10, 's') + }, + 'tsa:pn': { + 'time': (0.70, None, 0.10, 's') + }, '*': { 'time': (0, None, None, 's') } From a181bd1deb4d32aa14b8f9158042692958c84893 Mon Sep 17 00:00:00 2001 From: Luca Date: Wed, 5 Feb 2020 18:35:56 +0100 Subject: [PATCH 037/104] Adding dgemm --- cscs-checks/microbenchmarks/dgemm/dgemm.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/cscs-checks/microbenchmarks/dgemm/dgemm.py b/cscs-checks/microbenchmarks/dgemm/dgemm.py index 49621fd658..aff32e5888 100644 --- a/cscs-checks/microbenchmarks/dgemm/dgemm.py +++ b/cscs-checks/microbenchmarks/dgemm/dgemm.py @@ -17,12 +17,14 @@ def __init__(self): self.valid_systems = [ 'daint:gpu', 'daint:mc', 'dom:gpu', 'dom:mc', - 'kesch:cn', 'kesch:pn', 'tiger:gpu' + 'kesch:cn', 'kesch:pn', 'tiger:gpu', + 'arolla:cn', 'arolla:pn', + 'tsa:cn', 'tsa:pn' ] if self.current_system.name in ['daint', 'dom', 'tiger']: self.valid_prog_environs = ['PrgEnv-gnu', 'PrgEnv-intel'] - if self.current_system.name == 'kesch': + if self.current_system.name in ['arolla', 'kesch', 'tsa']: self.valid_prog_environs = ['PrgEnv-gnu-nompi'] self.num_tasks = 0 @@ -31,12 +33,16 @@ def __init__(self): self.build_system = 'SingleSource' self.build_system.cflags = ['-O3'] self.sys_reference = { + 'arolla:cn': (300.0, -0.15, None, 'Gflop/s'), + 'arolla:pn': (300.0, -0.15, None, 'Gflop/s'), 'daint:gpu': (300.0, -0.15, None, 'Gflop/s'), 'daint:mc': (860.0, -0.15, None, 'Gflop/s'), 'dom:gpu': (300.0, -0.15, None, 'Gflop/s'), 'dom:mc': (860.0, -0.15, None, 'Gflop/s'), 'kesch:cn': (300.0, -0.15, None, 'Gflop/s'), 'kesch:pn': (300.0, -0.15, None, 'Gflop/s'), + 'tsa:cn': (300.0, -0.15, None, 'Gflop/s'), + 'tsa:pn': (300.0, -0.15, None, 'Gflop/s'), } self.maintainers = ['AJ', 'VH'] @@ -62,7 +68,9 @@ def setup(self, partition, environ, **job_opts): self.num_cpus_per_task = 36 elif partition.fullname in ['tiger:gpu']: self.num_cpus_per_task = 18 - elif partition.fullname in ['kesch:cn', 'kesch:pn']: + elif partition.fullname in ['arolla:cn', 'arolla:pn', + 'kesch:cn', 'kesch:pn', + 'tsa:cn', 'tsa:pn']: self.num_cpus_per_task = 12 self.build_system.cflags += ['-I$EBROOTOPENBLAS/include'] self.build_system.ldflags = ['-L$EBROOTOPENBLAS/lib', '-lopenblas', From e1616c1d23324bd63ce5e3ce927589496d34121c Mon Sep 17 00:00:00 2001 From: Luca Date: Wed, 5 Feb 2020 18:45:39 +0100 Subject: [PATCH 038/104] Adding gpu burn --- cscs-checks/microbenchmarks/gpu_burn/gpu_burn_test.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/cscs-checks/microbenchmarks/gpu_burn/gpu_burn_test.py b/cscs-checks/microbenchmarks/gpu_burn/gpu_burn_test.py index aacf6eb5fe..7a063c9235 100644 --- a/cscs-checks/microbenchmarks/gpu_burn/gpu_burn_test.py +++ b/cscs-checks/microbenchmarks/gpu_burn/gpu_burn_test.py @@ -8,7 +8,9 @@ class GpuBurnTest(rfm.RegressionTest): def __init__(self): super().__init__() - self.valid_systems = ['daint:gpu', 'dom:gpu', 'kesch:cn', 'tiger:gpu'] + self.valid_systems = ['daint:gpu', 'dom:gpu', + 'kesch:cn', 'tiger:gpu' + 'arolla:cn', 'tsa:cn'] self.descr = 'GPU burn test' self.valid_prog_environs = ['PrgEnv-gnu'] @@ -22,6 +24,12 @@ def __init__(self): self.executable_opts = ['-d', '40'] self.num_gpus_per_node = 16 gpu_arch = '37' + elif self.current_system.name in ['arolla', 'tsa']: + self.exclusive_access = True + self.modules = ['cuda/10.1.243'] + self.executable_opts = ['-d', '40'] + self.num_gpus_per_node = 8 + gpu_arch = '70' elif self.current_system.name in {'daint', 'dom', 'tiger'}: self.modules = ['craype-accel-nvidia60'] self.executable_opts = ['-d', '20'] From 2d7f26114568a18a05b027cb78cca3fba7318b55 Mon Sep 17 00:00:00 2001 From: Luca Date: Wed, 5 Feb 2020 19:10:08 +0100 Subject: [PATCH 039/104] Adding OSU benchmarks --- cscs-checks/microbenchmarks/osu/osu_tests.py | 54 ++++++++++++++++++-- 1 file changed, 49 insertions(+), 5 deletions(-) diff --git a/cscs-checks/microbenchmarks/osu/osu_tests.py b/cscs-checks/microbenchmarks/osu/osu_tests.py index 5f4998eff5..9f876e4573 100644 --- a/cscs-checks/microbenchmarks/osu/osu_tests.py +++ b/cscs-checks/microbenchmarks/osu/osu_tests.py @@ -57,12 +57,17 @@ def __init__(self): super().__init__() self.valid_systems = ['daint:gpu', 'daint:mc', 'dom:gpu', 'dom:mc', 'tiger:gpu', - 'kesch:cn', 'kesch:pn', 'leone:normal'] + 'kesch:cn', 'kesch:pn', + 'arolla:cn', 'arolla:pn', + 'tsa:cn', 'tsa:pn' ] self.valid_prog_environs = ['PrgEnv-cray'] if self.current_system.name == 'kesch': self.exclusive_access = True self.valid_prog_environs = ['PrgEnv-cray', 'PrgEnv-gnu', 'PrgEnv-intel'] + elif self.current_system.name in ['arolla', 'tsa']: + self.exclusive_access = True + self.valid_prog_environs = ['PrgEnv-gnu', 'PrgEnv-pgi'] self.descr = 'Flexible Alltoall OSU test' self.build_system = 'Make' @@ -152,6 +157,9 @@ def __init__(self): if self.current_system.name == 'kesch': self.exclusive_access = True self.valid_prog_environs = ['PrgEnv-cray', 'PrgEnv-gnu'] + elif self.current_system.name in ['arolla', 'tsa']: + self.exclusive_access = True + self.valid_prog_environs = ['PrgEnv-gnu', 'PrgEnv-pgi'] else: self.valid_prog_environs = ['PrgEnv-cray', 'PrgEnv-gnu', 'PrgEnv-intel'] @@ -172,11 +180,15 @@ class P2PCPUBandwidthTest(P2PBaseTest): def __init__(self): super().__init__() self.valid_systems = ['daint:gpu', 'daint:mc', 'tiger:gpu', - 'dom:gpu', 'dom:mc', 'kesch:cn'] + 'dom:gpu', 'dom:mc', 'kesch:cn', + 'arolla:cn', 'tsa:cn'] self.executable = './p2p_osu_bw' self.executable_opts = ['-x', '100', '-i', '1000'] self.reference = { + 'arolla:cn': { + 'bw': (6311.48, -0.15, None, 'MB/s') + }, 'daint:gpu': { 'bw': (9798.29, -0.1, None, 'MB/s') }, @@ -196,6 +208,9 @@ def __init__(self): 'kesch:cn': { 'bw': (6311.48, -0.15, None, 'MB/s') }, + 'tsa:cn': { + 'bw': (6311.48, -0.15, None, 'MB/s') + }, '*': { 'bw': (0, None, None, 'MB/s') } @@ -212,11 +227,15 @@ class P2PCPULatencyTest(P2PBaseTest): def __init__(self): super().__init__() self.valid_systems = ['daint:gpu', 'daint:mc', 'tiger:gpu', - 'dom:gpu', 'dom:mc', 'kesch:cn'] + 'dom:gpu', 'dom:mc', 'kesch:cn', + 'arolla:cn', 'tsa:cn'] self.executable_opts = ['-x', '100', '-i', '1000'] self.executable = './p2p_osu_latency' self.reference = { + 'arolla:cn': { + 'latency': (1.57, None, 0.1, 'us') + }, 'daint:gpu': { 'latency': (1.16, None, 1.0, 'us') }, @@ -236,6 +255,9 @@ def __init__(self): 'kesch:cn': { 'latency': (1.17, None, 0.1, 'us') }, + 'tsa:cn': { + 'latency': (1.57, None, 0.1, 'us') + }, '*': { 'latency': (0, None, None, 'us') } @@ -251,13 +273,17 @@ def __init__(self): class G2GBandwidthTest(P2PBaseTest): def __init__(self): super().__init__() - self.valid_systems = ['daint:gpu', 'dom:gpu', 'kesch:cn', 'tiger:gpu'] + self.valid_systems = ['daint:gpu', 'dom:gpu', 'kesch:cn', 'tiger:gpu', + 'arolla:cn', 'tsa:cn'] self.num_gpus_per_node = 1 self.executable = './p2p_osu_bw' self.executable_opts = ['-x', '100', '-i', '1000', '-d', 'cuda', 'D', 'D'] self.reference = { + 'arolla:cn': { + 'bw': (6288.98, -0.1, None, 'MB/s') + }, 'dom:gpu': { 'bw': (8897.86, -0.1, None, 'MB/s') }, @@ -267,6 +293,9 @@ def __init__(self): 'kesch:cn': { 'bw': (6288.98, -0.1, None, 'MB/s') }, + 'tsa:cn': { + 'bw': (6288.98, -0.1, None, 'MB/s') + }, '*': { 'bw': (0, None, None, 'MB/s') } @@ -282,6 +311,10 @@ def __init__(self): elif self.current_system.name == 'kesch': self.modules = ['cudatoolkit/8.0.61'] self.variables = {'MV2_USE_CUDA': '1'} + elif self.current_system.name in ['arolla', 'tsa']: + self.modules = ['cuda/10.1.243'] + self.build_system.ldflags = ['-L$EBROOTCUDA/lib64', + '-lcudart', '-lcuda'] self.build_system.cppflags = ['-D_ENABLE_CUDA_'] @@ -291,13 +324,17 @@ def __init__(self): class G2GLatencyTest(P2PBaseTest): def __init__(self): super().__init__() - self.valid_systems = ['daint:gpu', 'dom:gpu', 'kesch:cn', 'tiger:gpu'] + self.valid_systems = ['daint:gpu', 'dom:gpu', 'kesch:cn', 'tiger:gpu', + 'arolla:cn', 'tsa:cn'] self.num_gpus_per_node = 1 self.executable = './p2p_osu_latency' self.executable_opts = ['-x', '100', '-i', '1000', '-d', 'cuda', 'D', 'D'] self.reference = { + 'arolla:cn': { + 'latency': (23.09, None, 0.1, 'us') + }, 'dom:gpu': { 'latency': (5.49, None, 0.1, 'us') }, @@ -307,6 +344,9 @@ def __init__(self): 'kesch:cn': { 'latency': (23.09, None, 0.1, 'us') }, + 'tsa:cn': { + 'latency': (23.09, None, 0.1, 'us') + }, '*': { 'latency': (0, None, None, 'us') } @@ -322,5 +362,9 @@ def __init__(self): elif self.current_system.name == 'kesch': self.modules = ['cudatoolkit/8.0.61'] self.variables = {'MV2_USE_CUDA': '1'} + elif self.current_system.name in ['arolla', 'tsa']: + self.modules = ['cuda/10.1.243'] + self.build_system.ldflags = ['-L$EBROOTCUDA/lib64', + '-lcudart', '-lcuda'] self.build_system.cppflags = ['-D_ENABLE_CUDA_'] From f4c51140840a0b62dba7e0081098614f16d52c22 Mon Sep 17 00:00:00 2001 From: Luca Date: Wed, 5 Feb 2020 19:10:46 +0100 Subject: [PATCH 040/104] NCO and CDO --- cscs-checks/tools/io/cdo.py | 9 ++++++--- cscs-checks/tools/io/nco.py | 9 ++++++--- 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/cscs-checks/tools/io/cdo.py b/cscs-checks/tools/io/cdo.py index 31ae46496c..492bfcb93d 100644 --- a/cscs-checks/tools/io/cdo.py +++ b/cscs-checks/tools/io/cdo.py @@ -30,8 +30,8 @@ def __init__(self): self.sourcesdir = os.path.join(self.current_system.resourcesdir, 'CDO-NCO') self.valid_systems = ['daint:gpu', 'daint:mc', 'dom:gpu', 'dom:mc', - 'kesch:pn'] - if self.current_system.name == 'kesch': + 'kesch:pn', 'arolla:pn', 'tsa:pn'] + if self.current_system.name in ['arolla', 'kesch', 'tsa']: self.exclusive_access = True self.valid_prog_environs = ['PrgEnv-gnu-nompi'] self.modules = ['cdo'] @@ -87,7 +87,10 @@ def __init__(self): r'(?i)error|conflict|unsupported|failure', self.stderr) def setup(self, partition, environ, **job_opts): - nco_name = 'nco' if self.current_system.name == 'kesch' else 'NCO' + if self.current_system.name in ['arolla', 'kesch', 'tsa']: + nco_name = 'nco' + else: + nco_name = 'NCO' self.pre_run = ['module load %s' % nco_name] super().setup(partition, environ, **job_opts) diff --git a/cscs-checks/tools/io/nco.py b/cscs-checks/tools/io/nco.py index 69c9269950..283fffe335 100644 --- a/cscs-checks/tools/io/nco.py +++ b/cscs-checks/tools/io/nco.py @@ -23,8 +23,8 @@ def __init__(self): self.sourcesdir = os.path.join(self.current_system.resourcesdir, 'CDO-NCO') self.valid_systems = ['daint:gpu', 'daint:mc', 'dom:gpu', 'dom:mc', - 'kesch:pn'] - if self.current_system.name == 'kesch': + 'kesch:pn', 'arolla:pn', 'tsa:pn'] + if self.current_system.name in ['arolla', 'kesch', 'tsa']: self.exclusive_access = True self.valid_prog_environs = ['PrgEnv-gnu-nompi'] self.modules = ['nco'] @@ -84,7 +84,10 @@ def __init__(self): r'(?i)error|conflict|unsupported|failure', self.stderr) def setup(self, partition, environ, **job_opts): - cdo_name = 'cdo' if self.current_system.name == 'kesch' else 'CDO' + if self.current_system.name in ['arolla', 'kesch', 'tsa']: + cdo_name = 'cdo' + else: + cdo_name = 'CDO' self.pre_run = ['module load %s' % cdo_name] super().setup(partition, environ, **job_opts) From 6c7bfcb6c6a06b7f7b159f44507242f32d217044 Mon Sep 17 00:00:00 2001 From: Luca Date: Thu, 6 Feb 2020 10:06:19 +0100 Subject: [PATCH 041/104] Adding stream --- cscs-checks/microbenchmarks/stream/stream.py | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/cscs-checks/microbenchmarks/stream/stream.py b/cscs-checks/microbenchmarks/stream/stream.py index fc34cfa229..a7fddb88c0 100644 --- a/cscs-checks/microbenchmarks/stream/stream.py +++ b/cscs-checks/microbenchmarks/stream/stream.py @@ -15,8 +15,8 @@ def __init__(self): self.descr = 'STREAM Benchmark' self.exclusive_access = True self.valid_systems = ['daint:gpu', 'daint:mc', 'dom:gpu', 'dom:mc', - 'kesch:cn', 'kesch:pn', 'leone:normal', - 'tiger:gpu'] + 'kesch:cn', 'kesch:pn', 'tiger:gpu', + 'arolla:cn', 'arolla:pn', 'tsa:cn', 'tsa:pn'] self.valid_prog_environs = ['PrgEnv-cray', 'PrgEnv-gnu', 'PrgEnv-intel', 'PrgEnv-pgi', 'PrgEnv-cray_classic'] @@ -36,12 +36,17 @@ def __init__(self): self.valid_prog_environs = ['PrgEnv-cray', 'PrgEnv-gnu'] cray_flags = self.prgenv_flags['PrgEnv-cray_classic'] self.prgenv_flags['PrgEnv-cray'] = cray_flags + elif self.current_system.name in ['arolla', 'tsa']: + self.exclusive_access = True + self.valid_prog_environs = ['PrgEnv-gnu'] self.sourcepath = 'stream.c' self.build_system = 'SingleSource' self.num_tasks = 1 self.num_tasks_per_node = 1 self.stream_cpus_per_task = { + 'arolla:cn': 12, + 'arolla:pn': 12, 'daint:gpu': 12, 'daint:mc': 36, 'dom:gpu': 12, @@ -50,6 +55,8 @@ def __init__(self): 'kesch:pn': 24, 'leone:normal': 16, 'monch:compute': 20, + 'tsa:cn': 12, + 'tsa:pn': 12, } self.variables = { 'OMP_PLACES': 'threads', @@ -79,13 +86,17 @@ def __init__(self): '*': {'triad': (0.0, None, None, 'MB/s')}, }, 'PrgEnv-gnu': { + 'arolla:cn': {'triad': (47000, -0.05, None, 'MB/s')}, + 'arolla:pn': {'triad': (84400, -0.05, None, 'MB/s')}, 'daint:gpu': {'triad': (43800, -0.05, None, 'MB/s')}, 'daint:mc': {'triad': (88500, -0.05, None, 'MB/s')}, 'dom:gpu': {'triad': (43800, -0.05, None, 'MB/s')}, 'dom:mc': {'triad': (87500, -0.05, None, 'MB/s')}, 'kesch:cn': {'triad': (47000, -0.05, None, 'MB/s')}, 'kesch:pn': {'triad': (84400, -0.05, None, 'MB/s')}, - 'leone:normal': {'triad': (44767.0, -0.05, None, 'MB/s')}, +# 'leone:normal': {'triad': (44767.0, -0.05, None, 'MB/s')}, + 'tsa:cn': {'triad': (47000, -0.05, None, 'MB/s')}, + 'tsa:pn': {'triad': (84400, -0.05, None, 'MB/s')}, '*': {'triad': (0.0, None, None, 'MB/s')}, }, 'PrgEnv-intel': { From 142eb16136a14dd274011f7a30338feaa008ac76 Mon Sep 17 00:00:00 2001 From: Luca Date: Thu, 6 Feb 2020 10:14:46 +0100 Subject: [PATCH 042/104] Adding kernel latency --- .../kernel_latency/kernel_latency.py | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/cscs-checks/microbenchmarks/kernel_latency/kernel_latency.py b/cscs-checks/microbenchmarks/kernel_latency/kernel_latency.py index 4361a2fe4d..8bb8844a15 100644 --- a/cscs-checks/microbenchmarks/kernel_latency/kernel_latency.py +++ b/cscs-checks/microbenchmarks/kernel_latency/kernel_latency.py @@ -8,7 +8,8 @@ class KernelLatencyTest(rfm.RegressionTest): def __init__(self, kernel_version): # List known partitions here so as to avoid specifying them every time # with --system - self.valid_systems = ['daint:gpu', 'dom:gpu', 'kesch:cn', 'tiger:gpu'] + self.valid_systems = ['daint:gpu', 'dom:gpu', 'kesch:cn', 'tiger:gpu', + 'arolla:cn', 'tsa:cn'] self.num_tasks = 0 self.num_tasks_per_node = 1 self.sourcepath = 'kernel_latency.cu' @@ -25,6 +26,11 @@ def __init__(self, kernel_version): self.valid_prog_environs = ['PrgEnv-cray', 'PrgEnv-pgi'] self.modules = ['craype-accel-nvidia35'] gpu_arch = '37' + elif self.current_system.name in ['arolla', 'tsa']: + self.num_gpus_per_node = 8 + self.valid_prog_environs = ['PrgEnv-pgi'] + self.modules = ['cuda/10.1.243'] + gpu_arch = '70' else: # Enable test when running on an unknown system self.num_gpus_per_node = 1 @@ -59,6 +65,8 @@ def __init__(self, kernel_version): } self.sys_reference = { 'sync': { + 'arolla:cn': { + 'latency': (12.0, None, 0.10, 'us') 'dom:gpu': { 'latency': (6.6, None, 0.10, 'us') }, @@ -67,12 +75,16 @@ def __init__(self, kernel_version): }, 'kesch:cn': { 'latency': (12.0, None, 0.10, 'us') + 'tsa:cn': { + 'latency': (12.0, None, 0.10, 'us') }, '*': { 'latency': (0.0, None, None, 'us') } }, 'async': { + 'arolla:cn': { + 'latency': (5.7, None, 0.10, 'us') 'dom:gpu': { 'latency': (2.2, None, 0.10, 'us') }, @@ -81,6 +93,8 @@ def __init__(self, kernel_version): }, 'kesch:cn': { 'latency': (5.7, None, 0.10, 'us') + 'tsa:cn': { + 'latency': (5.7, None, 0.10, 'us') }, '*': { 'latency': (0.0, None, None, 'us') From f8c2debe1ab8db8df1bb1adb2651f0f9f72dc58d Mon Sep 17 00:00:00 2001 From: Luca Date: Thu, 6 Feb 2020 10:24:20 +0100 Subject: [PATCH 043/104] Minor fix --- cscs-checks/microbenchmarks/kernel_latency/kernel_latency.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/cscs-checks/microbenchmarks/kernel_latency/kernel_latency.py b/cscs-checks/microbenchmarks/kernel_latency/kernel_latency.py index 8bb8844a15..0a319f29bb 100644 --- a/cscs-checks/microbenchmarks/kernel_latency/kernel_latency.py +++ b/cscs-checks/microbenchmarks/kernel_latency/kernel_latency.py @@ -67,6 +67,7 @@ def __init__(self, kernel_version): 'sync': { 'arolla:cn': { 'latency': (12.0, None, 0.10, 'us') + }, 'dom:gpu': { 'latency': (6.6, None, 0.10, 'us') }, @@ -75,6 +76,7 @@ def __init__(self, kernel_version): }, 'kesch:cn': { 'latency': (12.0, None, 0.10, 'us') + }, 'tsa:cn': { 'latency': (12.0, None, 0.10, 'us') }, @@ -85,6 +87,7 @@ def __init__(self, kernel_version): 'async': { 'arolla:cn': { 'latency': (5.7, None, 0.10, 'us') + }, 'dom:gpu': { 'latency': (2.2, None, 0.10, 'us') }, @@ -93,6 +96,7 @@ def __init__(self, kernel_version): }, 'kesch:cn': { 'latency': (5.7, None, 0.10, 'us') + }, 'tsa:cn': { 'latency': (5.7, None, 0.10, 'us') }, From d2b103d3fc0526856711423b4ce84ca95641117b Mon Sep 17 00:00:00 2001 From: Luca Date: Thu, 6 Feb 2020 12:40:21 +0100 Subject: [PATCH 044/104] Loading netcdf-fortran to fix a check --- cscs-checks/tools/io/cdo.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cscs-checks/tools/io/cdo.py b/cscs-checks/tools/io/cdo.py index 492bfcb93d..e5bff11505 100644 --- a/cscs-checks/tools/io/cdo.py +++ b/cscs-checks/tools/io/cdo.py @@ -34,7 +34,7 @@ def __init__(self): if self.current_system.name in ['arolla', 'kesch', 'tsa']: self.exclusive_access = True self.valid_prog_environs = ['PrgEnv-gnu-nompi'] - self.modules = ['cdo'] + self.modules = ['cdo', 'netcdf-fortran'] else: self.valid_prog_environs = ['PrgEnv-gnu'] self.modules = ['CDO'] From f87c604f77e67e0d1119afd217f232773c434f40 Mon Sep 17 00:00:00 2001 From: Luca Date: Thu, 6 Feb 2020 15:32:12 +0100 Subject: [PATCH 045/104] Fixing PEP 8 format --- cscs-checks/mch/gpu_direct_cuda.py | 5 +++-- cscs-checks/microbenchmarks/alloc_speed/alloc_speed.py | 2 +- cscs-checks/microbenchmarks/dgemm/dgemm.py | 2 +- cscs-checks/microbenchmarks/gpu_burn/gpu_burn_test.py | 2 +- cscs-checks/microbenchmarks/osu/osu_tests.py | 6 +++--- cscs-checks/microbenchmarks/stream/stream.py | 4 ++-- cscs-checks/prgenv/helloworld.py | 2 +- cscs-checks/system/slurm/slurm.py | 2 +- cscs-checks/tools/profiling_and_debugging/cuda_gdb.py | 2 +- cscs-checks/tools/profiling_and_debugging/ddt.py | 7 ++++--- 10 files changed, 18 insertions(+), 16 deletions(-) diff --git a/cscs-checks/mch/gpu_direct_cuda.py b/cscs-checks/mch/gpu_direct_cuda.py index fda135da88..15a32de0ae 100644 --- a/cscs-checks/mch/gpu_direct_cuda.py +++ b/cscs-checks/mch/gpu_direct_cuda.py @@ -7,7 +7,7 @@ class GpuDirectCudaCheck(rfm.RegressionTest): def __init__(self): self.descr = 'tests gpu-direct for CUDA' - self.valid_systems = ['daint:gpu', 'dom:gpu', 'kesch:cn', + self.valid_systems = ['daint:gpu', 'dom:gpu', 'kesch:cn', 'tiger:gpu', 'arolla:cn', 'tsa:cn'] self.valid_prog_environs = ['PrgEnv-gnu'] self.sourcepath = 'gpu_direct_cuda.cu' @@ -41,4 +41,5 @@ def __init__(self): self.stdout, 'result', float) self.sanity_patterns = sn.assert_reference(result, 1., -1e-5, 1e-5) self.maintainers = ['AJ', 'MKr'] - self.tags = {'production', 'mch', 'craype'} \ No newline at end of file + self.tags = {'production', 'mch', 'craype'} + diff --git a/cscs-checks/microbenchmarks/alloc_speed/alloc_speed.py b/cscs-checks/microbenchmarks/alloc_speed/alloc_speed.py index 82e153d8ad..f4f0ea8c60 100644 --- a/cscs-checks/microbenchmarks/alloc_speed/alloc_speed.py +++ b/cscs-checks/microbenchmarks/alloc_speed/alloc_speed.py @@ -15,7 +15,7 @@ def __init__(self, hugepages): 'tiger:gpu'] self.valid_prog_environs = ['PrgEnv-gnu'] if hugepages == 'no': - self.valid_systems += ['kesch:cn', 'kesch:pn', + self.valid_systems += ['kesch:cn', 'kesch:pn', 'arolla:cn', 'arolla:pn', 'tsa:cn', 'tsa:pn'] else: diff --git a/cscs-checks/microbenchmarks/dgemm/dgemm.py b/cscs-checks/microbenchmarks/dgemm/dgemm.py index aff32e5888..7bacde7b4f 100644 --- a/cscs-checks/microbenchmarks/dgemm/dgemm.py +++ b/cscs-checks/microbenchmarks/dgemm/dgemm.py @@ -68,7 +68,7 @@ def setup(self, partition, environ, **job_opts): self.num_cpus_per_task = 36 elif partition.fullname in ['tiger:gpu']: self.num_cpus_per_task = 18 - elif partition.fullname in ['arolla:cn', 'arolla:pn', + elif partition.fullname in ['arolla:cn', 'arolla:pn', 'kesch:cn', 'kesch:pn', 'tsa:cn', 'tsa:pn']: self.num_cpus_per_task = 12 diff --git a/cscs-checks/microbenchmarks/gpu_burn/gpu_burn_test.py b/cscs-checks/microbenchmarks/gpu_burn/gpu_burn_test.py index 7a063c9235..d589828edf 100644 --- a/cscs-checks/microbenchmarks/gpu_burn/gpu_burn_test.py +++ b/cscs-checks/microbenchmarks/gpu_burn/gpu_burn_test.py @@ -8,7 +8,7 @@ class GpuBurnTest(rfm.RegressionTest): def __init__(self): super().__init__() - self.valid_systems = ['daint:gpu', 'dom:gpu', + self.valid_systems = ['daint:gpu', 'dom:gpu', 'kesch:cn', 'tiger:gpu' 'arolla:cn', 'tsa:cn'] self.descr = 'GPU burn test' diff --git a/cscs-checks/microbenchmarks/osu/osu_tests.py b/cscs-checks/microbenchmarks/osu/osu_tests.py index 9f876e4573..5b804923b0 100644 --- a/cscs-checks/microbenchmarks/osu/osu_tests.py +++ b/cscs-checks/microbenchmarks/osu/osu_tests.py @@ -59,7 +59,7 @@ def __init__(self): 'dom:gpu', 'dom:mc', 'tiger:gpu', 'kesch:cn', 'kesch:pn', 'arolla:cn', 'arolla:pn', - 'tsa:cn', 'tsa:pn' ] + 'tsa:cn', 'tsa:pn'] self.valid_prog_environs = ['PrgEnv-cray'] if self.current_system.name == 'kesch': self.exclusive_access = True @@ -159,7 +159,7 @@ def __init__(self): self.valid_prog_environs = ['PrgEnv-cray', 'PrgEnv-gnu'] elif self.current_system.name in ['arolla', 'tsa']: self.exclusive_access = True - self.valid_prog_environs = ['PrgEnv-gnu', 'PrgEnv-pgi'] + self.valid_prog_environs = ['PrgEnv-gnu', 'PrgEnv-pgi'] else: self.valid_prog_environs = ['PrgEnv-cray', 'PrgEnv-gnu', 'PrgEnv-intel'] @@ -364,7 +364,7 @@ def __init__(self): self.variables = {'MV2_USE_CUDA': '1'} elif self.current_system.name in ['arolla', 'tsa']: self.modules = ['cuda/10.1.243'] - self.build_system.ldflags = ['-L$EBROOTCUDA/lib64', + self.build_system.ldflags = ['-L$EBROOTCUDA/lib64', '-lcudart', '-lcuda'] self.build_system.cppflags = ['-D_ENABLE_CUDA_'] diff --git a/cscs-checks/microbenchmarks/stream/stream.py b/cscs-checks/microbenchmarks/stream/stream.py index a7fddb88c0..ea23f2930c 100644 --- a/cscs-checks/microbenchmarks/stream/stream.py +++ b/cscs-checks/microbenchmarks/stream/stream.py @@ -15,7 +15,7 @@ def __init__(self): self.descr = 'STREAM Benchmark' self.exclusive_access = True self.valid_systems = ['daint:gpu', 'daint:mc', 'dom:gpu', 'dom:mc', - 'kesch:cn', 'kesch:pn', 'tiger:gpu', + 'kesch:cn', 'kesch:pn', 'tiger:gpu', 'arolla:cn', 'arolla:pn', 'tsa:cn', 'tsa:pn'] self.valid_prog_environs = ['PrgEnv-cray', 'PrgEnv-gnu', 'PrgEnv-intel', 'PrgEnv-pgi', @@ -94,7 +94,7 @@ def __init__(self): 'dom:mc': {'triad': (87500, -0.05, None, 'MB/s')}, 'kesch:cn': {'triad': (47000, -0.05, None, 'MB/s')}, 'kesch:pn': {'triad': (84400, -0.05, None, 'MB/s')}, -# 'leone:normal': {'triad': (44767.0, -0.05, None, 'MB/s')}, +# 'leone:normal': {'triad': (44767.0, -0.05, None, 'MB/s')}, 'tsa:cn': {'triad': (47000, -0.05, None, 'MB/s')}, 'tsa:pn': {'triad': (84400, -0.05, None, 'MB/s')}, '*': {'triad': (0.0, None, None, 'MB/s')}, diff --git a/cscs-checks/prgenv/helloworld.py b/cscs-checks/prgenv/helloworld.py index a5bcc79ffe..0ae195df31 100644 --- a/cscs-checks/prgenv/helloworld.py +++ b/cscs-checks/prgenv/helloworld.py @@ -18,7 +18,7 @@ def __init__(self, variant, lang, linkage): self.sourcepath = 'hello_world' self.build_system = 'SingleSource' self.valid_systems = ['daint:gpu', 'daint:mc', 'dom:gpu', 'dom:mc', - 'kesch:cn', 'tiger:gpu','arolla:cn', 'arolla:pn', + 'kesch:cn', 'tiger:gpu', 'arolla:cn', 'arolla:pn', 'tsa:cn', 'tsa:pn'] self.valid_prog_environs = ['PrgEnv-cray', 'PrgEnv-cray_classic', diff --git a/cscs-checks/system/slurm/slurm.py b/cscs-checks/system/slurm/slurm.py index 9497b161d1..f7832c1a1e 100644 --- a/cscs-checks/system/slurm/slurm.py +++ b/cscs-checks/system/slurm/slurm.py @@ -118,7 +118,7 @@ def set_memory_limit(self): class DefaultRequestGPU(SlurmSimpleBaseCheck): def __init__(self): super().__init__() - self.valid_systems = ['daint:gpu', 'dom:gpu', + self.valid_systems = ['daint:gpu', 'dom:gpu', 'arolla:cn', 'kesch:cn', 'tsa:cn'] self.executable = 'nvidia-smi' diff --git a/cscs-checks/tools/profiling_and_debugging/cuda_gdb.py b/cscs-checks/tools/profiling_and_debugging/cuda_gdb.py index ada2b834fd..7bca40efcc 100644 --- a/cscs-checks/tools/profiling_and_debugging/cuda_gdb.py +++ b/cscs-checks/tools/profiling_and_debugging/cuda_gdb.py @@ -42,7 +42,7 @@ def __init__(self): self.build_system.ldflags = ['-g', '-fopenmp', '-lcublas', '-lcudart', '-lm'] elif self.current_system.name in ['arolla', 'tsa']: - self.build_system.ldflags += ['-L$EBROOTCUDA/lib64', + self.build_system.ldflags += ['-L$EBROOTCUDA/lib64', '-lcudart', '-lm'] self.sanity_patterns = sn.all([ diff --git a/cscs-checks/tools/profiling_and_debugging/ddt.py b/cscs-checks/tools/profiling_and_debugging/ddt.py index 055b8a6adb..63c5cb7e81 100644 --- a/cscs-checks/tools/profiling_and_debugging/ddt.py +++ b/cscs-checks/tools/profiling_and_debugging/ddt.py @@ -69,7 +69,8 @@ def __init__(self, lang, extension): self.valid_systems = ['daint:gpu', 'daint:mc', 'dom:gpu', 'dom:mc', 'kesch:cn', 'tiger:gpu', 'arolla:cn', 'tsa:cn'] - if self.current_system.name in ['arolla', 'kesch', 'tsa'] and self.lang == 'C': + if self.current_system.name in ['arolla', 'kesch', 'tsa'] \ + and self.lang == 'C': self.build_system.ldflags = ['-lm'] residual_pattern = '_jacobi.%s:%d,residual' @@ -127,8 +128,8 @@ def __init__(self, lang, extension): self.build_system.ldflags = ['-lm', '-lcudart'] elif self.current_system.name in ['arolla', 'tsa']: arch = 'sm_70' - self.build_system.ldflags = ['-lstdc++', '-lm', - '-L$EBROOTCUDA/lib64', + self.build_system.ldflags = ['-lstdc++', '-lm', + '-L$EBROOTCUDA/lib64', '-lcudart'] else: arch = 'sm_60' From 893b4116b39055da082b1789044c9cd04cef5e44 Mon Sep 17 00:00:00 2001 From: Luca Date: Thu, 6 Feb 2020 16:08:07 +0100 Subject: [PATCH 046/104] Adding Tsa to cscs-ci.py --- config/cscs-ci.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/config/cscs-ci.py b/config/cscs-ci.py index 7cf36e510b..239a1237d2 100644 --- a/config/cscs-ci.py +++ b/config/cscs-ci.py @@ -65,6 +65,24 @@ class ReframeSettings: } } }, + 'tsa': { + 'descr': 'Tsa MCH', + 'hostnames': [r'tsa-\w+\d+'], + 'modules_system': 'tmod', + 'resourcesdir': '/apps/common/UES/reframe/resources', + 'partitions': { + 'cn': { + 'scheduler': 'nativeslurm', + 'access': ['--partition=cn-regression'], + 'environs': ['PrgEnv-gnu', 'PrgEnv-gnu-nompi', + 'PrgEnv-pgi', 'PrgEnv-pgi-nompi'], + 'descr': 'Tsa compute nodes', + 'resources': { + '_rfm_gpu': ['--gres=gpu:{num_gpus_per_node}'], + } + } + } + }, 'generic': { 'descr': 'Generic example system', 'partitions': { From 3612bfc5393a1f4ec433f68d23e9720c936601fc Mon Sep 17 00:00:00 2001 From: Luca Date: Fri, 7 Feb 2020 14:52:13 +0100 Subject: [PATCH 047/104] Adding environments of Kesch adn Tsa to cscs-ci.py --- config/cscs-ci.py | 76 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 76 insertions(+) diff --git a/config/cscs-ci.py b/config/cscs-ci.py index 239a1237d2..1165df2e0e 100644 --- a/config/cscs-ci.py +++ b/config/cscs-ci.py @@ -98,6 +98,82 @@ class ReframeSettings: }, 'environments': { + + 'kesch': { + 'PrgEnv-pgi-nompi': { + 'modules': ['PE/17.06', + 'PrgEnv-pgi/18.5'], + 'cc': 'pgcc', + 'cxx': 'pgc++', + 'ftn': 'pgf90', + }, + 'PrgEnv-pgi': { + 'modules': [ + 'PE/17.06', 'pgi/18.5-gcc-5.4.0-2.26', + 'openmpi/4.0.1-pgi-18.5-gcc-5.4.0-2.26-cuda-8.0' + ], + 'cc': 'mpicc', + 'cxx': 'mpicxx', + 'ftn': 'mpifort', + }, + 'PrgEnv-cray': { + 'modules': ['PE/17.06', + 'PrgEnv-CrayCCE/17.06'], + }, + 'PrgEnv-cray-nompi': { + 'modules': ['PE/17.06', + 'PrgEnv-cray'], + }, + 'PrgEnv-gnu': { + 'modules': ['PE/17.06', + 'gmvapich2/17.02_cuda_8.0_gdr'], + 'variables': { + 'LD_PRELOAD': '$(pkg-config --variable=libdir mvapich2-gdr)/libmpi.so' + }, + 'cc': 'mpicc', + 'cxx': 'mpicxx', + 'ftn': 'mpif90', + }, + 'PrgEnv-gnu-nompi': { + 'modules': ['PE/17.06', + 'PrgEnv-gnu'], + 'cc': 'gcc', + 'cxx': 'g++', + 'ftn': 'gfortran', + }, + }, + + 'tsa': { + 'PrgEnv-pgi-nompi': { + 'type': 'ProgEnvironment', + 'modules': ['PrgEnv-pgi/19.9'], + 'cc': 'pgcc', + 'cxx': 'pgc++', + 'ftn': 'pgf90', + }, + 'PrgEnv-pgi': { + 'type': 'ProgEnvironment', + 'modules': ['PrgEnv-pgi/19.9'], + 'cc': 'mpicc', + 'cxx': 'mpicxx', + 'ftn': 'mpifort', + }, + 'PrgEnv-gnu': { + 'type': 'ProgEnvironment', + 'modules': ['PrgEnv-gnu/19.2'], + 'cc': 'mpicc', + 'cxx': 'mpicxx', + 'ftn': 'mpifort', + }, + 'PrgEnv-gnu-nompi': { + 'type': 'ProgEnvironment', + 'modules': ['PrgEnv-gnu/19.2'], + 'cc': 'gcc', + 'cxx': 'g++', + 'ftn': 'gfortran', + }, + }, + '*': { 'PrgEnv-cray': { 'modules': ['PrgEnv-cray'], From c4b91e3957c896154211376ea5426889900231f9 Mon Sep 17 00:00:00 2001 From: lucamar Date: Thu, 13 Feb 2020 08:27:01 +0100 Subject: [PATCH 048/104] Update nvprof.py --- cscs-checks/tools/profiling_and_debugging/nvprof.py | 1 - 1 file changed, 1 deletion(-) diff --git a/cscs-checks/tools/profiling_and_debugging/nvprof.py b/cscs-checks/tools/profiling_and_debugging/nvprof.py index 00655934ca..7fbb002561 100644 --- a/cscs-checks/tools/profiling_and_debugging/nvprof.py +++ b/cscs-checks/tools/profiling_and_debugging/nvprof.py @@ -6,7 +6,6 @@ @rfm.simple_test class NvprofCheck(rfm.RegressionTest): def __init__(self): - super().__init__() self.descr = 'Checks the nvprof tool' self.valid_systems = ['daint:gpu', 'dom:gpu', 'kesch:cn', 'tiger:gpu', 'arolla:cn', 'tsa:cn'] From ed619d9d17b3e278c37fc69b99eec3f053ea6387 Mon Sep 17 00:00:00 2001 From: lucamar Date: Thu, 13 Feb 2020 08:28:13 +0100 Subject: [PATCH 049/104] Update ddt.py --- cscs-checks/tools/profiling_and_debugging/ddt.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/cscs-checks/tools/profiling_and_debugging/ddt.py b/cscs-checks/tools/profiling_and_debugging/ddt.py index 63c5cb7e81..6cf4ed40fe 100644 --- a/cscs-checks/tools/profiling_and_debugging/ddt.py +++ b/cscs-checks/tools/profiling_and_debugging/ddt.py @@ -68,9 +68,8 @@ def __init__(self, lang, extension): super().__init__(lang, extension) self.valid_systems = ['daint:gpu', 'daint:mc', 'dom:gpu', 'dom:mc', 'kesch:cn', 'tiger:gpu', 'arolla:cn', 'tsa:cn'] - - if self.current_system.name in ['arolla', 'kesch', 'tsa'] \ - and self.lang == 'C': + if (self.current_system.name in ['arolla', 'kesch', 'tsa'] + and self.lang == 'C'): self.build_system.ldflags = ['-lm'] residual_pattern = '_jacobi.%s:%d,residual' From 7a6027cbe7a26f5164e95903d665d37005ee5471 Mon Sep 17 00:00:00 2001 From: lucamar Date: Thu, 13 Feb 2020 08:29:19 +0100 Subject: [PATCH 050/104] Update cuda_checks.py --- cscs-checks/cuda/cuda_checks.py | 1 - 1 file changed, 1 deletion(-) diff --git a/cscs-checks/cuda/cuda_checks.py b/cscs-checks/cuda/cuda_checks.py index 401d3b7128..c6baef9014 100644 --- a/cscs-checks/cuda/cuda_checks.py +++ b/cscs-checks/cuda/cuda_checks.py @@ -5,7 +5,6 @@ class CudaCheck(rfm.RegressionTest): def __init__(self): - super().__init__() self.valid_systems = ['daint:gpu', 'dom:gpu', 'kesch:cn', 'tiger:gpu', 'arolla:cn', 'tsa:cn'] self.valid_prog_environs = ['PrgEnv-cray', 'PrgEnv-gnu'] From 42a48d6e7972de4c96ccd6a97d996084fd409f4c Mon Sep 17 00:00:00 2001 From: lucamar Date: Thu, 13 Feb 2020 08:29:52 +0100 Subject: [PATCH 051/104] Update cuda_checks.py --- cscs-checks/cuda/cuda_checks.py | 1 - 1 file changed, 1 deletion(-) diff --git a/cscs-checks/cuda/cuda_checks.py b/cscs-checks/cuda/cuda_checks.py index c6baef9014..365b942bb8 100644 --- a/cscs-checks/cuda/cuda_checks.py +++ b/cscs-checks/cuda/cuda_checks.py @@ -8,7 +8,6 @@ def __init__(self): self.valid_systems = ['daint:gpu', 'dom:gpu', 'kesch:cn', 'tiger:gpu', 'arolla:cn', 'tsa:cn'] self.valid_prog_environs = ['PrgEnv-cray', 'PrgEnv-gnu'] - if self.current_system.name == 'kesch': self.valid_prog_environs += ['PrgEnv-cray-nompi', 'PrgEnv-gnu-nompi'] From f14f49c0a92dcfcd88dbc0e6b244192485bbf8ee Mon Sep 17 00:00:00 2001 From: lucamar Date: Thu, 13 Feb 2020 08:30:35 +0100 Subject: [PATCH 052/104] Update multi_gpu.py --- cscs-checks/cuda/multi_gpu.py | 1 - 1 file changed, 1 deletion(-) diff --git a/cscs-checks/cuda/multi_gpu.py b/cscs-checks/cuda/multi_gpu.py index 4fe2404976..d30812c883 100644 --- a/cscs-checks/cuda/multi_gpu.py +++ b/cscs-checks/cuda/multi_gpu.py @@ -8,7 +8,6 @@ @rfm.simple_test class GpuBandwidthCheck(rfm.RegressionTest): def __init__(self): - super().__init__() self.valid_systems = ['kesch:cn', 'daint:gpu', 'dom:gpu', 'tiger:gpu', 'arolla:cn', 'tsa:cn'] self.valid_prog_environs = ['PrgEnv-gnu'] From 07b9f7ae01d14a07e790b19a8c64037f439dc3ec Mon Sep 17 00:00:00 2001 From: lucamar Date: Thu, 13 Feb 2020 08:31:21 +0100 Subject: [PATCH 053/104] Update netcdf_compile_run.py --- cscs-checks/libraries/io/netcdf_compile_run.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/cscs-checks/libraries/io/netcdf_compile_run.py b/cscs-checks/libraries/io/netcdf_compile_run.py index 203de28d6d..bb82d93d91 100644 --- a/cscs-checks/libraries/io/netcdf_compile_run.py +++ b/cscs-checks/libraries/io/netcdf_compile_run.py @@ -86,9 +86,7 @@ def setflags(self): '-I$EBROOTNETCDFMINFORTRAN/include' ] elif self.current_system.name in ['arolla', 'tsa']: - self.modules = ['netcdf', - 'netcdf-c++', - 'netcdf-fortran'] + self.modules = ['netcdf', 'netcdf-c++', 'netcdf-fortran'] self.build_system.cppflags = [ '-I$EBROOTNETCDF/include', '-I$EBROOTNETCDFMINCPLUSPLUS/include', From eae471a8b61c0f9a35fd32921085bb615af7701e Mon Sep 17 00:00:00 2001 From: lucamar Date: Thu, 13 Feb 2020 08:31:43 +0100 Subject: [PATCH 054/104] Update automatic_arrays_acc.py --- cscs-checks/mch/automatic_arrays_acc.py | 1 - 1 file changed, 1 deletion(-) diff --git a/cscs-checks/mch/automatic_arrays_acc.py b/cscs-checks/mch/automatic_arrays_acc.py index 391cbc0c33..04fba21824 100644 --- a/cscs-checks/mch/automatic_arrays_acc.py +++ b/cscs-checks/mch/automatic_arrays_acc.py @@ -5,7 +5,6 @@ @rfm.simple_test class AutomaticArraysCheck(rfm.RegressionTest): def __init__(self): - super().__init__() self.valid_systems = ['daint:gpu', 'dom:gpu', 'kesch:cn', 'tiger:gpu', 'arolla:cn', 'tsa:cn'] self.valid_prog_environs = ['PrgEnv-cray', 'PrgEnv-cce', 'PrgEnv-pgi'] From aee8196f55f2f6b3ee559e16d456e59a37ee53ee Mon Sep 17 00:00:00 2001 From: lucamar Date: Thu, 13 Feb 2020 08:32:09 +0100 Subject: [PATCH 055/104] Update cuda_checks.py --- cscs-checks/cuda/cuda_checks.py | 1 + 1 file changed, 1 insertion(+) diff --git a/cscs-checks/cuda/cuda_checks.py b/cscs-checks/cuda/cuda_checks.py index 365b942bb8..a8e8098fbb 100644 --- a/cscs-checks/cuda/cuda_checks.py +++ b/cscs-checks/cuda/cuda_checks.py @@ -13,6 +13,7 @@ def __init__(self): 'PrgEnv-gnu-nompi'] elif self.current_system.name in ['arolla', 'tsa']: self.valid_prog_environs += ['PrgEnv-gnu-nompi'] + self.sourcesdir = os.path.join(self.current_system.resourcesdir, 'CUDA', 'essentials') From b20f64c637fb090477ad75ac1ee9dcb866cb9fe4 Mon Sep 17 00:00:00 2001 From: lucamar Date: Thu, 13 Feb 2020 08:32:35 +0100 Subject: [PATCH 056/104] Update cuda_checks.py --- cscs-checks/cuda/cuda_checks.py | 1 - 1 file changed, 1 deletion(-) diff --git a/cscs-checks/cuda/cuda_checks.py b/cscs-checks/cuda/cuda_checks.py index a8e8098fbb..10993a4c25 100644 --- a/cscs-checks/cuda/cuda_checks.py +++ b/cscs-checks/cuda/cuda_checks.py @@ -23,7 +23,6 @@ def __init__(self): self.modules = ['cuda/10.1.243'] else: self.modules = ['craype-accel-nvidia60'] - self.num_gpus_per_node = 1 self.nvidia_sm = '60' From 10e66a29017999e56d8e02d34f57fb9aa5912e41 Mon Sep 17 00:00:00 2001 From: lucamar Date: Thu, 13 Feb 2020 08:35:17 +0100 Subject: [PATCH 057/104] Update collectives_halo.py --- cscs-checks/mch/collectives_halo.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/cscs-checks/mch/collectives_halo.py b/cscs-checks/mch/collectives_halo.py index 909551c318..3278feab40 100644 --- a/cscs-checks/mch/collectives_halo.py +++ b/cscs-checks/mch/collectives_halo.py @@ -116,9 +116,6 @@ def __init__(self, variant, bench_reference): 'tsa:cn': { 'elapsed_time': (ref, None, 0.15) }, - '*': { - 'elapsed_time': (ref, None, None) - } } self.maintainers = ['AJ', 'MKr'] From a301013963fc0724beaa0e64490fc3e89dcb3a55 Mon Sep 17 00:00:00 2001 From: lucamar Date: Thu, 13 Feb 2020 08:36:00 +0100 Subject: [PATCH 058/104] Update gpu_direct_acc.py --- cscs-checks/mch/gpu_direct_acc.py | 1 - 1 file changed, 1 deletion(-) diff --git a/cscs-checks/mch/gpu_direct_acc.py b/cscs-checks/mch/gpu_direct_acc.py index 8857160598..1b16c4e369 100644 --- a/cscs-checks/mch/gpu_direct_acc.py +++ b/cscs-checks/mch/gpu_direct_acc.py @@ -7,7 +7,6 @@ @rfm.simple_test class GpuDirectAccCheck(rfm.RegressionTest): def __init__(self): - super().__init__() self.descr = 'tests gpu-direct for Fortran OpenACC' self.valid_systems = ['daint:gpu', 'dom:gpu', 'kesch:cn', 'tiger:gpu', 'arolla:cn', 'tsa:cn'] From 3d278c28c1c65b3303721e4dac24bc35d70f8ad0 Mon Sep 17 00:00:00 2001 From: lucamar Date: Thu, 13 Feb 2020 08:36:49 +0100 Subject: [PATCH 059/104] Update cuda_gdb.py --- cscs-checks/tools/profiling_and_debugging/cuda_gdb.py | 1 - 1 file changed, 1 deletion(-) diff --git a/cscs-checks/tools/profiling_and_debugging/cuda_gdb.py b/cscs-checks/tools/profiling_and_debugging/cuda_gdb.py index 7bca40efcc..a26aacd99f 100644 --- a/cscs-checks/tools/profiling_and_debugging/cuda_gdb.py +++ b/cscs-checks/tools/profiling_and_debugging/cuda_gdb.py @@ -8,7 +8,6 @@ @rfm.simple_test class CudaGdbCheck(rfm.RegressionTest): def __init__(self): - super().__init__() self.valid_prog_environs = ['PrgEnv-gnu'] self.valid_systems = ['daint:gpu', 'dom:gpu', 'kesch:cn', 'tiger:gpu', 'arolla:cn', 'tsa:cn'] From 7d6fb6129f1e4d374de0e5b4941962d84681f21b Mon Sep 17 00:00:00 2001 From: lucamar Date: Thu, 13 Feb 2020 08:37:45 +0100 Subject: [PATCH 060/104] Update cuda_stress_test.py --- cscs-checks/mch/cuda_stress_test.py | 1 - 1 file changed, 1 deletion(-) diff --git a/cscs-checks/mch/cuda_stress_test.py b/cscs-checks/mch/cuda_stress_test.py index ccb3519959..2a24a03d6f 100644 --- a/cscs-checks/mch/cuda_stress_test.py +++ b/cscs-checks/mch/cuda_stress_test.py @@ -5,7 +5,6 @@ @rfm.simple_test class CudaStressTest(rfm.RegressionTest): def __init__(self): - super().__init__() self.descr = 'MCH CUDA stress test' self.valid_systems = ['daint:gpu', 'dom:gpu', 'kesch:cn', 'tiger:gpu', 'arolla:cn', 'tsa:cn'] From e769f9759e1659022f11e150369c53b992052cf8 Mon Sep 17 00:00:00 2001 From: lucamar Date: Thu, 13 Feb 2020 08:38:04 +0100 Subject: [PATCH 061/104] Update openacc_cuda_mpi_cppstd.py --- cscs-checks/mch/openacc_cuda_mpi_cppstd.py | 1 - 1 file changed, 1 deletion(-) diff --git a/cscs-checks/mch/openacc_cuda_mpi_cppstd.py b/cscs-checks/mch/openacc_cuda_mpi_cppstd.py index 64c90cff65..afb2b2b208 100644 --- a/cscs-checks/mch/openacc_cuda_mpi_cppstd.py +++ b/cscs-checks/mch/openacc_cuda_mpi_cppstd.py @@ -5,7 +5,6 @@ @rfm.simple_test class OpenaccCudaCpp(rfm.RegressionTest): def __init__(self): - super().__init__() self.descr = 'test for OpenACC, CUDA, MPI, and C++' self.valid_systems = ['daint:gpu', 'dom:gpu', 'tiger:gpu', 'arolla:cn', 'kesch:cn', 'tsa:cn'] From 146d08bc5a7a1eda0265eec58a7b55d4221b58d7 Mon Sep 17 00:00:00 2001 From: Luca Date: Thu, 13 Feb 2020 08:46:26 +0100 Subject: [PATCH 062/104] Removing leone and monch from cscs.py --- config/cscs.py | 46 ---------------------------------------------- 1 file changed, 46 deletions(-) diff --git a/config/cscs.py b/config/cscs.py index 1fa82b5b2f..c4b2b60d7f 100644 --- a/config/cscs.py +++ b/config/cscs.py @@ -363,52 +363,6 @@ class ReframeSettings: } }, - 'leone': { - 'descr': 'Leone', - 'hostnames': ['leone'], - 'modules_system': 'tmod', - 'resourcesdir': '/apps/common/UES/reframe/resources', - 'partitions': { - 'login': { - 'scheduler': 'local', - 'environs': ['PrgEnv-gnu'], - 'descr': 'Leone login nodes', - 'max_jobs': 1 - }, - - 'normal': { - 'scheduler': 'nativeslurm', - 'environs': ['PrgEnv-gnu'], - 'descr': ('Leone compute nodes - ' - 'default partition'), - 'max_jobs': 10 - }, - } - }, - - 'monch': { - 'descr': 'Monch PASC', - 'hostnames': ['monch'], - 'modules_system': 'tmod', - 'resourcesdir': '/apps/common/UES/reframe/resources', - 'partitions': { - 'login': { - 'scheduler': 'local', - 'environs': ['PrgEnv-gnu'], - 'descr': 'Monch login nodes', - 'max_jobs': 1 - }, - - 'compute': { - 'scheduler': 'slurm+mpirun', - 'access': ['--partition=compute'], - 'environs': ['PrgEnv-gnu'], - 'descr': 'Monch compute nodes', - 'max_jobs': 10 - } - } - }, - 'generic': { 'descr': 'Generic example system', 'partitions': { From e39d741ebb7567730c5b4beeb0937e25991b6963 Mon Sep 17 00:00:00 2001 From: Luca Date: Thu, 13 Feb 2020 08:52:43 +0100 Subject: [PATCH 063/104] Removing environments for Kesch and Tsa from cscs-ci.py --- config/cscs-ci.py | 76 ----------------------------------------------- 1 file changed, 76 deletions(-) diff --git a/config/cscs-ci.py b/config/cscs-ci.py index 1165df2e0e..239a1237d2 100644 --- a/config/cscs-ci.py +++ b/config/cscs-ci.py @@ -98,82 +98,6 @@ class ReframeSettings: }, 'environments': { - - 'kesch': { - 'PrgEnv-pgi-nompi': { - 'modules': ['PE/17.06', - 'PrgEnv-pgi/18.5'], - 'cc': 'pgcc', - 'cxx': 'pgc++', - 'ftn': 'pgf90', - }, - 'PrgEnv-pgi': { - 'modules': [ - 'PE/17.06', 'pgi/18.5-gcc-5.4.0-2.26', - 'openmpi/4.0.1-pgi-18.5-gcc-5.4.0-2.26-cuda-8.0' - ], - 'cc': 'mpicc', - 'cxx': 'mpicxx', - 'ftn': 'mpifort', - }, - 'PrgEnv-cray': { - 'modules': ['PE/17.06', - 'PrgEnv-CrayCCE/17.06'], - }, - 'PrgEnv-cray-nompi': { - 'modules': ['PE/17.06', - 'PrgEnv-cray'], - }, - 'PrgEnv-gnu': { - 'modules': ['PE/17.06', - 'gmvapich2/17.02_cuda_8.0_gdr'], - 'variables': { - 'LD_PRELOAD': '$(pkg-config --variable=libdir mvapich2-gdr)/libmpi.so' - }, - 'cc': 'mpicc', - 'cxx': 'mpicxx', - 'ftn': 'mpif90', - }, - 'PrgEnv-gnu-nompi': { - 'modules': ['PE/17.06', - 'PrgEnv-gnu'], - 'cc': 'gcc', - 'cxx': 'g++', - 'ftn': 'gfortran', - }, - }, - - 'tsa': { - 'PrgEnv-pgi-nompi': { - 'type': 'ProgEnvironment', - 'modules': ['PrgEnv-pgi/19.9'], - 'cc': 'pgcc', - 'cxx': 'pgc++', - 'ftn': 'pgf90', - }, - 'PrgEnv-pgi': { - 'type': 'ProgEnvironment', - 'modules': ['PrgEnv-pgi/19.9'], - 'cc': 'mpicc', - 'cxx': 'mpicxx', - 'ftn': 'mpifort', - }, - 'PrgEnv-gnu': { - 'type': 'ProgEnvironment', - 'modules': ['PrgEnv-gnu/19.2'], - 'cc': 'mpicc', - 'cxx': 'mpicxx', - 'ftn': 'mpifort', - }, - 'PrgEnv-gnu-nompi': { - 'type': 'ProgEnvironment', - 'modules': ['PrgEnv-gnu/19.2'], - 'cc': 'gcc', - 'cxx': 'g++', - 'ftn': 'gfortran', - }, - }, - '*': { 'PrgEnv-cray': { 'modules': ['PrgEnv-cray'], From ad1c7aacfd5300c5158a3ba75e09925bfe47d38f Mon Sep 17 00:00:00 2001 From: Luca Date: Thu, 13 Feb 2020 08:55:36 +0100 Subject: [PATCH 064/104] Format adjusted in cuda_checks.py --- cscs-checks/cuda/cuda_checks.py | 1 - 1 file changed, 1 deletion(-) diff --git a/cscs-checks/cuda/cuda_checks.py b/cscs-checks/cuda/cuda_checks.py index 10993a4c25..0ce2d7f848 100644 --- a/cscs-checks/cuda/cuda_checks.py +++ b/cscs-checks/cuda/cuda_checks.py @@ -25,7 +25,6 @@ def __init__(self): self.modules = ['craype-accel-nvidia60'] self.num_gpus_per_node = 1 self.nvidia_sm = '60' - if self.current_system.name == 'kesch': self.exclusive_access = True self.nvidia_sm = '37' From 79cd40f1de85e4ea8d17a1a8c98f402f62f100ff Mon Sep 17 00:00:00 2001 From: Luca Date: Thu, 13 Feb 2020 09:00:43 +0100 Subject: [PATCH 065/104] Format adjusted in gpu_direct_cuda.py --- cscs-checks/mch/gpu_direct_cuda.py | 1 - 1 file changed, 1 deletion(-) diff --git a/cscs-checks/mch/gpu_direct_cuda.py b/cscs-checks/mch/gpu_direct_cuda.py index 15a32de0ae..302b8bd812 100644 --- a/cscs-checks/mch/gpu_direct_cuda.py +++ b/cscs-checks/mch/gpu_direct_cuda.py @@ -42,4 +42,3 @@ def __init__(self): self.sanity_patterns = sn.assert_reference(result, 1., -1e-5, 1e-5) self.maintainers = ['AJ', 'MKr'] self.tags = {'production', 'mch', 'craype'} - From 4cc1e23b2e3395fecf377760e1659a89a5d4b55c Mon Sep 17 00:00:00 2001 From: Luca Date: Thu, 13 Feb 2020 09:01:41 +0100 Subject: [PATCH 066/104] Format adjusted in openacc_cuda_mpi_cppstd.py --- cscs-checks/mch/openacc_cuda_mpi_cppstd.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cscs-checks/mch/openacc_cuda_mpi_cppstd.py b/cscs-checks/mch/openacc_cuda_mpi_cppstd.py index afb2b2b208..fcc88ec05d 100644 --- a/cscs-checks/mch/openacc_cuda_mpi_cppstd.py +++ b/cscs-checks/mch/openacc_cuda_mpi_cppstd.py @@ -82,6 +82,7 @@ def setup(self, partition, environ, **job_opts): ] elif self.current_system.name in ['arolla', 'tsa']: self.build_system.ldflags += [ - '-L$EBROOTCUDA/lib64', '-lcublas', '-lcudart'] + '-L$EBROOTCUDA/lib64', '-lcublas', '-lcudart' + ] super().setup(partition, environ, **job_opts) From 6de6be0c2949bbd0bc48585e83d3f7caec3650a6 Mon Sep 17 00:00:00 2001 From: Luca Date: Thu, 13 Feb 2020 09:14:57 +0100 Subject: [PATCH 067/104] Content adjusted in alloc_speed.py --- cscs-checks/microbenchmarks/alloc_speed/alloc_speed.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/cscs-checks/microbenchmarks/alloc_speed/alloc_speed.py b/cscs-checks/microbenchmarks/alloc_speed/alloc_speed.py index f4f0ea8c60..cbbb2b5ef0 100644 --- a/cscs-checks/microbenchmarks/alloc_speed/alloc_speed.py +++ b/cscs-checks/microbenchmarks/alloc_speed/alloc_speed.py @@ -66,9 +66,6 @@ def __init__(self, hugepages): 'tsa:pn': { 'time': (0.70, None, 0.10, 's') }, - '*': { - 'time': (0, None, None, 's') - } }, '2M': { 'dom:gpu': { From ccf29224eb3a1ad43fa28aae0aa390e07f4ce73d Mon Sep 17 00:00:00 2001 From: Luca Date: Thu, 13 Feb 2020 09:15:36 +0100 Subject: [PATCH 068/104] Startin reference numbers for Arolla and Tsa in gpu_burn_test.py --- cscs-checks/microbenchmarks/gpu_burn/gpu_burn_test.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/cscs-checks/microbenchmarks/gpu_burn/gpu_burn_test.py b/cscs-checks/microbenchmarks/gpu_burn/gpu_burn_test.py index d589828edf..7dd9f80f18 100644 --- a/cscs-checks/microbenchmarks/gpu_burn/gpu_burn_test.py +++ b/cscs-checks/microbenchmarks/gpu_burn/gpu_burn_test.py @@ -7,7 +7,6 @@ @rfm.simple_test class GpuBurnTest(rfm.RegressionTest): def __init__(self): - super().__init__() self.valid_systems = ['daint:gpu', 'dom:gpu', 'kesch:cn', 'tiger:gpu' 'arolla:cn', 'tsa:cn'] @@ -56,6 +55,10 @@ def __init__(self): } self.reference = { + 'arolla:cn': { + 'perf': (950, -0.10, None, 'Gflop/s'), + 'max_temp': (0, None, None, 'Celsius') + }, 'dom:gpu': { 'perf': (4115, -0.10, None, 'Gflop/s'), 'max_temp': (0, None, None, 'Celsius') @@ -68,6 +71,10 @@ def __init__(self): 'perf': (950, -0.10, None, 'Gflop/s'), 'max_temp': (0, None, None, 'Celsius') }, + 'tsa:cn': { + 'perf': (950, -0.10, None, 'Gflop/s'), + 'max_temp': (0, None, None, 'Celsius') + }, '*': { 'perf': (0, None, None, 'Gflop/s'), 'max_temp': (0, None, None, 'Celsius') From b41e764dc390d8fee972481d30c2bab240eb2bb3 Mon Sep 17 00:00:00 2001 From: Luca Date: Thu, 13 Feb 2020 09:17:29 +0100 Subject: [PATCH 069/104] Removed * reference numbers from kernel_latency.py --- .../microbenchmarks/kernel_latency/kernel_latency.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/cscs-checks/microbenchmarks/kernel_latency/kernel_latency.py b/cscs-checks/microbenchmarks/kernel_latency/kernel_latency.py index 0a319f29bb..05858c73d4 100644 --- a/cscs-checks/microbenchmarks/kernel_latency/kernel_latency.py +++ b/cscs-checks/microbenchmarks/kernel_latency/kernel_latency.py @@ -80,9 +80,6 @@ def __init__(self, kernel_version): 'tsa:cn': { 'latency': (12.0, None, 0.10, 'us') }, - '*': { - 'latency': (0.0, None, None, 'us') - } }, 'async': { 'arolla:cn': { @@ -100,9 +97,6 @@ def __init__(self, kernel_version): 'tsa:cn': { 'latency': (5.7, None, 0.10, 'us') }, - '*': { - 'latency': (0.0, None, None, 'us') - } }, } From 59cb741fbb071ec4b3a835ccc83d567806f5d772 Mon Sep 17 00:00:00 2001 From: Luca Date: Thu, 13 Feb 2020 09:18:27 +0100 Subject: [PATCH 070/104] Removed * reference numbers from halo_cell_exchange.py --- cscs-checks/microbenchmarks/mpi/halo_cell_exchange.py | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/cscs-checks/microbenchmarks/mpi/halo_cell_exchange.py b/cscs-checks/microbenchmarks/mpi/halo_cell_exchange.py index 9747ac2fac..66d2c7aa12 100644 --- a/cscs-checks/microbenchmarks/mpi/halo_cell_exchange.py +++ b/cscs-checks/microbenchmarks/mpi/halo_cell_exchange.py @@ -118,17 +118,6 @@ def __init__(self): 'time_6_10000': (1.448979e-05, None, 0.50, 's'), 'time_6_1000000': (8.432294e-04, None, 0.50, 's') }, - '*': { - 'time_2_10': (0, None, None, 's'), - 'time_2_10000': (0, None, None, 's'), - 'time_2_1000000': (0, None, None, 's'), - 'time_4_10': (0, None, None, 's'), - 'time_4_10000': (0, None, None, 's'), - 'time_4_1000000': (0, None, None, 's'), - 'time_6_10': (0, None, None, 's'), - 'time_6_10000': (0, None, None, 's'), - 'time_6_1000000': (0, None, None, 's') - } } self.maintainers = ['AJ'] From efc07a0b7e8e1ee077ff3f0de171a0ed994202f7 Mon Sep 17 00:00:00 2001 From: Luca Date: Thu, 13 Feb 2020 09:23:41 +0100 Subject: [PATCH 071/104] Removed * reference numbers from ose_tests.py --- cscs-checks/microbenchmarks/osu/osu_tests.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/cscs-checks/microbenchmarks/osu/osu_tests.py b/cscs-checks/microbenchmarks/osu/osu_tests.py index 5b804923b0..0e3b921041 100644 --- a/cscs-checks/microbenchmarks/osu/osu_tests.py +++ b/cscs-checks/microbenchmarks/osu/osu_tests.py @@ -54,7 +54,6 @@ def __init__(self, variant): @rfm.simple_test class FlexAlltoallTest(rfm.RegressionTest): def __init__(self): - super().__init__() self.valid_systems = ['daint:gpu', 'daint:mc', 'dom:gpu', 'dom:mc', 'tiger:gpu', 'kesch:cn', 'kesch:pn', @@ -211,9 +210,6 @@ def __init__(self): 'tsa:cn': { 'bw': (6311.48, -0.15, None, 'MB/s') }, - '*': { - 'bw': (0, None, None, 'MB/s') - } } self.perf_patterns = { 'bw': sn.extractsingle(r'^4194304\s+(?P\S+)', From 2a4a717a5334bf8b037712591434b5330a56d54a Mon Sep 17 00:00:00 2001 From: Luca Date: Thu, 13 Feb 2020 09:25:07 +0100 Subject: [PATCH 072/104] Removed * reference numbers from stream.py --- cscs-checks/microbenchmarks/stream/stream.py | 1 - 1 file changed, 1 deletion(-) diff --git a/cscs-checks/microbenchmarks/stream/stream.py b/cscs-checks/microbenchmarks/stream/stream.py index ea23f2930c..ca448603e0 100644 --- a/cscs-checks/microbenchmarks/stream/stream.py +++ b/cscs-checks/microbenchmarks/stream/stream.py @@ -97,7 +97,6 @@ def __init__(self): # 'leone:normal': {'triad': (44767.0, -0.05, None, 'MB/s')}, 'tsa:cn': {'triad': (47000, -0.05, None, 'MB/s')}, 'tsa:pn': {'triad': (84400, -0.05, None, 'MB/s')}, - '*': {'triad': (0.0, None, None, 'MB/s')}, }, 'PrgEnv-intel': { 'daint:gpu': {'triad': (59500, -0.05, None, 'MB/s')}, From 09d18e8ad6018c36027ede6d838a886ef5205d91 Mon Sep 17 00:00:00 2001 From: Luca Date: Thu, 13 Feb 2020 09:26:08 +0100 Subject: [PATCH 073/104] Removed super() from mpi.py --- cscs-checks/prgenv/mpi.py | 1 - 1 file changed, 1 deletion(-) diff --git a/cscs-checks/prgenv/mpi.py b/cscs-checks/prgenv/mpi.py index fdb97bc7e1..4ddb23b47c 100644 --- a/cscs-checks/prgenv/mpi.py +++ b/cscs-checks/prgenv/mpi.py @@ -29,7 +29,6 @@ class MpiInitTest(rfm.RegressionTest): ''' def __init__(self, required_thread): - super().__init__() self.valid_systems = ['daint:gpu', 'daint:mc', 'dom:gpu', 'dom:mc', 'tiger:gpu'] self.valid_prog_environs = ['PrgEnv-cray', 'PrgEnv-gnu', From 8a75a9569e4cd6df27b9b5d89bce0bf32159296f Mon Sep 17 00:00:00 2001 From: Luca Date: Thu, 13 Feb 2020 09:30:38 +0100 Subject: [PATCH 074/104] Removed **kwargs from helloworld.py --- cscs-checks/prgenv/helloworld.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/cscs-checks/prgenv/helloworld.py b/cscs-checks/prgenv/helloworld.py index 0ae195df31..ea15475e3c 100644 --- a/cscs-checks/prgenv/helloworld.py +++ b/cscs-checks/prgenv/helloworld.py @@ -28,8 +28,7 @@ def __init__(self, variant, lang, linkage): self.exclusive_access = True # Removing static compilation from kesch - if (self.current_system.name in ['kesch'] and - linkage == 'static'): + if (self.current_system.name in ['kesch'] and linkage == 'static'): self.valid_prog_environs = [] self.compilation_time_seconds = None @@ -117,8 +116,8 @@ def cray_linker_workaround(self): for lang in ['cpp', 'c', 'f90'] for linkage in ['dynamic', 'static'])) class HelloWorldTestSerial(HelloWorldBaseTest): - def __init__(self, lang, linkage, **kwargs): - super().__init__('serial', lang, linkage, **kwargs) + def __init__(self, lang, linkage): + super().__init__('serial', lang, linkage) self.valid_systems += ['kesch:pn', 'arolla:pn', 'tsa:pn'] self.sourcepath += '_serial.' + lang self.descr += ' Serial ' + linkage.capitalize() From e51caf03db46e2d57147522a51b14b5a894d4351 Mon Sep 17 00:00:00 2001 From: Luca Date: Thu, 13 Feb 2020 09:33:10 +0100 Subject: [PATCH 075/104] Adjusted format in cdo.py --- cscs-checks/tools/io/cdo.py | 1 + 1 file changed, 1 insertion(+) diff --git a/cscs-checks/tools/io/cdo.py b/cscs-checks/tools/io/cdo.py index e5bff11505..9e46bc177b 100644 --- a/cscs-checks/tools/io/cdo.py +++ b/cscs-checks/tools/io/cdo.py @@ -91,6 +91,7 @@ def setup(self, partition, environ, **job_opts): nco_name = 'nco' else: nco_name = 'NCO' + self.pre_run = ['module load %s' % nco_name] super().setup(partition, environ, **job_opts) From 4adef54db4cefc2d9f0c7da12464ebe4638fad73 Mon Sep 17 00:00:00 2001 From: Luca Date: Thu, 13 Feb 2020 09:33:42 +0100 Subject: [PATCH 076/104] Adjusted format in nco.py --- cscs-checks/tools/io/nco.py | 1 + 1 file changed, 1 insertion(+) diff --git a/cscs-checks/tools/io/nco.py b/cscs-checks/tools/io/nco.py index 283fffe335..a1c26a9921 100644 --- a/cscs-checks/tools/io/nco.py +++ b/cscs-checks/tools/io/nco.py @@ -88,6 +88,7 @@ def setup(self, partition, environ, **job_opts): cdo_name = 'cdo' else: cdo_name = 'CDO' + self.pre_run = ['module load %s' % cdo_name] super().setup(partition, environ, **job_opts) From 12d6f9a02f017efdda44c24e32ceca0485406444 Mon Sep 17 00:00:00 2001 From: Luca Date: Thu, 13 Feb 2020 10:05:40 +0100 Subject: [PATCH 077/104] Removing reference values for arolla and tsa from cscs-checks/cuda/multi_gpu.py --- cscs-checks/cuda/multi_gpu.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/cscs-checks/cuda/multi_gpu.py b/cscs-checks/cuda/multi_gpu.py index d30812c883..df1326e1b3 100644 --- a/cscs-checks/cuda/multi_gpu.py +++ b/cscs-checks/cuda/multi_gpu.py @@ -55,9 +55,6 @@ def __init__(self): self.perf_patterns = {} self.reference = {} self.__bwref = { - 'arolla:cn:h2d': (7583, -0.1, None, 'MB/s'), - 'arolla:cn:d2h': (7584, -0.1, None, 'MB/s'), - 'arolla:cn:d2d': (137408, -0.1, None, 'MB/s'), 'daint:gpu:h2d': (11881, -0.1, None, 'MB/s'), 'daint:gpu:d2h': (12571, -0.1, None, 'MB/s'), 'daint:gpu:d2d': (499000, -0.1, None, 'MB/s'), @@ -70,9 +67,6 @@ def __init__(self): 'tiger:gpu:h2d': (0, None, None, 'MB/s'), 'tiger:gpu:d2h': (0, None, None, 'MB/s'), 'tiger:gpu:d2d': (0, None, None, 'MB/s'), - 'tsa:cn:h2d': (7583, -0.1, None, 'MB/s'), - 'tsa:cn:d2h': (7584, -0.1, None, 'MB/s'), - 'tsa:cn:d2d': (137408, -0.1, None, 'MB/s'), } self.tags = {'diagnostic', 'benchmark', 'mch', 'craype', 'external-resources'} From 31e1f65ec0ca9d239b18cb86935736cdab6cad8a Mon Sep 17 00:00:00 2001 From: Luca Date: Thu, 13 Feb 2020 10:14:49 +0100 Subject: [PATCH 078/104] Null reference values for Arolla and Tsa in cscs-checks/cuda/multi_gpu.py --- cscs-checks/cuda/multi_gpu.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/cscs-checks/cuda/multi_gpu.py b/cscs-checks/cuda/multi_gpu.py index df1326e1b3..cf0d0a260b 100644 --- a/cscs-checks/cuda/multi_gpu.py +++ b/cscs-checks/cuda/multi_gpu.py @@ -55,6 +55,10 @@ def __init__(self): self.perf_patterns = {} self.reference = {} self.__bwref = { +# FIXME: numbers for Arolla and Tsa need to be updated (sanity check fail if they are not listed) + 'arolla:gpu:h2d': (0, None, None, 'MB/s'), + 'arolla:gpu:d2h': (0, None, None, 'MB/s'), + 'arolla:gpu:d2d': (0, None, None, 'MB/s'), 'daint:gpu:h2d': (11881, -0.1, None, 'MB/s'), 'daint:gpu:d2h': (12571, -0.1, None, 'MB/s'), 'daint:gpu:d2d': (499000, -0.1, None, 'MB/s'), @@ -67,6 +71,9 @@ def __init__(self): 'tiger:gpu:h2d': (0, None, None, 'MB/s'), 'tiger:gpu:d2h': (0, None, None, 'MB/s'), 'tiger:gpu:d2d': (0, None, None, 'MB/s'), + 'tsa:gpu:h2d': (0, None, None, 'MB/s'), + 'tsa:gpu:d2h': (0, None, None, 'MB/s'), + 'tsa:gpu:d2d': (0, None, None, 'MB/s'), } self.tags = {'diagnostic', 'benchmark', 'mch', 'craype', 'external-resources'} From 5ecdc455bca2a0c9466861c7eace8aa7107f274c Mon Sep 17 00:00:00 2001 From: Luca Date: Thu, 13 Feb 2020 10:26:29 +0100 Subject: [PATCH 079/104] Dummy reference values to avoid sanity failure for Arolla and Tsa in cscs-checks/cuda/multi_gpu.py --- cscs-checks/cuda/multi_gpu.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/cscs-checks/cuda/multi_gpu.py b/cscs-checks/cuda/multi_gpu.py index cf0d0a260b..4374bd8dc6 100644 --- a/cscs-checks/cuda/multi_gpu.py +++ b/cscs-checks/cuda/multi_gpu.py @@ -55,10 +55,10 @@ def __init__(self): self.perf_patterns = {} self.reference = {} self.__bwref = { -# FIXME: numbers for Arolla and Tsa need to be updated (sanity check fail if they are not listed) - 'arolla:gpu:h2d': (0, None, None, 'MB/s'), - 'arolla:gpu:d2h': (0, None, None, 'MB/s'), - 'arolla:gpu:d2d': (0, None, None, 'MB/s'), +# FIXME: reference values for Arolla and Tsa need to be updated (sanity check fails if they are not defined) + 'arolla:cn:h2d': (7583, -0.1, None, 'MB/s'), + 'arolla:cn:d2h': (7584, -0.1, None, 'MB/s'), + 'arolla:cn:d2d': (137408, -0.1, None, 'MB/s'), 'daint:gpu:h2d': (11881, -0.1, None, 'MB/s'), 'daint:gpu:d2h': (12571, -0.1, None, 'MB/s'), 'daint:gpu:d2d': (499000, -0.1, None, 'MB/s'), @@ -71,9 +71,9 @@ def __init__(self): 'tiger:gpu:h2d': (0, None, None, 'MB/s'), 'tiger:gpu:d2h': (0, None, None, 'MB/s'), 'tiger:gpu:d2d': (0, None, None, 'MB/s'), - 'tsa:gpu:h2d': (0, None, None, 'MB/s'), - 'tsa:gpu:d2h': (0, None, None, 'MB/s'), - 'tsa:gpu:d2d': (0, None, None, 'MB/s'), + 'tsa:cn:h2d': (7583, -0.1, None, 'MB/s'), + 'tsa:cn:d2h': (7584, -0.1, None, 'MB/s'), + 'tsa:cn:d2d': (137408, -0.1, None, 'MB/s'), } self.tags = {'diagnostic', 'benchmark', 'mch', 'craype', 'external-resources'} From bd68fb23525566189908a52743bf0bda917c8378 Mon Sep 17 00:00:00 2001 From: Luca Date: Thu, 13 Feb 2020 10:27:47 +0100 Subject: [PATCH 080/104] Removed reference values for Arolla and Tsa from cscs-checks/microbenchmarks/dgemm/dgemm.py --- cscs-checks/microbenchmarks/dgemm/dgemm.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/cscs-checks/microbenchmarks/dgemm/dgemm.py b/cscs-checks/microbenchmarks/dgemm/dgemm.py index 7bacde7b4f..e6efd535e3 100644 --- a/cscs-checks/microbenchmarks/dgemm/dgemm.py +++ b/cscs-checks/microbenchmarks/dgemm/dgemm.py @@ -33,16 +33,12 @@ def __init__(self): self.build_system = 'SingleSource' self.build_system.cflags = ['-O3'] self.sys_reference = { - 'arolla:cn': (300.0, -0.15, None, 'Gflop/s'), - 'arolla:pn': (300.0, -0.15, None, 'Gflop/s'), 'daint:gpu': (300.0, -0.15, None, 'Gflop/s'), 'daint:mc': (860.0, -0.15, None, 'Gflop/s'), 'dom:gpu': (300.0, -0.15, None, 'Gflop/s'), 'dom:mc': (860.0, -0.15, None, 'Gflop/s'), 'kesch:cn': (300.0, -0.15, None, 'Gflop/s'), 'kesch:pn': (300.0, -0.15, None, 'Gflop/s'), - 'tsa:cn': (300.0, -0.15, None, 'Gflop/s'), - 'tsa:pn': (300.0, -0.15, None, 'Gflop/s'), } self.maintainers = ['AJ', 'VH'] From 08c111b5dca9e70c23b85fa91206103b638e09ee Mon Sep 17 00:00:00 2001 From: Luca Date: Thu, 13 Feb 2020 15:11:23 +0100 Subject: [PATCH 081/104] Removed additional programming environments from config/cscs-ci.py --- config/cscs-ci.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/config/cscs-ci.py b/config/cscs-ci.py index 239a1237d2..6c9fd2fec7 100644 --- a/config/cscs-ci.py +++ b/config/cscs-ci.py @@ -74,8 +74,7 @@ class ReframeSettings: 'cn': { 'scheduler': 'nativeslurm', 'access': ['--partition=cn-regression'], - 'environs': ['PrgEnv-gnu', 'PrgEnv-gnu-nompi', - 'PrgEnv-pgi', 'PrgEnv-pgi-nompi'], + 'environs': ['PrgEnv-gnu'], 'descr': 'Tsa compute nodes', 'resources': { '_rfm_gpu': ['--gres=gpu:{num_gpus_per_node}'], From 95211458818df126457e1369ab47ed34d71344b4 Mon Sep 17 00:00:00 2001 From: Luca Date: Thu, 13 Feb 2020 15:28:58 +0100 Subject: [PATCH 082/104] Removed reference values for Arolla and Tsa from cscs-checks/mch/collectives_halo.py --- cscs-checks/mch/collectives_halo.py | 16 ---------------- 1 file changed, 16 deletions(-) diff --git a/cscs-checks/mch/collectives_halo.py b/cscs-checks/mch/collectives_halo.py index 3278feab40..395d250a5d 100644 --- a/cscs-checks/mch/collectives_halo.py +++ b/cscs-checks/mch/collectives_halo.py @@ -68,11 +68,6 @@ def __init__(self, variant, bench_reference): self.stdout, 1, float, -1) } ref_values = { - 'arolla': { - 'nocomm': 5.7878, - 'nocomp': 5.62155, - 'default': 5.53777 - }, 'kesch': { 'nocomm': 5.7878, 'nocomp': 5.62155, @@ -83,11 +78,6 @@ def __init__(self, variant, bench_reference): 'nocomp': 0.0137893, 'default': 0.0138493 }, - 'tsa': { - 'nocomm': 5.7878, - 'nocomp': 5.62155, - 'default': 5.53777 - }, } if self.current_system.name == 'dom': @@ -101,9 +91,6 @@ def __init__(self, variant, bench_reference): ref = 0.0 self.reference = { - 'arolla:cn': { - 'elapsed_time': (ref, None, 0.15) - }, 'kesch:cn': { 'elapsed_time': (ref, None, 0.15) }, @@ -113,9 +100,6 @@ def __init__(self, variant, bench_reference): 'dom': { 'elapsed_time': (ref, None, 0.15) }, - 'tsa:cn': { - 'elapsed_time': (ref, None, 0.15) - }, } self.maintainers = ['AJ', 'MKr'] From 0e93ba93b339578eb3348a7b3347d67e67764947 Mon Sep 17 00:00:00 2001 From: Luca Date: Thu, 13 Feb 2020 15:30:11 +0100 Subject: [PATCH 083/104] Removed reference values for Arolla and Tsa from cscs-checks/mch/cuda_stress_test.py --- cscs-checks/mch/cuda_stress_test.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/cscs-checks/mch/cuda_stress_test.py b/cscs-checks/mch/cuda_stress_test.py index 2a24a03d6f..06f1c37051 100644 --- a/cscs-checks/mch/cuda_stress_test.py +++ b/cscs-checks/mch/cuda_stress_test.py @@ -29,9 +29,6 @@ def __init__(self): 'time': sn.extractsingle(r'Timing: (\S+)', self.stdout, 1, float) } self.reference = { - 'arolla:cn': { - 'time': (2.25, None, 0.05) - }, 'daint:gpu': { 'time': (1.41184, None, 0.05) }, @@ -41,9 +38,6 @@ def __init__(self): 'kesch:cn': { 'time': (2.25, None, 0.05) }, - 'tsa:cn': { - 'time': (2.25, None, 0.05) - } } self.tags = {'production', 'mch', 'craype'} self.maintainers = ['MKr', 'AJ'] From e6fd735b21f0f563c7039617e82c23a52692f5ee Mon Sep 17 00:00:00 2001 From: Luca Date: Thu, 13 Feb 2020 15:31:23 +0100 Subject: [PATCH 084/104] Removed reference values for Arolla and Tsa from cscs-checks/microbenchmarks/alloc_speed/alloc_speed.py --- .../microbenchmarks/alloc_speed/alloc_speed.py | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/cscs-checks/microbenchmarks/alloc_speed/alloc_speed.py b/cscs-checks/microbenchmarks/alloc_speed/alloc_speed.py index cbbb2b5ef0..5e695fd159 100644 --- a/cscs-checks/microbenchmarks/alloc_speed/alloc_speed.py +++ b/cscs-checks/microbenchmarks/alloc_speed/alloc_speed.py @@ -36,12 +36,6 @@ def __init__(self, hugepages): } self.sys_reference = { 'no': { - 'arolla:cn': { - 'time': (1.60, None, 0.10, 's') - }, - 'arolla:pn': { - 'time': (0.70, None, 0.10, 's') - }, 'dom:gpu': { 'time': (1.22, None, 0.05, 's') }, @@ -60,12 +54,6 @@ def __init__(self, hugepages): 'kesch:pn': { 'time': (0.70, None, 0.10, 's') }, - 'tsa:cn': { - 'time': (1.60, None, 0.10, 's') - }, - 'tsa:pn': { - 'time': (0.70, None, 0.10, 's') - }, }, '2M': { 'dom:gpu': { From 3dcec07e84441fc7967ed53a08e2ec6ed5d1dab0 Mon Sep 17 00:00:00 2001 From: Luca Date: Thu, 13 Feb 2020 15:32:58 +0100 Subject: [PATCH 085/104] Removed reference values for Arolla and Tsa from cscs-checks/microbenchmarks/gpu_burn/gpu_burn_test.py --- .../microbenchmarks/gpu_burn/gpu_burn_test.py | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/cscs-checks/microbenchmarks/gpu_burn/gpu_burn_test.py b/cscs-checks/microbenchmarks/gpu_burn/gpu_burn_test.py index 7dd9f80f18..fdd565784e 100644 --- a/cscs-checks/microbenchmarks/gpu_burn/gpu_burn_test.py +++ b/cscs-checks/microbenchmarks/gpu_burn/gpu_burn_test.py @@ -55,10 +55,6 @@ def __init__(self): } self.reference = { - 'arolla:cn': { - 'perf': (950, -0.10, None, 'Gflop/s'), - 'max_temp': (0, None, None, 'Celsius') - }, 'dom:gpu': { 'perf': (4115, -0.10, None, 'Gflop/s'), 'max_temp': (0, None, None, 'Celsius') @@ -71,14 +67,6 @@ def __init__(self): 'perf': (950, -0.10, None, 'Gflop/s'), 'max_temp': (0, None, None, 'Celsius') }, - 'tsa:cn': { - 'perf': (950, -0.10, None, 'Gflop/s'), - 'max_temp': (0, None, None, 'Celsius') - }, - '*': { - 'perf': (0, None, None, 'Gflop/s'), - 'max_temp': (0, None, None, 'Celsius') - } } self.num_tasks = 0 From 0aa014a6d49fc4790989a8fbc533f8855e2cd873 Mon Sep 17 00:00:00 2001 From: Luca Date: Thu, 13 Feb 2020 15:36:21 +0100 Subject: [PATCH 086/104] Removed reference values for Arolla and Tsa from cscs-checks/microbenchmarks/stream/stream.py --- cscs-checks/microbenchmarks/stream/stream.py | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/cscs-checks/microbenchmarks/stream/stream.py b/cscs-checks/microbenchmarks/stream/stream.py index ca448603e0..e452b63058 100644 --- a/cscs-checks/microbenchmarks/stream/stream.py +++ b/cscs-checks/microbenchmarks/stream/stream.py @@ -45,8 +45,8 @@ def __init__(self): self.num_tasks = 1 self.num_tasks_per_node = 1 self.stream_cpus_per_task = { - 'arolla:cn': 12, - 'arolla:pn': 12, + 'arolla:cn': 16, + 'arolla:pn': 16, 'daint:gpu': 12, 'daint:mc': 36, 'dom:gpu': 12, @@ -55,8 +55,8 @@ def __init__(self): 'kesch:pn': 24, 'leone:normal': 16, 'monch:compute': 20, - 'tsa:cn': 12, - 'tsa:pn': 12, + 'tsa:cn': 16, + 'tsa:pn': 16, } self.variables = { 'OMP_PLACES': 'threads', @@ -86,8 +86,6 @@ def __init__(self): '*': {'triad': (0.0, None, None, 'MB/s')}, }, 'PrgEnv-gnu': { - 'arolla:cn': {'triad': (47000, -0.05, None, 'MB/s')}, - 'arolla:pn': {'triad': (84400, -0.05, None, 'MB/s')}, 'daint:gpu': {'triad': (43800, -0.05, None, 'MB/s')}, 'daint:mc': {'triad': (88500, -0.05, None, 'MB/s')}, 'dom:gpu': {'triad': (43800, -0.05, None, 'MB/s')}, @@ -95,8 +93,6 @@ def __init__(self): 'kesch:cn': {'triad': (47000, -0.05, None, 'MB/s')}, 'kesch:pn': {'triad': (84400, -0.05, None, 'MB/s')}, # 'leone:normal': {'triad': (44767.0, -0.05, None, 'MB/s')}, - 'tsa:cn': {'triad': (47000, -0.05, None, 'MB/s')}, - 'tsa:pn': {'triad': (84400, -0.05, None, 'MB/s')}, }, 'PrgEnv-intel': { 'daint:gpu': {'triad': (59500, -0.05, None, 'MB/s')}, From 2ab87fdfad664e6a8d75100be36d299e1106f64c Mon Sep 17 00:00:00 2001 From: Luca Date: Thu, 13 Feb 2020 15:41:33 +0100 Subject: [PATCH 087/104] Removed reference values for Arolla and Tsa from cscs-checks/microbenchmarks/kernel_latency/kernel_latency.py --- .../microbenchmarks/kernel_latency/kernel_latency.py | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/cscs-checks/microbenchmarks/kernel_latency/kernel_latency.py b/cscs-checks/microbenchmarks/kernel_latency/kernel_latency.py index 05858c73d4..4eefcf7bfd 100644 --- a/cscs-checks/microbenchmarks/kernel_latency/kernel_latency.py +++ b/cscs-checks/microbenchmarks/kernel_latency/kernel_latency.py @@ -65,9 +65,6 @@ def __init__(self, kernel_version): } self.sys_reference = { 'sync': { - 'arolla:cn': { - 'latency': (12.0, None, 0.10, 'us') - }, 'dom:gpu': { 'latency': (6.6, None, 0.10, 'us') }, @@ -77,14 +74,8 @@ def __init__(self, kernel_version): 'kesch:cn': { 'latency': (12.0, None, 0.10, 'us') }, - 'tsa:cn': { - 'latency': (12.0, None, 0.10, 'us') - }, }, 'async': { - 'arolla:cn': { - 'latency': (5.7, None, 0.10, 'us') - }, 'dom:gpu': { 'latency': (2.2, None, 0.10, 'us') }, @@ -94,9 +85,6 @@ def __init__(self, kernel_version): 'kesch:cn': { 'latency': (5.7, None, 0.10, 'us') }, - 'tsa:cn': { - 'latency': (5.7, None, 0.10, 'us') - }, }, } From bf54819f6564620996587c3b6992ad8adf5e1645 Mon Sep 17 00:00:00 2001 From: Luca Date: Thu, 13 Feb 2020 15:42:38 +0100 Subject: [PATCH 088/104] Removed reference values for Arolla and Tsa from cscs-checks/mch/automatic_arrays_acc.py --- cscs-checks/mch/automatic_arrays_acc.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/cscs-checks/mch/automatic_arrays_acc.py b/cscs-checks/mch/automatic_arrays_acc.py index 04fba21824..bf1557e1cc 100644 --- a/cscs-checks/mch/automatic_arrays_acc.py +++ b/cscs-checks/mch/automatic_arrays_acc.py @@ -41,11 +41,9 @@ def __init__(self): 'kesch:cn': {'time': (2.9E-04, None, 0.15)}, }, 'PrgEnv-pgi': { - 'arolla:cn': {'time': (1.4E-04, None, 0.15)}, 'daint:gpu': {'time': (7.5E-05, None, 0.15)}, 'dom:gpu': {'time': (7.5e-05, None, 0.15)}, 'kesch:cn': {'time': (1.4E-04, None, 0.15)}, - 'tsa:cn': {'time': (1.4E-04, None, 0.15)}, } } From 1a74d70e59b6dce6a2c96fa7e60e06ff551c5994 Mon Sep 17 00:00:00 2001 From: Luca Date: Thu, 13 Feb 2020 15:44:21 +0100 Subject: [PATCH 089/104] Removed reference values for Arolla and Tsa from cscs-checks/mch/collectives_halo.py --- cscs-checks/mch/collectives_halo.py | 20 -------------------- 1 file changed, 20 deletions(-) diff --git a/cscs-checks/mch/collectives_halo.py b/cscs-checks/mch/collectives_halo.py index 395d250a5d..e39502ff4f 100644 --- a/cscs-checks/mch/collectives_halo.py +++ b/cscs-checks/mch/collectives_halo.py @@ -117,11 +117,6 @@ class AlltoallvTest(CollectivesBaseTest): def __init__(self, variant): super().__init__(variant, { - 'arolla': { - 'nocomm': 5.7878, - 'nocomp': 5.62155, - 'default': 5.53777 - }, 'kesch': { 'nocomm': 6.89819, 'nocomp': 6.98276, @@ -132,11 +127,6 @@ def __init__(self, variant): 'nocomp': 0.0137893, 'default': 0.0138493 }, - 'tsa': { - 'nocomm': 5.7878, - 'nocomp': 5.62155, - 'default': 5.53777 - }, }) self.strict_check = False self.sourcesdir = 'https://github.com/eth-cscs/comm_overlap_bench.git' @@ -148,11 +138,6 @@ class HaloExchangeTest(CollectivesBaseTest): def __init__(self, variant): super().__init__(variant, { - 'arolla': { - 'nocomm': 5.7878, - 'nocomp': 5.62155, - 'default': 5.53777 - }, 'kesch': { 'nocomm': 5.7878, 'nocomp': 54.2012, @@ -163,11 +148,6 @@ def __init__(self, variant): 'nocomp': 1.36716, 'default': 2.53509 }, - 'tsa': { - 'nocomm': 5.7878, - 'nocomp': 5.62155, - 'default': 5.53777 - }, }) self.sourcesdir = 'https://github.com/eth-cscs/comm_overlap_bench.git' self.prebuild_cmd = ['git checkout barebones'] From 98895ac82ca9af4852e1ba2a3b8818250b779320 Mon Sep 17 00:00:00 2001 From: Luca Date: Thu, 13 Feb 2020 15:46:12 +0100 Subject: [PATCH 090/104] Removed reference values for Arolla and Tsa from cscs-checks/microbenchmarks/mpi/halo_cell_exchange.py --- .../microbenchmarks/mpi/halo_cell_exchange.py | 22 ------------------- 1 file changed, 22 deletions(-) diff --git a/cscs-checks/microbenchmarks/mpi/halo_cell_exchange.py b/cscs-checks/microbenchmarks/mpi/halo_cell_exchange.py index 66d2c7aa12..2eed26f42f 100644 --- a/cscs-checks/microbenchmarks/mpi/halo_cell_exchange.py +++ b/cscs-checks/microbenchmarks/mpi/halo_cell_exchange.py @@ -63,17 +63,6 @@ def __init__(self): } self.reference = { - 'arolla:cn': { - 'time_2_10': (2.280450e-06, None, 0.50, 's'), - 'time_2_10000': (8.059907e-06, None, 0.50, 's'), - 'time_2_1000000': (5.959686e-04, None, 0.50, 's'), - 'time_4_10': (2.951527e-06, None, 0.50, 's'), - 'time_4_10000': (1.258132e-05, None, 0.50, 's'), - 'time_4_1000000': (8.539153e-04, None, 0.50, 's'), - 'time_6_10': (3.740311e-06, None, 0.50, 's'), - 'time_6_10000': (1.448979e-05, None, 0.50, 's'), - 'time_6_1000000': (8.432294e-04, None, 0.50, 's') - }, 'dom:gpu': { 'time_2_10': (3.925395e-06, None, 0.50, 's'), 'time_2_10000': (9.721279e-06, None, 0.50, 's'), @@ -107,17 +96,6 @@ def __init__(self): 'time_6_10000': (1.448979e-05, None, 0.50, 's'), 'time_6_1000000': (8.432294e-04, None, 0.50, 's') }, - 'tsa:cn': { - 'time_2_10': (2.280450e-06, None, 0.50, 's'), - 'time_2_10000': (8.059907e-06, None, 0.50, 's'), - 'time_2_1000000': (5.959686e-04, None, 0.50, 's'), - 'time_4_10': (2.951527e-06, None, 0.50, 's'), - 'time_4_10000': (1.258132e-05, None, 0.50, 's'), - 'time_4_1000000': (8.539153e-04, None, 0.50, 's'), - 'time_6_10': (3.740311e-06, None, 0.50, 's'), - 'time_6_10000': (1.448979e-05, None, 0.50, 's'), - 'time_6_1000000': (8.432294e-04, None, 0.50, 's') - }, } self.maintainers = ['AJ'] From 53909665f128e63eb5b982c7873aeb03dfe8069f Mon Sep 17 00:00:00 2001 From: Luca Date: Thu, 13 Feb 2020 15:48:30 +0100 Subject: [PATCH 091/104] Removed reference values for Arolla and Tsa from cscs-checks/microbenchmarks/osu/osu_tests.py --- cscs-checks/microbenchmarks/osu/osu_tests.py | 24 -------------------- 1 file changed, 24 deletions(-) diff --git a/cscs-checks/microbenchmarks/osu/osu_tests.py b/cscs-checks/microbenchmarks/osu/osu_tests.py index 0e3b921041..9f2386309e 100644 --- a/cscs-checks/microbenchmarks/osu/osu_tests.py +++ b/cscs-checks/microbenchmarks/osu/osu_tests.py @@ -185,9 +185,6 @@ def __init__(self): self.executable_opts = ['-x', '100', '-i', '1000'] self.reference = { - 'arolla:cn': { - 'bw': (6311.48, -0.15, None, 'MB/s') - }, 'daint:gpu': { 'bw': (9798.29, -0.1, None, 'MB/s') }, @@ -207,9 +204,6 @@ def __init__(self): 'kesch:cn': { 'bw': (6311.48, -0.15, None, 'MB/s') }, - 'tsa:cn': { - 'bw': (6311.48, -0.15, None, 'MB/s') - }, } self.perf_patterns = { 'bw': sn.extractsingle(r'^4194304\s+(?P\S+)', @@ -229,9 +223,6 @@ def __init__(self): self.executable = './p2p_osu_latency' self.reference = { - 'arolla:cn': { - 'latency': (1.57, None, 0.1, 'us') - }, 'daint:gpu': { 'latency': (1.16, None, 1.0, 'us') }, @@ -251,9 +242,6 @@ def __init__(self): 'kesch:cn': { 'latency': (1.17, None, 0.1, 'us') }, - 'tsa:cn': { - 'latency': (1.57, None, 0.1, 'us') - }, '*': { 'latency': (0, None, None, 'us') } @@ -277,9 +265,6 @@ def __init__(self): 'cuda', 'D', 'D'] self.reference = { - 'arolla:cn': { - 'bw': (6288.98, -0.1, None, 'MB/s') - }, 'dom:gpu': { 'bw': (8897.86, -0.1, None, 'MB/s') }, @@ -289,9 +274,6 @@ def __init__(self): 'kesch:cn': { 'bw': (6288.98, -0.1, None, 'MB/s') }, - 'tsa:cn': { - 'bw': (6288.98, -0.1, None, 'MB/s') - }, '*': { 'bw': (0, None, None, 'MB/s') } @@ -328,9 +310,6 @@ def __init__(self): 'cuda', 'D', 'D'] self.reference = { - 'arolla:cn': { - 'latency': (23.09, None, 0.1, 'us') - }, 'dom:gpu': { 'latency': (5.49, None, 0.1, 'us') }, @@ -340,9 +319,6 @@ def __init__(self): 'kesch:cn': { 'latency': (23.09, None, 0.1, 'us') }, - 'tsa:cn': { - 'latency': (23.09, None, 0.1, 'us') - }, '*': { 'latency': (0, None, None, 'us') } From f6d657684353ff21e6b425e596727ed60db8c91d Mon Sep 17 00:00:00 2001 From: lucamar Date: Thu, 13 Feb 2020 16:13:33 +0100 Subject: [PATCH 092/104] Update multi_gpu.py --- cscs-checks/cuda/multi_gpu.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cscs-checks/cuda/multi_gpu.py b/cscs-checks/cuda/multi_gpu.py index 4374bd8dc6..02c6fb7ac0 100644 --- a/cscs-checks/cuda/multi_gpu.py +++ b/cscs-checks/cuda/multi_gpu.py @@ -55,7 +55,8 @@ def __init__(self): self.perf_patterns = {} self.reference = {} self.__bwref = { -# FIXME: reference values for Arolla and Tsa need to be updated (sanity check fails if they are not defined) + # FIXME: reference values for Arolla and Tsa need to be updated + # (sanity check fails if they are not defined) 'arolla:cn:h2d': (7583, -0.1, None, 'MB/s'), 'arolla:cn:d2h': (7584, -0.1, None, 'MB/s'), 'arolla:cn:d2d': (137408, -0.1, None, 'MB/s'), From 2c30d67a28ed6fbca1071825c8af984e5ab456b0 Mon Sep 17 00:00:00 2001 From: lucamar Date: Thu, 13 Feb 2020 16:14:06 +0100 Subject: [PATCH 093/104] Update collectives_halo.py --- cscs-checks/mch/collectives_halo.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cscs-checks/mch/collectives_halo.py b/cscs-checks/mch/collectives_halo.py index e39502ff4f..da22f93bd0 100644 --- a/cscs-checks/mch/collectives_halo.py +++ b/cscs-checks/mch/collectives_halo.py @@ -77,7 +77,7 @@ def __init__(self, variant, bench_reference): 'nocomm': 0.0171947, 'nocomp': 0.0137893, 'default': 0.0138493 - }, + } } if self.current_system.name == 'dom': From a4662a3647d28376939d201b83591a647e38217d Mon Sep 17 00:00:00 2001 From: lucamar Date: Thu, 13 Feb 2020 16:14:31 +0100 Subject: [PATCH 094/104] Update cuda_stress_test.py --- cscs-checks/mch/cuda_stress_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cscs-checks/mch/cuda_stress_test.py b/cscs-checks/mch/cuda_stress_test.py index 06f1c37051..a7c90ca127 100644 --- a/cscs-checks/mch/cuda_stress_test.py +++ b/cscs-checks/mch/cuda_stress_test.py @@ -37,7 +37,7 @@ def __init__(self): }, 'kesch:cn': { 'time': (2.25, None, 0.05) - }, + } } self.tags = {'production', 'mch', 'craype'} self.maintainers = ['MKr', 'AJ'] From 9159cf6d35ac79cb01b6bc7a1fd703c89961726f Mon Sep 17 00:00:00 2001 From: lucamar Date: Thu, 13 Feb 2020 16:15:41 +0100 Subject: [PATCH 095/104] Update stream.py --- cscs-checks/microbenchmarks/stream/stream.py | 1 - 1 file changed, 1 deletion(-) diff --git a/cscs-checks/microbenchmarks/stream/stream.py b/cscs-checks/microbenchmarks/stream/stream.py index e452b63058..6a96315828 100644 --- a/cscs-checks/microbenchmarks/stream/stream.py +++ b/cscs-checks/microbenchmarks/stream/stream.py @@ -92,7 +92,6 @@ def __init__(self): 'dom:mc': {'triad': (87500, -0.05, None, 'MB/s')}, 'kesch:cn': {'triad': (47000, -0.05, None, 'MB/s')}, 'kesch:pn': {'triad': (84400, -0.05, None, 'MB/s')}, -# 'leone:normal': {'triad': (44767.0, -0.05, None, 'MB/s')}, }, 'PrgEnv-intel': { 'daint:gpu': {'triad': (59500, -0.05, None, 'MB/s')}, From 352b8a13333a1b9256d70a6cfb11d103fe90bc4d Mon Sep 17 00:00:00 2001 From: lucamar Date: Thu, 13 Feb 2020 16:17:48 +0100 Subject: [PATCH 096/104] Update mpi.py --- cscs-checks/prgenv/mpi.py | 1 - 1 file changed, 1 deletion(-) diff --git a/cscs-checks/prgenv/mpi.py b/cscs-checks/prgenv/mpi.py index 4ddb23b47c..3460fdfff1 100644 --- a/cscs-checks/prgenv/mpi.py +++ b/cscs-checks/prgenv/mpi.py @@ -67,7 +67,6 @@ def __init__(self, required_thread): @rfm.simple_test class MpiHelloTest(rfm.RegressionTest): def __init__(self): - super().__init__() self.valid_systems = ['daint:gpu', 'daint:mc', 'dom:gpu', 'dom:mc', 'kesch:cn', 'kesch:pn', 'tiger:gpu', 'arolla:cn', 'arolla:pn', 'tsa:cn', 'tsa:pn'] From 5fe8be581d7e9d23e869ee50bf1d9787696d9eec Mon Sep 17 00:00:00 2001 From: Vasileios Karakasis Date: Thu, 13 Feb 2020 17:42:21 +0100 Subject: [PATCH 097/104] Minor code style fixes --- cscs-checks/cuda/multi_gpu.py | 4 ++-- cscs-checks/mch/automatic_arrays_acc.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/cscs-checks/cuda/multi_gpu.py b/cscs-checks/cuda/multi_gpu.py index 02c6fb7ac0..c7fe4510c7 100644 --- a/cscs-checks/cuda/multi_gpu.py +++ b/cscs-checks/cuda/multi_gpu.py @@ -55,8 +55,8 @@ def __init__(self): self.perf_patterns = {} self.reference = {} self.__bwref = { - # FIXME: reference values for Arolla and Tsa need to be updated - # (sanity check fails if they are not defined) + # FIXME: reference values for Arolla and Tsa need to be updated + # (sanity check fails if they are not defined) 'arolla:cn:h2d': (7583, -0.1, None, 'MB/s'), 'arolla:cn:d2h': (7584, -0.1, None, 'MB/s'), 'arolla:cn:d2d': (137408, -0.1, None, 'MB/s'), diff --git a/cscs-checks/mch/automatic_arrays_acc.py b/cscs-checks/mch/automatic_arrays_acc.py index bf1557e1cc..d8e6b7f38e 100644 --- a/cscs-checks/mch/automatic_arrays_acc.py +++ b/cscs-checks/mch/automatic_arrays_acc.py @@ -21,6 +21,7 @@ def __init__(self): } elif self.current_system.name in ['arolla', 'tsa']: self.exclusive_access = True + # This tets requires an MPI compiler, although it uses a single task self.num_tasks = 1 self.num_gpus_per_node = 1 @@ -33,7 +34,6 @@ def __init__(self): 'time': sn.extractsingle(r'Timing:\s+(?P