From 0c118d98441748f2b33c855f3ba7840f4ce8802a Mon Sep 17 00:00:00 2001 From: jgp Date: Wed, 23 Oct 2019 16:20:45 +0200 Subject: [PATCH 1/3] removing gdb4hpc from prod + fix for mpip --- cscs-checks/tools/profiling_and_debugging/gdb4hpc.py | 2 +- cscs-checks/tools/profiling_and_debugging/ipm_mpi_omp.py | 2 +- cscs-checks/tools/profiling_and_debugging/mpip_mpi_omp.py | 6 +++++- 3 files changed, 7 insertions(+), 3 deletions(-) diff --git a/cscs-checks/tools/profiling_and_debugging/gdb4hpc.py b/cscs-checks/tools/profiling_and_debugging/gdb4hpc.py index 5353355a74..7acfabdb76 100644 --- a/cscs-checks/tools/profiling_and_debugging/gdb4hpc.py +++ b/cscs-checks/tools/profiling_and_debugging/gdb4hpc.py @@ -41,7 +41,7 @@ def __init__(self, lang, extension): 'OMP_PROC_BIND': 'true', } self.maintainers = ['JG'] - self.tags = {'production', 'craype'} + self.tags = {'craype'} # gdb4hpc has its own way to launch a debugging job and needs an # additional jobscript. The reframe jobscript can be copied for that # purpose, by adding the cray_debug_ comments around the job launch diff --git a/cscs-checks/tools/profiling_and_debugging/ipm_mpi_omp.py b/cscs-checks/tools/profiling_and_debugging/ipm_mpi_omp.py index ec879a5505..ca9afeede2 100644 --- a/cscs-checks/tools/profiling_and_debugging/ipm_mpi_omp.py +++ b/cscs-checks/tools/profiling_and_debugging/ipm_mpi_omp.py @@ -22,7 +22,7 @@ def __init__(self, lang): 'PrgEnv-pgi': ['-O2', '-g', '-mp'] } ipm_ver = '2.0.6' - tc_ver = '19.09' + tc_ver = '19.10' self.ipm_modules = { 'PrgEnv-gnu': ['IPM/%s-CrayGNU-%s' % (ipm_ver, tc_ver)], 'PrgEnv-cray': ['IPM/%s-CrayCCE-%s' % (ipm_ver, tc_ver)], diff --git a/cscs-checks/tools/profiling_and_debugging/mpip_mpi_omp.py b/cscs-checks/tools/profiling_and_debugging/mpip_mpi_omp.py index 9527876394..8150096872 100644 --- a/cscs-checks/tools/profiling_and_debugging/mpip_mpi_omp.py +++ b/cscs-checks/tools/profiling_and_debugging/mpip_mpi_omp.py @@ -22,6 +22,8 @@ def __init__(self, lang): 'PrgEnv-intel': ['-g', '-qopenmp', '-O2'], 'PrgEnv-pgi': ['-g', '-mp', '-O2'] } + # unload xalt to avoid _buffer_decode error: + self.prebuild_cmd = ['module rm 2.7.10 ;module list -t'] self.modules = ['mpiP'] self.build_system = 'Make' self.num_iterations = 500 @@ -37,6 +39,8 @@ def __init__(self, lang): if lang == 'F90': self.build_system.max_concurrency = 1 + # unload xalt to avoid _buffer_decode error: + self.pre_run = ['module rm 2.7.10'] self.num_tasks = 96 self.num_tasks_per_node = 24 self.num_cpus_per_task = 1 @@ -66,7 +70,7 @@ def __init__(self, lang): # check performance report: sn.assert_found('Single collector task', self.rpt_file), sn.assert_eq(sn.extractsingle( - r'^.*_jacobi.*\s+(?P\d+)\s.*Isend', + r'^\s+\d\s+\d\s_jacobi.{4}\s+(?P\d+)\s.*Isend', self.rpt_file, 'mpi_isendline'), mpi_isendline), ]) self.maintainers = ['JG'] From 53d68714e9166705e20ffeccf416cbb687f7073a Mon Sep 17 00:00:00 2001 From: jgp Date: Wed, 23 Oct 2019 16:45:43 +0200 Subject: [PATCH 2/3] cudagdb --- .../tools/profiling_and_debugging/cuda_gdb.py | 18 ++++-------------- .../src/Cuda/.in.cudagdb | 3 +++ 2 files changed, 7 insertions(+), 14 deletions(-) create mode 100644 cscs-checks/tools/profiling_and_debugging/src/Cuda/.in.cudagdb diff --git a/cscs-checks/tools/profiling_and_debugging/cuda_gdb.py b/cscs-checks/tools/profiling_and_debugging/cuda_gdb.py index dcdffe8c73..8c3d95f651 100644 --- a/cscs-checks/tools/profiling_and_debugging/cuda_gdb.py +++ b/cscs-checks/tools/profiling_and_debugging/cuda_gdb.py @@ -14,7 +14,8 @@ def __init__(self): self.num_gpus_per_node = 1 self.num_tasks_per_node = 1 self.sourcesdir = 'src/Cuda' - self.executable = 'cuda-gdb cuda_gdb_check' + self.executable = 'cuda-gdb' + self.executable_opts = ['-x .in.cudagdb ./cuda_gdb_check'] if self.current_system.name == 'kesch': self.exclusive_access = True self.modules = ['cudatoolkit/8.0.61'] @@ -35,24 +36,13 @@ def __init__(self): '-lcudart', '-lm'] self.sanity_patterns = sn.all([ - sn.assert_found(r'^\(cuda-gdb\) Breakpoint 1 at .*: file ', - self.stdout), + sn.assert_found(r'^Breakpoint 1 at .*: file ', self.stdout), sn.assert_found(r'_jacobi-cuda-kernel.cu, line 59\.', self.stdout), - sn.assert_found(r'^\(cuda-gdb\) Starting program:', self.stdout), sn.assert_found(r'^\(cuda-gdb\) quit', self.stdout), sn.assert_lt(sn.abs(sn.extractsingle( - r'^\(cuda-gdb\)\s+\$1\s+=\s+(?P\S+)', self.stdout, + r'\$1\s+=\s+(?P\S+)', self.stdout, 'result', float)), 1e-5) ]) self.maintainers = ['MK', 'JG'] self.tags = {'production', 'craype'} - - def setup(self, partition, environ, **job_opts): - super().setup(partition, environ, **job_opts) - self.job.launcher = LauncherWrapper( - self.job.launcher, 'printf', [ - r"'break _jacobi-cuda-kernel.cu:59\n", - r"run\n", r"print *residue_d'", ' | ' - ] - ) diff --git a/cscs-checks/tools/profiling_and_debugging/src/Cuda/.in.cudagdb b/cscs-checks/tools/profiling_and_debugging/src/Cuda/.in.cudagdb new file mode 100644 index 0000000000..77acae68a6 --- /dev/null +++ b/cscs-checks/tools/profiling_and_debugging/src/Cuda/.in.cudagdb @@ -0,0 +1,3 @@ +break _jacobi-cuda-kernel.cu:59 +run +print *residue_d From 44e3021a7c59f95acd1514f44f5a1bb863707785 Mon Sep 17 00:00:00 2001 From: jgp Date: Thu, 24 Oct 2019 11:45:15 +0200 Subject: [PATCH 3/3] fix --- cscs-checks/tools/profiling_and_debugging/cuda_gdb.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/cscs-checks/tools/profiling_and_debugging/cuda_gdb.py b/cscs-checks/tools/profiling_and_debugging/cuda_gdb.py index 8c3d95f651..b7a3f467ce 100644 --- a/cscs-checks/tools/profiling_and_debugging/cuda_gdb.py +++ b/cscs-checks/tools/profiling_and_debugging/cuda_gdb.py @@ -16,6 +16,8 @@ def __init__(self): self.sourcesdir = 'src/Cuda' self.executable = 'cuda-gdb' self.executable_opts = ['-x .in.cudagdb ./cuda_gdb_check'] + # unload xalt to avoid runtime error: + self.pre_run = ['unset LD_PRELOAD'] if self.current_system.name == 'kesch': self.exclusive_access = True self.modules = ['cudatoolkit/8.0.61']