From dba8191b42bc4f7eb717f44f857021a7962cd79b Mon Sep 17 00:00:00 2001 From: jgp Date: Thu, 21 Mar 2019 15:05:35 +0100 Subject: [PATCH 01/11] gperftools --- .../gperftools_mpi_omp.py | 102 ++++++++++++++++++ 1 file changed, 102 insertions(+) create mode 100644 cscs-checks/tools/profiling_and_debugging/gperftools_mpi_omp.py diff --git a/cscs-checks/tools/profiling_and_debugging/gperftools_mpi_omp.py b/cscs-checks/tools/profiling_and_debugging/gperftools_mpi_omp.py new file mode 100644 index 0000000000..41884652f5 --- /dev/null +++ b/cscs-checks/tools/profiling_and_debugging/gperftools_mpi_omp.py @@ -0,0 +1,102 @@ +import os + +import reframe as rfm +import reframe.utility.sanity as sn + + +@rfm.required_version('>=2.14') +@rfm.parameterized_test(*([lang] for lang in ['Cpp', 'F90'])) +class GperftoolsMpiCheck(rfm.RegressionTest): + '''This test checks gperftools: + https://gperftools.github.io/gperftools/cpuprofile.html + ''' + def __init__(self, lang): + super().__init__() + self.valid_systems = ['daint:gpu', 'daint:mc', + 'dom:gpu', 'dom:mc'] + self.valid_prog_environs = ['PrgEnv-gnu'] + self.prgenv_flags = { + 'PrgEnv-cray': ['-g', '-h nomessage=3140', '-homp', '-O2'], + 'PrgEnv-gnu': ['-g', '-fopenmp', '-O2'], + 'PrgEnv-intel': ['-g', '-openmp', '-O2'], + 'PrgEnv-pgi': ['-g', '-mp', '-O2'] + } + # external pprof is needed to avoid "stack trace depth >= 2**32" errors + self.modules = ['gperftools', 'graphviz', 'pprof'] + self.build_system = 'Make' + self.iterations = 500 + self.build_system.cppflags = [ + '-DUSE_MPI', + '-D_CSCS_ITMAX=%s' % self.iterations, + ] + if lang == 'Cpp': + self.sourcesdir = os.path.join('src', 'C++') + else: + self.sourcesdir = os.path.join('src', lang) + + if lang == 'F90': + self.build_system.max_concurrency = 1 + + self.num_tasks = 96 + self.num_tasks_per_node = 24 + self.num_cpus_per_task = 1 + self.num_tasks_per_core = 2 + self.num_iterations = self.iterations + self.split_file = '0.sh' + self.executable = self.split_file + self.exe = './jacobi' + self.rpt_file = 'gperftools.rpt' + self.rpt_file_txt = '%s.txt' % self.rpt_file + self.rpt_file_pdf = '%s.pdf' % self.rpt_file + self.rpt_file_doc = '%s.doc' % self.rpt_file + self.variables = { + 'OMP_NUM_THREADS': str(self.num_cpus_per_task), + 'ITERATIONS': str(self.num_iterations), + 'OMP_PROC_BIND': 'true', + 'CRAYPE_LINK_TYPE': 'dynamic', + } + self.pre_run = [ + 'echo \'#!/bin/bash\' &> %s' + % self.split_file, + 'echo \'CPUPROFILE=`hostname`.$SLURM_PROCID\' %s >> %s' + % (self.exe, self.split_file), + 'chmod u+x %s' + % (self.split_file), + ] + self.post_run = [ + 'pprof --text --lines %s %s &> %s' + % (self.exe, '*.0', self.rpt_file_txt), + 'pprof --pdf %s %s &> %s' + % (self.exe, '*.0', self.rpt_file_pdf), + 'file %s &> %s' + % (self.rpt_file_pdf, self.rpt_file_doc) + ] + self.sanity_patterns = sn.all([ + # check job status: + sn.assert_found('SUCCESS', self.stdout), + # check txt report: + sn.assert_found(r'MPI_Allreduce', self.rpt_file_txt), + # check pdf report: + sn.assert_found('PDF document', self.rpt_file_doc), + ]) + self.perf_patterns = { + 'hotspot1': sn.extractsingle( + r'^\s+\d+ms\s+(?P\d+.\d+)%.*_jacobi.\w+:\d+', + self.rpt_file_txt, 'flatPercentage', float) + } + self.reference = { + 'daint:mc': { 'hotspot1': (26.0, -0.5, 0.5, '%') }, + 'daint:gpu': { 'hotspot1': (46.0, -0.5, 0.5, '%') }, + 'dom:mc': { 'hotspot1': (26.0, -0.5, 0.5, '%') }, + 'dom:gpu': { 'hotspot1': (46.0, -0.5, 0.5, '%') }, + } + self.maintainers = ['JG'] + self.tags = {'production'} + + def setup(self, environ, partition, **job_opts): + super().setup(environ, partition, **job_opts) + flags = self.prgenv_flags[self.current_environ.name] + self.build_system.cflags = flags + self.build_system.cxxflags = flags + self.build_system.fflags = flags + self.build_system.ldflags = flags + ['`pkg-config --libs libprofiler`'] From 975fa9e344cc4ad3c78290ddb2c10145c18abca2 Mon Sep 17 00:00:00 2001 From: jgp Date: Thu, 21 Mar 2019 15:12:59 +0100 Subject: [PATCH 02/11] typo --- .../tools/profiling_and_debugging/gperftools_mpi_omp.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/cscs-checks/tools/profiling_and_debugging/gperftools_mpi_omp.py b/cscs-checks/tools/profiling_and_debugging/gperftools_mpi_omp.py index 41884652f5..a7bf3b6e7b 100644 --- a/cscs-checks/tools/profiling_and_debugging/gperftools_mpi_omp.py +++ b/cscs-checks/tools/profiling_and_debugging/gperftools_mpi_omp.py @@ -85,10 +85,10 @@ def __init__(self, lang): self.rpt_file_txt, 'flatPercentage', float) } self.reference = { - 'daint:mc': { 'hotspot1': (26.0, -0.5, 0.5, '%') }, - 'daint:gpu': { 'hotspot1': (46.0, -0.5, 0.5, '%') }, - 'dom:mc': { 'hotspot1': (26.0, -0.5, 0.5, '%') }, - 'dom:gpu': { 'hotspot1': (46.0, -0.5, 0.5, '%') }, + 'daint:mc': {'hotspot1': (26.0, -0.5, 0.5, '%')}, + 'daint:gpu': {'hotspot1': (46.0, -0.5, 0.5, '%')}, + 'dom:mc': {'hotspot1': (26.0, -0.5, 0.5, '%')}, + 'dom:gpu': {'hotspot1': (46.0, -0.5, 0.5, '%')}, } self.maintainers = ['JG'] self.tags = {'production'} From a492fed67641225586e6f023be3ef581242c8133 Mon Sep 17 00:00:00 2001 From: jgp Date: Thu, 21 Mar 2019 15:36:12 +0100 Subject: [PATCH 03/11] loosening perf limits --- .../tools/profiling_and_debugging/gperftools_mpi_omp.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cscs-checks/tools/profiling_and_debugging/gperftools_mpi_omp.py b/cscs-checks/tools/profiling_and_debugging/gperftools_mpi_omp.py index a7bf3b6e7b..05144a25e5 100644 --- a/cscs-checks/tools/profiling_and_debugging/gperftools_mpi_omp.py +++ b/cscs-checks/tools/profiling_and_debugging/gperftools_mpi_omp.py @@ -85,8 +85,8 @@ def __init__(self, lang): self.rpt_file_txt, 'flatPercentage', float) } self.reference = { - 'daint:mc': {'hotspot1': (26.0, -0.5, 0.5, '%')}, - 'daint:gpu': {'hotspot1': (46.0, -0.5, 0.5, '%')}, + 'daint:mc': {'hotspot1': (26.0, -0.6, 0.6, '%')}, + 'daint:gpu': {'hotspot1': (46.0, -0.6, 0.6, '%')}, 'dom:mc': {'hotspot1': (26.0, -0.5, 0.5, '%')}, 'dom:gpu': {'hotspot1': (46.0, -0.5, 0.5, '%')}, } From 057fbbf6e43221347e6b4abfa977f54cb66ecead Mon Sep 17 00:00:00 2001 From: jgp Date: Thu, 21 Mar 2019 15:51:46 +0100 Subject: [PATCH 04/11] loosening perf limits --- .../tools/profiling_and_debugging/gperftools_mpi_omp.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/cscs-checks/tools/profiling_and_debugging/gperftools_mpi_omp.py b/cscs-checks/tools/profiling_and_debugging/gperftools_mpi_omp.py index 05144a25e5..f67f14b787 100644 --- a/cscs-checks/tools/profiling_and_debugging/gperftools_mpi_omp.py +++ b/cscs-checks/tools/profiling_and_debugging/gperftools_mpi_omp.py @@ -85,10 +85,10 @@ def __init__(self, lang): self.rpt_file_txt, 'flatPercentage', float) } self.reference = { - 'daint:mc': {'hotspot1': (26.0, -0.6, 0.6, '%')}, - 'daint:gpu': {'hotspot1': (46.0, -0.6, 0.6, '%')}, - 'dom:mc': {'hotspot1': (26.0, -0.5, 0.5, '%')}, - 'dom:gpu': {'hotspot1': (46.0, -0.5, 0.5, '%')}, + 'daint:mc': {'hotspot1': (26.0, -0.6, None, '%')}, + 'daint:gpu': {'hotspot1': (46.0, -0.6, None, '%')}, + 'dom:mc': {'hotspot1': (26.0, -0.5, None, '%')}, + 'dom:gpu': {'hotspot1': (46.0, -0.5, None, '%')}, } self.maintainers = ['JG'] self.tags = {'production'} From 1e237a741191b1ea038be21b8e76b0a50b93500a Mon Sep 17 00:00:00 2001 From: jgp Date: Thu, 21 Mar 2019 15:57:39 +0100 Subject: [PATCH 05/11] pullrequestreview-217290604 --- .../profiling_and_debugging/gperftools_mpi_omp.py | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/cscs-checks/tools/profiling_and_debugging/gperftools_mpi_omp.py b/cscs-checks/tools/profiling_and_debugging/gperftools_mpi_omp.py index f67f14b787..5b769fe300 100644 --- a/cscs-checks/tools/profiling_and_debugging/gperftools_mpi_omp.py +++ b/cscs-checks/tools/profiling_and_debugging/gperftools_mpi_omp.py @@ -56,26 +56,22 @@ def __init__(self, lang): 'CRAYPE_LINK_TYPE': 'dynamic', } self.pre_run = [ - 'echo \'#!/bin/bash\' &> %s' - % self.split_file, + 'echo \'#!/bin/bash\' &> %s' % self.split_file, 'echo \'CPUPROFILE=`hostname`.$SLURM_PROCID\' %s >> %s' % (self.exe, self.split_file), - 'chmod u+x %s' - % (self.split_file), + 'chmod u+x %s' % (self.split_file), ] self.post_run = [ 'pprof --text --lines %s %s &> %s' % (self.exe, '*.0', self.rpt_file_txt), - 'pprof --pdf %s %s &> %s' - % (self.exe, '*.0', self.rpt_file_pdf), - 'file %s &> %s' - % (self.rpt_file_pdf, self.rpt_file_doc) + 'pprof --pdf %s %s &> %s' % (self.exe, '*.0', self.rpt_file_pdf), + 'file %s &> %s' % (self.rpt_file_pdf, self.rpt_file_doc) ] self.sanity_patterns = sn.all([ # check job status: sn.assert_found('SUCCESS', self.stdout), # check txt report: - sn.assert_found(r'MPI_Allreduce', self.rpt_file_txt), + sn.assert_found('MPI_Allreduce', self.rpt_file_txt), # check pdf report: sn.assert_found('PDF document', self.rpt_file_doc), ]) From 887885307fb5ee9007850643f648605fd0b0c879 Mon Sep 17 00:00:00 2001 From: jgp Date: Thu, 21 Mar 2019 16:34:38 +0100 Subject: [PATCH 06/11] discussion_r267816415 --- .../profiling_and_debugging/gperftools_mpi_omp.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/cscs-checks/tools/profiling_and_debugging/gperftools_mpi_omp.py b/cscs-checks/tools/profiling_and_debugging/gperftools_mpi_omp.py index 5b769fe300..614fc190ea 100644 --- a/cscs-checks/tools/profiling_and_debugging/gperftools_mpi_omp.py +++ b/cscs-checks/tools/profiling_and_debugging/gperftools_mpi_omp.py @@ -57,15 +57,15 @@ def __init__(self, lang): } self.pre_run = [ 'echo \'#!/bin/bash\' &> %s' % self.split_file, - 'echo \'CPUPROFILE=`hostname`.$SLURM_PROCID\' %s >> %s' - % (self.exe, self.split_file), + 'echo \'CPUPROFILE=`hostname`.$SLURM_PROCID\' %s >> %s' % + (self.exe, self.split_file), 'chmod u+x %s' % (self.split_file), ] self.post_run = [ - 'pprof --text --lines %s %s &> %s' - % (self.exe, '*.0', self.rpt_file_txt), + 'pprof --text --lines %s %s &> %s' % + (self.exe, '*.0', self.rpt_file_txt), 'pprof --pdf %s %s &> %s' % (self.exe, '*.0', self.rpt_file_pdf), - 'file %s &> %s' % (self.rpt_file_pdf, self.rpt_file_doc) + 'file %s &> %s' % (self.rpt_file_pdf, self.rpt_file_doc) ] self.sanity_patterns = sn.all([ # check job status: From 18cdd5ccfefc09cb8c7473ea27d01d82080c7c4e Mon Sep 17 00:00:00 2001 From: jgp Date: Thu, 21 Mar 2019 16:40:17 +0100 Subject: [PATCH 07/11] discussion_r267817697 --- .../gperftools_mpi_omp.py | 17 +++++------------ 1 file changed, 5 insertions(+), 12 deletions(-) diff --git a/cscs-checks/tools/profiling_and_debugging/gperftools_mpi_omp.py b/cscs-checks/tools/profiling_and_debugging/gperftools_mpi_omp.py index 614fc190ea..3fba6cf59a 100644 --- a/cscs-checks/tools/profiling_and_debugging/gperftools_mpi_omp.py +++ b/cscs-checks/tools/profiling_and_debugging/gperftools_mpi_omp.py @@ -18,7 +18,7 @@ def __init__(self, lang): self.prgenv_flags = { 'PrgEnv-cray': ['-g', '-h nomessage=3140', '-homp', '-O2'], 'PrgEnv-gnu': ['-g', '-fopenmp', '-O2'], - 'PrgEnv-intel': ['-g', '-openmp', '-O2'], + 'PrgEnv-intel': ['-g', '-qopenmp', '-O2'], 'PrgEnv-pgi': ['-g', '-mp', '-O2'] } # external pprof is needed to avoid "stack trace depth >= 2**32" errors @@ -72,20 +72,13 @@ def __init__(self, lang): sn.assert_found('SUCCESS', self.stdout), # check txt report: sn.assert_found('MPI_Allreduce', self.rpt_file_txt), + sn.extractsingle( + r'^\s+\d+ms\s+(?P\d+.\d+)%.*_jacobi.\w+:\d+', + self.rpt_file_txt, 'flatPercentage', float + ), # check pdf report: sn.assert_found('PDF document', self.rpt_file_doc), ]) - self.perf_patterns = { - 'hotspot1': sn.extractsingle( - r'^\s+\d+ms\s+(?P\d+.\d+)%.*_jacobi.\w+:\d+', - self.rpt_file_txt, 'flatPercentage', float) - } - self.reference = { - 'daint:mc': {'hotspot1': (26.0, -0.6, None, '%')}, - 'daint:gpu': {'hotspot1': (46.0, -0.6, None, '%')}, - 'dom:mc': {'hotspot1': (26.0, -0.5, None, '%')}, - 'dom:gpu': {'hotspot1': (46.0, -0.5, None, '%')}, - } self.maintainers = ['JG'] self.tags = {'production'} From a4d85e788c0baa2fcc77aedf7acc9218b228ba58 Mon Sep 17 00:00:00 2001 From: Theofilos Manitaras Date: Thu, 21 Mar 2019 16:54:56 +0100 Subject: [PATCH 08/11] Use assert_found instead of extract_single --- .../tools/profiling_and_debugging/gperftools_mpi_omp.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/cscs-checks/tools/profiling_and_debugging/gperftools_mpi_omp.py b/cscs-checks/tools/profiling_and_debugging/gperftools_mpi_omp.py index 3fba6cf59a..cdadfa69b2 100644 --- a/cscs-checks/tools/profiling_and_debugging/gperftools_mpi_omp.py +++ b/cscs-checks/tools/profiling_and_debugging/gperftools_mpi_omp.py @@ -72,10 +72,8 @@ def __init__(self, lang): sn.assert_found('SUCCESS', self.stdout), # check txt report: sn.assert_found('MPI_Allreduce', self.rpt_file_txt), - sn.extractsingle( - r'^\s+\d+ms\s+(?P\d+.\d+)%.*_jacobi.\w+:\d+', - self.rpt_file_txt, 'flatPercentage', float - ), + sn.assert_found( + r'^\s+\d+ms\s+\d+.\d+%.*_jacobi.\w+:\d+', self.rpt_file_txt), # check pdf report: sn.assert_found('PDF document', self.rpt_file_doc), ]) From 79230e1e616d5795e537e43c133b1bf2b983c521 Mon Sep 17 00:00:00 2001 From: jgp Date: Wed, 27 Mar 2019 19:37:01 +0100 Subject: [PATCH 09/11] pullrequestreview-219552365 --- cscs-checks/tools/profiling_and_debugging/gperftools_mpi_omp.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cscs-checks/tools/profiling_and_debugging/gperftools_mpi_omp.py b/cscs-checks/tools/profiling_and_debugging/gperftools_mpi_omp.py index 3fba6cf59a..8bfb982677 100644 --- a/cscs-checks/tools/profiling_and_debugging/gperftools_mpi_omp.py +++ b/cscs-checks/tools/profiling_and_debugging/gperftools_mpi_omp.py @@ -5,7 +5,7 @@ @rfm.required_version('>=2.14') -@rfm.parameterized_test(*([lang] for lang in ['Cpp', 'F90'])) +@rfm.parameterized_test(['Cpp', 'F90']) class GperftoolsMpiCheck(rfm.RegressionTest): '''This test checks gperftools: https://gperftools.github.io/gperftools/cpuprofile.html From 2b0b76ff89bfa60b2b89769648616a4ffc9806b0 Mon Sep 17 00:00:00 2001 From: jgp Date: Wed, 27 Mar 2019 20:39:49 +0100 Subject: [PATCH 10/11] lang --- cscs-checks/tools/profiling_and_debugging/gperftools_mpi_omp.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cscs-checks/tools/profiling_and_debugging/gperftools_mpi_omp.py b/cscs-checks/tools/profiling_and_debugging/gperftools_mpi_omp.py index 0b1102d616..000640bbc1 100644 --- a/cscs-checks/tools/profiling_and_debugging/gperftools_mpi_omp.py +++ b/cscs-checks/tools/profiling_and_debugging/gperftools_mpi_omp.py @@ -11,7 +11,7 @@ class GperftoolsMpiCheck(rfm.RegressionTest): https://gperftools.github.io/gperftools/cpuprofile.html ''' def __init__(self, lang): - super().__init__() + super().__init__(lang) self.valid_systems = ['daint:gpu', 'daint:mc', 'dom:gpu', 'dom:mc'] self.valid_prog_environs = ['PrgEnv-gnu'] From 04f6da3773cb4fc22315d3ddcef416f8430e6cc3 Mon Sep 17 00:00:00 2001 From: jgp Date: Wed, 27 Mar 2019 21:27:05 +0100 Subject: [PATCH 11/11] typo --- .../tools/profiling_and_debugging/gperftools_mpi_omp.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cscs-checks/tools/profiling_and_debugging/gperftools_mpi_omp.py b/cscs-checks/tools/profiling_and_debugging/gperftools_mpi_omp.py index 000640bbc1..c2d4c3e7c2 100644 --- a/cscs-checks/tools/profiling_and_debugging/gperftools_mpi_omp.py +++ b/cscs-checks/tools/profiling_and_debugging/gperftools_mpi_omp.py @@ -5,13 +5,13 @@ @rfm.required_version('>=2.14') -@rfm.parameterized_test(['Cpp', 'F90']) +@rfm.parameterized_test(['Cpp'], ['F90']) class GperftoolsMpiCheck(rfm.RegressionTest): '''This test checks gperftools: https://gperftools.github.io/gperftools/cpuprofile.html ''' def __init__(self, lang): - super().__init__(lang) + super().__init__() self.valid_systems = ['daint:gpu', 'daint:mc', 'dom:gpu', 'dom:mc'] self.valid_prog_environs = ['PrgEnv-gnu']