From 9cb242172ef26dd9535b899a503feb638848d165 Mon Sep 17 00:00:00 2001 From: Sebastian Keller Date: Mon, 15 Apr 2019 16:49:17 +0200 Subject: [PATCH 1/3] flexible hpcg gpu check --- .../microbenchmarks/hpcg/hpcg_benchmark.py | 62 ++++++++++++ .../microbenchmarks/hpcg/hpcg_check.py | 96 ------------------- 2 files changed, 62 insertions(+), 96 deletions(-) delete mode 100644 cscs-checks/microbenchmarks/hpcg/hpcg_check.py diff --git a/cscs-checks/microbenchmarks/hpcg/hpcg_benchmark.py b/cscs-checks/microbenchmarks/hpcg/hpcg_benchmark.py index f2a70aaebe..04e02abbe8 100644 --- a/cscs-checks/microbenchmarks/hpcg/hpcg_benchmark.py +++ b/cscs-checks/microbenchmarks/hpcg/hpcg_benchmark.py @@ -163,3 +163,65 @@ def setup(self, partition, environ, **job_opts): ]) super().setup(partition, environ, **job_opts) + + +@rfm.simple_test +class HPCG_GPUCheck(rfm.RunOnlyRegressionTest): + def __init__(self): + super().__init__() + self.maintainers = ['SK', 'VK'] + self.descr = 'HPCG benchmark on GPUs' + self.sourcesdir = os.path.join(self.current_system.resourcesdir, + 'HPCG') + + # there's no binary with support for CUDA 10 yet + self.valid_systems = ['daint:gpu'] + self.valid_prog_environs = ['PrgEnv-gnu'] + self.modules = ['craype-accel-nvidia60', 'craype-hugepages8M'] + self.executable = 'xhpcg_gpu_3.1' + self.pre_run = ['chmod +x %s' % self.executable] + self.num_tasks = 0 + self.num_tasks_per_node = 1 + self.num_cpus_per_task = 12 + self.variables = { + 'PMI_NO_FORK': '1', + 'MPICH_USE_DMAPP_COLL': '1', + 'OMP_SCHEDULE': 'static', + 'OMP_NUM_THREADS': str(self.num_cpus_per_task), + 'HUGETLB_VERBOSE': '0', + 'HUGETLB_DEFAULT_PAGE_SIZE': '8M', + } + + self.output_file = sn.getitem(sn.glob('*.yaml'), 0) + + self.reference = { + 'daint:gpu': { + 'gflops': (94.7, -0.1, None, 'Gflop/s') + }, + 'dom:gpu': { + 'gflops': (94.7, -0.1, None, 'Gflop/s') + }, + } + + @property + @sn.sanity_function + def num_tasks_assigned(self): + return self.job.num_tasks + + def setup(self, partition, environ, **job_opts): + + num_nodes = self.num_tasks_assigned / self.num_tasks_per_node + self.perf_patterns = { + 'gflops': sn.extractsingle( + r'HPCG result is VALID with a GFLOP\/s rating of:\s*' + r'(?P\S+)', + self.output_file, 'perf', float) / num_nodes + } + + self.sanity_patterns = sn.all([ + sn.assert_eq(4, sn.count( + sn.findall(r'PASSED', self.output_file))), + sn.assert_eq(0, self.num_tasks_assigned % self.num_tasks_per_node) + ]) + + super().setup(partition, environ, **job_opts) diff --git a/cscs-checks/microbenchmarks/hpcg/hpcg_check.py b/cscs-checks/microbenchmarks/hpcg/hpcg_check.py deleted file mode 100644 index 89f7809600..0000000000 --- a/cscs-checks/microbenchmarks/hpcg/hpcg_check.py +++ /dev/null @@ -1,96 +0,0 @@ -import os - -import reframe as rfm -import reframe.utility.sanity as sn - - -@rfm.simple_test -class HPCG_GPUCheck(rfm.RunOnlyRegressionTest): - def __init__(self): - super().__init__() - self.maintainers = ['VK'] - self.descr = 'HPCG check' - self.sourcesdir = os.path.join(self.current_system.resourcesdir, - 'HPCG') - - self.valid_systems = ['daint:gpu'] - self.valid_prog_environs = ['PrgEnv-gnu'] - self.modules = ['craype-accel-nvidia60', 'craype-hugepages8M'] - self.executable = 'xhpcg_gpu_3.1' - self.num_tasks = 5304 - self.num_tasks_per_node = 1 - self.num_cpus_per_task = 12 - self.variables = { - 'PMI_NO_FORK': '1', - 'MPICH_USE_DMAPP_COLL': '1', - 'OMP_SCHEDULE': 'static', - 'OMP_NUM_THREADS': str(self.num_cpus_per_task), - 'HUGETLB_VERBOSE': '0', - 'HUGETLB_DEFAULT_PAGE_SIZE': '8M', - } - - output_file = sn.getitem(sn.glob('*.yaml'), 0) - self.sanity_patterns = sn.assert_eq(4, sn.count( - sn.findall(r'PASSED', output_file))) - self.reference = { - 'daint:gpu': { - 'perf': (476744, -0.10, None) - }, - } - - self.perf_patterns = { - 'perf': sn.extractsingle( - r'HPCG result is VALID with a GFLOP\/s rating of:\s*' - r'(?P\S+)', output_file, 'perf', float) - } - - -# FIXME: This test is obsolete; it is kept only for reference -@rfm.parameterized_test([2], [4], [6], [8]) -class HPCGMonchAcceptanceCheck(rfm.RegressionTest): - def __init__(self, num_tasks): - super().__init__() - self.tags = {'monch_acceptance'} - self.descr = 'HPCG monch acceptance check' - self.maintainers = ['VK'] - - self.valid_systems = ['monch:compute'] - self.valid_prog_environs = ['PrgEnv-gnu'] - self.sourcesdir = os.path.join(self.current_system.resourcesdir, - 'HPCG-CPU') - self.executable = './bin/xhpcg' - self.num_tasks = num_tasks - self.num_tasks_per_node = 1 - self.num_cpus_per_task = 20 - self.variables = { - 'MV2_ENABLE_AFFINITY': '0', - 'OMP_NUM_THREADS': str(self.num_cpus_per_task), - } - - self.prebuild_cmd = ['. configure MPI_GCC_OMP'] - output_file = sn.getitem(sn.glob('HPCG-Benchmark_*.txt'), 0) - self.sanity_patterns = sn.assert_eq(4, sn.count( - sn.findall(r'PASSED', output_file))) - reference_by_nodes = { - 2: { - 'perf': (2.20716, -0.10, None), - }, - 4: { - 'perf': (4.28179, -0.10, None), - }, - 6: { - 'perf': (6.18806, -0.10, None), - }, - 8: { - 'perf': (8.16107, -0.10, None), - }, - } - self.reference = { - 'monch:compute': reference_by_nodes[num_tasks] - } - self.perf_patterns = { - 'perf': sn.extractsingle( - r'HPCG result is VALID with a GFLOP\/s rating of=\s*' - r'(?P\S+)', output_file, 'perf', float) - } - From 7e84bd1d00b506f9e5f355c8401fb629b3a66e5b Mon Sep 17 00:00:00 2001 From: Sebastian Keller Date: Mon, 15 Apr 2019 16:57:14 +0200 Subject: [PATCH 2/3] pep8 --- cscs-checks/microbenchmarks/hpcg/hpcg_benchmark.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/cscs-checks/microbenchmarks/hpcg/hpcg_benchmark.py b/cscs-checks/microbenchmarks/hpcg/hpcg_benchmark.py index 04e02abbe8..152e6e6cec 100644 --- a/cscs-checks/microbenchmarks/hpcg/hpcg_benchmark.py +++ b/cscs-checks/microbenchmarks/hpcg/hpcg_benchmark.py @@ -212,10 +212,10 @@ def setup(self, partition, environ, **job_opts): num_nodes = self.num_tasks_assigned / self.num_tasks_per_node self.perf_patterns = { - 'gflops': sn.extractsingle( - r'HPCG result is VALID with a GFLOP\/s rating of:\s*' - r'(?P\S+)', - self.output_file, 'perf', float) / num_nodes + 'gflops': sn.extractsingle( + r'HPCG result is VALID with a GFLOP\/s rating of:\s*' + r'(?P\S+)', + self.output_file, 'perf', float) / num_nodes } self.sanity_patterns = sn.all([ @@ -224,4 +224,4 @@ def setup(self, partition, environ, **job_opts): sn.assert_eq(0, self.num_tasks_assigned % self.num_tasks_per_node) ]) - super().setup(partition, environ, **job_opts) + super().setup(partition, environ, **job_opts) From 47240f333e1b7da29951ec85830850727fad8f5e Mon Sep 17 00:00:00 2001 From: Sebastian Keller Date: Thu, 18 Apr 2019 15:19:09 +0200 Subject: [PATCH 3/3] removed unnecessary setup method --- cscs-checks/microbenchmarks/hpcg/hpcg_benchmark.py | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/cscs-checks/microbenchmarks/hpcg/hpcg_benchmark.py b/cscs-checks/microbenchmarks/hpcg/hpcg_benchmark.py index 152e6e6cec..106d24c53f 100644 --- a/cscs-checks/microbenchmarks/hpcg/hpcg_benchmark.py +++ b/cscs-checks/microbenchmarks/hpcg/hpcg_benchmark.py @@ -1,3 +1,4 @@ +import os import reframe as rfm import reframe.utility.sanity as sn @@ -203,13 +204,6 @@ def __init__(self): }, } - @property - @sn.sanity_function - def num_tasks_assigned(self): - return self.job.num_tasks - - def setup(self, partition, environ, **job_opts): - num_nodes = self.num_tasks_assigned / self.num_tasks_per_node self.perf_patterns = { 'gflops': sn.extractsingle( @@ -224,4 +218,7 @@ def setup(self, partition, environ, **job_opts): sn.assert_eq(0, self.num_tasks_assigned % self.num_tasks_per_node) ]) - super().setup(partition, environ, **job_opts) + @property + @sn.sanity_function + def num_tasks_assigned(self): + return self.job.num_tasks