From 9f2772e9dd048fb1a65a0519a59120f57dbe4852 Mon Sep 17 00:00:00 2001 From: Sebastian Keller Date: Fri, 23 Nov 2018 14:25:45 +0100 Subject: [PATCH 01/16] reference implementation of the HPCG benchmark --- cscs-checks/apps/hpcg_check.py | 109 +++++++++++++++++++++++++++++++++ 1 file changed, 109 insertions(+) create mode 100644 cscs-checks/apps/hpcg_check.py diff --git a/cscs-checks/apps/hpcg_check.py b/cscs-checks/apps/hpcg_check.py new file mode 100644 index 0000000000..6c22dbe819 --- /dev/null +++ b/cscs-checks/apps/hpcg_check.py @@ -0,0 +1,109 @@ +import os + +import reframe as rfm +import reframe.utility.sanity as sn + +@rfm.simple_test +class HPCGCheck(rfm.RegressionTest): + def __init__(self, **kwargs): + #super().__init__('hpcg_check', + # os.path.dirname(__file__), **kwargs) + super().__init__() + + self.descr = 'HPCG check' + self.valid_systems = ['daint:gpu'] + self.valid_prog_environs = ['PrgEnv-gnu'] + #self.modules = [] + #self.sourcesdir = os.path.join(self.current_system.resourcesdir, + # 'HPCG') + self.build_system = 'Make' + self.build_system.options = ['arch=MPI_GCC_OMP'] + self.sourcesdir = None + self.sourcepath = 'hpcg' + self.prebuild_cmd = ['git clone https://github.com/hpcg-benchmark/hpcg.git', 'cd hpcg'] + + self.executable = 'hpcg/bin/xhpcg' + #self.prerun = ['chmod +x %s' % self.executable] + output_file = sn.getitem(sn.glob('HPCG*.txt'), 0) + self.sanity_patterns = sn.assert_eq(4, sn.count( + sn.findall(r'PASSED', output_file))) + self.num_tasks = 12 + self.num_tasks_per_node = 12 + self.num_cpus_per_task = 1 + self.variables = { + #'CXX' : 'CC', + #'PMI_NO_FORK': '1', + #'MPICH_USE_DMAPP_COLL': '1', + 'OMP_SCHEDULE': 'static', + 'OMP_NUM_THREADS': str(self.num_cpus_per_task), + #'HUGETLB_VERBOSE': '0', + #'HUGETLB_DEFAULT_PAGE_SIZE': '8M', + } + self.reference = { + 'daint:gpu': { + 'perf': (2.2, -0.1, 0.1) + }, + } + + self.perf_patterns = { + 'perf': sn.extractsingle( + r'HPCG result is VALID with a GFLOP\/s rating of=\s*' + r'(?P\S+)', output_file, 'perf', float) + } + self.maintainers = ['SK'] + + +# FIXME: This test is obsolete; it is kept only for reference +#class HPCGMonchAcceptanceCheck(RegressionTest): +# def __init__(self, num_tasks, **kwargs): +# super().__init__('hpcg_check_%s_nodes' % num_tasks, +# os.path.dirname(__file__), **kwargs) +# self.descr = 'HPCG check' +# self.valid_systems = ['monch:compute'] +# self.valid_prog_environs = ['PrgEnv-gnu'] +# self.sourcesdir = os.path.join(self.current_system.resourcesdir, +# 'HPCG-CPU') +# self.executable = './bin/xhpcg' +# output_file = sn.getitem(sn.glob('HPCG-Benchmark_*.txt'), 0) +# self.sanity_patterns = sn.assert_eq(4, sn.count( +# sn.findall(r'PASSED', output_file))) +# self.num_tasks = num_tasks +# self.num_tasks_per_node = 1 +# self.num_cpus_per_task = 20 +# self.prebuild_cmd = ['./configure MPI_GCC_OMP'] +# self.variables = { +# 'MV2_ENABLE_AFFINITY': '0', +# 'OMP_NUM_THREADS': str(self.num_cpus_per_task), +# } +# reference_by_nodes = { +# 2: { +# 'perf': (2.20716, -0.10, None), +# }, +# 4: { +# 'perf': (4.28179, -0.10, None), +# }, +# 6: { +# 'perf': (6.18806, -0.10, None), +# }, +# 8: { +# 'perf': (8.16107, -0.10, None), +# }, +# } +# self.reference = { +# 'monch:compute': reference_by_nodes[num_tasks] +# } +# self.perf_patterns = { +# 'perf': sn.extractsingle( +# r'HPCG result is VALID with a GFLOP\/s rating of=\s*' +# r'(?P\S+)', output_file, 'perf', float) +# } +# self.tags = {'monch_acceptance'} +# self.maintainers = ['VK'] +# +# +#def _get_checks(**kwargs): +# return [HPCGCheck(**kwargs), +# HPCGMonchAcceptanceCheck(2, **kwargs), +# HPCGMonchAcceptanceCheck(4, **kwargs), +# HPCGMonchAcceptanceCheck(6, **kwargs), +# HPCGMonchAcceptanceCheck(8, **kwargs)] From 421477879cb946a216a2c84d4caa64f67e6a585e Mon Sep 17 00:00:00 2001 From: Sebastian Keller Date: Mon, 26 Nov 2018 15:57:08 +0100 Subject: [PATCH 02/16] moved hpcg checks to analytics --- .../{apps => analytics/hpcg}/hpcg_check.py | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) rename cscs-checks/{apps => analytics/hpcg}/hpcg_check.py (91%) diff --git a/cscs-checks/apps/hpcg_check.py b/cscs-checks/analytics/hpcg/hpcg_check.py similarity index 91% rename from cscs-checks/apps/hpcg_check.py rename to cscs-checks/analytics/hpcg/hpcg_check.py index 6c22dbe819..d0ab69255c 100644 --- a/cscs-checks/apps/hpcg_check.py +++ b/cscs-checks/analytics/hpcg/hpcg_check.py @@ -11,9 +11,9 @@ def __init__(self, **kwargs): super().__init__() self.descr = 'HPCG check' - self.valid_systems = ['daint:gpu'] + self.valid_systems = ['daint:mc', 'daint:gpu'] self.valid_prog_environs = ['PrgEnv-gnu'] - #self.modules = [] + self.modules = ['craype-hugepages8M'] #self.sourcesdir = os.path.join(self.current_system.resourcesdir, # 'HPCG') self.build_system = 'Make' @@ -23,7 +23,7 @@ def __init__(self, **kwargs): self.prebuild_cmd = ['git clone https://github.com/hpcg-benchmark/hpcg.git', 'cd hpcg'] self.executable = 'hpcg/bin/xhpcg' - #self.prerun = ['chmod +x %s' % self.executable] + self.executable_opts = ['--nx=104', '--ny=104', '--nz=104', '-t2'] output_file = sn.getitem(sn.glob('HPCG*.txt'), 0) self.sanity_patterns = sn.assert_eq(4, sn.count( sn.findall(r'PASSED', output_file))) @@ -32,16 +32,18 @@ def __init__(self, **kwargs): self.num_cpus_per_task = 1 self.variables = { #'CXX' : 'CC', - #'PMI_NO_FORK': '1', - #'MPICH_USE_DMAPP_COLL': '1', - 'OMP_SCHEDULE': 'static', + #'OMP_SCHEDULE': 'static', 'OMP_NUM_THREADS': str(self.num_cpus_per_task), #'HUGETLB_VERBOSE': '0', - #'HUGETLB_DEFAULT_PAGE_SIZE': '8M', + #'PMI_NO_FORK': '1', + #'MPICH_USE_DMAPP_COLL': '1', } self.reference = { 'daint:gpu': { - 'perf': (2.2, -0.1, 0.1) + 'perf': (7.6, -0.1, 0.1) + }, + 'daint:mc': { + 'perf': (13.4, -0.1, 0.1) }, } From dbb892519aecb27afdc38edb7b5a628684835279 Mon Sep 17 00:00:00 2001 From: Sebastian Keller Date: Mon, 26 Nov 2018 18:10:45 +0100 Subject: [PATCH 03/16] finished mkl hpcg test --- cscs-checks/analytics/hpcg/hpcg_check_mkl.py | 64 ++++++++ cscs-checks/analytics/hpcg/src/Make.CrayXC | 149 +++++++++++++++++++ 2 files changed, 213 insertions(+) create mode 100644 cscs-checks/analytics/hpcg/hpcg_check_mkl.py create mode 100644 cscs-checks/analytics/hpcg/src/Make.CrayXC diff --git a/cscs-checks/analytics/hpcg/hpcg_check_mkl.py b/cscs-checks/analytics/hpcg/hpcg_check_mkl.py new file mode 100644 index 0000000000..c5ae000c82 --- /dev/null +++ b/cscs-checks/analytics/hpcg/hpcg_check_mkl.py @@ -0,0 +1,64 @@ +import os + +import reframe as rfm +import reframe.utility.sanity as sn + +@rfm.simple_test +class HPCGCheck(rfm.RegressionTest): + def __init__(self, **kwargs): + #super().__init__('hpcg_check', + # os.path.dirname(__file__), **kwargs) + super().__init__() + + self.descr = 'HPCG check' + self.valid_systems = ['daint:mc'] + self.valid_prog_environs = ['PrgEnv-intel'] + self.modules = ['craype-hugepages8M'] + #self.sourcesdir = os.path.join(self.current_system.resourcesdir, + # 'HPCG') + self.build_system = 'Make' + #self.build_system.options = ['arch=MPI_GCC_OMP'] + self.prebuild_cmd = ['cp -r ${MKLROOT}/benchmarks/hpcg/* .', + 'mv Make.CrayXC setup', + './configure CrayXC'] + + self.num_tasks_per_core = 2 + self.num_tasks_per_node = 4 + self.num_cpus_per_task = 18 + self.num_tasks = self.num_tasks_per_node * 1 + problem_size = 104 + + self.variables = { + 'HUGETLB_VERBOSE': '0', + 'MPICH_MAX_THREAD_SAFETY' : 'multiple', + 'MPICH_USE_DMAPP_COLL': '1', + 'PMI_NO_FORK': '1', + 'KMP_HW_SUBSET' : '9c,2t', + 'KMP_AFFINITY' : 'granularity=fine,compact' + } + + self.executable = 'bin/xhpcg_avx2' + self.executable_opts = ['--nx=%d' % problem_size, + '--ny=%d' % problem_size, + '--nz=%d' % problem_size, '-t2'] + output_file = sn.getitem(sn.glob('n%d-%dp-%dt-*.yaml' % + (problem_size, + self.num_tasks, + self.num_cpus_per_task)), 0) + self.sanity_patterns = sn.assert_eq(4, sn.count( + sn.findall(r'PASSED', output_file))) + self.reference = { + #'daint:gpu': { + # 'perf': (12, -0.1, 0.1) + #}, + 'daint:mc': { + 'perf': (22.3, -0.1, 0.1) + }, + } + + self.perf_patterns = { + 'perf': sn.extractsingle( + r'HPCG result is VALID with a GFLOP\/s rating of:\s*' + r'(?P\S+)', output_file, 'perf', float) + } + self.maintainers = ['SK'] diff --git a/cscs-checks/analytics/hpcg/src/Make.CrayXC b/cscs-checks/analytics/hpcg/src/Make.CrayXC new file mode 100644 index 0000000000..20195a2c43 --- /dev/null +++ b/cscs-checks/analytics/hpcg/src/Make.CrayXC @@ -0,0 +1,149 @@ +#=============================================================================== +# Copyright 2014-2018 Intel Corporation All Rights Reserved. +# +# The source code, information and material ("Material") contained herein is +# owned by Intel Corporation or its suppliers or licensors, and title to such +# Material remains with Intel Corporation or its suppliers or licensors. The +# Material contains proprietary information of Intel or its suppliers and +# licensors. The Material is protected by worldwide copyright laws and treaty +# provisions. No part of the Material may be used, copied, reproduced, +# modified, published, uploaded, posted, transmitted, distributed or disclosed +# in any way without Intel's prior express written permission. No license under +# any patent, copyright or other intellectual property rights in the Material +# is granted to or conferred upon you, either expressly, by implication, +# inducement, estoppel or otherwise. Any license under such intellectual +# property rights must be express and approved by Intel in writing. +# +# Unless otherwise agreed by Intel in writing, you may not remove or alter this +# notice or any other notice embedded in Materials by Intel or Intel's +# suppliers or licensors in any way. +#=============================================================================== + +# -- High Performance Conjugate Gradient Benchmark (HPCG) +# HPCG - 2.1 - January 31, 2014 +# +# Michael A. Heroux +# Scalable Algorithms Group, Computing Research Center +# Sandia National Laboratories, Albuquerque, NM +# +# Piotr Luszczek +# Jack Dongarra +# University of Tennessee, Knoxville +# Innovative Computing Laboratory +# (C) Copyright 2013 All Rights Reserved +# +# -- Copyright notice and Licensing terms: +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions, and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. The name of the University, the name of the Laboratory, or the +# names of its contributors may not be used to endorse or promote +# products derived from this software without specific written +# permission. +# +# -- Disclaimer: +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY +# OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +#****************************************************************************** + +# ---------------------------------------------------------------------- +# - shell -------------------------------------------------------------- +# ---------------------------------------------------------------------- +# +SHELL = /bin/sh +# +CD = cd +CP = cp +LN_S = ln -s -f +MKDIR = mkdir -p +RM = /bin/rm -f +TOUCH = touch +# +# ---------------------------------------------------------------------- +# - HPCG Directory Structure / HPCG library ------------------------------ +# ---------------------------------------------------------------------- +# +TOPdir = . +SRCdir = $(TOPdir)/src +INCdir = $(TOPdir)/src +BINdir = $(TOPdir)/bin +# +# ---------------------------------------------------------------------- +# - Message Passing library (MPI) -------------------------------------- +# ---------------------------------------------------------------------- +# MPinc tells the C compiler where to find the Message Passing library +# header files, MPlib is defined to be the name of the library to be +# used. The variable MPdir is only used for defining MPinc and MPlib. +# +MPdir = +MPinc = +MPlib = -Wl,--whole-archive,-ldmapp,--no-whole-archive +# +# +# ---------------------------------------------------------------------- +# - HPCG includes / libraries / specifics ------------------------------- +# ---------------------------------------------------------------------- +# +HPCG_INCLUDES = -I$(INCdir) -I$(INCdir)/$(arch) $(MPinc) +HPCG_LIBS = $(MPlib) +# +# - Compile time options ----------------------------------------------- +# +# -DHPCG_NO_MPI Define to disable MPI +# -DHPCG_NO_OPENMP Define to disable OPENMP +# -DHPCG_DEBUG Define to enable debugging output +# -DHPCG_DETAILED_DEBUG Define to enable very detailed debugging output +# +# By default HPCG will: +# *) Build with MPI enabled. +# *) Build with OpenMP enabled. +# *) Not generate debugging output. +# +HPCG_OPTS = +# +# ---------------------------------------------------------------------- +# +HPCG_DEFS = -DMPICH_IGNORE_CXX_SEEK $(HPCG_OPTS) $(HPCG_INCLUDES) +# +# ---------------------------------------------------------------------- +# - Compilers / linkers - Optimization flags --------------------------- +# ---------------------------------------------------------------------- +# +CXX = CC +CXXFLAGS = -xCORE-AVX2 -qopenmp -std=c++11 $(HPCG_DEFS) +ifeq (yes, $(DBG)) + CXXFLAGS += -O0 -g -DHPCG_DEBUG +else + CXXFLAGS += -O3 -DNDEBUG +endif +# +LINKER = $(CXX) +MKL_LIB=$(MKLROOT)/lib/intel64 + +LINKFLAGS = -z relro -z now -Wl,-R'$$ORIGIN/lib/intel64' -liomp5 -L$(MKL_LIB) -liomp5 -static-intel -mkl +# +ARCHIVER = ar +ARFLAGS = r +RANLIB = echo +# +# ---------------------------------------------------------------------- +xhpcg_suff = _avx2 From e1de3614f25dc9e63c6836d9bf6f189078648d2a Mon Sep 17 00:00:00 2001 From: Sebastian Keller Date: Tue, 27 Nov 2018 16:43:43 +0100 Subject: [PATCH 04/16] merged ref and mkl implementation in one file --- cscs-checks/analytics/hpcg/hpcg_benchmark.py | 109 ++++++++++++++++++ cscs-checks/analytics/hpcg/hpcg_check.py | 111 ------------------- cscs-checks/analytics/hpcg/hpcg_check_mkl.py | 64 ----------- 3 files changed, 109 insertions(+), 175 deletions(-) create mode 100644 cscs-checks/analytics/hpcg/hpcg_benchmark.py delete mode 100644 cscs-checks/analytics/hpcg/hpcg_check.py delete mode 100644 cscs-checks/analytics/hpcg/hpcg_check_mkl.py diff --git a/cscs-checks/analytics/hpcg/hpcg_benchmark.py b/cscs-checks/analytics/hpcg/hpcg_benchmark.py new file mode 100644 index 0000000000..667c0276c2 --- /dev/null +++ b/cscs-checks/analytics/hpcg/hpcg_benchmark.py @@ -0,0 +1,109 @@ +import reframe as rfm +import reframe.utility.sanity as sn + +@rfm.simple_test +class HPCGCheckRef(rfm.RegressionTest): + def __init__(self, **kwargs): + super().__init__() + + self.descr = 'HPCG reference benchmark' + self.valid_systems = ['daint:mc', 'daint:gpu', 'dom:gpu', 'dom:mc'] + self.valid_prog_environs = ['PrgEnv-gnu'] + self.modules = ['craype-hugepages8M'] + self.build_system = 'Make' + self.build_system.options = ['arch=MPI_GCC_OMP'] + self.sourcesdir = None + self.prebuild_cmd = ['git clone https://github.com/hpcg-benchmark/hpcg.git', 'cd hpcg'] + + self.executable = 'hpcg/bin/xhpcg' + self.executable_opts = ['--nx=104', '--ny=104', '--nz=104', '-t2'] + output_file = sn.getitem(sn.glob('HPCG*.txt'), 0) + self.sanity_patterns = sn.assert_eq(4, sn.count( + sn.findall(r'PASSED', output_file))) + self.num_tasks = 12 + self.num_tasks_per_node = 12 + self.num_cpus_per_task = 1 + + self.reference = { + 'daint:gpu': { + 'perf': (7.6, -0.1, 0.1) + }, + 'daint:mc': { + 'perf': (13.4, -0.1, 0.1) + }, + 'dom:gpu': { + 'perf': (7.6, -0.1, 0.1) + }, + 'dom:mc': { + 'perf': (13.4, -0.1, 0.1) + }, + } + + self.perf_patterns = { + 'perf': sn.extractsingle( + r'HPCG result is VALID with a GFLOP\/s rating of=\s*' + r'(?P\S+)', output_file, 'perf', float) + } + self.maintainers = ['SK'] + +@rfm.simple_test +class HPCGCheckMKL(rfm.RegressionTest): + def __init__(self, **kwargs): + super().__init__() + + self.descr = 'HPCG benchmark Intel MKL implementation' + self.valid_systems = ['dom:mc'] + self.valid_prog_environs = ['PrgEnv-intel'] + self.modules = ['craype-hugepages8M'] + #self.sourcesdir needed for "CrayXC" config file + self.build_system = 'Make' + self.prebuild_cmd = ['cp -r ${MKLROOT}/benchmarks/hpcg/* .', + 'mv Make.CrayXC setup', + './configure CrayXC'] + + self.num_tasks = 0 + self.num_tasks_per_core = 2 + self.num_tasks_per_node = 4 + self.num_cpus_per_task = 18 + self.problem_size = 104 + + self.variables = { + 'HUGETLB_VERBOSE': '0', + 'MPICH_MAX_THREAD_SAFETY' : 'multiple', + 'MPICH_USE_DMAPP_COLL': '1', + 'PMI_NO_FORK': '1', + 'KMP_HW_SUBSET' : '9c,2t', + 'KMP_AFFINITY' : 'granularity=fine,compact' + } + + self.executable = 'bin/xhpcg_avx2' + self.executable_opts = ['--nx=%d' % self.problem_size, + '--ny=%d' % self.problem_size, + '--nz=%d' % self.problem_size, '-t2'] + self.sanity_patterns = sn.assert_eq(4, sn.count( + sn.findall(r'PASSED', self.outfile_lazy))) + self.reference = { + 'dom:mc': { + 'perf': (22, -0.1, 0.1) + }, + } + + self.perf_patterns = { + 'perf': sn.extractsingle( + r'HPCG result is VALID with a GFLOP\/s rating of:\s*' + r'(?P\S+)', self.outfile_lazy, 'perf', float) / (self.num_tasks_assigned/self.num_tasks_per_node) + } + self.maintainers = ['SK'] + + @property + @sn.sanity_function + def num_tasks_assigned(self): + return self.job.num_tasks + + @property + @sn.sanity_function + def outfile_lazy(self): + pattern = 'n%d-%dp-%dt-*.yaml' % (self.problem_size, + self.job.num_tasks, + self.num_cpus_per_task) + return sn.getitem(sn.glob(pattern), 0) diff --git a/cscs-checks/analytics/hpcg/hpcg_check.py b/cscs-checks/analytics/hpcg/hpcg_check.py deleted file mode 100644 index d0ab69255c..0000000000 --- a/cscs-checks/analytics/hpcg/hpcg_check.py +++ /dev/null @@ -1,111 +0,0 @@ -import os - -import reframe as rfm -import reframe.utility.sanity as sn - -@rfm.simple_test -class HPCGCheck(rfm.RegressionTest): - def __init__(self, **kwargs): - #super().__init__('hpcg_check', - # os.path.dirname(__file__), **kwargs) - super().__init__() - - self.descr = 'HPCG check' - self.valid_systems = ['daint:mc', 'daint:gpu'] - self.valid_prog_environs = ['PrgEnv-gnu'] - self.modules = ['craype-hugepages8M'] - #self.sourcesdir = os.path.join(self.current_system.resourcesdir, - # 'HPCG') - self.build_system = 'Make' - self.build_system.options = ['arch=MPI_GCC_OMP'] - self.sourcesdir = None - self.sourcepath = 'hpcg' - self.prebuild_cmd = ['git clone https://github.com/hpcg-benchmark/hpcg.git', 'cd hpcg'] - - self.executable = 'hpcg/bin/xhpcg' - self.executable_opts = ['--nx=104', '--ny=104', '--nz=104', '-t2'] - output_file = sn.getitem(sn.glob('HPCG*.txt'), 0) - self.sanity_patterns = sn.assert_eq(4, sn.count( - sn.findall(r'PASSED', output_file))) - self.num_tasks = 12 - self.num_tasks_per_node = 12 - self.num_cpus_per_task = 1 - self.variables = { - #'CXX' : 'CC', - #'OMP_SCHEDULE': 'static', - 'OMP_NUM_THREADS': str(self.num_cpus_per_task), - #'HUGETLB_VERBOSE': '0', - #'PMI_NO_FORK': '1', - #'MPICH_USE_DMAPP_COLL': '1', - } - self.reference = { - 'daint:gpu': { - 'perf': (7.6, -0.1, 0.1) - }, - 'daint:mc': { - 'perf': (13.4, -0.1, 0.1) - }, - } - - self.perf_patterns = { - 'perf': sn.extractsingle( - r'HPCG result is VALID with a GFLOP\/s rating of=\s*' - r'(?P\S+)', output_file, 'perf', float) - } - self.maintainers = ['SK'] - - -# FIXME: This test is obsolete; it is kept only for reference -#class HPCGMonchAcceptanceCheck(RegressionTest): -# def __init__(self, num_tasks, **kwargs): -# super().__init__('hpcg_check_%s_nodes' % num_tasks, -# os.path.dirname(__file__), **kwargs) -# self.descr = 'HPCG check' -# self.valid_systems = ['monch:compute'] -# self.valid_prog_environs = ['PrgEnv-gnu'] -# self.sourcesdir = os.path.join(self.current_system.resourcesdir, -# 'HPCG-CPU') -# self.executable = './bin/xhpcg' -# output_file = sn.getitem(sn.glob('HPCG-Benchmark_*.txt'), 0) -# self.sanity_patterns = sn.assert_eq(4, sn.count( -# sn.findall(r'PASSED', output_file))) -# self.num_tasks = num_tasks -# self.num_tasks_per_node = 1 -# self.num_cpus_per_task = 20 -# self.prebuild_cmd = ['./configure MPI_GCC_OMP'] -# self.variables = { -# 'MV2_ENABLE_AFFINITY': '0', -# 'OMP_NUM_THREADS': str(self.num_cpus_per_task), -# } -# reference_by_nodes = { -# 2: { -# 'perf': (2.20716, -0.10, None), -# }, -# 4: { -# 'perf': (4.28179, -0.10, None), -# }, -# 6: { -# 'perf': (6.18806, -0.10, None), -# }, -# 8: { -# 'perf': (8.16107, -0.10, None), -# }, -# } -# self.reference = { -# 'monch:compute': reference_by_nodes[num_tasks] -# } -# self.perf_patterns = { -# 'perf': sn.extractsingle( -# r'HPCG result is VALID with a GFLOP\/s rating of=\s*' -# r'(?P\S+)', output_file, 'perf', float) -# } -# self.tags = {'monch_acceptance'} -# self.maintainers = ['VK'] -# -# -#def _get_checks(**kwargs): -# return [HPCGCheck(**kwargs), -# HPCGMonchAcceptanceCheck(2, **kwargs), -# HPCGMonchAcceptanceCheck(4, **kwargs), -# HPCGMonchAcceptanceCheck(6, **kwargs), -# HPCGMonchAcceptanceCheck(8, **kwargs)] diff --git a/cscs-checks/analytics/hpcg/hpcg_check_mkl.py b/cscs-checks/analytics/hpcg/hpcg_check_mkl.py deleted file mode 100644 index c5ae000c82..0000000000 --- a/cscs-checks/analytics/hpcg/hpcg_check_mkl.py +++ /dev/null @@ -1,64 +0,0 @@ -import os - -import reframe as rfm -import reframe.utility.sanity as sn - -@rfm.simple_test -class HPCGCheck(rfm.RegressionTest): - def __init__(self, **kwargs): - #super().__init__('hpcg_check', - # os.path.dirname(__file__), **kwargs) - super().__init__() - - self.descr = 'HPCG check' - self.valid_systems = ['daint:mc'] - self.valid_prog_environs = ['PrgEnv-intel'] - self.modules = ['craype-hugepages8M'] - #self.sourcesdir = os.path.join(self.current_system.resourcesdir, - # 'HPCG') - self.build_system = 'Make' - #self.build_system.options = ['arch=MPI_GCC_OMP'] - self.prebuild_cmd = ['cp -r ${MKLROOT}/benchmarks/hpcg/* .', - 'mv Make.CrayXC setup', - './configure CrayXC'] - - self.num_tasks_per_core = 2 - self.num_tasks_per_node = 4 - self.num_cpus_per_task = 18 - self.num_tasks = self.num_tasks_per_node * 1 - problem_size = 104 - - self.variables = { - 'HUGETLB_VERBOSE': '0', - 'MPICH_MAX_THREAD_SAFETY' : 'multiple', - 'MPICH_USE_DMAPP_COLL': '1', - 'PMI_NO_FORK': '1', - 'KMP_HW_SUBSET' : '9c,2t', - 'KMP_AFFINITY' : 'granularity=fine,compact' - } - - self.executable = 'bin/xhpcg_avx2' - self.executable_opts = ['--nx=%d' % problem_size, - '--ny=%d' % problem_size, - '--nz=%d' % problem_size, '-t2'] - output_file = sn.getitem(sn.glob('n%d-%dp-%dt-*.yaml' % - (problem_size, - self.num_tasks, - self.num_cpus_per_task)), 0) - self.sanity_patterns = sn.assert_eq(4, sn.count( - sn.findall(r'PASSED', output_file))) - self.reference = { - #'daint:gpu': { - # 'perf': (12, -0.1, 0.1) - #}, - 'daint:mc': { - 'perf': (22.3, -0.1, 0.1) - }, - } - - self.perf_patterns = { - 'perf': sn.extractsingle( - r'HPCG result is VALID with a GFLOP\/s rating of:\s*' - r'(?P\S+)', output_file, 'perf', float) - } - self.maintainers = ['SK'] From 6658f73afc0b3ec4deef722d0ece16bf53845e2d Mon Sep 17 00:00:00 2001 From: Sebastian Keller Date: Fri, 23 Nov 2018 14:25:45 +0100 Subject: [PATCH 05/16] reference implementation of the HPCG benchmark --- cscs-checks/apps/hpcg_check.py | 109 +++++++++++++++++++++++++++++++++ 1 file changed, 109 insertions(+) create mode 100644 cscs-checks/apps/hpcg_check.py diff --git a/cscs-checks/apps/hpcg_check.py b/cscs-checks/apps/hpcg_check.py new file mode 100644 index 0000000000..6c22dbe819 --- /dev/null +++ b/cscs-checks/apps/hpcg_check.py @@ -0,0 +1,109 @@ +import os + +import reframe as rfm +import reframe.utility.sanity as sn + +@rfm.simple_test +class HPCGCheck(rfm.RegressionTest): + def __init__(self, **kwargs): + #super().__init__('hpcg_check', + # os.path.dirname(__file__), **kwargs) + super().__init__() + + self.descr = 'HPCG check' + self.valid_systems = ['daint:gpu'] + self.valid_prog_environs = ['PrgEnv-gnu'] + #self.modules = [] + #self.sourcesdir = os.path.join(self.current_system.resourcesdir, + # 'HPCG') + self.build_system = 'Make' + self.build_system.options = ['arch=MPI_GCC_OMP'] + self.sourcesdir = None + self.sourcepath = 'hpcg' + self.prebuild_cmd = ['git clone https://github.com/hpcg-benchmark/hpcg.git', 'cd hpcg'] + + self.executable = 'hpcg/bin/xhpcg' + #self.prerun = ['chmod +x %s' % self.executable] + output_file = sn.getitem(sn.glob('HPCG*.txt'), 0) + self.sanity_patterns = sn.assert_eq(4, sn.count( + sn.findall(r'PASSED', output_file))) + self.num_tasks = 12 + self.num_tasks_per_node = 12 + self.num_cpus_per_task = 1 + self.variables = { + #'CXX' : 'CC', + #'PMI_NO_FORK': '1', + #'MPICH_USE_DMAPP_COLL': '1', + 'OMP_SCHEDULE': 'static', + 'OMP_NUM_THREADS': str(self.num_cpus_per_task), + #'HUGETLB_VERBOSE': '0', + #'HUGETLB_DEFAULT_PAGE_SIZE': '8M', + } + self.reference = { + 'daint:gpu': { + 'perf': (2.2, -0.1, 0.1) + }, + } + + self.perf_patterns = { + 'perf': sn.extractsingle( + r'HPCG result is VALID with a GFLOP\/s rating of=\s*' + r'(?P\S+)', output_file, 'perf', float) + } + self.maintainers = ['SK'] + + +# FIXME: This test is obsolete; it is kept only for reference +#class HPCGMonchAcceptanceCheck(RegressionTest): +# def __init__(self, num_tasks, **kwargs): +# super().__init__('hpcg_check_%s_nodes' % num_tasks, +# os.path.dirname(__file__), **kwargs) +# self.descr = 'HPCG check' +# self.valid_systems = ['monch:compute'] +# self.valid_prog_environs = ['PrgEnv-gnu'] +# self.sourcesdir = os.path.join(self.current_system.resourcesdir, +# 'HPCG-CPU') +# self.executable = './bin/xhpcg' +# output_file = sn.getitem(sn.glob('HPCG-Benchmark_*.txt'), 0) +# self.sanity_patterns = sn.assert_eq(4, sn.count( +# sn.findall(r'PASSED', output_file))) +# self.num_tasks = num_tasks +# self.num_tasks_per_node = 1 +# self.num_cpus_per_task = 20 +# self.prebuild_cmd = ['./configure MPI_GCC_OMP'] +# self.variables = { +# 'MV2_ENABLE_AFFINITY': '0', +# 'OMP_NUM_THREADS': str(self.num_cpus_per_task), +# } +# reference_by_nodes = { +# 2: { +# 'perf': (2.20716, -0.10, None), +# }, +# 4: { +# 'perf': (4.28179, -0.10, None), +# }, +# 6: { +# 'perf': (6.18806, -0.10, None), +# }, +# 8: { +# 'perf': (8.16107, -0.10, None), +# }, +# } +# self.reference = { +# 'monch:compute': reference_by_nodes[num_tasks] +# } +# self.perf_patterns = { +# 'perf': sn.extractsingle( +# r'HPCG result is VALID with a GFLOP\/s rating of=\s*' +# r'(?P\S+)', output_file, 'perf', float) +# } +# self.tags = {'monch_acceptance'} +# self.maintainers = ['VK'] +# +# +#def _get_checks(**kwargs): +# return [HPCGCheck(**kwargs), +# HPCGMonchAcceptanceCheck(2, **kwargs), +# HPCGMonchAcceptanceCheck(4, **kwargs), +# HPCGMonchAcceptanceCheck(6, **kwargs), +# HPCGMonchAcceptanceCheck(8, **kwargs)] From ac2f60b996571fd509c2c3eeb4de98a68b92c5eb Mon Sep 17 00:00:00 2001 From: Sebastian Keller Date: Mon, 26 Nov 2018 15:57:08 +0100 Subject: [PATCH 06/16] moved hpcg checks to analytics --- .../{apps => analytics/hpcg}/hpcg_check.py | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) rename cscs-checks/{apps => analytics/hpcg}/hpcg_check.py (91%) diff --git a/cscs-checks/apps/hpcg_check.py b/cscs-checks/analytics/hpcg/hpcg_check.py similarity index 91% rename from cscs-checks/apps/hpcg_check.py rename to cscs-checks/analytics/hpcg/hpcg_check.py index 6c22dbe819..d0ab69255c 100644 --- a/cscs-checks/apps/hpcg_check.py +++ b/cscs-checks/analytics/hpcg/hpcg_check.py @@ -11,9 +11,9 @@ def __init__(self, **kwargs): super().__init__() self.descr = 'HPCG check' - self.valid_systems = ['daint:gpu'] + self.valid_systems = ['daint:mc', 'daint:gpu'] self.valid_prog_environs = ['PrgEnv-gnu'] - #self.modules = [] + self.modules = ['craype-hugepages8M'] #self.sourcesdir = os.path.join(self.current_system.resourcesdir, # 'HPCG') self.build_system = 'Make' @@ -23,7 +23,7 @@ def __init__(self, **kwargs): self.prebuild_cmd = ['git clone https://github.com/hpcg-benchmark/hpcg.git', 'cd hpcg'] self.executable = 'hpcg/bin/xhpcg' - #self.prerun = ['chmod +x %s' % self.executable] + self.executable_opts = ['--nx=104', '--ny=104', '--nz=104', '-t2'] output_file = sn.getitem(sn.glob('HPCG*.txt'), 0) self.sanity_patterns = sn.assert_eq(4, sn.count( sn.findall(r'PASSED', output_file))) @@ -32,16 +32,18 @@ def __init__(self, **kwargs): self.num_cpus_per_task = 1 self.variables = { #'CXX' : 'CC', - #'PMI_NO_FORK': '1', - #'MPICH_USE_DMAPP_COLL': '1', - 'OMP_SCHEDULE': 'static', + #'OMP_SCHEDULE': 'static', 'OMP_NUM_THREADS': str(self.num_cpus_per_task), #'HUGETLB_VERBOSE': '0', - #'HUGETLB_DEFAULT_PAGE_SIZE': '8M', + #'PMI_NO_FORK': '1', + #'MPICH_USE_DMAPP_COLL': '1', } self.reference = { 'daint:gpu': { - 'perf': (2.2, -0.1, 0.1) + 'perf': (7.6, -0.1, 0.1) + }, + 'daint:mc': { + 'perf': (13.4, -0.1, 0.1) }, } From 2a7f772137adf8b3129485757506a60643e5ce66 Mon Sep 17 00:00:00 2001 From: Sebastian Keller Date: Mon, 26 Nov 2018 18:10:45 +0100 Subject: [PATCH 07/16] finished mkl hpcg test --- cscs-checks/analytics/hpcg/hpcg_check_mkl.py | 64 ++++++++ cscs-checks/analytics/hpcg/src/Make.CrayXC | 149 +++++++++++++++++++ 2 files changed, 213 insertions(+) create mode 100644 cscs-checks/analytics/hpcg/hpcg_check_mkl.py create mode 100644 cscs-checks/analytics/hpcg/src/Make.CrayXC diff --git a/cscs-checks/analytics/hpcg/hpcg_check_mkl.py b/cscs-checks/analytics/hpcg/hpcg_check_mkl.py new file mode 100644 index 0000000000..c5ae000c82 --- /dev/null +++ b/cscs-checks/analytics/hpcg/hpcg_check_mkl.py @@ -0,0 +1,64 @@ +import os + +import reframe as rfm +import reframe.utility.sanity as sn + +@rfm.simple_test +class HPCGCheck(rfm.RegressionTest): + def __init__(self, **kwargs): + #super().__init__('hpcg_check', + # os.path.dirname(__file__), **kwargs) + super().__init__() + + self.descr = 'HPCG check' + self.valid_systems = ['daint:mc'] + self.valid_prog_environs = ['PrgEnv-intel'] + self.modules = ['craype-hugepages8M'] + #self.sourcesdir = os.path.join(self.current_system.resourcesdir, + # 'HPCG') + self.build_system = 'Make' + #self.build_system.options = ['arch=MPI_GCC_OMP'] + self.prebuild_cmd = ['cp -r ${MKLROOT}/benchmarks/hpcg/* .', + 'mv Make.CrayXC setup', + './configure CrayXC'] + + self.num_tasks_per_core = 2 + self.num_tasks_per_node = 4 + self.num_cpus_per_task = 18 + self.num_tasks = self.num_tasks_per_node * 1 + problem_size = 104 + + self.variables = { + 'HUGETLB_VERBOSE': '0', + 'MPICH_MAX_THREAD_SAFETY' : 'multiple', + 'MPICH_USE_DMAPP_COLL': '1', + 'PMI_NO_FORK': '1', + 'KMP_HW_SUBSET' : '9c,2t', + 'KMP_AFFINITY' : 'granularity=fine,compact' + } + + self.executable = 'bin/xhpcg_avx2' + self.executable_opts = ['--nx=%d' % problem_size, + '--ny=%d' % problem_size, + '--nz=%d' % problem_size, '-t2'] + output_file = sn.getitem(sn.glob('n%d-%dp-%dt-*.yaml' % + (problem_size, + self.num_tasks, + self.num_cpus_per_task)), 0) + self.sanity_patterns = sn.assert_eq(4, sn.count( + sn.findall(r'PASSED', output_file))) + self.reference = { + #'daint:gpu': { + # 'perf': (12, -0.1, 0.1) + #}, + 'daint:mc': { + 'perf': (22.3, -0.1, 0.1) + }, + } + + self.perf_patterns = { + 'perf': sn.extractsingle( + r'HPCG result is VALID with a GFLOP\/s rating of:\s*' + r'(?P\S+)', output_file, 'perf', float) + } + self.maintainers = ['SK'] diff --git a/cscs-checks/analytics/hpcg/src/Make.CrayXC b/cscs-checks/analytics/hpcg/src/Make.CrayXC new file mode 100644 index 0000000000..20195a2c43 --- /dev/null +++ b/cscs-checks/analytics/hpcg/src/Make.CrayXC @@ -0,0 +1,149 @@ +#=============================================================================== +# Copyright 2014-2018 Intel Corporation All Rights Reserved. +# +# The source code, information and material ("Material") contained herein is +# owned by Intel Corporation or its suppliers or licensors, and title to such +# Material remains with Intel Corporation or its suppliers or licensors. The +# Material contains proprietary information of Intel or its suppliers and +# licensors. The Material is protected by worldwide copyright laws and treaty +# provisions. No part of the Material may be used, copied, reproduced, +# modified, published, uploaded, posted, transmitted, distributed or disclosed +# in any way without Intel's prior express written permission. No license under +# any patent, copyright or other intellectual property rights in the Material +# is granted to or conferred upon you, either expressly, by implication, +# inducement, estoppel or otherwise. Any license under such intellectual +# property rights must be express and approved by Intel in writing. +# +# Unless otherwise agreed by Intel in writing, you may not remove or alter this +# notice or any other notice embedded in Materials by Intel or Intel's +# suppliers or licensors in any way. +#=============================================================================== + +# -- High Performance Conjugate Gradient Benchmark (HPCG) +# HPCG - 2.1 - January 31, 2014 +# +# Michael A. Heroux +# Scalable Algorithms Group, Computing Research Center +# Sandia National Laboratories, Albuquerque, NM +# +# Piotr Luszczek +# Jack Dongarra +# University of Tennessee, Knoxville +# Innovative Computing Laboratory +# (C) Copyright 2013 All Rights Reserved +# +# -- Copyright notice and Licensing terms: +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions, and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. The name of the University, the name of the Laboratory, or the +# names of its contributors may not be used to endorse or promote +# products derived from this software without specific written +# permission. +# +# -- Disclaimer: +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY +# OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +#****************************************************************************** + +# ---------------------------------------------------------------------- +# - shell -------------------------------------------------------------- +# ---------------------------------------------------------------------- +# +SHELL = /bin/sh +# +CD = cd +CP = cp +LN_S = ln -s -f +MKDIR = mkdir -p +RM = /bin/rm -f +TOUCH = touch +# +# ---------------------------------------------------------------------- +# - HPCG Directory Structure / HPCG library ------------------------------ +# ---------------------------------------------------------------------- +# +TOPdir = . +SRCdir = $(TOPdir)/src +INCdir = $(TOPdir)/src +BINdir = $(TOPdir)/bin +# +# ---------------------------------------------------------------------- +# - Message Passing library (MPI) -------------------------------------- +# ---------------------------------------------------------------------- +# MPinc tells the C compiler where to find the Message Passing library +# header files, MPlib is defined to be the name of the library to be +# used. The variable MPdir is only used for defining MPinc and MPlib. +# +MPdir = +MPinc = +MPlib = -Wl,--whole-archive,-ldmapp,--no-whole-archive +# +# +# ---------------------------------------------------------------------- +# - HPCG includes / libraries / specifics ------------------------------- +# ---------------------------------------------------------------------- +# +HPCG_INCLUDES = -I$(INCdir) -I$(INCdir)/$(arch) $(MPinc) +HPCG_LIBS = $(MPlib) +# +# - Compile time options ----------------------------------------------- +# +# -DHPCG_NO_MPI Define to disable MPI +# -DHPCG_NO_OPENMP Define to disable OPENMP +# -DHPCG_DEBUG Define to enable debugging output +# -DHPCG_DETAILED_DEBUG Define to enable very detailed debugging output +# +# By default HPCG will: +# *) Build with MPI enabled. +# *) Build with OpenMP enabled. +# *) Not generate debugging output. +# +HPCG_OPTS = +# +# ---------------------------------------------------------------------- +# +HPCG_DEFS = -DMPICH_IGNORE_CXX_SEEK $(HPCG_OPTS) $(HPCG_INCLUDES) +# +# ---------------------------------------------------------------------- +# - Compilers / linkers - Optimization flags --------------------------- +# ---------------------------------------------------------------------- +# +CXX = CC +CXXFLAGS = -xCORE-AVX2 -qopenmp -std=c++11 $(HPCG_DEFS) +ifeq (yes, $(DBG)) + CXXFLAGS += -O0 -g -DHPCG_DEBUG +else + CXXFLAGS += -O3 -DNDEBUG +endif +# +LINKER = $(CXX) +MKL_LIB=$(MKLROOT)/lib/intel64 + +LINKFLAGS = -z relro -z now -Wl,-R'$$ORIGIN/lib/intel64' -liomp5 -L$(MKL_LIB) -liomp5 -static-intel -mkl +# +ARCHIVER = ar +ARFLAGS = r +RANLIB = echo +# +# ---------------------------------------------------------------------- +xhpcg_suff = _avx2 From abee44e4bc4bfe00bda90e46c19cf1fd4d7721dd Mon Sep 17 00:00:00 2001 From: Sebastian Keller Date: Tue, 27 Nov 2018 16:43:43 +0100 Subject: [PATCH 08/16] merged ref and mkl implementation in one file --- cscs-checks/analytics/hpcg/hpcg_benchmark.py | 109 ++++++++++++++++++ cscs-checks/analytics/hpcg/hpcg_check.py | 111 ------------------- cscs-checks/analytics/hpcg/hpcg_check_mkl.py | 64 ----------- 3 files changed, 109 insertions(+), 175 deletions(-) create mode 100644 cscs-checks/analytics/hpcg/hpcg_benchmark.py delete mode 100644 cscs-checks/analytics/hpcg/hpcg_check.py delete mode 100644 cscs-checks/analytics/hpcg/hpcg_check_mkl.py diff --git a/cscs-checks/analytics/hpcg/hpcg_benchmark.py b/cscs-checks/analytics/hpcg/hpcg_benchmark.py new file mode 100644 index 0000000000..667c0276c2 --- /dev/null +++ b/cscs-checks/analytics/hpcg/hpcg_benchmark.py @@ -0,0 +1,109 @@ +import reframe as rfm +import reframe.utility.sanity as sn + +@rfm.simple_test +class HPCGCheckRef(rfm.RegressionTest): + def __init__(self, **kwargs): + super().__init__() + + self.descr = 'HPCG reference benchmark' + self.valid_systems = ['daint:mc', 'daint:gpu', 'dom:gpu', 'dom:mc'] + self.valid_prog_environs = ['PrgEnv-gnu'] + self.modules = ['craype-hugepages8M'] + self.build_system = 'Make' + self.build_system.options = ['arch=MPI_GCC_OMP'] + self.sourcesdir = None + self.prebuild_cmd = ['git clone https://github.com/hpcg-benchmark/hpcg.git', 'cd hpcg'] + + self.executable = 'hpcg/bin/xhpcg' + self.executable_opts = ['--nx=104', '--ny=104', '--nz=104', '-t2'] + output_file = sn.getitem(sn.glob('HPCG*.txt'), 0) + self.sanity_patterns = sn.assert_eq(4, sn.count( + sn.findall(r'PASSED', output_file))) + self.num_tasks = 12 + self.num_tasks_per_node = 12 + self.num_cpus_per_task = 1 + + self.reference = { + 'daint:gpu': { + 'perf': (7.6, -0.1, 0.1) + }, + 'daint:mc': { + 'perf': (13.4, -0.1, 0.1) + }, + 'dom:gpu': { + 'perf': (7.6, -0.1, 0.1) + }, + 'dom:mc': { + 'perf': (13.4, -0.1, 0.1) + }, + } + + self.perf_patterns = { + 'perf': sn.extractsingle( + r'HPCG result is VALID with a GFLOP\/s rating of=\s*' + r'(?P\S+)', output_file, 'perf', float) + } + self.maintainers = ['SK'] + +@rfm.simple_test +class HPCGCheckMKL(rfm.RegressionTest): + def __init__(self, **kwargs): + super().__init__() + + self.descr = 'HPCG benchmark Intel MKL implementation' + self.valid_systems = ['dom:mc'] + self.valid_prog_environs = ['PrgEnv-intel'] + self.modules = ['craype-hugepages8M'] + #self.sourcesdir needed for "CrayXC" config file + self.build_system = 'Make' + self.prebuild_cmd = ['cp -r ${MKLROOT}/benchmarks/hpcg/* .', + 'mv Make.CrayXC setup', + './configure CrayXC'] + + self.num_tasks = 0 + self.num_tasks_per_core = 2 + self.num_tasks_per_node = 4 + self.num_cpus_per_task = 18 + self.problem_size = 104 + + self.variables = { + 'HUGETLB_VERBOSE': '0', + 'MPICH_MAX_THREAD_SAFETY' : 'multiple', + 'MPICH_USE_DMAPP_COLL': '1', + 'PMI_NO_FORK': '1', + 'KMP_HW_SUBSET' : '9c,2t', + 'KMP_AFFINITY' : 'granularity=fine,compact' + } + + self.executable = 'bin/xhpcg_avx2' + self.executable_opts = ['--nx=%d' % self.problem_size, + '--ny=%d' % self.problem_size, + '--nz=%d' % self.problem_size, '-t2'] + self.sanity_patterns = sn.assert_eq(4, sn.count( + sn.findall(r'PASSED', self.outfile_lazy))) + self.reference = { + 'dom:mc': { + 'perf': (22, -0.1, 0.1) + }, + } + + self.perf_patterns = { + 'perf': sn.extractsingle( + r'HPCG result is VALID with a GFLOP\/s rating of:\s*' + r'(?P\S+)', self.outfile_lazy, 'perf', float) / (self.num_tasks_assigned/self.num_tasks_per_node) + } + self.maintainers = ['SK'] + + @property + @sn.sanity_function + def num_tasks_assigned(self): + return self.job.num_tasks + + @property + @sn.sanity_function + def outfile_lazy(self): + pattern = 'n%d-%dp-%dt-*.yaml' % (self.problem_size, + self.job.num_tasks, + self.num_cpus_per_task) + return sn.getitem(sn.glob(pattern), 0) diff --git a/cscs-checks/analytics/hpcg/hpcg_check.py b/cscs-checks/analytics/hpcg/hpcg_check.py deleted file mode 100644 index d0ab69255c..0000000000 --- a/cscs-checks/analytics/hpcg/hpcg_check.py +++ /dev/null @@ -1,111 +0,0 @@ -import os - -import reframe as rfm -import reframe.utility.sanity as sn - -@rfm.simple_test -class HPCGCheck(rfm.RegressionTest): - def __init__(self, **kwargs): - #super().__init__('hpcg_check', - # os.path.dirname(__file__), **kwargs) - super().__init__() - - self.descr = 'HPCG check' - self.valid_systems = ['daint:mc', 'daint:gpu'] - self.valid_prog_environs = ['PrgEnv-gnu'] - self.modules = ['craype-hugepages8M'] - #self.sourcesdir = os.path.join(self.current_system.resourcesdir, - # 'HPCG') - self.build_system = 'Make' - self.build_system.options = ['arch=MPI_GCC_OMP'] - self.sourcesdir = None - self.sourcepath = 'hpcg' - self.prebuild_cmd = ['git clone https://github.com/hpcg-benchmark/hpcg.git', 'cd hpcg'] - - self.executable = 'hpcg/bin/xhpcg' - self.executable_opts = ['--nx=104', '--ny=104', '--nz=104', '-t2'] - output_file = sn.getitem(sn.glob('HPCG*.txt'), 0) - self.sanity_patterns = sn.assert_eq(4, sn.count( - sn.findall(r'PASSED', output_file))) - self.num_tasks = 12 - self.num_tasks_per_node = 12 - self.num_cpus_per_task = 1 - self.variables = { - #'CXX' : 'CC', - #'OMP_SCHEDULE': 'static', - 'OMP_NUM_THREADS': str(self.num_cpus_per_task), - #'HUGETLB_VERBOSE': '0', - #'PMI_NO_FORK': '1', - #'MPICH_USE_DMAPP_COLL': '1', - } - self.reference = { - 'daint:gpu': { - 'perf': (7.6, -0.1, 0.1) - }, - 'daint:mc': { - 'perf': (13.4, -0.1, 0.1) - }, - } - - self.perf_patterns = { - 'perf': sn.extractsingle( - r'HPCG result is VALID with a GFLOP\/s rating of=\s*' - r'(?P\S+)', output_file, 'perf', float) - } - self.maintainers = ['SK'] - - -# FIXME: This test is obsolete; it is kept only for reference -#class HPCGMonchAcceptanceCheck(RegressionTest): -# def __init__(self, num_tasks, **kwargs): -# super().__init__('hpcg_check_%s_nodes' % num_tasks, -# os.path.dirname(__file__), **kwargs) -# self.descr = 'HPCG check' -# self.valid_systems = ['monch:compute'] -# self.valid_prog_environs = ['PrgEnv-gnu'] -# self.sourcesdir = os.path.join(self.current_system.resourcesdir, -# 'HPCG-CPU') -# self.executable = './bin/xhpcg' -# output_file = sn.getitem(sn.glob('HPCG-Benchmark_*.txt'), 0) -# self.sanity_patterns = sn.assert_eq(4, sn.count( -# sn.findall(r'PASSED', output_file))) -# self.num_tasks = num_tasks -# self.num_tasks_per_node = 1 -# self.num_cpus_per_task = 20 -# self.prebuild_cmd = ['./configure MPI_GCC_OMP'] -# self.variables = { -# 'MV2_ENABLE_AFFINITY': '0', -# 'OMP_NUM_THREADS': str(self.num_cpus_per_task), -# } -# reference_by_nodes = { -# 2: { -# 'perf': (2.20716, -0.10, None), -# }, -# 4: { -# 'perf': (4.28179, -0.10, None), -# }, -# 6: { -# 'perf': (6.18806, -0.10, None), -# }, -# 8: { -# 'perf': (8.16107, -0.10, None), -# }, -# } -# self.reference = { -# 'monch:compute': reference_by_nodes[num_tasks] -# } -# self.perf_patterns = { -# 'perf': sn.extractsingle( -# r'HPCG result is VALID with a GFLOP\/s rating of=\s*' -# r'(?P\S+)', output_file, 'perf', float) -# } -# self.tags = {'monch_acceptance'} -# self.maintainers = ['VK'] -# -# -#def _get_checks(**kwargs): -# return [HPCGCheck(**kwargs), -# HPCGMonchAcceptanceCheck(2, **kwargs), -# HPCGMonchAcceptanceCheck(4, **kwargs), -# HPCGMonchAcceptanceCheck(6, **kwargs), -# HPCGMonchAcceptanceCheck(8, **kwargs)] diff --git a/cscs-checks/analytics/hpcg/hpcg_check_mkl.py b/cscs-checks/analytics/hpcg/hpcg_check_mkl.py deleted file mode 100644 index c5ae000c82..0000000000 --- a/cscs-checks/analytics/hpcg/hpcg_check_mkl.py +++ /dev/null @@ -1,64 +0,0 @@ -import os - -import reframe as rfm -import reframe.utility.sanity as sn - -@rfm.simple_test -class HPCGCheck(rfm.RegressionTest): - def __init__(self, **kwargs): - #super().__init__('hpcg_check', - # os.path.dirname(__file__), **kwargs) - super().__init__() - - self.descr = 'HPCG check' - self.valid_systems = ['daint:mc'] - self.valid_prog_environs = ['PrgEnv-intel'] - self.modules = ['craype-hugepages8M'] - #self.sourcesdir = os.path.join(self.current_system.resourcesdir, - # 'HPCG') - self.build_system = 'Make' - #self.build_system.options = ['arch=MPI_GCC_OMP'] - self.prebuild_cmd = ['cp -r ${MKLROOT}/benchmarks/hpcg/* .', - 'mv Make.CrayXC setup', - './configure CrayXC'] - - self.num_tasks_per_core = 2 - self.num_tasks_per_node = 4 - self.num_cpus_per_task = 18 - self.num_tasks = self.num_tasks_per_node * 1 - problem_size = 104 - - self.variables = { - 'HUGETLB_VERBOSE': '0', - 'MPICH_MAX_THREAD_SAFETY' : 'multiple', - 'MPICH_USE_DMAPP_COLL': '1', - 'PMI_NO_FORK': '1', - 'KMP_HW_SUBSET' : '9c,2t', - 'KMP_AFFINITY' : 'granularity=fine,compact' - } - - self.executable = 'bin/xhpcg_avx2' - self.executable_opts = ['--nx=%d' % problem_size, - '--ny=%d' % problem_size, - '--nz=%d' % problem_size, '-t2'] - output_file = sn.getitem(sn.glob('n%d-%dp-%dt-*.yaml' % - (problem_size, - self.num_tasks, - self.num_cpus_per_task)), 0) - self.sanity_patterns = sn.assert_eq(4, sn.count( - sn.findall(r'PASSED', output_file))) - self.reference = { - #'daint:gpu': { - # 'perf': (12, -0.1, 0.1) - #}, - 'daint:mc': { - 'perf': (22.3, -0.1, 0.1) - }, - } - - self.perf_patterns = { - 'perf': sn.extractsingle( - r'HPCG result is VALID with a GFLOP\/s rating of:\s*' - r'(?P\S+)', output_file, 'perf', float) - } - self.maintainers = ['SK'] From 694decbdd1303e6e08524d238f9f7387176b693a Mon Sep 17 00:00:00 2001 From: Sebastian Keller Date: Thu, 29 Nov 2018 16:54:45 +0100 Subject: [PATCH 09/16] system/parition dependent number of tasks --- cscs-checks/analytics/hpcg/hpcg_benchmark.py | 39 ++++++++++++-------- 1 file changed, 24 insertions(+), 15 deletions(-) diff --git a/cscs-checks/analytics/hpcg/hpcg_benchmark.py b/cscs-checks/analytics/hpcg/hpcg_benchmark.py index 667c0276c2..acb8bf0e54 100644 --- a/cscs-checks/analytics/hpcg/hpcg_benchmark.py +++ b/cscs-checks/analytics/hpcg/hpcg_benchmark.py @@ -3,7 +3,7 @@ @rfm.simple_test class HPCGCheckRef(rfm.RegressionTest): - def __init__(self, **kwargs): + def __init__(self): super().__init__() self.descr = 'HPCG reference benchmark' @@ -12,30 +12,34 @@ def __init__(self, **kwargs): self.modules = ['craype-hugepages8M'] self.build_system = 'Make' self.build_system.options = ['arch=MPI_GCC_OMP'] - self.sourcesdir = None - self.prebuild_cmd = ['git clone https://github.com/hpcg-benchmark/hpcg.git', 'cd hpcg'] + self.sourcesdir = 'https://github.com/hpcg-benchmark/hpcg.git' - self.executable = 'hpcg/bin/xhpcg' + self.executable = 'bin/xhpcg' self.executable_opts = ['--nx=104', '--ny=104', '--nz=104', '-t2'] output_file = sn.getitem(sn.glob('HPCG*.txt'), 0) self.sanity_patterns = sn.assert_eq(4, sn.count( sn.findall(r'PASSED', output_file))) - self.num_tasks = 12 - self.num_tasks_per_node = 12 + self.num_cpus_per_task = 1 + self.system_num_tasks = { + 'daint:mc': 36, + 'daint:gpu': 12, + 'dom:mc': 36, + 'dom:gpu': 12, + } self.reference = { 'daint:gpu': { - 'perf': (7.6, -0.1, 0.1) + 'perf': (7.6, -0.1, None) }, 'daint:mc': { - 'perf': (13.4, -0.1, 0.1) + 'perf': (13.4, -0.1, None) }, 'dom:gpu': { - 'perf': (7.6, -0.1, 0.1) + 'perf': (7.6, -0.1, None) }, 'dom:mc': { - 'perf': (13.4, -0.1, 0.1) + 'perf': (13.4, -0.1, None) }, } @@ -46,13 +50,18 @@ def __init__(self, **kwargs): } self.maintainers = ['SK'] + def setup(self, partition, environ, **job_opts): + self.num_tasks = self.system_num_tasks[self.current_system.name + + ":" + partition.name] + super().setup(partition, environ, **job_opts) + @rfm.simple_test class HPCGCheckMKL(rfm.RegressionTest): - def __init__(self, **kwargs): + def __init__(self): super().__init__() self.descr = 'HPCG benchmark Intel MKL implementation' - self.valid_systems = ['dom:mc'] + self.valid_systems = ['daint:mc', 'dom:mc'] self.valid_prog_environs = ['PrgEnv-intel'] self.modules = ['craype-hugepages8M'] #self.sourcesdir needed for "CrayXC" config file @@ -84,7 +93,7 @@ def __init__(self, **kwargs): sn.findall(r'PASSED', self.outfile_lazy))) self.reference = { 'dom:mc': { - 'perf': (22, -0.1, 0.1) + 'perf': (22, -0.1, None) }, } @@ -104,6 +113,6 @@ def num_tasks_assigned(self): @sn.sanity_function def outfile_lazy(self): pattern = 'n%d-%dp-%dt-*.yaml' % (self.problem_size, - self.job.num_tasks, - self.num_cpus_per_task) + self.job.num_tasks, + self.num_cpus_per_task) return sn.getitem(sn.glob(pattern), 0) From 70189b3214d55a254d03f939c3f5a299c64f4e3d Mon Sep 17 00:00:00 2001 From: Sebastian Keller Date: Fri, 30 Nov 2018 16:35:58 +0100 Subject: [PATCH 10/16] extended HPCG-MKL to gpu partitions --- cscs-checks/analytics/hpcg/hpcg_benchmark.py | 36 +++++++++++++++----- 1 file changed, 28 insertions(+), 8 deletions(-) diff --git a/cscs-checks/analytics/hpcg/hpcg_benchmark.py b/cscs-checks/analytics/hpcg/hpcg_benchmark.py index acb8bf0e54..801a4b8e6a 100644 --- a/cscs-checks/analytics/hpcg/hpcg_benchmark.py +++ b/cscs-checks/analytics/hpcg/hpcg_benchmark.py @@ -49,6 +49,7 @@ def __init__(self): r'(?P\S+)', output_file, 'perf', float) } self.maintainers = ['SK'] + self.tags = {'diagnostic'} def setup(self, partition, environ, **job_opts): self.num_tasks = self.system_num_tasks[self.current_system.name @@ -61,7 +62,7 @@ def __init__(self): super().__init__() self.descr = 'HPCG benchmark Intel MKL implementation' - self.valid_systems = ['daint:mc', 'dom:mc'] + self.valid_systems = ['daint:mc', 'dom:mc', 'daint:gpu', 'dom:gpu'] self.valid_prog_environs = ['PrgEnv-intel'] self.modules = ['craype-hugepages8M'] #self.sourcesdir needed for "CrayXC" config file @@ -72,8 +73,6 @@ def __init__(self): self.num_tasks = 0 self.num_tasks_per_core = 2 - self.num_tasks_per_node = 4 - self.num_cpus_per_task = 18 self.problem_size = 104 self.variables = { @@ -95,14 +94,19 @@ def __init__(self): 'dom:mc': { 'perf': (22, -0.1, None) }, + 'daint:mc': { + 'perf': (22, -0.1, None) + }, + 'dom:gpu': { + 'perf': (10.7, -0.1, None) + }, + 'daint:gpu': { + 'perf': (10.7, -0.1, None) + }, } - self.perf_patterns = { - 'perf': sn.extractsingle( - r'HPCG result is VALID with a GFLOP\/s rating of:\s*' - r'(?P\S+)', self.outfile_lazy, 'perf', float) / (self.num_tasks_assigned/self.num_tasks_per_node) - } self.maintainers = ['SK'] + self.tags = {'diagnostic'} @property @sn.sanity_function @@ -116,3 +120,19 @@ def outfile_lazy(self): self.job.num_tasks, self.num_cpus_per_task) return sn.getitem(sn.glob(pattern), 0) + + def setup(self, partition, environ, **job_opts): + if partition.name == 'gpu': + self.num_tasks_per_node = 2 + self.num_cpus_per_task = 12 + else: + self.num_tasks_per_node = 4 + self.num_cpus_per_task = 18 + + self.perf_patterns = { + 'perf': sn.extractsingle( + r'HPCG result is VALID with a GFLOP\/s rating of:\s*' + r'(?P\S+)', self.outfile_lazy, 'perf', float) / (self.num_tasks_assigned/self.num_tasks_per_node) + } + + super().setup(partition, environ, **job_opts) From 0ece896c0c8b7e59908099bb4a522bd2d265dda2 Mon Sep 17 00:00:00 2001 From: Sebastian Keller Date: Tue, 18 Dec 2018 18:49:00 +0100 Subject: [PATCH 11/16] moved hpcg-flexible to microbenchmarks --- cscs-checks/{analytics => microbenchmarks}/hpcg/hpcg_benchmark.py | 0 cscs-checks/{analytics => microbenchmarks}/hpcg/src/Make.CrayXC | 0 2 files changed, 0 insertions(+), 0 deletions(-) rename cscs-checks/{analytics => microbenchmarks}/hpcg/hpcg_benchmark.py (100%) rename cscs-checks/{analytics => microbenchmarks}/hpcg/src/Make.CrayXC (100%) diff --git a/cscs-checks/analytics/hpcg/hpcg_benchmark.py b/cscs-checks/microbenchmarks/hpcg/hpcg_benchmark.py similarity index 100% rename from cscs-checks/analytics/hpcg/hpcg_benchmark.py rename to cscs-checks/microbenchmarks/hpcg/hpcg_benchmark.py diff --git a/cscs-checks/analytics/hpcg/src/Make.CrayXC b/cscs-checks/microbenchmarks/hpcg/src/Make.CrayXC similarity index 100% rename from cscs-checks/analytics/hpcg/src/Make.CrayXC rename to cscs-checks/microbenchmarks/hpcg/src/Make.CrayXC From 5ae09b396e62cd6fe1f3d220d9f9a9871e240b6c Mon Sep 17 00:00:00 2001 From: Sebastian Keller Date: Tue, 18 Dec 2018 20:07:38 +0100 Subject: [PATCH 12/16] address reviews --- .../microbenchmarks/hpcg/hpcg_benchmark.py | 38 +++++++++++-------- 1 file changed, 23 insertions(+), 15 deletions(-) diff --git a/cscs-checks/microbenchmarks/hpcg/hpcg_benchmark.py b/cscs-checks/microbenchmarks/hpcg/hpcg_benchmark.py index 801a4b8e6a..342267caf0 100644 --- a/cscs-checks/microbenchmarks/hpcg/hpcg_benchmark.py +++ b/cscs-checks/microbenchmarks/hpcg/hpcg_benchmark.py @@ -1,6 +1,7 @@ import reframe as rfm import reframe.utility.sanity as sn +@rfm.required_version('>=2.16-dev0') @rfm.simple_test class HPCGCheckRef(rfm.RegressionTest): def __init__(self): @@ -16,6 +17,7 @@ def __init__(self): self.executable = 'bin/xhpcg' self.executable_opts = ['--nx=104', '--ny=104', '--nz=104', '-t2'] + # use glob to catch the output file suffix dependent on execution time output_file = sn.getitem(sn.glob('HPCG*.txt'), 0) self.sanity_patterns = sn.assert_eq(4, sn.count( sn.findall(r'PASSED', output_file))) @@ -30,21 +32,21 @@ def __init__(self): self.reference = { 'daint:gpu': { - 'perf': (7.6, -0.1, None) + 'gflops': (7.6, -0.1, None, 'GFLOPs') }, 'daint:mc': { - 'perf': (13.4, -0.1, None) + 'gflops': (13.4, -0.1, None, 'GFLOPs') }, 'dom:gpu': { - 'perf': (7.6, -0.1, None) + 'gflops': (7.6, -0.1, None, 'GFLOPs') }, 'dom:mc': { - 'perf': (13.4, -0.1, None) + 'gflops': (13.4, -0.1, None, 'GFLOPs') }, } self.perf_patterns = { - 'perf': sn.extractsingle( + 'gflops': sn.extractsingle( r'HPCG result is VALID with a GFLOP\/s rating of=\s*' r'(?P\S+)', output_file, 'perf', float) } @@ -52,10 +54,11 @@ def __init__(self): self.tags = {'diagnostic'} def setup(self, partition, environ, **job_opts): - self.num_tasks = self.system_num_tasks[self.current_system.name - + ":" + partition.name] + self.num_tasks = self.system_num_tasks[partition.fullname] + super().setup(partition, environ, **job_opts) +@rfm.required_version('>=2.16-dev0') @rfm.simple_test class HPCGCheckMKL(rfm.RegressionTest): def __init__(self): @@ -92,16 +95,16 @@ def __init__(self): sn.findall(r'PASSED', self.outfile_lazy))) self.reference = { 'dom:mc': { - 'perf': (22, -0.1, None) + 'gflops': (22, -0.1, None, 'GFLOPs') }, 'daint:mc': { - 'perf': (22, -0.1, None) + 'gflops': (22, -0.1, None, 'GFLOPs') }, 'dom:gpu': { - 'perf': (10.7, -0.1, None) + 'gflops': (10.7, -0.1, None, 'GFLOPs') }, 'daint:gpu': { - 'perf': (10.7, -0.1, None) + 'gflops': (10.7, -0.1, None, 'GFLOPs') }, } @@ -122,17 +125,22 @@ def outfile_lazy(self): return sn.getitem(sn.glob(pattern), 0) def setup(self, partition, environ, **job_opts): - if partition.name == 'gpu': + if partition.fullname in ['daint:gpu', 'dom:gpu']: self.num_tasks_per_node = 2 self.num_cpus_per_task = 12 else: self.num_tasks_per_node = 4 self.num_cpus_per_task = 18 + # since this is a flexible test, we divide the extracted + # performance by the number of nodes and compare + # against a single reference self.perf_patterns = { - 'perf': sn.extractsingle( - r'HPCG result is VALID with a GFLOP\/s rating of:\s*' - r'(?P\S+)', self.outfile_lazy, 'perf', float) / (self.num_tasks_assigned/self.num_tasks_per_node) + 'gflops': sn.extractsingle( + r'HPCG result is VALID with a GFLOP\/s rating of:\s*' + r'(?P\S+)', + self.outfile_lazy, 'perf', float) / + (self.num_tasks_assigned/self.num_tasks_per_node) } super().setup(partition, environ, **job_opts) From 0b0cd4d2d72d65650740a420d4b7132e10ab554e Mon Sep 17 00:00:00 2001 From: Sebastian Keller Date: Tue, 18 Dec 2018 20:36:19 +0100 Subject: [PATCH 13/16] some minor PEP8 formatting changes --- cscs-checks/microbenchmarks/hpcg/hpcg_benchmark.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/cscs-checks/microbenchmarks/hpcg/hpcg_benchmark.py b/cscs-checks/microbenchmarks/hpcg/hpcg_benchmark.py index 342267caf0..cdbda127e7 100644 --- a/cscs-checks/microbenchmarks/hpcg/hpcg_benchmark.py +++ b/cscs-checks/microbenchmarks/hpcg/hpcg_benchmark.py @@ -1,6 +1,7 @@ import reframe as rfm import reframe.utility.sanity as sn + @rfm.required_version('>=2.16-dev0') @rfm.simple_test class HPCGCheckRef(rfm.RegressionTest): @@ -58,6 +59,7 @@ def setup(self, partition, environ, **job_opts): super().setup(partition, environ, **job_opts) + @rfm.required_version('>=2.16-dev0') @rfm.simple_test class HPCGCheckMKL(rfm.RegressionTest): @@ -78,13 +80,13 @@ def __init__(self): self.num_tasks_per_core = 2 self.problem_size = 104 - self.variables = { + self.variables = { 'HUGETLB_VERBOSE': '0', 'MPICH_MAX_THREAD_SAFETY' : 'multiple', 'MPICH_USE_DMAPP_COLL': '1', 'PMI_NO_FORK': '1', - 'KMP_HW_SUBSET' : '9c,2t', - 'KMP_AFFINITY' : 'granularity=fine,compact' + 'KMP_HW_SUBSET': '9c,2t', + 'KMP_AFFINITY': 'granularity=fine,compact' } self.executable = 'bin/xhpcg_avx2' From 566bd5b08830ebc83c0d5375aec34985c9ca637e Mon Sep 17 00:00:00 2001 From: Sebastian Keller Date: Tue, 8 Jan 2019 17:04:51 +0100 Subject: [PATCH 14/16] revised formatting --- .../microbenchmarks/hpcg/hpcg_benchmark.py | 25 +++++++++---------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/cscs-checks/microbenchmarks/hpcg/hpcg_benchmark.py b/cscs-checks/microbenchmarks/hpcg/hpcg_benchmark.py index cdbda127e7..c0fe6a32ff 100644 --- a/cscs-checks/microbenchmarks/hpcg/hpcg_benchmark.py +++ b/cscs-checks/microbenchmarks/hpcg/hpcg_benchmark.py @@ -33,16 +33,16 @@ def __init__(self): self.reference = { 'daint:gpu': { - 'gflops': (7.6, -0.1, None, 'GFLOPs') + 'gflops': (7.6, -0.1, None, 'GFLOP/s') }, 'daint:mc': { - 'gflops': (13.4, -0.1, None, 'GFLOPs') + 'gflops': (13.4, -0.1, None, 'GFLOP/s') }, 'dom:gpu': { - 'gflops': (7.6, -0.1, None, 'GFLOPs') + 'gflops': (7.6, -0.1, None, 'GFLOP/s') }, 'dom:mc': { - 'gflops': (13.4, -0.1, None, 'GFLOPs') + 'gflops': (13.4, -0.1, None, 'GFLOP/s') }, } @@ -70,7 +70,6 @@ def __init__(self): self.valid_systems = ['daint:mc', 'dom:mc', 'daint:gpu', 'dom:gpu'] self.valid_prog_environs = ['PrgEnv-intel'] self.modules = ['craype-hugepages8M'] - #self.sourcesdir needed for "CrayXC" config file self.build_system = 'Make' self.prebuild_cmd = ['cp -r ${MKLROOT}/benchmarks/hpcg/* .', 'mv Make.CrayXC setup', @@ -97,16 +96,16 @@ def __init__(self): sn.findall(r'PASSED', self.outfile_lazy))) self.reference = { 'dom:mc': { - 'gflops': (22, -0.1, None, 'GFLOPs') + 'gflops': (22, -0.1, None, 'GFLOP/s') }, 'daint:mc': { - 'gflops': (22, -0.1, None, 'GFLOPs') + 'gflops': (22, -0.1, None, 'GFLOP/s') }, 'dom:gpu': { - 'gflops': (10.7, -0.1, None, 'GFLOPs') + 'gflops': (10.7, -0.1, None, 'GFLOP/s') }, 'daint:gpu': { - 'gflops': (10.7, -0.1, None, 'GFLOPs') + 'gflops': (10.7, -0.1, None, 'GFLOP/s') }, } @@ -137,12 +136,12 @@ def setup(self, partition, environ, **job_opts): # since this is a flexible test, we divide the extracted # performance by the number of nodes and compare # against a single reference + num_nodes = self.num_tasks_assigned / self.num_tasks_per_node self.perf_patterns = { 'gflops': sn.extractsingle( - r'HPCG result is VALID with a GFLOP\/s rating of:\s*' - r'(?P\S+)', - self.outfile_lazy, 'perf', float) / - (self.num_tasks_assigned/self.num_tasks_per_node) + r'HPCG result is VALID with a GFLOP\/s rating of:\s*' + r'(?P\S+)', + self.outfile_lazy, 'perf', float) / num_nodes } super().setup(partition, environ, **job_opts) From 0ee3225bb59febd42d146a0dd5083d981172bc5e Mon Sep 17 00:00:00 2001 From: Sebastian Keller Date: Wed, 9 Jan 2019 19:16:39 +0100 Subject: [PATCH 15/16] flexible HPCG reference test --- .../microbenchmarks/hpcg/hpcg_benchmark.py | 38 ++++++++++++++----- 1 file changed, 28 insertions(+), 10 deletions(-) diff --git a/cscs-checks/microbenchmarks/hpcg/hpcg_benchmark.py b/cscs-checks/microbenchmarks/hpcg/hpcg_benchmark.py index c0fe6a32ff..f89c026b70 100644 --- a/cscs-checks/microbenchmarks/hpcg/hpcg_benchmark.py +++ b/cscs-checks/microbenchmarks/hpcg/hpcg_benchmark.py @@ -19,10 +19,9 @@ def __init__(self): self.executable = 'bin/xhpcg' self.executable_opts = ['--nx=104', '--ny=104', '--nz=104', '-t2'] # use glob to catch the output file suffix dependent on execution time - output_file = sn.getitem(sn.glob('HPCG*.txt'), 0) - self.sanity_patterns = sn.assert_eq(4, sn.count( - sn.findall(r'PASSED', output_file))) + self.output_file = sn.getitem(sn.glob('HPCG*.txt'), 0) + self.num_tasks = 0 self.num_cpus_per_task = 1 self.system_num_tasks = { 'daint:mc': 36, @@ -46,16 +45,30 @@ def __init__(self): }, } + self.maintainers = ['SK'] + self.tags = {'diagnostic'} + + @property + @sn.sanity_function + def num_tasks_assigned(self): + return self.job.num_tasks + + def setup(self, partition, environ, **job_opts): + self.num_tasks_per_node = self.system_num_tasks[partition.fullname] + + num_nodes = self.num_tasks_assigned / self.num_tasks_per_node self.perf_patterns = { 'gflops': sn.extractsingle( r'HPCG result is VALID with a GFLOP\/s rating of=\s*' - r'(?P\S+)', output_file, 'perf', float) + r'(?P\S+)', + self.output_file, 'perf', float) / num_nodes } - self.maintainers = ['SK'] - self.tags = {'diagnostic'} - def setup(self, partition, environ, **job_opts): - self.num_tasks = self.system_num_tasks[partition.fullname] + self.sanity_patterns = sn.all([ + sn.assert_eq(4, sn.count( + sn.findall(r'PASSED', self.output_file))), + sn.assert_eq(0, self.num_tasks_assigned % self.num_tasks_per_node) + ]) super().setup(partition, environ, **job_opts) @@ -92,8 +105,7 @@ def __init__(self): self.executable_opts = ['--nx=%d' % self.problem_size, '--ny=%d' % self.problem_size, '--nz=%d' % self.problem_size, '-t2'] - self.sanity_patterns = sn.assert_eq(4, sn.count( - sn.findall(r'PASSED', self.outfile_lazy))) + self.reference = { 'dom:mc': { 'gflops': (22, -0.1, None, 'GFLOP/s') @@ -144,4 +156,10 @@ def setup(self, partition, environ, **job_opts): self.outfile_lazy, 'perf', float) / num_nodes } + self.sanity_patterns = sn.all([ + sn.assert_eq(4, sn.count( + sn.findall(r'PASSED', self.outfile_lazy))), + sn.assert_eq(0, self.num_tasks_assigned % self.num_tasks_per_node) + ]) + super().setup(partition, environ, **job_opts) From 1050d8683ad96b6a693afac085316643dfd93137 Mon Sep 17 00:00:00 2001 From: Vasileios Karakasis Date: Fri, 11 Jan 2019 10:37:31 +0100 Subject: [PATCH 16/16] Rename 'GFLOP/s' to 'Gflop/s' + style changes --- .../microbenchmarks/hpcg/hpcg_benchmark.py | 22 +++++++++---------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/cscs-checks/microbenchmarks/hpcg/hpcg_benchmark.py b/cscs-checks/microbenchmarks/hpcg/hpcg_benchmark.py index f89c026b70..9149aacef4 100644 --- a/cscs-checks/microbenchmarks/hpcg/hpcg_benchmark.py +++ b/cscs-checks/microbenchmarks/hpcg/hpcg_benchmark.py @@ -32,16 +32,16 @@ def __init__(self): self.reference = { 'daint:gpu': { - 'gflops': (7.6, -0.1, None, 'GFLOP/s') + 'gflops': (7.6, -0.1, None, 'Gflop/s') }, 'daint:mc': { - 'gflops': (13.4, -0.1, None, 'GFLOP/s') + 'gflops': (13.4, -0.1, None, 'Gflop/s') }, 'dom:gpu': { - 'gflops': (7.6, -0.1, None, 'GFLOP/s') + 'gflops': (7.6, -0.1, None, 'Gflop/s') }, 'dom:mc': { - 'gflops': (13.4, -0.1, None, 'GFLOP/s') + 'gflops': (13.4, -0.1, None, 'Gflop/s') }, } @@ -66,7 +66,7 @@ def setup(self, partition, environ, **job_opts): self.sanity_patterns = sn.all([ sn.assert_eq(4, sn.count( - sn.findall(r'PASSED', self.output_file))), + sn.findall(r'PASSED', self.output_file))), sn.assert_eq(0, self.num_tasks_assigned % self.num_tasks_per_node) ]) @@ -94,7 +94,7 @@ def __init__(self): self.variables = { 'HUGETLB_VERBOSE': '0', - 'MPICH_MAX_THREAD_SAFETY' : 'multiple', + 'MPICH_MAX_THREAD_SAFETY': 'multiple', 'MPICH_USE_DMAPP_COLL': '1', 'PMI_NO_FORK': '1', 'KMP_HW_SUBSET': '9c,2t', @@ -108,16 +108,16 @@ def __init__(self): self.reference = { 'dom:mc': { - 'gflops': (22, -0.1, None, 'GFLOP/s') + 'gflops': (22, -0.1, None, 'Gflop/s') }, 'daint:mc': { - 'gflops': (22, -0.1, None, 'GFLOP/s') + 'gflops': (22, -0.1, None, 'Gflop/s') }, 'dom:gpu': { - 'gflops': (10.7, -0.1, None, 'GFLOP/s') + 'gflops': (10.7, -0.1, None, 'Gflop/s') }, 'daint:gpu': { - 'gflops': (10.7, -0.1, None, 'GFLOP/s') + 'gflops': (10.7, -0.1, None, 'Gflop/s') }, } @@ -158,7 +158,7 @@ def setup(self, partition, environ, **job_opts): self.sanity_patterns = sn.all([ sn.assert_eq(4, sn.count( - sn.findall(r'PASSED', self.outfile_lazy))), + sn.findall(r'PASSED', self.outfile_lazy))), sn.assert_eq(0, self.num_tasks_assigned % self.num_tasks_per_node) ])