From 0f1384eeeb8a9efd71a7e0cb5c958e1b4874b9b1 Mon Sep 17 00:00:00 2001 From: Sebastian Keller Date: Thu, 10 Jan 2019 18:10:39 +0100 Subject: [PATCH 1/8] likwid benchmarks for cpu-mem bandwidth --- cscs-checks/microbenchmarks/likwid/likwid.py | 171 +++++++++++++++++++ 1 file changed, 171 insertions(+) create mode 100644 cscs-checks/microbenchmarks/likwid/likwid.py diff --git a/cscs-checks/microbenchmarks/likwid/likwid.py b/cscs-checks/microbenchmarks/likwid/likwid.py new file mode 100644 index 0000000000..1e2067eb8a --- /dev/null +++ b/cscs-checks/microbenchmarks/likwid/likwid.py @@ -0,0 +1,171 @@ +import reframe as rfm +import reframe.utility.sanity as sn + + +class BandwidthBase(rfm.RegressionTest): + def __init__(self): + super().__init__() + + self.valid_prog_environs = ['PrgEnv-gnu'] + self.build_system = 'Make' + self.build_system.flags_from_environ = False + self.sourcesdir = 'https://github.com/RRZE-HPC/likwid.git' + self.variables = { + 'LD_LIBRARY_PATH': './lib:$LD_LIBRARY_PATH', + 'PATH': './bin:./sbin:$PATH' + } + + self.executable = 'bin/likwid-bench' + + self.num_tasks = 1 + self.num_tasks_per_core = 2 + self.system_num_cpus = { + 'daint:mc': 72, + 'daint:gpu': 24, + 'dom:mc': 72, + 'dom:gpu': 24, + } + self.system_numa_domains = { + 'daint:mc': ['S0', 'S1'], + 'daint:gpu': ['S0'], + 'dom:mc': ['S0', 'S1'], + 'dom:gpu': ['S0'], + } + + self.maintainers = ['SK'] + self.tags = {'diagnostic'} + + bw_pattern = sn.extractsingle(r'MByte/s:\s*(?P\S+)', + self.stdout, 'bw', float) + + self.sanity_patterns = sn.assert_ge(bw_pattern, 0.0) + self.perf_patterns = { + 'bandwidth': bw_pattern + } + + def setup(self, partition, environ, **job_opts): + super().setup(partition, environ, **job_opts) + + self.postbuild_cmd = ['make install PREFIX=%s INSTALL_CHOWN=' + '\'-g csstaff -o sebkelle\'' % self.stagedir] + + +class CPUMemoryBandwidth(BandwidthBase): + def __init__(self): + super().__init__() + + self.valid_systems = ['daint:mc', 'daint:gpu', 'dom:gpu', 'dom:mc'] + + def setup(self, partition, environ, **job_opts): + + self.num_cpus_per_task = self.system_num_cpus[partition.fullname] + numa_domains = self.system_numa_domains[partition.fullname] + + num_cpu_dom = self.num_cpus_per_task / (len(numa_domains) * + self.num_tasks_per_core) + # result for daint:mc: '-w S0:100MB:18:1:2 -w S1:100MB:18:1:2' + # format: -w domain:data_size:nthreads:chunk_size:stride + # chunk_size and stride affect which cpus from are selected + workgroups = ' '.join(['-w %s:100MB:%d:1:2' % (dom, num_cpu_dom) + for dom in numa_domains]) + + self.executable_opts = ['-t %s' % self.kernel_name, workgroups] + + super().setup(partition, environ, **job_opts) + + +@rfm.required_version('>=2.16-dev0') +@rfm.simple_test +class CPUMemoryBandwidthRead(CPUMemoryBandwidth): + def __init__(self): + super().__init__() + + self.descr = 'CPU <- main memory read benchmark' + # the kernel to run in likwid + self.kernel_name = "load_avx" + + self.reference = { + 'daint:gpu': { + 'bandwidth': (65000, -0.05, None, 'MB/s') + }, + 'daint:mc': { + 'bandwidth': (130000, -0.05, None, 'MB/s') + }, + 'dom:gpu': { + 'bandwidth': (65000, -0.05, None, 'MB/s') + }, + 'dom:mc': { + 'bandwidth': (130000, -0.05, None, 'MB/s') + }, + } + + +@rfm.required_version('>=2.16-dev0') +@rfm.simple_test +class CPUMemoryBandwidthWrite(CPUMemoryBandwidth): + def __init__(self): + super().__init__() + + self.descr = 'CPU -> main memory write benchmark' + # the kernel to run in likwid + self.kernel_name = "store_mem_avx" + + self.reference = { + 'daint:gpu': { + 'bandwidth': (40000, -0.05, None, 'MB/s') + }, + 'daint:mc': { + 'bandwidth': (80000, -0.05, None, 'MB/s') + }, + 'dom:gpu': { + 'bandwidth': (40000, -0.05, None, 'MB/s') + }, + 'dom:mc': { + 'bandwidth': (80000, -0.05, None, 'MB/s') + }, + } + + +@rfm.required_version('>=2.16-dev0') +@rfm.simple_test +class CPUMemoryBandwidthCrossSocket(BandwidthBase): + def __init__(self): + super().__init__() + + self.descr = 'CPU S0 <- main memory S1 read' + 'CPU S1 <- main memory S0 read' + + self.valid_systems = ['daint:mc', 'dom:mc'] + + self.kernel_name = "load_avx" + + self.reference = { + 'daint:mc': { + 'bandwidth': (56000, -0.05, None, 'MB/s') + }, + 'dom:mc': { + 'bandwidth': (56000, -0.05, None, 'MB/s') + }, + } + + def setup(self, partition, environ, **job_opts): + + self.num_cpus_per_task = self.system_num_cpus[partition.fullname] + numa_domains = self.system_numa_domains[partition.fullname] + + num_cpu_dom = self.num_cpus_per_task / (len(numa_domains) * + self.num_tasks_per_core) + + # daint:mc: '-w S0:100MB:18:1:2-0:S1 -w S1:100MB:18:1:2-0:S0' + # format: + # -w domain:data_size:nthreads:chunk_size:stride-stream_nr:mem_domain + # chunk_size and stride affect which cpus from are selected + workgroups = ' '.join(['-w %s:100MB:%d:1:2-0:%s' % + (dom_cpu, num_cpu_dom, dom_mem) + for dom_cpu, dom_mem in + zip(numa_domains[:2], + reversed(numa_domains[:2]))]) + + self.executable_opts = ['-t %s' % self.kernel_name, workgroups] + + super().setup(partition, environ, **job_opts) From e91bc47c461bcc420f1c206cf8c29388a4181753 Mon Sep 17 00:00:00 2001 From: Sebastian Keller Date: Sun, 13 Jan 2019 10:00:41 +0100 Subject: [PATCH 2/8] added parameterized test --- cscs-checks/microbenchmarks/likwid/likwid.py | 74 ++++++++++++++------ 1 file changed, 51 insertions(+), 23 deletions(-) diff --git a/cscs-checks/microbenchmarks/likwid/likwid.py b/cscs-checks/microbenchmarks/likwid/likwid.py index 1e2067eb8a..d18e22887b 100644 --- a/cscs-checks/microbenchmarks/likwid/likwid.py +++ b/cscs-checks/microbenchmarks/likwid/likwid.py @@ -32,6 +32,18 @@ def __init__(self): 'dom:gpu': ['S0'], } + # Test each level at half capacity times nthreads per domain + self.system_cache_sizes = { + 'daint:mc': {'L1': '288kB', 'L2': '2304kB', 'L3': '23MB', + 'memory': '100MB'}, + 'daint:gpu': {'L1': '192kB', 'L2': '1536kB', 'L3': '15MB', + 'memory': '100MB'}, + 'dom:mc': {'L1': '288kB', 'L2': '2304kB', 'L3': '23MB', + 'memory': '100MB'}, + 'dom:gpu': {'L1': '192kB', 'L2': '1536kB', 'L3': '15MB', + 'memory': '100MB'}, + } + self.maintainers = ['SK'] self.tags = {'diagnostic'} @@ -50,7 +62,7 @@ def setup(self, partition, environ, **job_opts): '\'-g csstaff -o sebkelle\'' % self.stagedir] -class CPUMemoryBandwidth(BandwidthBase): +class AllCores(BandwidthBase): def __init__(self): super().__init__() @@ -66,7 +78,8 @@ def setup(self, partition, environ, **job_opts): # result for daint:mc: '-w S0:100MB:18:1:2 -w S1:100MB:18:1:2' # format: -w domain:data_size:nthreads:chunk_size:stride # chunk_size and stride affect which cpus from are selected - workgroups = ' '.join(['-w %s:100MB:%d:1:2' % (dom, num_cpu_dom) + workgroups = ' '.join(['-w %s:%s:%d:1:2' % + (dom, self.data_size, num_cpu_dom) for dom in numa_domains]) self.executable_opts = ['-t %s' % self.kernel_name, workgroups] @@ -76,75 +89,90 @@ def setup(self, partition, environ, **job_opts): @rfm.required_version('>=2.16-dev0') @rfm.simple_test -class CPUMemoryBandwidthRead(CPUMemoryBandwidth): +class CPUBandwidthWrite(AllCores): def __init__(self): super().__init__() - self.descr = 'CPU <- main memory read benchmark' + self.descr = 'CPU -> main memory write benchmark' # the kernel to run in likwid - self.kernel_name = "load_avx" + self.kernel_name = 'store_mem_avx' + self.data_size = '100MB' self.reference = { 'daint:gpu': { - 'bandwidth': (65000, -0.05, None, 'MB/s') + 'bandwidth': (40000, -0.1, None, 'MB/s') }, 'daint:mc': { - 'bandwidth': (130000, -0.05, None, 'MB/s') + 'bandwidth': (80000, -0.1, None, 'MB/s') }, 'dom:gpu': { - 'bandwidth': (65000, -0.05, None, 'MB/s') + 'bandwidth': (40000, -0.1, None, 'MB/s') }, 'dom:mc': { - 'bandwidth': (130000, -0.05, None, 'MB/s') + 'bandwidth': (80000, -0.1, None, 'MB/s') }, } @rfm.required_version('>=2.16-dev0') -@rfm.simple_test -class CPUMemoryBandwidthWrite(CPUMemoryBandwidth): - def __init__(self): +@rfm.parameterized_test(['L1'], ['L2'], ['L3'], ['memory']) +class CPUBandwidth(AllCores): + def __init__(self, mem_level): super().__init__() - self.descr = 'CPU -> main memory write benchmark' + self.descr = 'CPU <- %s read benchmark' % mem_level + # the kernel to run in likwid - self.kernel_name = "store_mem_avx" + self.kernel_name = 'load_avx' + self.ml = mem_level + + self.refs = { + 'mc': {'L1': 5100000, 'L2': 2100000, 'L3': 900000, + 'memory': 130000}, + 'gpu': {'L1': 2100000, 'L2': 900000, 'L3': 360000, + 'memory': 65000}, + } self.reference = { 'daint:gpu': { - 'bandwidth': (40000, -0.05, None, 'MB/s') + 'bandwidth': (self.refs['gpu'][mem_level], -0.1, None, 'MB/s') }, 'daint:mc': { - 'bandwidth': (80000, -0.05, None, 'MB/s') + 'bandwidth': (self.refs['mc'][mem_level], -0.1, None, 'MB/s') }, 'dom:gpu': { - 'bandwidth': (40000, -0.05, None, 'MB/s') + 'bandwidth': (self.refs['gpu'][mem_level], -0.1, None, 'MB/s') }, 'dom:mc': { - 'bandwidth': (80000, -0.05, None, 'MB/s') + 'bandwidth': (self.refs['mc'][mem_level], -0.1, None, 'MB/s') }, } + def setup(self, partition, environ, **job_opts): + self.data_size = self.system_cache_sizes[partition.fullname][self.ml] + + super().setup(partition, environ, **job_opts) + @rfm.required_version('>=2.16-dev0') @rfm.simple_test -class CPUMemoryBandwidthCrossSocket(BandwidthBase): +class CPUBandwidthCrossSocket(BandwidthBase): def __init__(self): super().__init__() self.descr = 'CPU S0 <- main memory S1 read' - 'CPU S1 <- main memory S0 read' + ' CPU S1 <- main memory S0 read' self.valid_systems = ['daint:mc', 'dom:mc'] - self.kernel_name = "load_avx" + self.kernel_name = 'load_avx' self.reference = { 'daint:mc': { - 'bandwidth': (56000, -0.05, None, 'MB/s') + 'bandwidth': (56000, -0.1, None, 'MB/s') }, 'dom:mc': { - 'bandwidth': (56000, -0.05, None, 'MB/s') + 'bandwidth': (56000, -0.1, None, 'MB/s') }, } From 63832804f1b27a154aad23f90b96aca5c2cc18cf Mon Sep 17 00:00:00 2001 From: Sebastian Keller Date: Sun, 13 Jan 2019 13:51:19 +0100 Subject: [PATCH 3/8] extended parametrization to stores --- cscs-checks/microbenchmarks/likwid/likwid.py | 113 ++++++++----------- 1 file changed, 45 insertions(+), 68 deletions(-) diff --git a/cscs-checks/microbenchmarks/likwid/likwid.py b/cscs-checks/microbenchmarks/likwid/likwid.py index d18e22887b..9747c1ecac 100644 --- a/cscs-checks/microbenchmarks/likwid/likwid.py +++ b/cscs-checks/microbenchmarks/likwid/likwid.py @@ -35,13 +35,13 @@ def __init__(self): # Test each level at half capacity times nthreads per domain self.system_cache_sizes = { 'daint:mc': {'L1': '288kB', 'L2': '2304kB', 'L3': '23MB', - 'memory': '100MB'}, + 'memory': '1800MB'}, 'daint:gpu': {'L1': '192kB', 'L2': '1536kB', 'L3': '15MB', - 'memory': '100MB'}, + 'memory': '1200MB'}, 'dom:mc': {'L1': '288kB', 'L2': '2304kB', 'L3': '23MB', - 'memory': '100MB'}, + 'memory': '1800MB'}, 'dom:gpu': {'L1': '192kB', 'L2': '1536kB', 'L3': '15MB', - 'memory': '100MB'}, + 'memory': '1200MB'}, } self.maintainers = ['SK'] @@ -62,95 +62,72 @@ def setup(self, partition, environ, **job_opts): '\'-g csstaff -o sebkelle\'' % self.stagedir] -class AllCores(BandwidthBase): - def __init__(self): - super().__init__() - - self.valid_systems = ['daint:mc', 'daint:gpu', 'dom:gpu', 'dom:mc'] - - def setup(self, partition, environ, **job_opts): - - self.num_cpus_per_task = self.system_num_cpus[partition.fullname] - numa_domains = self.system_numa_domains[partition.fullname] - - num_cpu_dom = self.num_cpus_per_task / (len(numa_domains) * - self.num_tasks_per_core) - # result for daint:mc: '-w S0:100MB:18:1:2 -w S1:100MB:18:1:2' - # format: -w domain:data_size:nthreads:chunk_size:stride - # chunk_size and stride affect which cpus from are selected - workgroups = ' '.join(['-w %s:%s:%d:1:2' % - (dom, self.data_size, num_cpu_dom) - for dom in numa_domains]) - - self.executable_opts = ['-t %s' % self.kernel_name, workgroups] - - super().setup(partition, environ, **job_opts) - - @rfm.required_version('>=2.16-dev0') -@rfm.simple_test -class CPUBandwidthWrite(AllCores): - def __init__(self): +@rfm.parameterized_test(*[[l,k] for l in ['L1', 'L2', 'L3'] + for k in ['load_avx', 'store_avx']], + ['memory', 'load_avx'], + ['memory', 'store_mem_avx']) +class CPUBandwidth(BandwidthBase): + def __init__(self, mem_level, kernel_name): super().__init__() - self.descr = 'CPU -> main memory write benchmark' - # the kernel to run in likwid - self.kernel_name = 'store_mem_avx' - self.data_size = '100MB' + self.descr = 'CPU <- %s %s benchmark' % (mem_level, kernel_name) - self.reference = { - 'daint:gpu': { - 'bandwidth': (40000, -0.1, None, 'MB/s') - }, - 'daint:mc': { - 'bandwidth': (80000, -0.1, None, 'MB/s') - }, - 'dom:gpu': { - 'bandwidth': (40000, -0.1, None, 'MB/s') - }, - 'dom:mc': { - 'bandwidth': (80000, -0.1, None, 'MB/s') - }, - } - - -@rfm.required_version('>=2.16-dev0') -@rfm.parameterized_test(['L1'], ['L2'], ['L3'], ['memory']) -class CPUBandwidth(AllCores): - def __init__(self, mem_level): - super().__init__() - - self.descr = 'CPU <- %s read benchmark' % mem_level + self.valid_systems = ['daint:mc', 'daint:gpu', 'dom:gpu', 'dom:mc'] # the kernel to run in likwid - self.kernel_name = 'load_avx' + self.kernel_name = kernel_name self.ml = mem_level self.refs = { - 'mc': {'L1': 5100000, 'L2': 2100000, 'L3': 900000, - 'memory': 130000}, - 'gpu': {'L1': 2100000, 'L2': 900000, 'L3': 360000, - 'memory': 65000}, + 'mc': { + 'load_avx': {'L1': 5100000, 'L2': 2000000, 'L3': 900000, + 'memory': 130000}, + 'store_avx': {'L1': 2800000, 'L2': 900000, 'L3': 480000}, + 'store_mem_avx': {'memory': 85000}, + }, + 'gpu': { + 'load_avx': {'L1': 2100000, 'L2': 850000, 'L3': 360000, + 'memory': 65000}, + 'store_avx': {'L1': 1200000, 'L2': 340000, 'L3': 210000}, + 'store_mem_avx': {'memory': 42500}, + } } + ref_proxy = {part: self.refs[part][kernel_name][mem_level] + for part in self.refs.keys()} self.reference = { 'daint:gpu': { - 'bandwidth': (self.refs['gpu'][mem_level], -0.1, None, 'MB/s') + 'bandwidth': (ref_proxy['gpu'], -0.1, None, 'MB/s') }, 'daint:mc': { - 'bandwidth': (self.refs['mc'][mem_level], -0.1, None, 'MB/s') + 'bandwidth': (ref_proxy['mc'], -0.1, None, 'MB/s') }, 'dom:gpu': { - 'bandwidth': (self.refs['gpu'][mem_level], -0.1, None, 'MB/s') + 'bandwidth': (ref_proxy['gpu'], -0.1, None, 'MB/s') }, 'dom:mc': { - 'bandwidth': (self.refs['mc'][mem_level], -0.1, None, 'MB/s') + 'bandwidth': (ref_proxy['mc'], -0.1, None, 'MB/s') }, } def setup(self, partition, environ, **job_opts): self.data_size = self.system_cache_sizes[partition.fullname][self.ml] + self.num_cpus_per_task = self.system_num_cpus[partition.fullname] + numa_domains = self.system_numa_domains[partition.fullname] + + num_cpu_dom = self.num_cpus_per_task / (len(numa_domains) * + self.num_tasks_per_core) + # result for daint:mc: '-w S0:100MB:18:1:2 -w S1:100MB:18:1:2' + # format: -w domain:data_size:nthreads:chunk_size:stride + # chunk_size and stride affect which cpus from are selected + workgroups = ' '.join(['-w %s:%s:%d:1:2' % + (dom, self.data_size, num_cpu_dom) + for dom in numa_domains]) + + self.executable_opts = ['-t %s' % self.kernel_name, workgroups] + super().setup(partition, environ, **job_opts) From 37ca5f96399e7913522a9e7875600c47f0e88cd4 Mon Sep 17 00:00:00 2001 From: Sebastian Keller Date: Sun, 13 Jan 2019 14:35:07 +0100 Subject: [PATCH 4/8] string description formatted --- cscs-checks/microbenchmarks/likwid/likwid.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cscs-checks/microbenchmarks/likwid/likwid.py b/cscs-checks/microbenchmarks/likwid/likwid.py index 9747c1ecac..a51fa6e0da 100644 --- a/cscs-checks/microbenchmarks/likwid/likwid.py +++ b/cscs-checks/microbenchmarks/likwid/likwid.py @@ -137,8 +137,8 @@ class CPUBandwidthCrossSocket(BandwidthBase): def __init__(self): super().__init__() - self.descr = 'CPU S0 <- main memory S1 read' - ' CPU S1 <- main memory S0 read' + self.descr = ("CPU S0 <- main memory S1 read " + + "CPU S1 <- main memory S0 read") self.valid_systems = ['daint:mc', 'dom:mc'] From c7344f92582995b4d2b374d51c03bc346e207d44 Mon Sep 17 00:00:00 2001 From: Sebastian Keller Date: Mon, 21 Jan 2019 15:45:06 +0100 Subject: [PATCH 5/8] use installed likwid rather than compile per test --- cscs-checks/microbenchmarks/likwid/likwid.py | 60 +++++++------------- 1 file changed, 21 insertions(+), 39 deletions(-) diff --git a/cscs-checks/microbenchmarks/likwid/likwid.py b/cscs-checks/microbenchmarks/likwid/likwid.py index a51fa6e0da..f10a5a47ed 100644 --- a/cscs-checks/microbenchmarks/likwid/likwid.py +++ b/cscs-checks/microbenchmarks/likwid/likwid.py @@ -2,22 +2,17 @@ import reframe.utility.sanity as sn -class BandwidthBase(rfm.RegressionTest): +class MemBandwidthTest(rfm.RunOnlyRegressionTest): def __init__(self): super().__init__() + self.modules = ['likwid'] self.valid_prog_environs = ['PrgEnv-gnu'] - self.build_system = 'Make' - self.build_system.flags_from_environ = False - self.sourcesdir = 'https://github.com/RRZE-HPC/likwid.git' - self.variables = { - 'LD_LIBRARY_PATH': './lib:$LD_LIBRARY_PATH', - 'PATH': './bin:./sbin:$PATH' - } + self.sourcesdir = None - self.executable = 'bin/likwid-bench' + self.executable = 'likwid-bench' - self.num_tasks = 1 + self.num_tasks = 0 self.num_tasks_per_core = 2 self.system_num_cpus = { 'daint:mc': 72, @@ -45,7 +40,7 @@ def __init__(self): } self.maintainers = ['SK'] - self.tags = {'diagnostic'} + self.tags = {'benchmark', 'diagnostic'} bw_pattern = sn.extractsingle(r'MByte/s:\s*(?P\S+)', self.stdout, 'bw', float) @@ -55,29 +50,21 @@ def __init__(self): 'bandwidth': bw_pattern } - def setup(self, partition, environ, **job_opts): - super().setup(partition, environ, **job_opts) - - self.postbuild_cmd = ['make install PREFIX=%s INSTALL_CHOWN=' - '\'-g csstaff -o sebkelle\'' % self.stagedir] - - @rfm.required_version('>=2.16-dev0') @rfm.parameterized_test(*[[l,k] for l in ['L1', 'L2', 'L3'] for k in ['load_avx', 'store_avx']], ['memory', 'load_avx'], ['memory', 'store_mem_avx']) -class CPUBandwidth(BandwidthBase): +class CPUBandwidth(MemBandwidthTest): def __init__(self, mem_level, kernel_name): super().__init__() self.descr = 'CPU <- %s %s benchmark' % (mem_level, kernel_name) - self.valid_systems = ['daint:mc', 'daint:gpu', 'dom:gpu', 'dom:mc'] # the kernel to run in likwid self.kernel_name = kernel_name - self.ml = mem_level + self.mem_level = mem_level self.refs = { 'mc': { @@ -112,28 +99,26 @@ def __init__(self, mem_level, kernel_name): } def setup(self, partition, environ, **job_opts): - self.data_size = self.system_cache_sizes[partition.fullname][self.ml] - + self.data_size = self.system_cache_sizes[partition.fullname][self.mem_level] self.num_cpus_per_task = self.system_num_cpus[partition.fullname] numa_domains = self.system_numa_domains[partition.fullname] - - num_cpu_dom = self.num_cpus_per_task / (len(numa_domains) * + num_cpu_domain = self.num_cpus_per_task / (len(numa_domains) * self.num_tasks_per_core) # result for daint:mc: '-w S0:100MB:18:1:2 -w S1:100MB:18:1:2' # format: -w domain:data_size:nthreads:chunk_size:stride # chunk_size and stride affect which cpus from are selected - workgroups = ' '.join(['-w %s:%s:%d:1:2' % - (dom, self.data_size, num_cpu_dom) - for dom in numa_domains]) + workgroups = ['-w %s:%s:%d:1:2' % + (dom, self.data_size, num_cpu_domain) + for dom in numa_domains] - self.executable_opts = ['-t %s' % self.kernel_name, workgroups] + self.executable_opts = ['-t %s' % self.kernel_name] + workgroups super().setup(partition, environ, **job_opts) @rfm.required_version('>=2.16-dev0') @rfm.simple_test -class CPUBandwidthCrossSocket(BandwidthBase): +class CPUBandwidthCrossSocket(MemBandwidthTest): def __init__(self): super().__init__() @@ -141,9 +126,7 @@ def __init__(self): "CPU S1 <- main memory S0 read") self.valid_systems = ['daint:mc', 'dom:mc'] - self.kernel_name = 'load_avx' - self.reference = { 'daint:mc': { 'bandwidth': (56000, -0.1, None, 'MB/s') @@ -158,19 +141,18 @@ def setup(self, partition, environ, **job_opts): self.num_cpus_per_task = self.system_num_cpus[partition.fullname] numa_domains = self.system_numa_domains[partition.fullname] - num_cpu_dom = self.num_cpus_per_task / (len(numa_domains) * + num_cpu_domain = self.num_cpus_per_task / (len(numa_domains) * self.num_tasks_per_core) # daint:mc: '-w S0:100MB:18:1:2-0:S1 -w S1:100MB:18:1:2-0:S0' # format: # -w domain:data_size:nthreads:chunk_size:stride-stream_nr:mem_domain # chunk_size and stride affect which cpus from are selected - workgroups = ' '.join(['-w %s:100MB:%d:1:2-0:%s' % - (dom_cpu, num_cpu_dom, dom_mem) - for dom_cpu, dom_mem in - zip(numa_domains[:2], - reversed(numa_domains[:2]))]) + workgroups = ['-w %s:100MB:%d:1:2-0:%s' % + (dom_cpu, num_cpu_domain, dom_mem) + for dom_cpu, dom_mem in + zip(numa_domains[:2], reversed(numa_domains[:2]))] - self.executable_opts = ['-t %s' % self.kernel_name, workgroups] + self.executable_opts = ['-t %s' % self.kernel_name] + workgroups super().setup(partition, environ, **job_opts) From 7421fbcc08d06a9e69d2743faefc0f589f842a06 Mon Sep 17 00:00:00 2001 From: Sebastian Keller Date: Fri, 25 Jan 2019 11:42:00 +0100 Subject: [PATCH 6/8] pep8 speaks --- cscs-checks/microbenchmarks/likwid/likwid.py | 22 +++++++++++--------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/cscs-checks/microbenchmarks/likwid/likwid.py b/cscs-checks/microbenchmarks/likwid/likwid.py index f10a5a47ed..5fc4d18f28 100644 --- a/cscs-checks/microbenchmarks/likwid/likwid.py +++ b/cscs-checks/microbenchmarks/likwid/likwid.py @@ -50,8 +50,9 @@ def __init__(self): 'bandwidth': bw_pattern } + @rfm.required_version('>=2.16-dev0') -@rfm.parameterized_test(*[[l,k] for l in ['L1', 'L2', 'L3'] +@rfm.parameterized_test(*[[l, k] for l in ['L1', 'L2', 'L3'] for k in ['load_avx', 'store_avx']], ['memory', 'load_avx'], ['memory', 'store_mem_avx']) @@ -99,17 +100,18 @@ def __init__(self, mem_level, kernel_name): } def setup(self, partition, environ, **job_opts): - self.data_size = self.system_cache_sizes[partition.fullname][self.mem_level] + pfn = parition.fullname + self.data_size = self.system_cache_sizes[pfn][self.mem_level] self.num_cpus_per_task = self.system_num_cpus[partition.fullname] numa_domains = self.system_numa_domains[partition.fullname] num_cpu_domain = self.num_cpus_per_task / (len(numa_domains) * - self.num_tasks_per_core) + self.num_tasks_per_core) # result for daint:mc: '-w S0:100MB:18:1:2 -w S1:100MB:18:1:2' # format: -w domain:data_size:nthreads:chunk_size:stride # chunk_size and stride affect which cpus from are selected workgroups = ['-w %s:%s:%d:1:2' % - (dom, self.data_size, num_cpu_domain) - for dom in numa_domains] + (dom, self.data_size, num_cpu_domain) + for dom in numa_domains] self.executable_opts = ['-t %s' % self.kernel_name] + workgroups @@ -141,17 +143,17 @@ def setup(self, partition, environ, **job_opts): self.num_cpus_per_task = self.system_num_cpus[partition.fullname] numa_domains = self.system_numa_domains[partition.fullname] - num_cpu_domain = self.num_cpus_per_task / (len(numa_domains) * - self.num_tasks_per_core) + num_cpu_domain = (self.num_cpus_per_task / (len(numa_domains) * + self.num_tasks_per_core)) # daint:mc: '-w S0:100MB:18:1:2-0:S1 -w S1:100MB:18:1:2-0:S0' # format: # -w domain:data_size:nthreads:chunk_size:stride-stream_nr:mem_domain # chunk_size and stride affect which cpus from are selected workgroups = ['-w %s:100MB:%d:1:2-0:%s' % - (dom_cpu, num_cpu_domain, dom_mem) - for dom_cpu, dom_mem in - zip(numa_domains[:2], reversed(numa_domains[:2]))] + (dom_cpu, num_cpu_domain, dom_mem) + for dom_cpu, dom_mem in + zip(numa_domains[:2], reversed(numa_domains[:2]))] self.executable_opts = ['-t %s' % self.kernel_name] + workgroups From ef3e03f4f7d8037743754023f52ea751a0d1db39 Mon Sep 17 00:00:00 2001 From: Sebastian Keller Date: Fri, 25 Jan 2019 11:43:54 +0100 Subject: [PATCH 7/8] pep8 drones on --- cscs-checks/microbenchmarks/likwid/likwid.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cscs-checks/microbenchmarks/likwid/likwid.py b/cscs-checks/microbenchmarks/likwid/likwid.py index 5fc4d18f28..c759be5d66 100644 --- a/cscs-checks/microbenchmarks/likwid/likwid.py +++ b/cscs-checks/microbenchmarks/likwid/likwid.py @@ -110,8 +110,8 @@ def setup(self, partition, environ, **job_opts): # format: -w domain:data_size:nthreads:chunk_size:stride # chunk_size and stride affect which cpus from are selected workgroups = ['-w %s:%s:%d:1:2' % - (dom, self.data_size, num_cpu_domain) - for dom in numa_domains] + (dom, self.data_size, num_cpu_domain) + for dom in numa_domains] self.executable_opts = ['-t %s' % self.kernel_name] + workgroups From a57a88098b60bc59932654a1743930a4b5b80a17 Mon Sep 17 00:00:00 2001 From: Vasileios Karakasis Date: Fri, 25 Jan 2019 13:29:55 +0100 Subject: [PATCH 8/8] Fix coding style --- cscs-checks/microbenchmarks/likwid/likwid.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/cscs-checks/microbenchmarks/likwid/likwid.py b/cscs-checks/microbenchmarks/likwid/likwid.py index c759be5d66..19ea001dd9 100644 --- a/cscs-checks/microbenchmarks/likwid/likwid.py +++ b/cscs-checks/microbenchmarks/likwid/likwid.py @@ -53,7 +53,7 @@ def __init__(self): @rfm.required_version('>=2.16-dev0') @rfm.parameterized_test(*[[l, k] for l in ['L1', 'L2', 'L3'] - for k in ['load_avx', 'store_avx']], + for k in ['load_avx', 'store_avx']], ['memory', 'load_avx'], ['memory', 'store_mem_avx']) class CPUBandwidth(MemBandwidthTest): @@ -111,7 +111,7 @@ def setup(self, partition, environ, **job_opts): # chunk_size and stride affect which cpus from are selected workgroups = ['-w %s:%s:%d:1:2' % (dom, self.data_size, num_cpu_domain) - for dom in numa_domains] + for dom in numa_domains] self.executable_opts = ['-t %s' % self.kernel_name] + workgroups @@ -143,8 +143,8 @@ def setup(self, partition, environ, **job_opts): self.num_cpus_per_task = self.system_num_cpus[partition.fullname] numa_domains = self.system_numa_domains[partition.fullname] - num_cpu_domain = (self.num_cpus_per_task / (len(numa_domains) * - self.num_tasks_per_core)) + num_cpu_domain = (self.num_cpus_per_task / + (len(numa_domains) * self.num_tasks_per_core)) # daint:mc: '-w S0:100MB:18:1:2-0:S1 -w S1:100MB:18:1:2-0:S0' # format: @@ -152,8 +152,8 @@ def setup(self, partition, environ, **job_opts): # chunk_size and stride affect which cpus from are selected workgroups = ['-w %s:100MB:%d:1:2-0:%s' % (dom_cpu, num_cpu_domain, dom_mem) - for dom_cpu, dom_mem in - zip(numa_domains[:2], reversed(numa_domains[:2]))] + for dom_cpu, dom_mem in + zip(numa_domains[:2], reversed(numa_domains[:2]))] self.executable_opts = ['-t %s' % self.kernel_name] + workgroups