From 0c19758cd6ca3c809fe28980dd456c5595ab5f6c Mon Sep 17 00:00:00 2001 From: ajocksch Date: Fri, 7 Jun 2019 10:58:06 +0200 Subject: [PATCH 1/4] MCH halo cell exchange generic --- cscs-checks/mch/collectives_halo.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/cscs-checks/mch/collectives_halo.py b/cscs-checks/mch/collectives_halo.py index 9a2fd30d88..d89d400b38 100644 --- a/cscs-checks/mch/collectives_halo.py +++ b/cscs-checks/mch/collectives_halo.py @@ -63,7 +63,15 @@ def __init__(self, variant, bench_reference): else: sysname = self.current_system.name - ref = bench_reference[sysname][variant] + try: + ref = bench_reference[sysname][variant] + except: + ref = { + 'nocomm': 0, + 'nocomp': 0, + 'default': 0 + } + self.reference = { 'kesch:cn': { 'elapsed_time': (ref, None, 0.15) @@ -73,6 +81,9 @@ def __init__(self, variant, bench_reference): }, 'dom': { 'elapsed_time': (ref, None, 0.15) + }, + '*': { + 'elapsed_time': (0, None, None) } } From 4e8fc4f7fe4657b81dc0182b014ff29cbd3ae7f4 Mon Sep 17 00:00:00 2001 From: ajocksch Date: Fri, 7 Jun 2019 11:31:20 +0200 Subject: [PATCH 2/4] improved code style --- cscs-checks/mch/collectives_halo.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/cscs-checks/mch/collectives_halo.py b/cscs-checks/mch/collectives_halo.py index d89d400b38..9982086fbb 100644 --- a/cscs-checks/mch/collectives_halo.py +++ b/cscs-checks/mch/collectives_halo.py @@ -65,12 +65,8 @@ def __init__(self, variant, bench_reference): try: ref = bench_reference[sysname][variant] - except: - ref = { - 'nocomm': 0, - 'nocomp': 0, - 'default': 0 - } + except KeyError: + ref = {} self.reference = { 'kesch:cn': { From 5ef64616309895c117707accd2e0e60107294c62 Mon Sep 17 00:00:00 2001 From: ajocksch Date: Tue, 11 Jun 2019 12:08:14 +0200 Subject: [PATCH 3/4] halo exchange more generic --- cscs-checks/mch/collectives_halo.py | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/cscs-checks/mch/collectives_halo.py b/cscs-checks/mch/collectives_halo.py index 9982086fbb..e662f62a52 100644 --- a/cscs-checks/mch/collectives_halo.py +++ b/cscs-checks/mch/collectives_halo.py @@ -30,7 +30,7 @@ def __init__(self, variant, bench_reference): '-DCUDA_COMPUTE_CAPABILITY="sm_37"' ] self.build_system.max_concurrency = 1 - else: + elif self.current_system.name in {'daint', 'dom'}: self.num_tasks = 4 self.num_gpus_per_node = 1 self.num_tasks_per_node = 1 @@ -40,6 +40,16 @@ def __init__(self, variant, bench_reference): '-DCUDA_COMPUTE_CAPABILITY="sm_60"' ] self.build_system.max_concurrency = 8 + else: + self.num_tasks = 4 + self.num_gpus_per_node = 1 + self.num_tasks_per_node = 1 + self.variables['MPICH_RDMA_ENABLED_CUDA'] = '1' + self.variables['MV2_USE_CUDA'] = '1' + self.build_system.config_opts += [ + '-DCUDA_COMPUTE_CAPABILITY="sm_37"' + ] + self.build_system.max_concurrency = 1 self.sanity_patterns = sn.assert_found(r'ELAPSED TIME:', self.stdout) self.perf_patterns = { @@ -66,7 +76,7 @@ def __init__(self, variant, bench_reference): try: ref = bench_reference[sysname][variant] except KeyError: - ref = {} + ref = 0.0 self.reference = { 'kesch:cn': { @@ -79,7 +89,7 @@ def __init__(self, variant, bench_reference): 'elapsed_time': (ref, None, 0.15) }, '*': { - 'elapsed_time': (0, None, None) + 'elapsed_time': (ref, None, None) } } From 081d1abcfcf722dde238ef42678956aefc086745 Mon Sep 17 00:00:00 2001 From: ajocksch Date: Wed, 12 Jun 2019 10:02:12 +0200 Subject: [PATCH 4/4] revised generic case --- cscs-checks/mch/collectives_halo.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/cscs-checks/mch/collectives_halo.py b/cscs-checks/mch/collectives_halo.py index e662f62a52..018236660c 100644 --- a/cscs-checks/mch/collectives_halo.py +++ b/cscs-checks/mch/collectives_halo.py @@ -44,11 +44,6 @@ def __init__(self, variant, bench_reference): self.num_tasks = 4 self.num_gpus_per_node = 1 self.num_tasks_per_node = 1 - self.variables['MPICH_RDMA_ENABLED_CUDA'] = '1' - self.variables['MV2_USE_CUDA'] = '1' - self.build_system.config_opts += [ - '-DCUDA_COMPUTE_CAPABILITY="sm_37"' - ] self.build_system.max_concurrency = 1 self.sanity_patterns = sn.assert_found(r'ELAPSED TIME:', self.stdout)