From e971591d68325bff35c74e9557f3ad6a85ec0aed Mon Sep 17 00:00:00 2001
From: ajocksch <andreas.jocksch@cscs.ch>
Date: Mon, 26 Aug 2019 17:42:06 +0200
Subject: [PATCH 1/5] WIP: flexible alltoall OSU benchmark

---
 cscs-checks/microbenchmarks/osu/osu_tests.py | 118 ++++++++-----------
 1 file changed, 49 insertions(+), 69 deletions(-)
diff --git a/cscs-checks/microbenchmarks/osu/osu_tests.py b/cscs-checks/microbenchmarks/osu/osu_tests.py
index 04b414283f..dfcb51e2cd 100644
--- a/cscs-checks/microbenchmarks/osu/osu_tests.py
+++ b/cscs-checks/microbenchmarks/osu/osu_tests.py
@@ -3,9 +3,9 @@
 
 
 @rfm.required_version('>=2.16')
-@rfm.parameterized_test(['production'])
+@rfm.simple_test
 class AlltoallTest(rfm.RegressionTest):
-    def __init__(self, variant):
+    def __init__(self):
         super().__init__()
         self.strict_check = False
         self.valid_systems = ['daint:gpu', 'dom:gpu']
@@ -25,13 +25,13 @@ def __init__(self, variant):
             'latency': sn.extractsingle(r'^8\s+(?P<latency>\S+)',
                                         self.stdout, 'latency', float)
         }
-        self.tags = {variant, 'benchmark'}
+        self.tags = {'production', 'benchmark'}
         self.reference = {
             'dom:gpu': {
-                'latency': (8.23, None, 0.1, 'us')
+                'latency': (1.31, None, 0.1, 'us')
             },
             'daint:gpu': {
-                'latency': (20.73, None, 2.0, 'us')
+                'latency': (1.31, None, 2.0, 'us')
             },
             '*': {
                 'latency': (0, None, None, 'us')
@@ -39,10 +39,7 @@ def __init__(self, variant):
         }
         self.num_tasks_per_node = 1
         self.num_gpus_per_node  = 1
-        if self.current_system.name == 'daint':
-            self.num_tasks = 16
-        else:
-            self.num_tasks = 6
+        self.num_tasks = 0
 
         self.extra_resources = {
             'switches': {
@@ -51,6 +48,21 @@ def __init__(self, variant):
         }
 
 
+    @property
+    @sn.sanity_function
+    def num_tasks_assigned(self):
+        return self.job.num_tasks
+
+    def setup(self, partition, environ, **job_opts):
+        num_nodes = self.num_tasks_assigned / self.num_tasks_per_node
+        self.perf_patterns = {
+            'latency': sn.extractsingle(r'^8\s+(?P<latency>\S+)',
+                self.stdout, 'latency', float) / num_nodes
+        }
+
+        super().setup(partition, environ, **job_opts)
+
+
 @rfm.simple_test
 class FlexAlltoallTest(rfm.RegressionTest):
     def __init__(self):
@@ -76,14 +88,12 @@ def __init__(self):
 
 
 @rfm.required_version('>=2.16')
-@rfm.parameterized_test(['small'], ['large'])
+@rfm.simple_test
 class AllreduceTest(rfm.RegressionTest):
-    def __init__(self, variant):
+    def __init__(self):
         super().__init__()
         self.strict_check = False
-        self.valid_systems = ['daint:gpu', 'daint:mc']
-        if variant == 'small':
-            self.valid_systems += ['dom:gpu', 'dom:mc']
+        self.valid_systems = ['daint:gpu', 'daint:mc', 'dom:gpu', 'dom:mc']
 
         self.descr = 'Allreduce OSU microbenchmark'
         self.build_system = 'Make'
@@ -95,40 +105,22 @@ def __init__(self, variant):
         self.valid_prog_environs = ['PrgEnv-gnu']
         self.maintainers = ['RS', 'VK']
         self.sanity_patterns = sn.assert_found(r'^8', self.stdout)
-        self.perf_patterns = {
-            'latency': sn.extractsingle(r'^8\s+(?P<latency>\S+)',
-                                        self.stdout, 'latency', float)
-        }
         self.tags = {'production', 'benchmark'}
-        if variant == 'small':
-            self.num_tasks = 6
-            self.reference = {
-                'dom:gpu': {
-                    'latency': (6.0, None, 0.10, 'us')
-                },
-                'daint:gpu': {
-                    'latency': (7.81, None, 0.25, 'us')
-                },
-                'daint:mc': {
-                    'latency': (8.79, None, 0.25, 'us')
-                },
-                '*': {
-                    'latency': (0, None, None, 'us')
-                }
-            }
-        else:
-            self.num_tasks = 16
-            self.reference = {
-                'daint:gpu': {
-                    'latency': (16.87, None, 0.40, 'us')
-                },
-                'daint:mc': {
-                    'latency': (10.85, None, 0.20, 'us')
-                },
-                '*': {
-                    'latency': (0, None, None, 'us')
-                }
+        self.num_tasks = 0
+        self.reference = {
+            'dom:gpu': {
+                'latency': (1.0, None, 0.10, 'us')
+            },
+            'daint:gpu': {
+                'latency': (1.302, None, 0.40, 'us')
+            },
+            'daint:mc': {
+                'latency': (1.456, None, 0.20, 'us')
+            },
+            '*': {
+                'latency': (0, None, None, 'us')
             }
+        }
 
         self.num_tasks_per_node = 1
         self.num_gpus_per_node  = 1
@@ -138,32 +130,20 @@ def __init__(self, variant):
             }
         }
 
+    @property
+    @sn.sanity_function
+    def num_tasks_assigned(self):
+        return self.job.num_tasks
 
-# FIXME: This test is obsolete; it is kept only for reference.
-@rfm.parameterized_test(*({'num_tasks': i} for i in range(2, 10, 2)))
-class AlltoallMonchAcceptanceTest(AlltoallTest):
-    def __init__(self, num_tasks):
-        super().__init__('monch_acceptance')
-        self.valid_systems = ['monch:compute']
-        self.num_tasks = num_tasks
-        reference_by_node = {
-            2: {
-                'perf': (2.71, None, 0.1)
-            },
-            4: {
-                'perf': (3.75, None, 0.1)
-            },
-            6: {
-                'perf': (6.28, None, 0.1)
-            },
-            8: {
-                'perf': (8.15, None, 0.1)
-            },
-        }
-        self.reference = {
-            'monch:compute': reference_by_node[self.num_tasks]
+    def setup(self, partition, environ, **job_opts):
+        num_nodes = self.num_tasks_assigned / self.num_tasks_per_node
+        self.perf_patterns = {
+            'latency': sn.extractsingle(r'^8\s+(?P<latency>\S+)',
+                self.stdout, 'latency', float) / num_nodes
         }
 
+        super().setup(partition, environ, **job_opts)
+
 
 class P2PBaseTest(rfm.RegressionTest):
     def __init__(self):

From f98fe29b9f4847d6b2c563f46a90ee226bc08ab3 Mon Sep 17 00:00:00 2001
From: ajocksch <andreas.jocksch@cscs.ch>
Date: Mon, 26 Aug 2019 17:52:00 +0200
Subject: [PATCH 2/5] pep8

---
 cscs-checks/microbenchmarks/osu/osu_tests.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/cscs-checks/microbenchmarks/osu/osu_tests.py b/cscs-checks/microbenchmarks/osu/osu_tests.py
index dfcb51e2cd..8da0ee9b14 100644
--- a/cscs-checks/microbenchmarks/osu/osu_tests.py
+++ b/cscs-checks/microbenchmarks/osu/osu_tests.py
@@ -47,7 +47,6 @@ def __init__(self):
             }
         }
 
-
     @property
     @sn.sanity_function
     def num_tasks_assigned(self):
@@ -57,7 +56,7 @@ def setup(self, partition, environ, **job_opts):
         num_nodes = self.num_tasks_assigned / self.num_tasks_per_node
         self.perf_patterns = {
             'latency': sn.extractsingle(r'^8\s+(?P<latency>\S+)',
-                self.stdout, 'latency', float) / num_nodes
+                           self.stdout, 'latency', float) / num_nodes
         }
 
         super().setup(partition, environ, **job_opts)
@@ -139,7 +138,7 @@ def setup(self, partition, environ, **job_opts):
         num_nodes = self.num_tasks_assigned / self.num_tasks_per_node
         self.perf_patterns = {
             'latency': sn.extractsingle(r'^8\s+(?P<latency>\S+)',
-                self.stdout, 'latency', float) / num_nodes
+                           self.stdout, 'latency', float) / num_nodes
         }
 
         super().setup(partition, environ, **job_opts)

From df57a82a5bfdec61660fb2ef2da43cebe6d0f830 Mon Sep 17 00:00:00 2001
From: ajocksch <andreas.jocksch@cscs.ch>
Date: Fri, 6 Sep 2019 11:22:27 +0200
Subject: [PATCH 3/5] perf_patterns in flexible alltoall

---
 cscs-checks/microbenchmarks/osu/osu_tests.py | 127 ++++++++++++-------
 1 file changed, 79 insertions(+), 48 deletions(-)

diff --git a/cscs-checks/microbenchmarks/osu/osu_tests.py b/cscs-checks/microbenchmarks/osu/osu_tests.py
index 8da0ee9b14..f0991fd627 100644
--- a/cscs-checks/microbenchmarks/osu/osu_tests.py
+++ b/cscs-checks/microbenchmarks/osu/osu_tests.py
@@ -3,9 +3,9 @@
 
 
 @rfm.required_version('>=2.16')
-@rfm.simple_test
+@rfm.parameterized_test(['production'])
 class AlltoallTest(rfm.RegressionTest):
-    def __init__(self):
+    def __init__(self, variant):
         super().__init__()
         self.strict_check = False
         self.valid_systems = ['daint:gpu', 'dom:gpu']
@@ -25,13 +25,13 @@ def __init__(self):
             'latency': sn.extractsingle(r'^8\s+(?P<latency>\S+)',
                                         self.stdout, 'latency', float)
         }
-        self.tags = {'production', 'benchmark'}
+        self.tags = {variant, 'benchmark'}
         self.reference = {
             'dom:gpu': {
-                'latency': (1.31, None, 0.1, 'us')
+                'latency': (8.23, None, 0.1, 'us')
             },
             'daint:gpu': {
-                'latency': (1.31, None, 2.0, 'us')
+                'latency': (20.73, None, 2.0, 'us')
             },
             '*': {
                 'latency': (0, None, None, 'us')
@@ -39,7 +39,10 @@ def __init__(self):
         }
         self.num_tasks_per_node = 1
         self.num_gpus_per_node  = 1
-        self.num_tasks = 0
+        if self.current_system.name == 'daint':
+            self.num_tasks = 16
+        else:
+            self.num_tasks = 6
 
         self.extra_resources = {
             'switches': {
@@ -47,21 +50,8 @@ def __init__(self):
             }
         }
 
-    @property
-    @sn.sanity_function
-    def num_tasks_assigned(self):
-        return self.job.num_tasks
-
-    def setup(self, partition, environ, **job_opts):
-        num_nodes = self.num_tasks_assigned / self.num_tasks_per_node
-        self.perf_patterns = {
-            'latency': sn.extractsingle(r'^8\s+(?P<latency>\S+)',
-                           self.stdout, 'latency', float) / num_nodes
-        }
-
-        super().setup(partition, environ, **job_opts)
-
 
+@rfm.required_version('>=2.18')
 @rfm.simple_test
 class FlexAlltoallTest(rfm.RegressionTest):
     def __init__(self):
@@ -83,16 +73,27 @@ def __init__(self):
         self.num_tasks_per_node = 1
         self.num_tasks = 0
         self.sanity_patterns = sn.assert_found(r'^1048576', self.stdout)
+        self.perf_patterns = {
+            'latency': sn.extractsingle(r'^8\s+(?P<latency>\S+)',
+                                        self.stdout, 'latency', float)
+        }
+        self.reference = {
+            '*': {
+                'latency': (0, None, None, 'us')
+            },
+        }
         self.tags = {'diagnostic', 'ops', 'benchmark'}
 
 
 @rfm.required_version('>=2.16')
-@rfm.simple_test
+@rfm.parameterized_test(['small'], ['large'])
 class AllreduceTest(rfm.RegressionTest):
-    def __init__(self):
+    def __init__(self, variant):
         super().__init__()
         self.strict_check = False
-        self.valid_systems = ['daint:gpu', 'daint:mc', 'dom:gpu', 'dom:mc']
+        self.valid_systems = ['daint:gpu', 'daint:mc']
+        if variant == 'small':
+            self.valid_systems += ['dom:gpu', 'dom:mc']
 
         self.descr = 'Allreduce OSU microbenchmark'
         self.build_system = 'Make'
@@ -104,22 +105,40 @@ def __init__(self):
         self.valid_prog_environs = ['PrgEnv-gnu']
         self.maintainers = ['RS', 'VK']
         self.sanity_patterns = sn.assert_found(r'^8', self.stdout)
+        self.perf_patterns = {
+            'latency': sn.extractsingle(r'^8\s+(?P<latency>\S+)',
+                                        self.stdout, 'latency', float)
+        }
         self.tags = {'production', 'benchmark'}
-        self.num_tasks = 0
-        self.reference = {
-            'dom:gpu': {
-                'latency': (1.0, None, 0.10, 'us')
-            },
-            'daint:gpu': {
-                'latency': (1.302, None, 0.40, 'us')
-            },
-            'daint:mc': {
-                'latency': (1.456, None, 0.20, 'us')
-            },
-            '*': {
-                'latency': (0, None, None, 'us')
+        if variant == 'small':
+            self.num_tasks = 6
+            self.reference = {
+                'dom:gpu': {
+                    'latency': (6.0, None, 0.10, 'us')
+                },
+                'daint:gpu': {
+                    'latency': (7.81, None, 0.25, 'us')
+                },
+                'daint:mc': {
+                    'latency': (8.79, None, 0.25, 'us')
+                },
+                '*': {
+                    'latency': (0, None, None, 'us')
+                }
+            }
+        else:
+            self.num_tasks = 16
+            self.reference = {
+                'daint:gpu': {
+                    'latency': (16.87, None, 0.40, 'us')
+                },
+                'daint:mc': {
+                    'latency': (10.85, None, 0.20, 'us')
+                },
+                '*': {
+                    'latency': (0, None, None, 'us')
+                }
             }
-        }
 
         self.num_tasks_per_node = 1
         self.num_gpus_per_node  = 1
@@ -129,19 +148,31 @@ def __init__(self):
             }
         }
 
-    @property
-    @sn.sanity_function
-    def num_tasks_assigned(self):
-        return self.job.num_tasks
 
-    def setup(self, partition, environ, **job_opts):
-        num_nodes = self.num_tasks_assigned / self.num_tasks_per_node
-        self.perf_patterns = {
-            'latency': sn.extractsingle(r'^8\s+(?P<latency>\S+)',
-                           self.stdout, 'latency', float) / num_nodes
+# FIXME: This test is obsolete; it is kept only for reference.
+@rfm.parameterized_test(*({'num_tasks': i} for i in range(2, 10, 2)))
+class AlltoallMonchAcceptanceTest(AlltoallTest):
+    def __init__(self, num_tasks):
+        super().__init__('monch_acceptance')
+        self.valid_systems = ['monch:compute']
+        self.num_tasks = num_tasks
+        reference_by_node = {
+            2: {
+                'perf': (2.71, None, 0.1)
+            },
+            4: {
+                'perf': (3.75, None, 0.1)
+            },
+            6: {
+                'perf': (6.28, None, 0.1)
+            },
+            8: {
+                'perf': (8.15, None, 0.1)
+            },
+        }
+        self.reference = {
+            'monch:compute': reference_by_node[self.num_tasks]
         }
-
-        super().setup(partition, environ, **job_opts)
 
 
 class P2PBaseTest(rfm.RegressionTest):

From d25e722cc9687c714ba0ec6b8b94fba6370792af Mon Sep 17 00:00:00 2001
From: Vasileios Karakasis <karakasis@cscs.ch>
Date: Sat, 14 Sep 2019 13:47:13 +0200
Subject: [PATCH 4/5] Minor fixes

---
 cscs-checks/microbenchmarks/osu/osu_tests.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/cscs-checks/microbenchmarks/osu/osu_tests.py b/cscs-checks/microbenchmarks/osu/osu_tests.py
index f0991fd627..f455671c3d 100644
--- a/cscs-checks/microbenchmarks/osu/osu_tests.py
+++ b/cscs-checks/microbenchmarks/osu/osu_tests.py
@@ -51,11 +51,10 @@ def __init__(self, variant):
         }
 
 
-@rfm.required_version('>=2.18')
+@rfm.required_version('>=2.19')
 @rfm.simple_test
 class FlexAlltoallTest(rfm.RegressionTest):
     def __init__(self):
-        super().__init__()
         self.valid_systems = ['daint:gpu', 'daint:mc',
                               'dom:gpu', 'dom:mc',
                               'kesch:cn', 'kesch:pn', 'leone:normal']

From c064c745cede7878d66171008ee4357ab59f51ad Mon Sep 17 00:00:00 2001
From: Vasileios Karakasis <karakasis@cscs.ch>
Date: Sat, 14 Sep 2019 14:21:13 +0200
Subject: [PATCH 5/5] Update syntax of tests

---
 cscs-checks/microbenchmarks/osu/osu_tests.py | 15 ++++++---------
 1 file changed, 6 insertions(+), 9 deletions(-)

diff --git a/cscs-checks/microbenchmarks/osu/osu_tests.py b/cscs-checks/microbenchmarks/osu/osu_tests.py
index f455671c3d..287353c3d5 100644
--- a/cscs-checks/microbenchmarks/osu/osu_tests.py
+++ b/cscs-checks/microbenchmarks/osu/osu_tests.py
@@ -2,11 +2,10 @@
 import reframe.utility.sanity as sn
 
 
-@rfm.required_version('>=2.16')
+@rfm.required_version('>=2.19')
 @rfm.parameterized_test(['production'])
 class AlltoallTest(rfm.RegressionTest):
     def __init__(self, variant):
-        super().__init__()
         self.strict_check = False
         self.valid_systems = ['daint:gpu', 'dom:gpu']
         self.descr = 'Alltoall OSU microbenchmark'
@@ -84,11 +83,10 @@ def __init__(self):
         self.tags = {'diagnostic', 'ops', 'benchmark'}
 
 
-@rfm.required_version('>=2.16')
+@rfm.required_version('>=2.19')
 @rfm.parameterized_test(['small'], ['large'])
 class AllreduceTest(rfm.RegressionTest):
     def __init__(self, variant):
-        super().__init__()
         self.strict_check = False
         self.valid_systems = ['daint:gpu', 'daint:mc']
         if variant == 'small':
@@ -176,7 +174,6 @@ def __init__(self, num_tasks):
 
 class P2PBaseTest(rfm.RegressionTest):
     def __init__(self):
-        super().__init__()
         self.exclusive_access = True
         self.strict_check = False
         self.num_tasks = 2
@@ -201,7 +198,7 @@ def __init__(self):
         }
 
 
-@rfm.required_version('>=2.16')
+@rfm.required_version('>=2.19')
 @rfm.simple_test
 class P2PCPUBandwidthTest(P2PBaseTest):
     def __init__(self):
@@ -241,7 +238,7 @@ def __init__(self):
         self.tags |= {'monch_acceptance'}
 
 
-@rfm.required_version('>=2.16')
+@rfm.required_version('>=2.19')
 @rfm.simple_test
 class P2PCPULatencyTest(P2PBaseTest):
     def __init__(self):
@@ -281,7 +278,7 @@ def __init__(self):
         self.tags |= {'monch_acceptance'}
 
 
-@rfm.required_version('>=2.16')
+@rfm.required_version('>=2.19')
 @rfm.simple_test
 class G2GBandwidthTest(P2PBaseTest):
     def __init__(self):
@@ -321,7 +318,7 @@ def __init__(self):
         self.build_system.cppflags = ['-D_ENABLE_CUDA_']
 
 
-@rfm.required_version('>=2.16')
+@rfm.required_version('>=2.19')
 @rfm.simple_test
 class G2GLatencyTest(P2PBaseTest):
     def __init__(self):