Promote DGEMM check to multinode

Victor Holanda · Victor Holanda · commit 1863bbd99418 · 2018-10-18T15:14:52.000+02:00
diff --git a/cscs-checks/libraries/math/dgemm.py b/cscs-checks/libraries/math/dgemm.py
@@ -2,48 +2,98 @@
 import reframe.utility.sanity as sn
 
 
+@rfm.required_version('>=2.14')
+@rfm.simple_test
 class DGEMMTest(rfm.RegressionTest):
     def __init__(self):
         super().__init__()
         self.descr = 'DGEMM performance test'
         self.sourcepath = 'dgemm.c'
-        self.executable_opts = ['5000', '5000', '5000']
-        self.sanity_patterns = sn.assert_found(
-            r'Time for \d+ DGEMM operations', self.stdout)
-        self.maintainers = ['AJ']
-        self.tags = {'production'}
 
+        self.sanity_patterns = self.eval_sanity()
+        # the perf patterns are automaticaly generated inside sanity
+        self.perf_patterns = {}
 
-@rfm.required_version('>=2.14')
-@rfm.simple_test
-class DGEMMTestMonch(DGEMMTest):
-    def __init__(self):
-        super().__init__()
-        self.tags = {'monch_acceptance'}
-        self.valid_systems = ['monch:compute']
-        self.valid_prog_environs = ['PrgEnv-gnu']
-        self.num_tasks = 1
+        self.valid_systems = ['daint:gpu', 'daint:mc', 'dom:gpu', 'dom:mc',
+                              'monch:compute']
+        self.valid_prog_environs = ['PrgEnv-cray', 'PrgEnv-gnu', 'PrgEnv-intel']
+
+        # FIXME: set the num_tasks to zero.
+        self.num_tasks = 2
         self.num_tasks_per_node = 1
         self.num_tasks_per_core = 1
-        self.num_cpus_per_task = 20
-        self.num_tasks_per_socket = 10
+        self.num_tasks_per_socket = 1
         self.use_multithreading = False
+
+        self.build_system = 'SingleSource'
+        self.build_system.cflags = ['-O3']
+
+        self.my_reference = {
+            'daint:gpu': (430, -0.1, None),
+            'daint:mc': (430, -0.1, None),
+            'monch:compute': (350, -0.1, None),
+        }
+
+        self.maintainers = ['AJ', 'VH', 'VK']
+        self.tags = {'production'}
+
+
+    def setup(self, partition, environ, **job_opts):
+        if partition.fullname in ['daint:gpu', 'dom:gpu']:
+            self.num_cpus_per_task = 12
+            self.executable_opts = ['6000', '6000', '6000']
+
+        elif partition.fullname in ['daint:mc', 'dom:mc']:
+            self.num_cpus_per_task = 36
+            self.executable_opts = ['6000', '6000', '6000']
+
+        elif partition.fullname in ['monch:compute']:
+            self.num_cpus_per_task = 20
+            self.executable_opts = ['5000', '5000', '5000']
+            self.build_system.cflags += ['-I$EBROOTOPENBLAS/include']
+            self.build_system.ldflags = ['-L$EBROOTOPENBLAS/lib', '-lopenblas',
+                                         '-lpthread', '-lgfortran']
+
         self.variables = {
             'OMP_NUM_THREADS': str(self.num_cpus_per_task),
             'MV2_ENABLE_AFFINITY': '0'
         }
-        self.build_system = 'SingleSource'
-        self.build_system.cflags = ['-O3', '-I$EBROOTOPENBLAS/include']
-        self.build_system.ldflags = ['-L$EBROOTOPENBLAS/lib', '-lopenblas',
-                                     '-lpthread', '-lgfortran']
-        self.perf_patterns = {
-            'perf': sn.max(
-                sn.extractall(r'Run\s\d\s+:\s+(?P<gflops>\S+)\s\S+',
-                              self.stdout, "gflops", float)
-            )
-        }
-        self.reference = {
-            'monch:compute': {
-                'perf': (350, -0.1, None)
-            }
-        }
+
+        if environ.name.startswith('PrgEnv-cray'):
+            self.build_system.cflags += ['-hnoomp']
+
+        super().setup(partition, environ, **job_opts)
+
+
+    @sn.sanity_function
+    def eval_sanity(self):
+        failures = []
+
+        all_tested_nodes = sn.evaluate(sn.findall(
+            r'(?P<name>.*):\s+Time for \d+ DGEMM operations',
+            self.stdout
+        ))
+        number_of_tested_nodes = len(all_tested_nodes)
+
+        if number_of_tested_nodes != self.num_tasks:
+            failures.append('Requested %s nodes, but found %s nodes)' %
+                            (self.num_tasks, number_of_tested_nodes))
+            #FIXME: list detected nodes in error message
+            sn.assert_false(failures, msg=', '.join(failures))
+
+        update_reference = False
+        if self.my_reference[self.current_partition.fullname]:
+            update_reference = True
+
+        for node in all_tested_nodes:
+            nodename  = node.group('name')
+
+            if update_reference:
+                partition_name = self.current_partition.fullname
+                ref_name = '%s:%s' % (partition_name, nodename)
+                self.reference[ref_name] = self.my_reference[partition_name]
+                self.perf_patterns[nodename] = sn.extractsingle(
+                    '%s:\\s+Flops based on.*:\\s+(?P<gflops>.*)\\sGFlops\\/sec'
+                    % nodename, self.stdout, "gflops", float)
+
+        return sn.assert_false(failures, msg=', '.join(failures))
diff --git a/cscs-checks/libraries/math/src/dgemm.c b/cscs-checks/libraries/math/src/dgemm.c
@@ -1,6 +1,7 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <sys/time.h>
+#include <unistd.h>
 
 extern void dgemm_(char*, char*, int*, int*, int*, double*, double*,
                    int*, double*, int*, double*, double*, int*);
@@ -22,7 +23,15 @@ int main(int argc, char* argv[])
     char tb='N';
 
     struct timeval start_time, end_time, duration[LOOP_COUNT];
-    
+
+
+#ifndef HOST_NAME_MAX
+#define HOST_NAME_MAX sysconf (_SC_HOST_NAME_MAX)
+#endif
+
+    char hostname[HOST_NAME_MAX];
+    gethostname(hostname, sizeof(hostname));
+
     if (argc >= 2) m = atoi(argv[1]);
     if (argc >= 3) n = atoi(argv[2]);
     if (argc >= 4) k = atoi(argv[3]);
@@ -34,10 +43,10 @@ int main(int argc, char* argv[])
     double* B = (double*)malloc(sizeof(double)*k*n);
     double* C = (double*)malloc(sizeof(double)*m*n);
 
-    printf("Size of Matrix A(mxk)\t\t:\t%d x %d\n", m, k);
-    printf("Size of Matrix B(kxn)\t\t:\t%d x %d\n", k, n);
-    printf("Size of Matrix C(mxn)\t\t:\t%d x %d\n", m, n);
-    printf("LOOP COUNT\t\t\t:\t%d \n", LOOP_COUNT);
+    printf("%s: Size of Matrix A(mxk)\t\t:\t%d x %d\n", hostname, m, k);
+    printf("%s: Size of Matrix B(kxn)\t\t:\t%d x %d\n", hostname, k, n);
+    printf("%s: Size of Matrix C(mxn)\t\t:\t%d x %d\n", hostname, m, n);
+    printf("%s: LOOP COUNT\t\t\t:\t%d \n", hostname, LOOP_COUNT);
     printf("\n");
 
     for (i=0; i<m*k ; ++i) A[i] = i%3+1;
@@ -48,7 +57,7 @@ int main(int argc, char* argv[])
 
     /* CALL DGEMM ONCE TO INITIALIZE THREAD/BUFFER */
     dgemm_(&ta, &tb, &m, &n, &k, &alpha, A, &m, B, &k, &beta, C, &m);
-    
+
     /* LOOP OVER DGEMM IN ORDER TO SMOOTHEN THE RESULTS */
     for (i=0; i<LOOP_COUNT; ++i)
     {
@@ -57,20 +66,21 @@ int main(int argc, char* argv[])
         gettimeofday(&end_time,NULL);
         timersub(&end_time, &start_time, &duration[i]);
     }
-    
+
+    time_avg = 0.0;
     for (i=0; i<LOOP_COUNT; ++i)
     {
-        time[i] = (duration[i].tv_sec * 1.e3 + 
+        time[i] = (duration[i].tv_sec * 1.e3 +
                    duration[i].tv_usec * 1.e-3) * 1.e-3;
         perf[i] = gflop / time[i];
         time_avg += time[i];
-        printf("Run %d \t\t\t\t:\t%.5f GFlops/sec\n", i, perf[i]);
+        printf("%s: Run %d \t\t\t\t:\t%.5f GFlops/sec\n", hostname, i, perf[i]);
     }
 
     printf("\n");
-    printf("Flops based on given dimensions\t:\t%.5f GFlops/sec\n", gflop);
-    printf("Avg. time / DGEMM operation\t:\t%f secs \n", time_avg/LOOP_COUNT);
-    printf("Time for %d DGEMM operations\t:\t%f secs \n", LOOP_COUNT, time_avg);
+    printf("%s: Flops based on given dimensions\t:\t%.5f GFlops/sec\n", hostname, gflop);
+    printf("%s: Avg. time / DGEMM operation\t:\t%f secs \n", hostname, time_avg/LOOP_COUNT);
+    printf("%s: Time for %d DGEMM operations\t:\t%f secs \n", hostname, LOOP_COUNT, time_avg);
     printf("\n");
 
     return 0;