Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
90 changes: 58 additions & 32 deletions cscs-checks/libraries/math/dgemm.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,49 +2,75 @@
import reframe.utility.sanity as sn


@rfm.required_version('>=2.16-dev0')
@rfm.simple_test
class DGEMMTest(rfm.RegressionTest):
def __init__(self):
super().__init__()
self.descr = 'DGEMM performance test'
self.sourcepath = 'dgemm.c'
self.executable_opts = ['5000', '5000', '5000']
self.sanity_patterns = sn.assert_found(
r'Time for \d+ DGEMM operations', self.stdout)
self.maintainers = ['AJ']
self.tags = {'production'}

self.sanity_patterns = self.eval_sanity()
# the perf patterns are automaticaly generated inside sanity
self.perf_patterns = {}

# FIXME: This test is obsolete; it is kept only for reference.
@rfm.required_version('>=2.14')
@rfm.simple_test
class DGEMMTestMonch(DGEMMTest):
def __init__(self):
super().__init__()
self.tags = {'monch_acceptance'}
self.valid_systems = ['monch:compute']
self.valid_systems = ['daint:gpu', 'daint:mc', 'dom:gpu', 'dom:mc']
self.valid_prog_environs = ['PrgEnv-gnu']
self.num_tasks = 1
self.num_tasks = 0
self.num_tasks_per_node = 1
self.num_tasks_per_core = 1
self.num_cpus_per_task = 20
self.num_tasks_per_socket = 10
self.num_tasks_per_socket = 1
self.use_multithreading = False
self.variables = {
'OMP_NUM_THREADS': str(self.num_cpus_per_task),
'MV2_ENABLE_AFFINITY': '0'
}
self.build_system = 'SingleSource'
self.build_system.cflags = ['-O3', '-I$EBROOTOPENBLAS/include']
self.build_system.ldflags = ['-L$EBROOTOPENBLAS/lib', '-lopenblas',
'-lpthread', '-lgfortran']
self.perf_patterns = {
'perf': sn.max(
sn.extractall(r'Run\s\d\s+:\s+(?P<gflops>\S+)\s\S+',
self.stdout, "gflops", float)
)
self.build_system.cflags = ['-O3', '-fopenmp']
self.sys_reference = {
'daint:gpu': (300.0, -0.15, None, 'Gflop/s'),
'daint:mc': (860.0, -0.15, None, 'Gflop/s'),
'dom:gpu': (300.0, -0.15, None, 'Gflop/s'),
'dom:mc': (860.0, -0.15, None, 'Gflop/s'),
# FIXME update the values for monch
'monch:compute': (350, -0.1, None, 'Gflop/s'),
}
self.reference = {
'monch:compute': {
'perf': (350, -0.1, None)
}

self.maintainers = ['AJ', 'VH', 'VK']
self.tags = {'diagnostic'}

def setup(self, partition, environ, **job_opts):
if partition.fullname in ['daint:gpu', 'dom:gpu']:
self.num_cpus_per_task = 12
self.executable_opts = ['6144', '12288', '3072']
elif partition.fullname in ['daint:mc', 'dom:mc']:
self.num_cpus_per_task = 36
self.executable_opts = ['6144', '12288', '3072']
elif partition.fullname in ['monch:compute']:
self.num_cpus_per_task = 20
self.executable_opts = ['5000', '5000', '5000']
self.build_system.cflags += ['-I$EBROOTOPENBLAS/include']
self.build_system.ldflags = ['-L$EBROOTOPENBLAS/lib', '-lopenblas',
'-lpthread', '-lgfortran']

self.variables = {
'OMP_NUM_THREADS': str(self.num_cpus_per_task)
}
super().setup(partition, environ, **job_opts)

@sn.sanity_function
def eval_sanity(self):
all_tested_nodes = sn.evaluate(sn.extractall(
r'(?P<hostname>\S+):\s+Time for \d+ DGEMM operations',
self.stdout, 'hostname'))
num_tested_nodes = len(all_tested_nodes)
failure_msg = ('Requested %s node(s), but found %s node(s)' %
(self.job.num_tasks, num_tested_nodes))
sn.assert_eq(num_tested_nodes, self.job.num_tasks, msg=failure_msg)

for hostname in all_tested_nodes:
if self.sys_reference[self.current_partition.fullname]:
partition_name = self.current_partition.fullname
ref_name = '%s:%s' % (partition_name, hostname)
self.reference[ref_name] = self.sys_reference[partition_name]
self.perf_patterns[hostname] = sn.extractsingle(
r'%s:\s+Avg\. performance\s+:\s+(?P<gflops>\S+)'
r'\sGflop/s' % hostname, self.stdout, 'gflops', float)

return True
36 changes: 24 additions & 12 deletions cscs-checks/libraries/math/src/dgemm.c
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#include <stdio.h>
#include <stdlib.h>
#include <sys/time.h>
#include <unistd.h>

extern void dgemm_(char*, char*, int*, int*, int*, double*, double*,
int*, double*, int*, double*, double*, int*);
Expand All @@ -22,7 +23,15 @@ int main(int argc, char* argv[])
char tb='N';

struct timeval start_time, end_time, duration[LOOP_COUNT];



#ifndef HOST_NAME_MAX
#define HOST_NAME_MAX sysconf (_SC_HOST_NAME_MAX)
#endif

char hostname[HOST_NAME_MAX];
gethostname(hostname, sizeof(hostname));

if (argc >= 2) m = atoi(argv[1]);
if (argc >= 3) n = atoi(argv[2]);
if (argc >= 4) k = atoi(argv[3]);
Expand All @@ -34,10 +43,10 @@ int main(int argc, char* argv[])
double* B = (double*)malloc(sizeof(double)*k*n);
double* C = (double*)malloc(sizeof(double)*m*n);

printf("Size of Matrix A(mxk)\t\t:\t%d x %d\n", m, k);
printf("Size of Matrix B(kxn)\t\t:\t%d x %d\n", k, n);
printf("Size of Matrix C(mxn)\t\t:\t%d x %d\n", m, n);
printf("LOOP COUNT\t\t\t:\t%d \n", LOOP_COUNT);
printf("%s: Size of Matrix A(mxk)\t\t:\t%d x %d\n", hostname, m, k);
printf("%s: Size of Matrix B(kxn)\t\t:\t%d x %d\n", hostname, k, n);
printf("%s: Size of Matrix C(mxn)\t\t:\t%d x %d\n", hostname, m, n);
printf("%s: LOOP COUNT\t\t\t:\t%d \n", hostname, LOOP_COUNT);
printf("\n");

for (i=0; i<m*k ; ++i) A[i] = i%3+1;
Expand All @@ -48,7 +57,7 @@ int main(int argc, char* argv[])

/* CALL DGEMM ONCE TO INITIALIZE THREAD/BUFFER */
dgemm_(&ta, &tb, &m, &n, &k, &alpha, A, &m, B, &k, &beta, C, &m);

/* LOOP OVER DGEMM IN ORDER TO SMOOTHEN THE RESULTS */
for (i=0; i<LOOP_COUNT; ++i)
{
Expand All @@ -57,20 +66,23 @@ int main(int argc, char* argv[])
gettimeofday(&end_time,NULL);
timersub(&end_time, &start_time, &duration[i]);
}


time_avg = 0.0;
for (i=0; i<LOOP_COUNT; ++i)
{
time[i] = (duration[i].tv_sec * 1.e3 +
time[i] = (duration[i].tv_sec * 1.e3 +
duration[i].tv_usec * 1.e-3) * 1.e-3;
perf[i] = gflop / time[i];
time_avg += time[i];
printf("Run %d \t\t\t\t:\t%.5f GFlops/sec\n", i, perf[i]);
printf("%s: Run %d \t\t\t\t:\t%.5f GFlops/sec\n", hostname, i, perf[i]);
}


printf("\n");
printf("Flops based on given dimensions\t:\t%.5f GFlops/sec\n", gflop);
printf("Avg. time / DGEMM operation\t:\t%f secs \n", time_avg/LOOP_COUNT);
printf("Time for %d DGEMM operations\t:\t%f secs \n", LOOP_COUNT, time_avg);
printf("%s: Flops based on given dimensions\t:\t%.5f Gflops\n", hostname, gflop);
printf("%s: Avg. performance \t:\t%.5f Gflop/s\n", hostname, gflop * LOOP_COUNT / time_avg);
printf("%s: Avg. time / DGEMM operation\t:\t%f secs \n", hostname, time_avg / LOOP_COUNT);
printf("%s: Time for %d DGEMM operations\t:\t%f secs \n", hostname, LOOP_COUNT, time_avg);
printf("\n");

return 0;
Expand Down