From 765ed4fed14f55ae13a7bac88ce887597662447f Mon Sep 17 00:00:00 2001 From: ajocksch Date: Thu, 9 May 2019 15:03:57 +0200 Subject: [PATCH 1/5] WIP: first version of fftw benchmark --- .../microbenchmarks/fft/fftw_benchmark.py | 58 +++++++++ .../microbenchmarks/fft/src/fftw_benchmark.c | 116 ++++++++++++++++++ 2 files changed, 174 insertions(+) create mode 100644 cscs-checks/microbenchmarks/fft/fftw_benchmark.py create mode 100644 cscs-checks/microbenchmarks/fft/src/fftw_benchmark.c diff --git a/cscs-checks/microbenchmarks/fft/fftw_benchmark.py b/cscs-checks/microbenchmarks/fft/fftw_benchmark.py new file mode 100644 index 0000000000..59d22aa34e --- /dev/null +++ b/cscs-checks/microbenchmarks/fft/fftw_benchmark.py @@ -0,0 +1,58 @@ +import reframe as rfm +import reframe.utility.sanity as sn + + +@rfm.required_version('>=2.16-dev0') +@rfm.parameterized_test([False], [True]) +class FFTWTest(rfm.RegressionTest): + def __init__(self, with_mpi): + super().__init__() + self.sourcepath = 'fftw_benchmark.c' + self.build_system = 'SingleSource' + self.build_system.cflags = ['-O2'] + self.valid_systems = ['daint:gpu', 'dom:gpu', 'kesch:cn'] + self.valid_prog_environs = ['PrgEnv-cray', 'PrgEnv-pgi', + 'PrgEnv-gnu'] + self.modules = ['cray-fftw'] + self.num_tasks_per_node = 12 + self.num_gpus_per_node = 0 + self.sanity_patterns = sn.assert_eq( + sn.count(sn.findall(r'execution time', self.stdout)), 1) + self.perf_patterns = { + 'fftw_exec_time': sn.extractsingle( + r'execution time:\s+(?P\S+)', self.stdout, + 'exec_time', float), + } + + if not with_mpi: + self.num_tasks = 12 + self.executable_opts = ['72 12 1000 0'] + self.sys_reference = { + 'dom:gpu': { + 'fftw_exec_time': (5.5e-01, None, 0.05, 's'), + }, + 'daint:gpu': { + 'fftw_exec_time': (5.5e-01, None, 0.05, 's'), + }, + 'kesch:cn': { + 'fftw_exec_time': (5.5e-01, None, 0.05, 's'), + } + } + else: + self.num_tasks = 72 + self.executable_opts = ['144 72 200 1'] + self.sys_reference = { + 'dom:gpu': { + 'fftw_exec_time': (4.7e-01, None, 0.50, 's'), + }, + 'daint:gpu': { + 'fftw_exec_time': (4.7e-01, None, 0.50, 's'), + }, + 'kesch:cn': { + 'fftw_exec_time': (4.7e-01, None, 0.50, 's'), + } + } + + self.reference = self.sys_reference + self.maintainers = ['AJ'] + self.tags = {'benchmark'} diff --git a/cscs-checks/microbenchmarks/fft/src/fftw_benchmark.c b/cscs-checks/microbenchmarks/fft/src/fftw_benchmark.c new file mode 100644 index 0000000000..a5fb021e63 --- /dev/null +++ b/cscs-checks/microbenchmarks/fft/src/fftw_benchmark.c @@ -0,0 +1,116 @@ +#include +#include +#include +#include +#include +#include + +fftw_complex *deri_temp_x, *deri_temp_y, *deri_temp_z; +fftw_plan plan_forward_x, plan_backward_x, plan_forward_y, plan_backward_y, plan_forward_z, plan_backward_z; + +void init_derivatives(double *func, double *deri, int npx, int npy, int npz, int npy2, int npz2){ + int nnn; + deri_temp_x = (fftw_complex *) malloc(npy*npz*(npx/2+1)*sizeof(fftw_complex)); + deri_temp_y = (fftw_complex *) malloc(npx*(npy/2+1)*sizeof(fftw_complex)); + deri_temp_z = (fftw_complex *) malloc(npx*npy2*(npz2/2+1)*sizeof(fftw_complex)); + nnn = npx; + plan_forward_x = fftw_plan_many_dft_r2c(1, &nnn, npy*npz, func, &nnn, 1, npx, deri_temp_x, &nnn, 1, npx/2+1, FFTW_MEASURE+FFTW_UNALIGNED); + nnn = npy; + plan_forward_y = fftw_plan_many_dft_r2c(1, &nnn, npx, func, &nnn, npx, 1, deri_temp_y, &nnn, 1, npy/2+1, FFTW_MEASURE+FFTW_UNALIGNED); + nnn = npz2; + plan_forward_z = fftw_plan_many_dft_r2c(1, &nnn, npx*npy2, func, &nnn, npx*npy2, 1, deri_temp_z, &nnn, 1, npz2/2+1, FFTW_MEASURE+FFTW_UNALIGNED); + nnn = npx; + plan_backward_x = fftw_plan_many_dft_c2r(1, &nnn, npy*npz, deri_temp_x, &nnn, 1, npx/2+1, deri, &nnn, 1, npx, FFTW_MEASURE+FFTW_UNALIGNED); + nnn = npy; + plan_backward_y = fftw_plan_many_dft_c2r(1, &nnn, npx, deri_temp_y, &nnn, 1, npy/2+1, deri, &nnn, npx, 1, FFTW_MEASURE+FFTW_UNALIGNED); + nnn = npz2; + plan_backward_z = fftw_plan_many_dft_c2r(1, &nnn, npx*npy2, deri_temp_z, &nnn, 1, npz2/2+1, deri, &nnn, npx*npy2, 1, FFTW_MEASURE+FFTW_UNALIGNED); +} + +void done_derivatives(){ + fftw_destroy_plan(plan_backward_z); + fftw_destroy_plan(plan_backward_y); + fftw_destroy_plan(plan_backward_x); + fftw_destroy_plan(plan_forward_z); + fftw_destroy_plan(plan_forward_y); + fftw_destroy_plan(plan_forward_x); + free(deri_temp_z); + free(deri_temp_y); + free(deri_temp_x); +} + +void derivative_x1(double *func, double *deri, int npx, int npy, int npz){ + int i, jk; + fftw_execute_dft_r2c(plan_forward_x, func, deri_temp_x); + fftw_execute_dft_c2r(plan_backward_x, deri_temp_x, deri); +} + +void derivative_y1(double *func, double *deri, int npx, int npy, int npz){ + int i, j, k; + for (k = 0; k Date: Wed, 15 May 2019 15:33:40 +0200 Subject: [PATCH 2/5] implemented changes requested --- .../microbenchmarks/fft/fftw_benchmark.py | 23 +++++++++++-------- .../microbenchmarks/fft/src/fftw_benchmark.c | 11 ++++++++- 2 files changed, 23 insertions(+), 11 deletions(-) diff --git a/cscs-checks/microbenchmarks/fft/fftw_benchmark.py b/cscs-checks/microbenchmarks/fft/fftw_benchmark.py index 59d22aa34e..e7f647737f 100644 --- a/cscs-checks/microbenchmarks/fft/fftw_benchmark.py +++ b/cscs-checks/microbenchmarks/fft/fftw_benchmark.py @@ -3,13 +3,12 @@ @rfm.required_version('>=2.16-dev0') -@rfm.parameterized_test([False], [True]) +@rfm.parameterized_test(['withoutmpi'], ['withmpi']) class FFTWTest(rfm.RegressionTest): - def __init__(self, with_mpi): + def __init__(self, exec_mode): super().__init__() self.sourcepath = 'fftw_benchmark.c' self.build_system = 'SingleSource' - self.build_system.cflags = ['-O2'] self.valid_systems = ['daint:gpu', 'dom:gpu', 'kesch:cn'] self.valid_prog_environs = ['PrgEnv-cray', 'PrgEnv-pgi', 'PrgEnv-gnu'] @@ -18,16 +17,21 @@ def __init__(self, with_mpi): self.num_gpus_per_node = 0 self.sanity_patterns = sn.assert_eq( sn.count(sn.findall(r'execution time', self.stdout)), 1) + if self.current_system.name == 'kesch': + self.build_system.cflags = ['-O2 -I$FFTW_INC -L$FFTW_DIR -lfftw3'] + else: + self.build_system.cflags = ['-O2'] + self.perf_patterns = { 'fftw_exec_time': sn.extractsingle( r'execution time:\s+(?P\S+)', self.stdout, 'exec_time', float), } - if not with_mpi: + if exec_mode == 'withoutmpi': self.num_tasks = 12 self.executable_opts = ['72 12 1000 0'] - self.sys_reference = { + self.reference = { 'dom:gpu': { 'fftw_exec_time': (5.5e-01, None, 0.05, 's'), }, @@ -35,13 +39,13 @@ def __init__(self, with_mpi): 'fftw_exec_time': (5.5e-01, None, 0.05, 's'), }, 'kesch:cn': { - 'fftw_exec_time': (5.5e-01, None, 0.05, 's'), + 'fftw_exec_time': (6.1e-01, None, 0.05, 's'), } } else: self.num_tasks = 72 self.executable_opts = ['144 72 200 1'] - self.sys_reference = { + self.reference = { 'dom:gpu': { 'fftw_exec_time': (4.7e-01, None, 0.50, 's'), }, @@ -49,10 +53,9 @@ def __init__(self, with_mpi): 'fftw_exec_time': (4.7e-01, None, 0.50, 's'), }, 'kesch:cn': { - 'fftw_exec_time': (4.7e-01, None, 0.50, 's'), + 'fftw_exec_time': (1.58, None, 0.50, 's'), } } - self.reference = self.sys_reference self.maintainers = ['AJ'] - self.tags = {'benchmark'} + self.tags = {'benchmark', 'scs'} diff --git a/cscs-checks/microbenchmarks/fft/src/fftw_benchmark.c b/cscs-checks/microbenchmarks/fft/src/fftw_benchmark.c index a5fb021e63..839b98f26a 100644 --- a/cscs-checks/microbenchmarks/fft/src/fftw_benchmark.c +++ b/cscs-checks/microbenchmarks/fft/src/fftw_benchmark.c @@ -71,18 +71,27 @@ int main(int argc, char *argv[]){ MPI_Comm_size(MPI_COMM_WORLD, &mpi_size); if (argc != 5){ if (mpi_rank == 0){ - printf("npoints nproc iter\n"); + printf("Usage: %s npoints nproc niter withmpi\n", argv[0]); } + MPI_Finalize(); exit(1); } npoints = atoi(argv[1]); nproc = atoi(argv[2]); iter = atoi(argv[3]); withmpi = atoi(argv[4]); + if ((npoints <= 0) || (nproc <= 0) || (iter <= 0) || (withmpi < 0)){ + if (mpi_rank == 0){ + printf("%s: invalid input arguments\n", argv[0]); + } + MPI_Finalize(); + exit(1); + } if (mpi_size != nproc){ if (mpi_rank == 0){ printf("number of MPI processes must be %d\n", nproc); } + MPI_Finalize(); exit(1); } npx = npy = npz2 = npoints; From 0477b8b7d955a09523695db37cd79e91beaceff5 Mon Sep 17 00:00:00 2001 From: ajocksch Date: Mon, 20 May 2019 15:07:44 +0200 Subject: [PATCH 3/5] no prgenv-gnu on kesch --- .../microbenchmarks/fft/fftw_benchmark.py | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/cscs-checks/microbenchmarks/fft/fftw_benchmark.py b/cscs-checks/microbenchmarks/fft/fftw_benchmark.py index e7f647737f..8c4fa9ccdc 100644 --- a/cscs-checks/microbenchmarks/fft/fftw_benchmark.py +++ b/cscs-checks/microbenchmarks/fft/fftw_benchmark.py @@ -3,23 +3,24 @@ @rfm.required_version('>=2.16-dev0') -@rfm.parameterized_test(['withoutmpi'], ['withmpi']) +@rfm.parameterized_test(['nompi'], ['mpi']) class FFTWTest(rfm.RegressionTest): def __init__(self, exec_mode): super().__init__() self.sourcepath = 'fftw_benchmark.c' self.build_system = 'SingleSource' self.valid_systems = ['daint:gpu', 'dom:gpu', 'kesch:cn'] - self.valid_prog_environs = ['PrgEnv-cray', 'PrgEnv-pgi', - 'PrgEnv-gnu'] self.modules = ['cray-fftw'] self.num_tasks_per_node = 12 self.num_gpus_per_node = 0 self.sanity_patterns = sn.assert_eq( sn.count(sn.findall(r'execution time', self.stdout)), 1) if self.current_system.name == 'kesch': + self.valid_prog_environs = ['PrgEnv-cray', 'PrgEnv-pgi'] self.build_system.cflags = ['-O2 -I$FFTW_INC -L$FFTW_DIR -lfftw3'] else: + self.valid_prog_environs = ['PrgEnv-cray', 'PrgEnv-pgi', + 'PrgEnv-gnu'] self.build_system.cflags = ['-O2'] self.perf_patterns = { @@ -28,18 +29,18 @@ def __init__(self, exec_mode): 'exec_time', float), } - if exec_mode == 'withoutmpi': + if exec_mode == 'nompi': self.num_tasks = 12 self.executable_opts = ['72 12 1000 0'] self.reference = { 'dom:gpu': { - 'fftw_exec_time': (5.5e-01, None, 0.05, 's'), + 'fftw_exec_time': (0.55, None, 0.05, 's'), }, 'daint:gpu': { - 'fftw_exec_time': (5.5e-01, None, 0.05, 's'), + 'fftw_exec_time': (0.55, None, 0.05, 's'), }, 'kesch:cn': { - 'fftw_exec_time': (6.1e-01, None, 0.05, 's'), + 'fftw_exec_time': (0.61, None, 0.05, 's'), } } else: @@ -47,10 +48,10 @@ def __init__(self, exec_mode): self.executable_opts = ['144 72 200 1'] self.reference = { 'dom:gpu': { - 'fftw_exec_time': (4.7e-01, None, 0.50, 's'), + 'fftw_exec_time': (0.47, None, 0.50, 's'), }, 'daint:gpu': { - 'fftw_exec_time': (4.7e-01, None, 0.50, 's'), + 'fftw_exec_time': (0.47, None, 0.50, 's'), }, 'kesch:cn': { 'fftw_exec_time': (1.58, None, 0.50, 's'), From a6432ebdd4364e36a64c1a086d5337a29e387d35 Mon Sep 17 00:00:00 2001 From: ajocksch Date: Fri, 24 May 2019 16:51:52 +0200 Subject: [PATCH 4/5] Make FFTW check generic --- cscs-checks/microbenchmarks/fft/fftw_benchmark.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/cscs-checks/microbenchmarks/fft/fftw_benchmark.py b/cscs-checks/microbenchmarks/fft/fftw_benchmark.py index 8c4fa9ccdc..aba96775c3 100644 --- a/cscs-checks/microbenchmarks/fft/fftw_benchmark.py +++ b/cscs-checks/microbenchmarks/fft/fftw_benchmark.py @@ -15,13 +15,14 @@ def __init__(self, exec_mode): self.num_gpus_per_node = 0 self.sanity_patterns = sn.assert_eq( sn.count(sn.findall(r'execution time', self.stdout)), 1) + self.build_system.cflags = ['-O2'] if self.current_system.name == 'kesch': self.valid_prog_environs = ['PrgEnv-cray', 'PrgEnv-pgi'] - self.build_system.cflags = ['-O2 -I$FFTW_INC -L$FFTW_DIR -lfftw3'] - else: + self.build_system.cflags += ['-I$FFTW_INC', '-L$FFTW_DIR', + '-lfftw3'] + elif self.current_system.name in {'daint', 'dom'}: self.valid_prog_environs = ['PrgEnv-cray', 'PrgEnv-pgi', 'PrgEnv-gnu'] - self.build_system.cflags = ['-O2'] self.perf_patterns = { 'fftw_exec_time': sn.extractsingle( @@ -41,6 +42,9 @@ def __init__(self, exec_mode): }, 'kesch:cn': { 'fftw_exec_time': (0.61, None, 0.05, 's'), + }, + '*': { + 'fftw_exec_time': (0, None, None, 's'), } } else: @@ -56,6 +60,9 @@ def __init__(self, exec_mode): 'kesch:cn': { 'fftw_exec_time': (1.58, None, 0.50, 's'), } + '*': { + 'fftw_exec_time': (0, None, None, 's'), + } } self.maintainers = ['AJ'] From 744fa2035de88a6bc74d83d79c5647ba3757979b Mon Sep 17 00:00:00 2001 From: ajocksch Date: Fri, 24 May 2019 16:56:42 +0200 Subject: [PATCH 5/5] Fix syntax error in FFTW check --- cscs-checks/microbenchmarks/fft/fftw_benchmark.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cscs-checks/microbenchmarks/fft/fftw_benchmark.py b/cscs-checks/microbenchmarks/fft/fftw_benchmark.py index aba96775c3..7d10704590 100644 --- a/cscs-checks/microbenchmarks/fft/fftw_benchmark.py +++ b/cscs-checks/microbenchmarks/fft/fftw_benchmark.py @@ -59,7 +59,7 @@ def __init__(self, exec_mode): }, 'kesch:cn': { 'fftw_exec_time': (1.58, None, 0.50, 's'), - } + }, '*': { 'fftw_exec_time': (0, None, None, 's'), }