diff --git a/cscs-checks/microbenchmarks/fft/fftw_benchmark.py b/cscs-checks/microbenchmarks/fft/fftw_benchmark.py new file mode 100644 index 0000000000..7d10704590 --- /dev/null +++ b/cscs-checks/microbenchmarks/fft/fftw_benchmark.py @@ -0,0 +1,69 @@ +import reframe as rfm +import reframe.utility.sanity as sn + + +@rfm.required_version('>=2.16-dev0') +@rfm.parameterized_test(['nompi'], ['mpi']) +class FFTWTest(rfm.RegressionTest): + def __init__(self, exec_mode): + super().__init__() + self.sourcepath = 'fftw_benchmark.c' + self.build_system = 'SingleSource' + self.valid_systems = ['daint:gpu', 'dom:gpu', 'kesch:cn'] + self.modules = ['cray-fftw'] + self.num_tasks_per_node = 12 + self.num_gpus_per_node = 0 + self.sanity_patterns = sn.assert_eq( + sn.count(sn.findall(r'execution time', self.stdout)), 1) + self.build_system.cflags = ['-O2'] + if self.current_system.name == 'kesch': + self.valid_prog_environs = ['PrgEnv-cray', 'PrgEnv-pgi'] + self.build_system.cflags += ['-I$FFTW_INC', '-L$FFTW_DIR', + '-lfftw3'] + elif self.current_system.name in {'daint', 'dom'}: + self.valid_prog_environs = ['PrgEnv-cray', 'PrgEnv-pgi', + 'PrgEnv-gnu'] + + self.perf_patterns = { + 'fftw_exec_time': sn.extractsingle( + r'execution time:\s+(?P\S+)', self.stdout, + 'exec_time', float), + } + + if exec_mode == 'nompi': + self.num_tasks = 12 + self.executable_opts = ['72 12 1000 0'] + self.reference = { + 'dom:gpu': { + 'fftw_exec_time': (0.55, None, 0.05, 's'), + }, + 'daint:gpu': { + 'fftw_exec_time': (0.55, None, 0.05, 's'), + }, + 'kesch:cn': { + 'fftw_exec_time': (0.61, None, 0.05, 's'), + }, + '*': { + 'fftw_exec_time': (0, None, None, 's'), + } + } + else: + self.num_tasks = 72 + self.executable_opts = ['144 72 200 1'] + self.reference = { + 'dom:gpu': { + 'fftw_exec_time': (0.47, None, 0.50, 's'), + }, + 'daint:gpu': { + 'fftw_exec_time': (0.47, None, 0.50, 's'), + }, + 'kesch:cn': { + 'fftw_exec_time': (1.58, None, 0.50, 's'), + }, + '*': { + 'fftw_exec_time': (0, None, None, 's'), + } + } + + self.maintainers = ['AJ'] + self.tags = {'benchmark', 'scs'} diff --git a/cscs-checks/microbenchmarks/fft/src/fftw_benchmark.c b/cscs-checks/microbenchmarks/fft/src/fftw_benchmark.c new file mode 100644 index 0000000000..839b98f26a --- /dev/null +++ b/cscs-checks/microbenchmarks/fft/src/fftw_benchmark.c @@ -0,0 +1,125 @@ +#include +#include +#include +#include +#include +#include + +fftw_complex *deri_temp_x, *deri_temp_y, *deri_temp_z; +fftw_plan plan_forward_x, plan_backward_x, plan_forward_y, plan_backward_y, plan_forward_z, plan_backward_z; + +void init_derivatives(double *func, double *deri, int npx, int npy, int npz, int npy2, int npz2){ + int nnn; + deri_temp_x = (fftw_complex *) malloc(npy*npz*(npx/2+1)*sizeof(fftw_complex)); + deri_temp_y = (fftw_complex *) malloc(npx*(npy/2+1)*sizeof(fftw_complex)); + deri_temp_z = (fftw_complex *) malloc(npx*npy2*(npz2/2+1)*sizeof(fftw_complex)); + nnn = npx; + plan_forward_x = fftw_plan_many_dft_r2c(1, &nnn, npy*npz, func, &nnn, 1, npx, deri_temp_x, &nnn, 1, npx/2+1, FFTW_MEASURE+FFTW_UNALIGNED); + nnn = npy; + plan_forward_y = fftw_plan_many_dft_r2c(1, &nnn, npx, func, &nnn, npx, 1, deri_temp_y, &nnn, 1, npy/2+1, FFTW_MEASURE+FFTW_UNALIGNED); + nnn = npz2; + plan_forward_z = fftw_plan_many_dft_r2c(1, &nnn, npx*npy2, func, &nnn, npx*npy2, 1, deri_temp_z, &nnn, 1, npz2/2+1, FFTW_MEASURE+FFTW_UNALIGNED); + nnn = npx; + plan_backward_x = fftw_plan_many_dft_c2r(1, &nnn, npy*npz, deri_temp_x, &nnn, 1, npx/2+1, deri, &nnn, 1, npx, FFTW_MEASURE+FFTW_UNALIGNED); + nnn = npy; + plan_backward_y = fftw_plan_many_dft_c2r(1, &nnn, npx, deri_temp_y, &nnn, 1, npy/2+1, deri, &nnn, npx, 1, FFTW_MEASURE+FFTW_UNALIGNED); + nnn = npz2; + plan_backward_z = fftw_plan_many_dft_c2r(1, &nnn, npx*npy2, deri_temp_z, &nnn, 1, npz2/2+1, deri, &nnn, npx*npy2, 1, FFTW_MEASURE+FFTW_UNALIGNED); +} + +void done_derivatives(){ + fftw_destroy_plan(plan_backward_z); + fftw_destroy_plan(plan_backward_y); + fftw_destroy_plan(plan_backward_x); + fftw_destroy_plan(plan_forward_z); + fftw_destroy_plan(plan_forward_y); + fftw_destroy_plan(plan_forward_x); + free(deri_temp_z); + free(deri_temp_y); + free(deri_temp_x); +} + +void derivative_x1(double *func, double *deri, int npx, int npy, int npz){ + int i, jk; + fftw_execute_dft_r2c(plan_forward_x, func, deri_temp_x); + fftw_execute_dft_c2r(plan_backward_x, deri_temp_x, deri); +} + +void derivative_y1(double *func, double *deri, int npx, int npy, int npz){ + int i, j, k; + for (k = 0; k