From 65fef78497cd0a1d59172176889b96ab9204b750 Mon Sep 17 00:00:00 2001 From: ajocksch Date: Fri, 1 Feb 2019 11:21:01 +0100 Subject: [PATCH 1/6] benchmark halo cell exchange --- .../microbenchmarks/mpi/halo_cell_exchange.py | 91 ++++++++++ .../mpi/src/halo_cell_exchange.c | 162 ++++++++++++++++++ cscs-checks/microbenchmarks/mpi/src/input.txt | 9 + 3 files changed, 262 insertions(+) create mode 100644 cscs-checks/microbenchmarks/mpi/halo_cell_exchange.py create mode 100644 cscs-checks/microbenchmarks/mpi/src/halo_cell_exchange.c create mode 100644 cscs-checks/microbenchmarks/mpi/src/input.txt diff --git a/cscs-checks/microbenchmarks/mpi/halo_cell_exchange.py b/cscs-checks/microbenchmarks/mpi/halo_cell_exchange.py new file mode 100644 index 0000000000..e0caf03a00 --- /dev/null +++ b/cscs-checks/microbenchmarks/mpi/halo_cell_exchange.py @@ -0,0 +1,91 @@ +import reframe as rfm +import reframe.utility.sanity as sn + + +@rfm.required_version('>=2.16-dev0') +@rfm.simple_test +class HaloCellExchangeTest(rfm.RegressionTest): + def __init__(self): + super().__init__() + self.sourcepath = 'halo_cell_exchange.c' + self.build_system = 'SingleSource' + self.valid_systems = ['daint:gpu', 'dom:gpu', 'kesch:cn'] + self.valid_prog_environs = ['PrgEnv-cray', 'PrgEnv-pgi', + 'PrgEnv-gnu'] + self.num_tasks = 6 + self.num_tasks_per_node = 1 + self.num_gpus_per_node = 0 + + self.build_system.cflags = ['-O2'] + + self.executable_opts = ['< input.txt'] + + self.sanity_patterns = sn.all([ + sn.assert_eq( + sn.count(sn.findall(r'halo_cell_exchange', + self.stdout)), 9) + ]) + + # the (?P\S+) should be replaced + self.perf_patterns = { + 'time_2_10': sn.extractsingle( + r'halo_cell_exchange 6 2 1 1 10 10 10' + r' (?P\S+) (?P\S+)', self.stdout, 'time_mpi', float), + 'time_2_10000': sn.extractsingle( + r'halo_cell_exchange 6 2 1 1 10000 10000 10000' + r' (?P\S+) (?P\S+)', self.stdout, 'time_mpi', float), + 'time_2_1000000': sn.extractsingle( + r'halo_cell_exchange 6 2 1 1 1000000 1000000 1000000' + r' (?P\S+) (?P\S+)', self.stdout, 'time_mpi', float), + 'time_4_10': sn.extractsingle( + r'halo_cell_exchange 6 2 2 1 10 10 10' + r' (?P\S+) (?P\S+)', self.stdout, 'time_mpi', float), + 'time_4_10000': sn.extractsingle( + r'halo_cell_exchange 6 2 2 1 10000 10000 10000' + r' (?P\S+) (?P\S+)', self.stdout, 'time_mpi', float), + 'time_4_1000000': sn.extractsingle( + r'halo_cell_exchange 6 2 2 1 1000000 1000000 1000000' + r' (?P\S+) (?P\S+)', self.stdout, 'time_mpi', float), + 'time_6_10': sn.extractsingle( + r'halo_cell_exchange 6 3 2 1 10 10 10' + r' (?P\S+) (?P\S+)', self.stdout, 'time_mpi', float), + 'time_6_10000': sn.extractsingle( + r'halo_cell_exchange 6 3 2 1 10000 10000 10000' + r' (?P\S+) (?P\S+)', self.stdout, 'time_mpi', float), + 'time_6_1000000': sn.extractsingle( + r'halo_cell_exchange 6 3 2 1 1000000 1000000 1000000' + r' (?P\S+) (?P\S+)', self.stdout, 'time_mpi', float) + } + + # the kesch values need to be added + self.sys_reference = { + 'dom:gpu': { + 'time_2_10': (3.925395e-02, None, 0.50, 's'), + 'time_2_10000': (9.721279e-02, None, 0.50, 's'), + 'time_2_1000000': (4.934530e+00, None, 0.50, 's'), + 'time_4_10': (5.878997e-02, None, 0.50, 's'), + 'time_4_10000': (1.495080e-01, None, 0.50, 's'), + 'time_4_1000000': (6.791397e+00, None, 0.50, 's'), + 'time_6_10': (5.428815e-02, None, 0.50, 's'), + 'time_6_10000': (1.540580e-01, None, 0.50, 's'), + 'time_6_1000000': (9.179296e+00, None, 0.50, 's') + }, + 'daint:gpu': { + 'time_2_10': (3.925395e-02, None, 0.50, 's'), + 'time_2_10000': (9.721279e-02, None, 0.50, 's'), + 'time_2_1000000': (4.934530e+00, None, 0.50, 's'), + 'time_4_10': (5.878997e-02, None, 0.50, 's'), + 'time_4_10000': (1.495080e-01, None, 0.50, 's'), + 'time_4_1000000': (6.791397e+00, None, 0.50, 's'), + 'time_6_10': (5.428815e-02, None, 0.50, 's'), + 'time_6_10000': (1.540580e-01, None, 0.50, 's'), + 'time_6_1000000': (9.179296e+00, None, 0.50, 's') + }, + 'kesch:cn': { + } + } + + self.reference = self.sys_reference + + self.maintainers = ['AJ'] + self.tags = {'benchmark', 'diagnostic'} diff --git a/cscs-checks/microbenchmarks/mpi/src/halo_cell_exchange.c b/cscs-checks/microbenchmarks/mpi/src/halo_cell_exchange.c new file mode 100644 index 0000000000..244b6bcb46 --- /dev/null +++ b/cscs-checks/microbenchmarks/mpi/src/halo_cell_exchange.c @@ -0,0 +1,162 @@ +/* there is still somewhere a bug in the code */ + +#include +#include +#include +#include + +int main(int argc, const char* argv[]){ + MPI_Comm cart_comm, red_comm; + MPI_Request *request; + MPI_Status *status; + int ndims, reorder, color, end, ierr; + int *dim_size, *periods, *halosize; + int comm_size, comm_rank, comm_size_cart, comm_rank_cart; + char *sendbuf, *recvbuf, inputbuf[1000], *pinputbuf; + double start, stop, deltatmin, deltatmax, ttt; + int rank, i, j; + + if (argc>1){ + printf("%s < ndims dim1 dim2 ... halosize1 halosize2 ...\n", argv[0]); + exit(0); + } + ierr = MPI_Init(NULL, NULL); + if (ierr!=0) exit(1); + ierr = MPI_Comm_size(MPI_COMM_WORLD, &comm_size); + if (ierr!=0) exit(1); + ierr = MPI_Comm_rank(MPI_COMM_WORLD, &comm_rank); + if (ierr!=0) exit(1); + end = 0; + while (end == 0){ + if (comm_rank==0){ + end = (fgets(inputbuf, sizeof(inputbuf), stdin) == NULL); + if (end == 0){ + pinputbuf = inputbuf; + while (pinputbuf[0]==' ') pinputbuf++; + end = !((pinputbuf[0]>='0')&&(pinputbuf[0]<='9')); + pinputbuf = inputbuf; + } + } + MPI_Bcast(&end, 1, MPI_INT, 0, MPI_COMM_WORLD); + if (end == 0){ + if (comm_rank==0){ + sscanf(pinputbuf, "%d", &ndims); + } + MPI_Bcast(&ndims, 1, MPI_INT, 0, MPI_COMM_WORLD); + dim_size = (int*) malloc(ndims*sizeof(int)); + periods = (int*) malloc(ndims*sizeof(int)); + halosize = (int*) malloc(ndims*sizeof(int)); + + for (i=0; icomm_size){ + if (comm_rank==0){ + printf("Please run with at least %d MPI ranks.\n", j); + } + ierr = MPI_Finalize(); + if (ierr!=0) exit(1); + exit(0); + } + + color = (comm_rank < j); + if (color == 0){ + ierr = MPI_Comm_split(MPI_COMM_WORLD, MPI_UNDEFINED, comm_rank, &red_comm); + }else{ + ierr = MPI_Comm_split(MPI_COMM_WORLD, color, comm_rank, &red_comm); + ierr = MPI_Cart_create(red_comm, ndims, dim_size, periods, reorder, &cart_comm); + if (ierr!=0) exit(1); + ierr = MPI_Comm_size(cart_comm, &comm_size_cart); + if (ierr!=0) exit(1); + ierr = MPI_Comm_rank(cart_comm, &comm_rank_cart); + if (ierr!=0) exit(1); + + j=0; + for (i=0; ij){ + j=halosize[i]; + } + } + sendbuf = (char*) malloc(ndims*2*j*sizeof(char)); + recvbuf = (char*) malloc(ndims*2*j*sizeof(char)); + request = (MPI_Request*) malloc(ndims*2*2*sizeof(MPI_Request)); + status = (MPI_Status*) malloc(ndims*2*2*sizeof(MPI_Status)); + + start = MPI_Wtime (); + + for (j=0; j<10000; j++){ + for (i=0; i Date: Fri, 1 Feb 2019 11:30:59 +0100 Subject: [PATCH 2/6] shortened lines --- .../microbenchmarks/mpi/halo_cell_exchange.py | 27 ++++++++++++------- 1 file changed, 18 insertions(+), 9 deletions(-) diff --git a/cscs-checks/microbenchmarks/mpi/halo_cell_exchange.py b/cscs-checks/microbenchmarks/mpi/halo_cell_exchange.py index e0caf03a00..43a683ad55 100644 --- a/cscs-checks/microbenchmarks/mpi/halo_cell_exchange.py +++ b/cscs-checks/microbenchmarks/mpi/halo_cell_exchange.py @@ -30,31 +30,40 @@ def __init__(self): self.perf_patterns = { 'time_2_10': sn.extractsingle( r'halo_cell_exchange 6 2 1 1 10 10 10' - r' (?P\S+) (?P\S+)', self.stdout, 'time_mpi', float), + r' (?P\S+) (?P\S+)', self.stdout, + 'time_mpi', float), 'time_2_10000': sn.extractsingle( r'halo_cell_exchange 6 2 1 1 10000 10000 10000' - r' (?P\S+) (?P\S+)', self.stdout, 'time_mpi', float), + r' (?P\S+) (?P\S+)', self.stdout, + 'time_mpi', float), 'time_2_1000000': sn.extractsingle( r'halo_cell_exchange 6 2 1 1 1000000 1000000 1000000' - r' (?P\S+) (?P\S+)', self.stdout, 'time_mpi', float), + r' (?P\S+) (?P\S+)', self.stdout, + 'time_mpi', float), 'time_4_10': sn.extractsingle( r'halo_cell_exchange 6 2 2 1 10 10 10' - r' (?P\S+) (?P\S+)', self.stdout, 'time_mpi', float), + r' (?P\S+) (?P\S+)', self.stdout, + 'time_mpi', float), 'time_4_10000': sn.extractsingle( r'halo_cell_exchange 6 2 2 1 10000 10000 10000' - r' (?P\S+) (?P\S+)', self.stdout, 'time_mpi', float), + r' (?P\S+) (?P\S+)', self.stdout, + 'time_mpi', float), 'time_4_1000000': sn.extractsingle( r'halo_cell_exchange 6 2 2 1 1000000 1000000 1000000' - r' (?P\S+) (?P\S+)', self.stdout, 'time_mpi', float), + r' (?P\S+) (?P\S+)', self.stdout, + 'time_mpi', float), 'time_6_10': sn.extractsingle( r'halo_cell_exchange 6 3 2 1 10 10 10' - r' (?P\S+) (?P\S+)', self.stdout, 'time_mpi', float), + r' (?P\S+) (?P\S+)', self.stdout, + 'time_mpi', float), 'time_6_10000': sn.extractsingle( r'halo_cell_exchange 6 3 2 1 10000 10000 10000' - r' (?P\S+) (?P\S+)', self.stdout, 'time_mpi', float), + r' (?P\S+) (?P\S+)', self.stdout, + 'time_mpi', float), 'time_6_1000000': sn.extractsingle( r'halo_cell_exchange 6 3 2 1 1000000 1000000 1000000' - r' (?P\S+) (?P\S+)', self.stdout, 'time_mpi', float) + r' (?P\S+) (?P\S+)', self.stdout, + 'time_mpi', float) } # the kesch values need to be added From 6aa83fb41cca66d52ae004c16bbcc467d94c8e2a Mon Sep 17 00:00:00 2001 From: ajocksch Date: Thu, 14 Feb 2019 11:24:38 +0100 Subject: [PATCH 3/6] bug in halo_cell_exchange fixed --- .../mpi/src/halo_cell_exchange.c | 24 +++++++------------ 1 file changed, 9 insertions(+), 15 deletions(-) diff --git a/cscs-checks/microbenchmarks/mpi/src/halo_cell_exchange.c b/cscs-checks/microbenchmarks/mpi/src/halo_cell_exchange.c index 244b6bcb46..93622b20f6 100644 --- a/cscs-checks/microbenchmarks/mpi/src/halo_cell_exchange.c +++ b/cscs-checks/microbenchmarks/mpi/src/halo_cell_exchange.c @@ -1,5 +1,3 @@ -/* there is still somewhere a bug in the code */ - #include #include #include @@ -14,7 +12,7 @@ int main(int argc, const char* argv[]){ int comm_size, comm_rank, comm_size_cart, comm_rank_cart; char *sendbuf, *recvbuf, inputbuf[1000], *pinputbuf; double start, stop, deltatmin, deltatmax, ttt; - int rank, i, j; + int rank_source, rank_dest, i, j; if (argc>1){ printf("%s < ndims dim1 dim2 ... halosize1 halosize2 ...\n", argv[0]); @@ -108,23 +106,19 @@ int main(int argc, const char* argv[]){ for (j=0; j<10000; j++){ for (i=0; i Date: Sun, 3 Mar 2019 23:51:22 +0100 Subject: [PATCH 4/6] almost all required changes in --- .../microbenchmarks/mpi/halo_cell_exchange.py | 69 ++- .../mpi/src/halo_cell_exchange.c | 444 ++++++++++++------ 2 files changed, 336 insertions(+), 177 deletions(-) diff --git a/cscs-checks/microbenchmarks/mpi/halo_cell_exchange.py b/cscs-checks/microbenchmarks/mpi/halo_cell_exchange.py index 43a683ad55..a9e24cace7 100644 --- a/cscs-checks/microbenchmarks/mpi/halo_cell_exchange.py +++ b/cscs-checks/microbenchmarks/mpi/halo_cell_exchange.py @@ -9,6 +9,7 @@ def __init__(self): super().__init__() self.sourcepath = 'halo_cell_exchange.c' self.build_system = 'SingleSource' + self.build_system.cflags = ['-O2'] self.valid_systems = ['daint:gpu', 'dom:gpu', 'kesch:cn'] self.valid_prog_environs = ['PrgEnv-cray', 'PrgEnv-pgi', 'PrgEnv-gnu'] @@ -16,79 +17,73 @@ def __init__(self): self.num_tasks_per_node = 1 self.num_gpus_per_node = 0 - self.build_system.cflags = ['-O2'] - - self.executable_opts = ['< input.txt'] + self.executable_opts = ['input.txt'] - self.sanity_patterns = sn.all([ - sn.assert_eq( - sn.count(sn.findall(r'halo_cell_exchange', - self.stdout)), 9) - ]) + self.sanity_patterns = sn.assert_eq( + sn.count(sn.findall(r'halo_cell_exchange', self.stdout)), 9) - # the (?P\S+) should be replaced self.perf_patterns = { 'time_2_10': sn.extractsingle( r'halo_cell_exchange 6 2 1 1 10 10 10' - r' (?P\S+) (?P\S+)', self.stdout, + r' \S+ (?P\S+)', self.stdout, 'time_mpi', float), 'time_2_10000': sn.extractsingle( r'halo_cell_exchange 6 2 1 1 10000 10000 10000' - r' (?P\S+) (?P\S+)', self.stdout, + r' \S+ (?P\S+)', self.stdout, 'time_mpi', float), 'time_2_1000000': sn.extractsingle( r'halo_cell_exchange 6 2 1 1 1000000 1000000 1000000' - r' (?P\S+) (?P\S+)', self.stdout, + r' \S+ (?P\S+)', self.stdout, 'time_mpi', float), 'time_4_10': sn.extractsingle( r'halo_cell_exchange 6 2 2 1 10 10 10' - r' (?P\S+) (?P\S+)', self.stdout, + r' \S+ (?P\S+)', self.stdout, 'time_mpi', float), 'time_4_10000': sn.extractsingle( r'halo_cell_exchange 6 2 2 1 10000 10000 10000' - r' (?P\S+) (?P\S+)', self.stdout, + r' \S+ (?P\S+)', self.stdout, 'time_mpi', float), 'time_4_1000000': sn.extractsingle( r'halo_cell_exchange 6 2 2 1 1000000 1000000 1000000' - r' (?P\S+) (?P\S+)', self.stdout, + r' \S+ (?P\S+)', self.stdout, 'time_mpi', float), 'time_6_10': sn.extractsingle( r'halo_cell_exchange 6 3 2 1 10 10 10' - r' (?P\S+) (?P\S+)', self.stdout, + r' \S+ (?P\S+)', self.stdout, 'time_mpi', float), 'time_6_10000': sn.extractsingle( r'halo_cell_exchange 6 3 2 1 10000 10000 10000' - r' (?P\S+) (?P\S+)', self.stdout, + r' \S+ (?P\S+)', self.stdout, 'time_mpi', float), 'time_6_1000000': sn.extractsingle( r'halo_cell_exchange 6 3 2 1 1000000 1000000 1000000' - r' (?P\S+) (?P\S+)', self.stdout, + r' \S+ (?P\S+)', self.stdout, 'time_mpi', float) } # the kesch values need to be added self.sys_reference = { 'dom:gpu': { - 'time_2_10': (3.925395e-02, None, 0.50, 's'), - 'time_2_10000': (9.721279e-02, None, 0.50, 's'), - 'time_2_1000000': (4.934530e+00, None, 0.50, 's'), - 'time_4_10': (5.878997e-02, None, 0.50, 's'), - 'time_4_10000': (1.495080e-01, None, 0.50, 's'), - 'time_4_1000000': (6.791397e+00, None, 0.50, 's'), - 'time_6_10': (5.428815e-02, None, 0.50, 's'), - 'time_6_10000': (1.540580e-01, None, 0.50, 's'), - 'time_6_1000000': (9.179296e+00, None, 0.50, 's') + 'time_2_10': (3.925395e-06, None, 0.50, 's'), + 'time_2_10000': (9.721279e-06, None, 0.50, 's'), + 'time_2_1000000': (4.934530e-04, None, 0.50, 's'), + 'time_4_10': (5.878997e-06, None, 0.50, 's'), + 'time_4_10000': (1.495080e-05, None, 0.50, 's'), + 'time_4_1000000': (6.791397e-04, None, 0.50, 's'), + 'time_6_10': (5.428815e-06, None, 0.50, 's'), + 'time_6_10000': (1.540580e-05, None, 0.50, 's'), + 'time_6_1000000': (9.179296e-04, None, 0.50, 's') }, 'daint:gpu': { - 'time_2_10': (3.925395e-02, None, 0.50, 's'), - 'time_2_10000': (9.721279e-02, None, 0.50, 's'), - 'time_2_1000000': (4.934530e+00, None, 0.50, 's'), - 'time_4_10': (5.878997e-02, None, 0.50, 's'), - 'time_4_10000': (1.495080e-01, None, 0.50, 's'), - 'time_4_1000000': (6.791397e+00, None, 0.50, 's'), - 'time_6_10': (5.428815e-02, None, 0.50, 's'), - 'time_6_10000': (1.540580e-01, None, 0.50, 's'), - 'time_6_1000000': (9.179296e+00, None, 0.50, 's') + 'time_2_10': (3.925395e-06, None, 0.50, 's'), + 'time_2_10000': (9.721279e-06, None, 0.50, 's'), + 'time_2_1000000': (4.934530e-04, None, 0.50, 's'), + 'time_4_10': (5.878997e-06, None, 0.50, 's'), + 'time_4_10000': (1.495080e-05, None, 0.50, 's'), + 'time_4_1000000': (6.791397e-04, None, 0.50, 's'), + 'time_6_10': (5.428815e-06, None, 0.50, 's'), + 'time_6_10000': (1.540580e-05, None, 0.50, 's'), + 'time_6_1000000': (9.179296e-04, None, 0.50, 's') }, 'kesch:cn': { } @@ -97,4 +92,4 @@ def __init__(self): self.reference = self.sys_reference self.maintainers = ['AJ'] - self.tags = {'benchmark', 'diagnostic'} + self.tags = {'benchmark'} diff --git a/cscs-checks/microbenchmarks/mpi/src/halo_cell_exchange.c b/cscs-checks/microbenchmarks/mpi/src/halo_cell_exchange.c index 93622b20f6..b581b4a121 100644 --- a/cscs-checks/microbenchmarks/mpi/src/halo_cell_exchange.c +++ b/cscs-checks/microbenchmarks/mpi/src/halo_cell_exchange.c @@ -3,154 +3,318 @@ #include #include -int main(int argc, const char* argv[]){ - MPI_Comm cart_comm, red_comm; - MPI_Request *request; - MPI_Status *status; - int ndims, reorder, color, end, ierr; - int *dim_size, *periods, *halosize; - int comm_size, comm_rank, comm_size_cart, comm_rank_cart; - char *sendbuf, *recvbuf, inputbuf[1000], *pinputbuf; - double start, stop, deltatmin, deltatmax, ttt; - int rank_source, rank_dest, i, j; +#define NCALLS 10000 - if (argc>1){ - printf("%s < ndims dim1 dim2 ... halosize1 halosize2 ...\n", argv[0]); - exit(0); +int +main (int argc, const char *argv[]) +{ + MPI_Comm cart_comm, red_comm; + MPI_Request *request; + MPI_Status *status; + FILE *pFile; + int ndims, reorder, color, end; + int *dim_size, *periods, *halosize; + int comm_size, comm_rank, comm_size_cart, comm_rank_cart; + char *sendbuf, *recvbuf, inputbuf[1000], *pinputbuf; + double start, stop, deltatmin, deltatmax, elapsed_time; + int rank_source, rank_dest, i, j; + + if (MPI_Init (NULL, NULL) != 0) + { + fprintf (stderr, "MPI_Init() failed\n"); + exit (1); + } + if (MPI_Comm_size (MPI_COMM_WORLD, &comm_size) != 0) + { + fprintf (stderr, "MPI_Comm_size() failed\n"); + exit (1); + } + if (MPI_Comm_rank (MPI_COMM_WORLD, &comm_rank) != 0) + { + fprintf (stderr, "MPI_Comm_rank() failed\n"); + exit (1); } - ierr = MPI_Init(NULL, NULL); - if (ierr!=0) exit(1); - ierr = MPI_Comm_size(MPI_COMM_WORLD, &comm_size); - if (ierr!=0) exit(1); - ierr = MPI_Comm_rank(MPI_COMM_WORLD, &comm_rank); - if (ierr!=0) exit(1); - end = 0; - while (end == 0){ - if (comm_rank==0){ - end = (fgets(inputbuf, sizeof(inputbuf), stdin) == NULL); - if (end == 0){ - pinputbuf = inputbuf; - while (pinputbuf[0]==' ') pinputbuf++; - end = !((pinputbuf[0]>='0')&&(pinputbuf[0]<='9')); - pinputbuf = inputbuf; - } - } - MPI_Bcast(&end, 1, MPI_INT, 0, MPI_COMM_WORLD); - if (end == 0){ - if (comm_rank==0){ - sscanf(pinputbuf, "%d", &ndims); - } - MPI_Bcast(&ndims, 1, MPI_INT, 0, MPI_COMM_WORLD); - dim_size = (int*) malloc(ndims*sizeof(int)); - periods = (int*) malloc(ndims*sizeof(int)); - halosize = (int*) malloc(ndims*sizeof(int)); + if (argc == 1) + { + if (comm_rank == 0) + { + printf ("%s inputfile\n", argv[0]); + printf ("ndims dim1 dim2 ... halosize1 halosize2 ...\n"); + } + exit (0); + } + if (comm_rank == 0) + { + if (strcmp (argv[1], "-") == 0) + { + pFile = stdin; + } + else + { + pFile = fopen (argv[1], "r"); + } + } + end = 0; + while (end == 0) + { + if (comm_rank == 0) + { + end = (fgets (inputbuf, sizeof (inputbuf) - 1, pFile) == NULL); + if (end == 0) + { + pinputbuf = inputbuf; + while (*pinputbuf == ' ') + pinputbuf++; + end = !((*pinputbuf >= '0') && (*pinputbuf <= '9')); + pinputbuf = inputbuf; + } + } + if (MPI_Bcast (&end, 1, MPI_INT, 0, MPI_COMM_WORLD) != 0) + { + fprintf (stderr, "MPI_Bcast() failed\n"); + exit (1); + } + if (end == 0) + { + if (comm_rank == 0) + { + sscanf (pinputbuf, "%d", &ndims); + } + if (MPI_Bcast (&ndims, 1, MPI_INT, 0, MPI_COMM_WORLD) != 0) + { + fprintf (stderr, "MPI_Bcast() failed\n"); + exit (1); + } + dim_size = (int *) malloc (ndims * sizeof (*dim_size)); + periods = (int *) malloc (ndims * sizeof (*periods)); + halosize = (int *) malloc (ndims * sizeof (*halosize)); - for (i=0; icomm_size){ - if (comm_rank==0){ - printf("Please run with at least %d MPI ranks.\n", j); - } - ierr = MPI_Finalize(); - if (ierr!=0) exit(1); - exit(0); - } + j = 1; + for (i = 0; i < ndims; i++) + { + if (comm_rank == 0) + { + while (*pinputbuf == ' ') + pinputbuf++; + while (*pinputbuf != ' ') + pinputbuf++; + while (*pinputbuf == ' ') + pinputbuf++; + sscanf (pinputbuf, "%d", &dim_size[i]); + } + if (MPI_Bcast (&dim_size[i], 1, MPI_INT, 0, MPI_COMM_WORLD) != + 0) + { + fprintf (stderr, "MPI_Bcast() failed\n"); + exit (1); + } + j *= dim_size[i]; + } + for (i = 0; i < ndims; i++) + { + if (comm_rank == 0) + { + while (*pinputbuf == ' ') + pinputbuf++; + while (*pinputbuf != ' ') + pinputbuf++; + while (*pinputbuf == ' ') + pinputbuf++; + sscanf (pinputbuf, "%d", &halosize[i]); + } + if (MPI_Bcast (&halosize[i], 1, MPI_INT, 0, MPI_COMM_WORLD) != + 0) + { + fprintf (stderr, "MPI_Bcast() failed\n"); + exit (1); + } + } + if (j > comm_size) + { + if (comm_rank == 0) + { + printf ("Please run with at least %d MPI ranks.\n", j); + } + if (MPI_Finalize () != 0) + { + fprintf (stderr, "MPI_Finalize() failed\n"); + exit (1); + } + exit (0); + } - color = (comm_rank < j); - if (color == 0){ - ierr = MPI_Comm_split(MPI_COMM_WORLD, MPI_UNDEFINED, comm_rank, &red_comm); - }else{ - ierr = MPI_Comm_split(MPI_COMM_WORLD, color, comm_rank, &red_comm); - ierr = MPI_Cart_create(red_comm, ndims, dim_size, periods, reorder, &cart_comm); - if (ierr!=0) exit(1); - ierr = MPI_Comm_size(cart_comm, &comm_size_cart); - if (ierr!=0) exit(1); - ierr = MPI_Comm_rank(cart_comm, &comm_rank_cart); - if (ierr!=0) exit(1); + color = (comm_rank < j); + if (color == 0) + { + if (MPI_Comm_split + (MPI_COMM_WORLD, MPI_UNDEFINED, comm_rank, &red_comm) != 0) + { + fprintf (stderr, "MPI_Comm_split() failed\n"); + exit (1); + } + } + else + { + if (MPI_Comm_split (MPI_COMM_WORLD, color, comm_rank, &red_comm) + != 0) + { + fprintf (stderr, "MPI_Comm_split() failed\n"); + exit (1); + } + if (MPI_Cart_create + (red_comm, ndims, dim_size, periods, reorder, + &cart_comm) != 0) + { + fprintf (stderr, "MPI_Comm_create() failed\n"); + exit (1); + } + if (MPI_Comm_size (cart_comm, &comm_size_cart) != 0) + { + fprintf (stderr, "MPI_Comm_size() failed\n"); + exit (1); + } + if (MPI_Comm_rank (cart_comm, &comm_rank_cart) != 0) + { + fprintf (stderr, "MPI_Comm_rank() failed\n"); + exit (1); + } - j=0; - for (i=0; ij){ - j=halosize[i]; - } - } - sendbuf = (char*) malloc(ndims*2*j*sizeof(char)); - recvbuf = (char*) malloc(ndims*2*j*sizeof(char)); - request = (MPI_Request*) malloc(ndims*2*2*sizeof(MPI_Request)); - status = (MPI_Status*) malloc(ndims*2*2*sizeof(MPI_Status)); + j = 0; + for (i = 0; i < ndims; i++) + { + if (halosize[i] > j) + { + j = halosize[i]; + } + } + sendbuf = (char *) malloc (ndims * 2 * j * sizeof (char)); + recvbuf = (char *) malloc (ndims * 2 * j * sizeof (char)); + request = + (MPI_Request *) malloc (ndims * 2 * 2 * sizeof (MPI_Request)); + status = + (MPI_Status *) malloc (ndims * 2 * 2 * sizeof (MPI_Status)); - start = MPI_Wtime (); + start = MPI_Wtime (); - for (j=0; j<10000; j++){ - for (i=0; i Date: Mon, 4 Mar 2019 09:16:40 +0100 Subject: [PATCH 5/6] implemented changes required --- .../microbenchmarks/mpi/halo_cell_exchange.py | 9 +++++++++ .../microbenchmarks/mpi/src/halo_cell_exchange.c | 12 ++++++++++++ 2 files changed, 21 insertions(+) diff --git a/cscs-checks/microbenchmarks/mpi/halo_cell_exchange.py b/cscs-checks/microbenchmarks/mpi/halo_cell_exchange.py index a9e24cace7..b25271cb8d 100644 --- a/cscs-checks/microbenchmarks/mpi/halo_cell_exchange.py +++ b/cscs-checks/microbenchmarks/mpi/halo_cell_exchange.py @@ -86,6 +86,15 @@ def __init__(self): 'time_6_1000000': (9.179296e-04, None, 0.50, 's') }, 'kesch:cn': { + 'time_2_10': (2.280450e-06, None, 0.50, 's'), + 'time_2_10000': (8.059907e-06, None, 0.50, 's'), + 'time_2_1000000': (5.959686e-04, None, 0.50, 's'), + 'time_4_10': (2.951527e-06, None, 0.50, 's'), + 'time_4_10000': (1.258132e-05, None, 0.50, 's'), + 'time_4_1000000': (8.539153e-04, None, 0.50, 's'), + 'time_6_10': (3.740311e-06, None, 0.50, 's'), + 'time_6_10000': (1.448979e-05, None, 0.50, 's'), + 'time_6_1000000': (8.432294e-04, None, 0.50, 's') } } diff --git a/cscs-checks/microbenchmarks/mpi/src/halo_cell_exchange.c b/cscs-checks/microbenchmarks/mpi/src/halo_cell_exchange.c index b581b4a121..e879405d68 100644 --- a/cscs-checks/microbenchmarks/mpi/src/halo_cell_exchange.c +++ b/cscs-checks/microbenchmarks/mpi/src/halo_cell_exchange.c @@ -1,3 +1,5 @@ +/* This benchmark emulates a halo cell exchange in n dimensions. The pure + communication is considered without any stencil computation.*/ #include #include #include @@ -59,6 +61,7 @@ main (int argc, const char *argv[]) { if (comm_rank == 0) { + /* read parameters for every single benchmark line by line */ end = (fgets (inputbuf, sizeof (inputbuf) - 1, pFile) == NULL); if (end == 0) { @@ -78,6 +81,7 @@ main (int argc, const char *argv[]) { if (comm_rank == 0) { + /* read number of dimensions */ sscanf (pinputbuf, "%d", &ndims); } if (MPI_Bcast (&ndims, 1, MPI_INT, 0, MPI_COMM_WORLD) != 0) @@ -106,6 +110,7 @@ main (int argc, const char *argv[]) pinputbuf++; while (*pinputbuf == ' ') pinputbuf++; + /* read number of ranks in every dimension */ sscanf (pinputbuf, "%d", &dim_size[i]); } if (MPI_Bcast (&dim_size[i], 1, MPI_INT, 0, MPI_COMM_WORLD) != @@ -126,6 +131,8 @@ main (int argc, const char *argv[]) pinputbuf++; while (*pinputbuf == ' ') pinputbuf++; + /* read halo cell size to be communicated in every + dimension */ sscanf (pinputbuf, "%d", &halosize[i]); } if (MPI_Bcast (&halosize[i], 1, MPI_INT, 0, MPI_COMM_WORLD) != @@ -149,6 +156,7 @@ main (int argc, const char *argv[]) exit (0); } + /* use only the number of ranks required */ color = (comm_rank < j); if (color == 0) { @@ -167,6 +175,7 @@ main (int argc, const char *argv[]) fprintf (stderr, "MPI_Comm_split() failed\n"); exit (1); } + /* cartesian grid communicator */ if (MPI_Cart_create (red_comm, ndims, dim_size, periods, reorder, &cart_comm) != 0) @@ -206,6 +215,7 @@ main (int argc, const char *argv[]) { for (i = 0; i < ndims; i++) { + /* receive data in every direction */ if (MPI_Cart_shift (cart_comm, i, 1, &rank_source, &rank_dest) != 0) { @@ -232,6 +242,7 @@ main (int argc, const char *argv[]) } for (i = 0; i < ndims; i++) { + /* send data in every direction */ if (MPI_Cart_shift (cart_comm, i, 1, &rank_source, &rank_dest) != 0) { @@ -290,6 +301,7 @@ main (int argc, const char *argv[]) { printf (" %d", halosize[i]); } + /* print minimum and maximum time per exchange and test */ printf (" %e %e\n", deltatmin / NCALLS, deltatmax / NCALLS); } free (status); From 6a75df373b88bc5e98ce60b08b5de2d7b5d90eed Mon Sep 17 00:00:00 2001 From: Vasileios Karakasis Date: Sun, 10 Mar 2019 01:04:50 +0100 Subject: [PATCH 6/6] Fix C coding style The previous one (perhaps the default) had some problems with nested-if indentation. Code is now formatted as follows: indent -linux -bap -i4 -nut -as --- .../mpi/src/halo_cell_exchange.c | 565 ++++++++---------- 1 file changed, 259 insertions(+), 306 deletions(-) diff --git a/cscs-checks/microbenchmarks/mpi/src/halo_cell_exchange.c b/cscs-checks/microbenchmarks/mpi/src/halo_cell_exchange.c index e879405d68..820c713dea 100644 --- a/cscs-checks/microbenchmarks/mpi/src/halo_cell_exchange.c +++ b/cscs-checks/microbenchmarks/mpi/src/halo_cell_exchange.c @@ -7,326 +7,279 @@ #define NCALLS 10000 -int -main (int argc, const char *argv[]) +int main(int argc, const char *argv[]) { - MPI_Comm cart_comm, red_comm; - MPI_Request *request; - MPI_Status *status; - FILE *pFile; - int ndims, reorder, color, end; - int *dim_size, *periods, *halosize; - int comm_size, comm_rank, comm_size_cart, comm_rank_cart; - char *sendbuf, *recvbuf, inputbuf[1000], *pinputbuf; - double start, stop, deltatmin, deltatmax, elapsed_time; - int rank_source, rank_dest, i, j; + MPI_Comm cart_comm, red_comm; + MPI_Request *request; + MPI_Status *status; + FILE *pFile; + int ndims, reorder, color, end; + int *dim_size, *periods, *halosize; + int comm_size, comm_rank, comm_size_cart, comm_rank_cart; + char *sendbuf, *recvbuf, inputbuf[1000], *pinputbuf; + double start, stop, deltatmin, deltatmax, elapsed_time; + int rank_source, rank_dest, i, j; - if (MPI_Init (NULL, NULL) != 0) - { - fprintf (stderr, "MPI_Init() failed\n"); - exit (1); + if (MPI_Init(NULL, NULL) != 0) { + fprintf(stderr, "MPI_Init() failed\n"); + exit(1); } - if (MPI_Comm_size (MPI_COMM_WORLD, &comm_size) != 0) - { - fprintf (stderr, "MPI_Comm_size() failed\n"); - exit (1); + if (MPI_Comm_size(MPI_COMM_WORLD, &comm_size) != 0) { + fprintf(stderr, "MPI_Comm_size() failed\n"); + exit(1); } - if (MPI_Comm_rank (MPI_COMM_WORLD, &comm_rank) != 0) - { - fprintf (stderr, "MPI_Comm_rank() failed\n"); - exit (1); + if (MPI_Comm_rank(MPI_COMM_WORLD, &comm_rank) != 0) { + fprintf(stderr, "MPI_Comm_rank() failed\n"); + exit(1); } - if (argc == 1) - { - if (comm_rank == 0) - { - printf ("%s inputfile\n", argv[0]); - printf ("ndims dim1 dim2 ... halosize1 halosize2 ...\n"); - } - exit (0); + if (argc == 1) { + if (comm_rank == 0) { + printf("%s inputfile\n", argv[0]); + printf("ndims dim1 dim2 ... halosize1 halosize2 ...\n"); + } + exit(0); } - if (comm_rank == 0) - { - if (strcmp (argv[1], "-") == 0) - { - pFile = stdin; - } - else - { - pFile = fopen (argv[1], "r"); - } + if (comm_rank == 0) { + if (strcmp(argv[1], "-") == 0) { + pFile = stdin; + } else { + pFile = fopen(argv[1], "r"); + } } - end = 0; - while (end == 0) - { - if (comm_rank == 0) - { - /* read parameters for every single benchmark line by line */ - end = (fgets (inputbuf, sizeof (inputbuf) - 1, pFile) == NULL); - if (end == 0) - { - pinputbuf = inputbuf; - while (*pinputbuf == ' ') - pinputbuf++; - end = !((*pinputbuf >= '0') && (*pinputbuf <= '9')); - pinputbuf = inputbuf; - } - } - if (MPI_Bcast (&end, 1, MPI_INT, 0, MPI_COMM_WORLD) != 0) - { - fprintf (stderr, "MPI_Bcast() failed\n"); - exit (1); - } - if (end == 0) - { - if (comm_rank == 0) - { - /* read number of dimensions */ - sscanf (pinputbuf, "%d", &ndims); - } - if (MPI_Bcast (&ndims, 1, MPI_INT, 0, MPI_COMM_WORLD) != 0) - { - fprintf (stderr, "MPI_Bcast() failed\n"); - exit (1); - } - dim_size = (int *) malloc (ndims * sizeof (*dim_size)); - periods = (int *) malloc (ndims * sizeof (*periods)); - halosize = (int *) malloc (ndims * sizeof (*halosize)); + end = 0; + while (end == 0) { + if (comm_rank == 0) { + /* read parameters for every single benchmark line by line */ + end = (fgets(inputbuf, sizeof(inputbuf) - 1, pFile) == NULL); + if (end == 0) { + pinputbuf = inputbuf; + while (*pinputbuf == ' ') + pinputbuf++; + end = !((*pinputbuf >= '0') + && (*pinputbuf <= '9')); + pinputbuf = inputbuf; + } + } + if (MPI_Bcast(&end, 1, MPI_INT, 0, MPI_COMM_WORLD) != 0) { + fprintf(stderr, "MPI_Bcast() failed\n"); + exit(1); + } + if (end == 0) { + if (comm_rank == 0) { + /* read number of dimensions */ + sscanf(pinputbuf, "%d", &ndims); + } + if (MPI_Bcast(&ndims, 1, MPI_INT, 0, MPI_COMM_WORLD) != 0) { + fprintf(stderr, "MPI_Bcast() failed\n"); + exit(1); + } + dim_size = (int *)malloc(ndims * sizeof(*dim_size)); + periods = (int *)malloc(ndims * sizeof(*periods)); + halosize = (int *)malloc(ndims * sizeof(*halosize)); - for (i = 0; i < ndims; i++) - { - periods[i] = 1; - } - reorder = 1; + for (i = 0; i < ndims; i++) { + periods[i] = 1; + } + reorder = 1; - j = 1; - for (i = 0; i < ndims; i++) - { - if (comm_rank == 0) - { - while (*pinputbuf == ' ') - pinputbuf++; - while (*pinputbuf != ' ') - pinputbuf++; - while (*pinputbuf == ' ') - pinputbuf++; - /* read number of ranks in every dimension */ - sscanf (pinputbuf, "%d", &dim_size[i]); - } - if (MPI_Bcast (&dim_size[i], 1, MPI_INT, 0, MPI_COMM_WORLD) != - 0) - { - fprintf (stderr, "MPI_Bcast() failed\n"); - exit (1); - } - j *= dim_size[i]; - } - for (i = 0; i < ndims; i++) - { - if (comm_rank == 0) - { - while (*pinputbuf == ' ') - pinputbuf++; - while (*pinputbuf != ' ') - pinputbuf++; - while (*pinputbuf == ' ') - pinputbuf++; - /* read halo cell size to be communicated in every - dimension */ - sscanf (pinputbuf, "%d", &halosize[i]); - } - if (MPI_Bcast (&halosize[i], 1, MPI_INT, 0, MPI_COMM_WORLD) != - 0) - { - fprintf (stderr, "MPI_Bcast() failed\n"); - exit (1); - } - } - if (j > comm_size) - { - if (comm_rank == 0) - { - printf ("Please run with at least %d MPI ranks.\n", j); - } - if (MPI_Finalize () != 0) - { - fprintf (stderr, "MPI_Finalize() failed\n"); - exit (1); - } - exit (0); - } + j = 1; + for (i = 0; i < ndims; i++) { + if (comm_rank == 0) { + while (*pinputbuf == ' ') + pinputbuf++; + while (*pinputbuf != ' ') + pinputbuf++; + while (*pinputbuf == ' ') + pinputbuf++; + /* read number of ranks in every dimension */ + sscanf(pinputbuf, "%d", &dim_size[i]); + } + if (MPI_Bcast(&dim_size[i], 1, MPI_INT, 0, MPI_COMM_WORLD) != 0) { + fprintf(stderr, "MPI_Bcast() failed\n"); + exit(1); + } + j *= dim_size[i]; + } + for (i = 0; i < ndims; i++) { + if (comm_rank == 0) { + while (*pinputbuf == ' ') + pinputbuf++; + while (*pinputbuf != ' ') + pinputbuf++; + while (*pinputbuf == ' ') + pinputbuf++; + /* read halo cell size to be communicated in every + dimension */ + sscanf(pinputbuf, "%d", &halosize[i]); + } + if (MPI_Bcast(&halosize[i], 1, MPI_INT, 0, MPI_COMM_WORLD) != 0) { + fprintf(stderr, "MPI_Bcast() failed\n"); + exit(1); + } + } + if (j > comm_size) { + if (comm_rank == 0) { + printf("Please run with at least %d MPI ranks.\n", j); + } + if (MPI_Finalize() != 0) { + fprintf(stderr, "MPI_Finalize() failed\n"); + exit(1); + } + exit(0); + } - /* use only the number of ranks required */ - color = (comm_rank < j); - if (color == 0) - { - if (MPI_Comm_split - (MPI_COMM_WORLD, MPI_UNDEFINED, comm_rank, &red_comm) != 0) - { - fprintf (stderr, "MPI_Comm_split() failed\n"); - exit (1); - } - } - else - { - if (MPI_Comm_split (MPI_COMM_WORLD, color, comm_rank, &red_comm) - != 0) - { - fprintf (stderr, "MPI_Comm_split() failed\n"); - exit (1); - } - /* cartesian grid communicator */ - if (MPI_Cart_create - (red_comm, ndims, dim_size, periods, reorder, - &cart_comm) != 0) - { - fprintf (stderr, "MPI_Comm_create() failed\n"); - exit (1); - } - if (MPI_Comm_size (cart_comm, &comm_size_cart) != 0) - { - fprintf (stderr, "MPI_Comm_size() failed\n"); - exit (1); - } - if (MPI_Comm_rank (cart_comm, &comm_rank_cart) != 0) - { - fprintf (stderr, "MPI_Comm_rank() failed\n"); - exit (1); - } + /* use only the number of ranks required */ + color = (comm_rank < j); + if (color == 0) { + if (MPI_Comm_split + (MPI_COMM_WORLD, MPI_UNDEFINED, comm_rank, + &red_comm) != 0) { + fprintf(stderr, "MPI_Comm_split() failed\n"); + exit(1); + } + } else { + if (MPI_Comm_split(MPI_COMM_WORLD, color, comm_rank, &red_comm) + != 0) { + fprintf(stderr, "MPI_Comm_split() failed\n"); + exit(1); + } + /* cartesian grid communicator */ + if (MPI_Cart_create + (red_comm, ndims, dim_size, periods, + reorder, &cart_comm) != 0) { + fprintf(stderr, "MPI_Comm_create() failed\n"); + exit(1); + } + if (MPI_Comm_size(cart_comm, &comm_size_cart) != 0) { + fprintf(stderr, "MPI_Comm_size() failed\n"); + exit(1); + } + if (MPI_Comm_rank(cart_comm, &comm_rank_cart) != 0) { + fprintf(stderr, "MPI_Comm_rank() failed\n"); + exit(1); + } - j = 0; - for (i = 0; i < ndims; i++) - { - if (halosize[i] > j) - { - j = halosize[i]; - } - } - sendbuf = (char *) malloc (ndims * 2 * j * sizeof (char)); - recvbuf = (char *) malloc (ndims * 2 * j * sizeof (char)); - request = - (MPI_Request *) malloc (ndims * 2 * 2 * sizeof (MPI_Request)); - status = - (MPI_Status *) malloc (ndims * 2 * 2 * sizeof (MPI_Status)); + j = 0; + for (i = 0; i < ndims; i++) { + if (halosize[i] > j) { + j = halosize[i]; + } + } + sendbuf = (char *)malloc(ndims * 2 * j * sizeof(char)); + recvbuf = (char *)malloc(ndims * 2 * j * sizeof(char)); + request = + (MPI_Request *) malloc(ndims * 2 * 2 * sizeof(MPI_Request)); + status = + (MPI_Status *) malloc(ndims * 2 * 2 * sizeof(MPI_Status)); - start = MPI_Wtime (); + start = MPI_Wtime(); - for (j = 0; j < NCALLS; j++) - { - for (i = 0; i < ndims; i++) - { - /* receive data in every direction */ - if (MPI_Cart_shift - (cart_comm, i, 1, &rank_source, &rank_dest) != 0) - { - fprintf (stderr, "MPI_Cart_shift() failed\n"); - exit (1); - } - if (MPI_Irecv - (recvbuf + i * 2 * halosize[i] * sizeof (char), - halosize[i], MPI_CHAR, rank_source, 1, cart_comm, - request + i * 2) != 0) - { - fprintf (stderr, "MPI_Irecv() failed\n"); - exit (1); - } - if (MPI_Irecv - (recvbuf + - (i * 2 + 1) * halosize[i] * sizeof (char), - halosize[i], MPI_CHAR, rank_dest, 1, cart_comm, - request + i * 2 + 1) != 0) - { - fprintf (stderr, "MPI_Irecv() failed\n"); - exit (1); - } - } - for (i = 0; i < ndims; i++) - { - /* send data in every direction */ - if (MPI_Cart_shift - (cart_comm, i, 1, &rank_source, &rank_dest) != 0) - { - fprintf (stderr, "MPI_Cart_shift() failed\n"); - exit (1); - } - if (MPI_Isend - (sendbuf + i * 2 * halosize[i] * sizeof (char), - halosize[i], MPI_CHAR, rank_source, 1, cart_comm, - request + i * 2 + ndims * 2) != 0) - { - fprintf (stderr, "MPI_Irecv() failed\n"); - exit (1); - } - if (MPI_Isend - (sendbuf + - (i * 2 + 1) * halosize[i] * sizeof (char), - halosize[i], MPI_CHAR, rank_dest, 1, cart_comm, - request + i * 2 + 1 + ndims * 2) != 0) - { - fprintf (stderr, "MPI_Irecv() failed\n"); - exit (1); - } - } - if (MPI_Waitall (ndims * 2 * 2, request, status) != 0) - { - fprintf (stderr, "MPI_Waitall() failed\n"); - exit (1); - } - } + for (j = 0; j < NCALLS; j++) { + for (i = 0; i < ndims; i++) { + /* receive data in every direction */ + if (MPI_Cart_shift + (cart_comm, i, 1, &rank_source, &rank_dest) != 0) { + fprintf(stderr, "MPI_Cart_shift() failed\n"); + exit(1); + } + if (MPI_Irecv + (recvbuf + + i * 2 * halosize[i] * + sizeof(char), halosize[i], + MPI_CHAR, rank_source, 1, + cart_comm, request + i * 2) != 0) { + fprintf(stderr, "MPI_Irecv() failed\n"); + exit(1); + } + if (MPI_Irecv + (recvbuf + + (i * 2 + + 1) * halosize[i] * + sizeof(char), halosize[i], + MPI_CHAR, rank_dest, 1, + cart_comm, request + i * 2 + 1) != 0) { + fprintf(stderr, "MPI_Irecv() failed\n"); + exit(1); + } + } + for (i = 0; i < ndims; i++) { + /* send data in every direction */ + if (MPI_Cart_shift + (cart_comm, i, 1, &rank_source, &rank_dest) != 0) { + fprintf(stderr, "MPI_Cart_shift() failed\n"); + exit(1); + } + if (MPI_Isend + (sendbuf + + i * 2 * halosize[i] * + sizeof(char), halosize[i], + MPI_CHAR, rank_source, 1, + cart_comm, request + i * 2 + ndims * 2) != 0) { + fprintf(stderr, "MPI_Irecv() failed\n"); + exit(1); + } + if (MPI_Isend + (sendbuf + + (i * 2 + + 1) * halosize[i] * + sizeof(char), halosize[i], + MPI_CHAR, rank_dest, 1, + cart_comm, request + i * 2 + 1 + ndims * 2) != 0) { + fprintf(stderr, "MPI_Irecv() failed\n"); + exit(1); + } + } + if (MPI_Waitall(ndims * 2 * 2, request, status) != 0) { + fprintf(stderr, "MPI_Waitall() failed\n"); + exit(1); + } + } - stop = MPI_Wtime (); - elapsed_time = stop - start; - if (MPI_Reduce - (&elapsed_time, &deltatmin, 1, MPI_DOUBLE, MPI_MIN, 0, - cart_comm) != 0) - { - fprintf (stderr, "MPI_Reduce() failed\n"); - exit (1); - } - if (MPI_Reduce - (&elapsed_time, &deltatmax, 1, MPI_DOUBLE, MPI_MAX, 0, - cart_comm) != 0) - { - fprintf (stderr, "MPI_Reduce() failed\n"); - exit (1); - } - if (comm_rank_cart == 0) - { - printf ("halo_cell_exchange %d", comm_size); - for (i = 0; i < ndims; i++) - { - printf (" %d", dim_size[i]); - } - for (i = 0; i < ndims; i++) - { - printf (" %d", halosize[i]); - } - /* print minimum and maximum time per exchange and test */ - printf (" %e %e\n", deltatmin / NCALLS, deltatmax / NCALLS); - } - free (status); - free (request); - free (recvbuf); - free (sendbuf); - if (MPI_Comm_free (&cart_comm) != 0) - { - fprintf (stderr, "MPI_Comm_free() failed\n"); - exit (1); - } - if (MPI_Comm_free (&red_comm) != 0) - { - fprintf (stderr, "MPI_Comm_free() failed\n"); - exit (1); - } - } - free (halosize); - free (periods); - free (dim_size); - } + stop = MPI_Wtime(); + elapsed_time = stop - start; + if (MPI_Reduce + (&elapsed_time, &deltatmin, 1, MPI_DOUBLE, + MPI_MIN, 0, cart_comm) != 0) { + fprintf(stderr, "MPI_Reduce() failed\n"); + exit(1); + } + if (MPI_Reduce + (&elapsed_time, &deltatmax, 1, MPI_DOUBLE, + MPI_MAX, 0, cart_comm) != 0) { + fprintf(stderr, "MPI_Reduce() failed\n"); + exit(1); + } + if (comm_rank_cart == 0) { + printf("halo_cell_exchange %d", comm_size); + for (i = 0; i < ndims; i++) { + printf(" %d", dim_size[i]); + } + for (i = 0; i < ndims; i++) { + printf(" %d", halosize[i]); + } + /* print minimum and maximum time per exchange and test */ + printf(" %e %e\n", deltatmin / NCALLS, deltatmax / NCALLS); + } + free(status); + free(request); + free(recvbuf); + free(sendbuf); + if (MPI_Comm_free(&cart_comm) != 0) { + fprintf(stderr, "MPI_Comm_free() failed\n"); + exit(1); + } + if (MPI_Comm_free(&red_comm) != 0) { + fprintf(stderr, "MPI_Comm_free() failed\n"); + exit(1); + } + } + free(halosize); + free(periods); + free(dim_size); + } } - if (MPI_Finalize () != 0) - { - fprintf (stderr, "MPI_Finalize() failed\n"); - exit (1); + if (MPI_Finalize() != 0) { + fprintf(stderr, "MPI_Finalize() failed\n"); + exit(1); } }