-
Notifications
You must be signed in to change notification settings - Fork 98
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #666 from ajocksch/benchmarks/halo_cell_exchange
[test] Add MPI halo cell exchange benchmark
- Loading branch information
Showing
3 changed files
with
398 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,104 @@ | ||
import reframe as rfm | ||
import reframe.utility.sanity as sn | ||
|
||
|
||
@rfm.required_version('>=2.16-dev0') | ||
@rfm.simple_test | ||
class HaloCellExchangeTest(rfm.RegressionTest): | ||
def __init__(self): | ||
super().__init__() | ||
self.sourcepath = 'halo_cell_exchange.c' | ||
self.build_system = 'SingleSource' | ||
self.build_system.cflags = ['-O2'] | ||
self.valid_systems = ['daint:gpu', 'dom:gpu', 'kesch:cn'] | ||
self.valid_prog_environs = ['PrgEnv-cray', 'PrgEnv-pgi', | ||
'PrgEnv-gnu'] | ||
self.num_tasks = 6 | ||
self.num_tasks_per_node = 1 | ||
self.num_gpus_per_node = 0 | ||
|
||
self.executable_opts = ['input.txt'] | ||
|
||
self.sanity_patterns = sn.assert_eq( | ||
sn.count(sn.findall(r'halo_cell_exchange', self.stdout)), 9) | ||
|
||
self.perf_patterns = { | ||
'time_2_10': sn.extractsingle( | ||
r'halo_cell_exchange 6 2 1 1 10 10 10' | ||
r' \S+ (?P<time_mpi>\S+)', self.stdout, | ||
'time_mpi', float), | ||
'time_2_10000': sn.extractsingle( | ||
r'halo_cell_exchange 6 2 1 1 10000 10000 10000' | ||
r' \S+ (?P<time_mpi>\S+)', self.stdout, | ||
'time_mpi', float), | ||
'time_2_1000000': sn.extractsingle( | ||
r'halo_cell_exchange 6 2 1 1 1000000 1000000 1000000' | ||
r' \S+ (?P<time_mpi>\S+)', self.stdout, | ||
'time_mpi', float), | ||
'time_4_10': sn.extractsingle( | ||
r'halo_cell_exchange 6 2 2 1 10 10 10' | ||
r' \S+ (?P<time_mpi>\S+)', self.stdout, | ||
'time_mpi', float), | ||
'time_4_10000': sn.extractsingle( | ||
r'halo_cell_exchange 6 2 2 1 10000 10000 10000' | ||
r' \S+ (?P<time_mpi>\S+)', self.stdout, | ||
'time_mpi', float), | ||
'time_4_1000000': sn.extractsingle( | ||
r'halo_cell_exchange 6 2 2 1 1000000 1000000 1000000' | ||
r' \S+ (?P<time_mpi>\S+)', self.stdout, | ||
'time_mpi', float), | ||
'time_6_10': sn.extractsingle( | ||
r'halo_cell_exchange 6 3 2 1 10 10 10' | ||
r' \S+ (?P<time_mpi>\S+)', self.stdout, | ||
'time_mpi', float), | ||
'time_6_10000': sn.extractsingle( | ||
r'halo_cell_exchange 6 3 2 1 10000 10000 10000' | ||
r' \S+ (?P<time_mpi>\S+)', self.stdout, | ||
'time_mpi', float), | ||
'time_6_1000000': sn.extractsingle( | ||
r'halo_cell_exchange 6 3 2 1 1000000 1000000 1000000' | ||
r' \S+ (?P<time_mpi>\S+)', self.stdout, | ||
'time_mpi', float) | ||
} | ||
|
||
# the kesch values need to be added | ||
self.sys_reference = { | ||
'dom:gpu': { | ||
'time_2_10': (3.925395e-06, None, 0.50, 's'), | ||
'time_2_10000': (9.721279e-06, None, 0.50, 's'), | ||
'time_2_1000000': (4.934530e-04, None, 0.50, 's'), | ||
'time_4_10': (5.878997e-06, None, 0.50, 's'), | ||
'time_4_10000': (1.495080e-05, None, 0.50, 's'), | ||
'time_4_1000000': (6.791397e-04, None, 0.50, 's'), | ||
'time_6_10': (5.428815e-06, None, 0.50, 's'), | ||
'time_6_10000': (1.540580e-05, None, 0.50, 's'), | ||
'time_6_1000000': (9.179296e-04, None, 0.50, 's') | ||
}, | ||
'daint:gpu': { | ||
'time_2_10': (3.925395e-06, None, 0.50, 's'), | ||
'time_2_10000': (9.721279e-06, None, 0.50, 's'), | ||
'time_2_1000000': (4.934530e-04, None, 0.50, 's'), | ||
'time_4_10': (5.878997e-06, None, 0.50, 's'), | ||
'time_4_10000': (1.495080e-05, None, 0.50, 's'), | ||
'time_4_1000000': (6.791397e-04, None, 0.50, 's'), | ||
'time_6_10': (5.428815e-06, None, 0.50, 's'), | ||
'time_6_10000': (1.540580e-05, None, 0.50, 's'), | ||
'time_6_1000000': (9.179296e-04, None, 0.50, 's') | ||
}, | ||
'kesch:cn': { | ||
'time_2_10': (2.280450e-06, None, 0.50, 's'), | ||
'time_2_10000': (8.059907e-06, None, 0.50, 's'), | ||
'time_2_1000000': (5.959686e-04, None, 0.50, 's'), | ||
'time_4_10': (2.951527e-06, None, 0.50, 's'), | ||
'time_4_10000': (1.258132e-05, None, 0.50, 's'), | ||
'time_4_1000000': (8.539153e-04, None, 0.50, 's'), | ||
'time_6_10': (3.740311e-06, None, 0.50, 's'), | ||
'time_6_10000': (1.448979e-05, None, 0.50, 's'), | ||
'time_6_1000000': (8.432294e-04, None, 0.50, 's') | ||
} | ||
} | ||
|
||
self.reference = self.sys_reference | ||
|
||
self.maintainers = ['AJ'] | ||
self.tags = {'benchmark'} |
285 changes: 285 additions & 0 deletions
285
cscs-checks/microbenchmarks/mpi/src/halo_cell_exchange.c
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,285 @@ | ||
/* This benchmark emulates a halo cell exchange in n dimensions. The pure | ||
communication is considered without any stencil computation.*/ | ||
#include <stdlib.h> | ||
#include <stdio.h> | ||
#include <string.h> | ||
#include <mpi.h> | ||
|
||
#define NCALLS 10000 | ||
|
||
int main(int argc, const char *argv[]) | ||
{ | ||
MPI_Comm cart_comm, red_comm; | ||
MPI_Request *request; | ||
MPI_Status *status; | ||
FILE *pFile; | ||
int ndims, reorder, color, end; | ||
int *dim_size, *periods, *halosize; | ||
int comm_size, comm_rank, comm_size_cart, comm_rank_cart; | ||
char *sendbuf, *recvbuf, inputbuf[1000], *pinputbuf; | ||
double start, stop, deltatmin, deltatmax, elapsed_time; | ||
int rank_source, rank_dest, i, j; | ||
|
||
if (MPI_Init(NULL, NULL) != 0) { | ||
fprintf(stderr, "MPI_Init() failed\n"); | ||
exit(1); | ||
} | ||
if (MPI_Comm_size(MPI_COMM_WORLD, &comm_size) != 0) { | ||
fprintf(stderr, "MPI_Comm_size() failed\n"); | ||
exit(1); | ||
} | ||
if (MPI_Comm_rank(MPI_COMM_WORLD, &comm_rank) != 0) { | ||
fprintf(stderr, "MPI_Comm_rank() failed\n"); | ||
exit(1); | ||
} | ||
if (argc == 1) { | ||
if (comm_rank == 0) { | ||
printf("%s inputfile\n", argv[0]); | ||
printf("ndims dim1 dim2 ... halosize1 halosize2 ...\n"); | ||
} | ||
exit(0); | ||
} | ||
if (comm_rank == 0) { | ||
if (strcmp(argv[1], "-") == 0) { | ||
pFile = stdin; | ||
} else { | ||
pFile = fopen(argv[1], "r"); | ||
} | ||
} | ||
end = 0; | ||
while (end == 0) { | ||
if (comm_rank == 0) { | ||
/* read parameters for every single benchmark line by line */ | ||
end = (fgets(inputbuf, sizeof(inputbuf) - 1, pFile) == NULL); | ||
if (end == 0) { | ||
pinputbuf = inputbuf; | ||
while (*pinputbuf == ' ') | ||
pinputbuf++; | ||
end = !((*pinputbuf >= '0') | ||
&& (*pinputbuf <= '9')); | ||
pinputbuf = inputbuf; | ||
} | ||
} | ||
if (MPI_Bcast(&end, 1, MPI_INT, 0, MPI_COMM_WORLD) != 0) { | ||
fprintf(stderr, "MPI_Bcast() failed\n"); | ||
exit(1); | ||
} | ||
if (end == 0) { | ||
if (comm_rank == 0) { | ||
/* read number of dimensions */ | ||
sscanf(pinputbuf, "%d", &ndims); | ||
} | ||
if (MPI_Bcast(&ndims, 1, MPI_INT, 0, MPI_COMM_WORLD) != 0) { | ||
fprintf(stderr, "MPI_Bcast() failed\n"); | ||
exit(1); | ||
} | ||
dim_size = (int *)malloc(ndims * sizeof(*dim_size)); | ||
periods = (int *)malloc(ndims * sizeof(*periods)); | ||
halosize = (int *)malloc(ndims * sizeof(*halosize)); | ||
|
||
for (i = 0; i < ndims; i++) { | ||
periods[i] = 1; | ||
} | ||
reorder = 1; | ||
|
||
j = 1; | ||
for (i = 0; i < ndims; i++) { | ||
if (comm_rank == 0) { | ||
while (*pinputbuf == ' ') | ||
pinputbuf++; | ||
while (*pinputbuf != ' ') | ||
pinputbuf++; | ||
while (*pinputbuf == ' ') | ||
pinputbuf++; | ||
/* read number of ranks in every dimension */ | ||
sscanf(pinputbuf, "%d", &dim_size[i]); | ||
} | ||
if (MPI_Bcast(&dim_size[i], 1, MPI_INT, 0, MPI_COMM_WORLD) != 0) { | ||
fprintf(stderr, "MPI_Bcast() failed\n"); | ||
exit(1); | ||
} | ||
j *= dim_size[i]; | ||
} | ||
for (i = 0; i < ndims; i++) { | ||
if (comm_rank == 0) { | ||
while (*pinputbuf == ' ') | ||
pinputbuf++; | ||
while (*pinputbuf != ' ') | ||
pinputbuf++; | ||
while (*pinputbuf == ' ') | ||
pinputbuf++; | ||
/* read halo cell size to be communicated in every | ||
dimension */ | ||
sscanf(pinputbuf, "%d", &halosize[i]); | ||
} | ||
if (MPI_Bcast(&halosize[i], 1, MPI_INT, 0, MPI_COMM_WORLD) != 0) { | ||
fprintf(stderr, "MPI_Bcast() failed\n"); | ||
exit(1); | ||
} | ||
} | ||
if (j > comm_size) { | ||
if (comm_rank == 0) { | ||
printf("Please run with at least %d MPI ranks.\n", j); | ||
} | ||
if (MPI_Finalize() != 0) { | ||
fprintf(stderr, "MPI_Finalize() failed\n"); | ||
exit(1); | ||
} | ||
exit(0); | ||
} | ||
|
||
/* use only the number of ranks required */ | ||
color = (comm_rank < j); | ||
if (color == 0) { | ||
if (MPI_Comm_split | ||
(MPI_COMM_WORLD, MPI_UNDEFINED, comm_rank, | ||
&red_comm) != 0) { | ||
fprintf(stderr, "MPI_Comm_split() failed\n"); | ||
exit(1); | ||
} | ||
} else { | ||
if (MPI_Comm_split(MPI_COMM_WORLD, color, comm_rank, &red_comm) | ||
!= 0) { | ||
fprintf(stderr, "MPI_Comm_split() failed\n"); | ||
exit(1); | ||
} | ||
/* cartesian grid communicator */ | ||
if (MPI_Cart_create | ||
(red_comm, ndims, dim_size, periods, | ||
reorder, &cart_comm) != 0) { | ||
fprintf(stderr, "MPI_Comm_create() failed\n"); | ||
exit(1); | ||
} | ||
if (MPI_Comm_size(cart_comm, &comm_size_cart) != 0) { | ||
fprintf(stderr, "MPI_Comm_size() failed\n"); | ||
exit(1); | ||
} | ||
if (MPI_Comm_rank(cart_comm, &comm_rank_cart) != 0) { | ||
fprintf(stderr, "MPI_Comm_rank() failed\n"); | ||
exit(1); | ||
} | ||
|
||
j = 0; | ||
for (i = 0; i < ndims; i++) { | ||
if (halosize[i] > j) { | ||
j = halosize[i]; | ||
} | ||
} | ||
sendbuf = (char *)malloc(ndims * 2 * j * sizeof(char)); | ||
recvbuf = (char *)malloc(ndims * 2 * j * sizeof(char)); | ||
request = | ||
(MPI_Request *) malloc(ndims * 2 * 2 * sizeof(MPI_Request)); | ||
status = | ||
(MPI_Status *) malloc(ndims * 2 * 2 * sizeof(MPI_Status)); | ||
|
||
start = MPI_Wtime(); | ||
|
||
for (j = 0; j < NCALLS; j++) { | ||
for (i = 0; i < ndims; i++) { | ||
/* receive data in every direction */ | ||
if (MPI_Cart_shift | ||
(cart_comm, i, 1, &rank_source, &rank_dest) != 0) { | ||
fprintf(stderr, "MPI_Cart_shift() failed\n"); | ||
exit(1); | ||
} | ||
if (MPI_Irecv | ||
(recvbuf + | ||
i * 2 * halosize[i] * | ||
sizeof(char), halosize[i], | ||
MPI_CHAR, rank_source, 1, | ||
cart_comm, request + i * 2) != 0) { | ||
fprintf(stderr, "MPI_Irecv() failed\n"); | ||
exit(1); | ||
} | ||
if (MPI_Irecv | ||
(recvbuf + | ||
(i * 2 + | ||
1) * halosize[i] * | ||
sizeof(char), halosize[i], | ||
MPI_CHAR, rank_dest, 1, | ||
cart_comm, request + i * 2 + 1) != 0) { | ||
fprintf(stderr, "MPI_Irecv() failed\n"); | ||
exit(1); | ||
} | ||
} | ||
for (i = 0; i < ndims; i++) { | ||
/* send data in every direction */ | ||
if (MPI_Cart_shift | ||
(cart_comm, i, 1, &rank_source, &rank_dest) != 0) { | ||
fprintf(stderr, "MPI_Cart_shift() failed\n"); | ||
exit(1); | ||
} | ||
if (MPI_Isend | ||
(sendbuf + | ||
i * 2 * halosize[i] * | ||
sizeof(char), halosize[i], | ||
MPI_CHAR, rank_source, 1, | ||
cart_comm, request + i * 2 + ndims * 2) != 0) { | ||
fprintf(stderr, "MPI_Irecv() failed\n"); | ||
exit(1); | ||
} | ||
if (MPI_Isend | ||
(sendbuf + | ||
(i * 2 + | ||
1) * halosize[i] * | ||
sizeof(char), halosize[i], | ||
MPI_CHAR, rank_dest, 1, | ||
cart_comm, request + i * 2 + 1 + ndims * 2) != 0) { | ||
fprintf(stderr, "MPI_Irecv() failed\n"); | ||
exit(1); | ||
} | ||
} | ||
if (MPI_Waitall(ndims * 2 * 2, request, status) != 0) { | ||
fprintf(stderr, "MPI_Waitall() failed\n"); | ||
exit(1); | ||
} | ||
} | ||
|
||
stop = MPI_Wtime(); | ||
elapsed_time = stop - start; | ||
if (MPI_Reduce | ||
(&elapsed_time, &deltatmin, 1, MPI_DOUBLE, | ||
MPI_MIN, 0, cart_comm) != 0) { | ||
fprintf(stderr, "MPI_Reduce() failed\n"); | ||
exit(1); | ||
} | ||
if (MPI_Reduce | ||
(&elapsed_time, &deltatmax, 1, MPI_DOUBLE, | ||
MPI_MAX, 0, cart_comm) != 0) { | ||
fprintf(stderr, "MPI_Reduce() failed\n"); | ||
exit(1); | ||
} | ||
if (comm_rank_cart == 0) { | ||
printf("halo_cell_exchange %d", comm_size); | ||
for (i = 0; i < ndims; i++) { | ||
printf(" %d", dim_size[i]); | ||
} | ||
for (i = 0; i < ndims; i++) { | ||
printf(" %d", halosize[i]); | ||
} | ||
/* print minimum and maximum time per exchange and test */ | ||
printf(" %e %e\n", deltatmin / NCALLS, deltatmax / NCALLS); | ||
} | ||
free(status); | ||
free(request); | ||
free(recvbuf); | ||
free(sendbuf); | ||
if (MPI_Comm_free(&cart_comm) != 0) { | ||
fprintf(stderr, "MPI_Comm_free() failed\n"); | ||
exit(1); | ||
} | ||
if (MPI_Comm_free(&red_comm) != 0) { | ||
fprintf(stderr, "MPI_Comm_free() failed\n"); | ||
exit(1); | ||
} | ||
} | ||
free(halosize); | ||
free(periods); | ||
free(dim_size); | ||
} | ||
} | ||
if (MPI_Finalize() != 0) { | ||
fprintf(stderr, "MPI_Finalize() failed\n"); | ||
exit(1); | ||
} | ||
} |
Oops, something went wrong.