From ec4223666d5d19ca00fef432e137b86407dbc820 Mon Sep 17 00:00:00 2001 From: Wenduo Wang Date: Mon, 25 Dec 2023 06:09:42 +0000 Subject: [PATCH 1/4] coll/base/alltoallv: skip send/recv 0-byte data The previous change c1a98f19ea108db1f6e895c5805f47ca318c70ff should have checked for total data size in bytes instead of count. This patch fixes that. Signed-off-by: Wenduo Wang (cherry picked from commit a7b4d3ebaea7ba2d1825d662de789722416c6ce1) --- ompi/mca/coll/base/coll_base_alltoallv.c | 24 +++++++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) diff --git a/ompi/mca/coll/base/coll_base_alltoallv.c b/ompi/mca/coll/base/coll_base_alltoallv.c index 43c5008a381..1fe5be6b3ee 100644 --- a/ompi/mca/coll/base/coll_base_alltoallv.c +++ b/ompi/mca/coll/base/coll_base_alltoallv.c @@ -199,6 +199,7 @@ ompi_coll_base_alltoallv_intra_pairwise(const void *sbuf, const int *scounts, co mca_coll_base_module_t *module) { int line = -1, err = 0, rank, size, step = 0, sendto, recvfrom; + size_t sdtype_size, rdtype_size; void *psnd, *prcv; ptrdiff_t sext, rext; @@ -213,6 +214,14 @@ ompi_coll_base_alltoallv_intra_pairwise(const void *sbuf, const int *scounts, co OPAL_OUTPUT((ompi_coll_base_framework.framework_output, "coll:base:alltoallv_intra_pairwise rank %d", rank)); + ompi_datatype_type_size(sdtype, &sdtype_size); + ompi_datatype_type_size(rdtype, &rdtype_size); + + if (0 == sdtype_size || 0 == rdtype_size) { + /* Nothing to exchange */ + return MPI_SUCCESS; + } + ompi_datatype_type_extent(sdtype, &sext); ompi_datatype_type_extent(rdtype, &rext); @@ -263,6 +272,7 @@ ompi_coll_base_alltoallv_intra_basic_linear(const void *sbuf, const int *scounts mca_coll_base_module_t *module) { int i, size, rank, err, nreqs; + size_t sdtype_size = 0, rdtype_size = 0; char *psnd, *prcv; ptrdiff_t sext, rext; ompi_request_t **preq, **reqs; @@ -280,13 +290,21 @@ ompi_coll_base_alltoallv_intra_basic_linear(const void *sbuf, const int *scounts OPAL_OUTPUT((ompi_coll_base_framework.framework_output, "coll:base:alltoallv_intra_basic_linear rank %d", rank)); + ompi_datatype_type_size(rdtype, &rdtype_size); + ompi_datatype_type_size(sdtype, &sdtype_size); + + if (0 == rdtype_size || 0 == sdtype_size) { + /* Nothing to exchange */ + return MPI_SUCCESS; + } + ompi_datatype_type_extent(sdtype, &sext); ompi_datatype_type_extent(rdtype, &rext); /* Simple optimization - handle send to self first */ psnd = ((char *) sbuf) + (ptrdiff_t)sdisps[rank] * sext; prcv = ((char *) rbuf) + (ptrdiff_t)rdisps[rank] * rext; - if (0 != scounts[rank]) { + if (0 < scounts[rank]) { err = ompi_datatype_sndrcv(psnd, scounts[rank], sdtype, prcv, rcounts[rank], rdtype); if (MPI_SUCCESS != err) { @@ -310,7 +328,7 @@ ompi_coll_base_alltoallv_intra_basic_linear(const void *sbuf, const int *scounts continue; } - if (rcounts[i] > 0) { + if (0 < rcounts[i]) { ++nreqs; prcv = ((char *) rbuf) + (ptrdiff_t)rdisps[i] * rext; err = MCA_PML_CALL(irecv_init(prcv, rcounts[i], rdtype, @@ -326,7 +344,7 @@ ompi_coll_base_alltoallv_intra_basic_linear(const void *sbuf, const int *scounts continue; } - if (scounts[i] > 0) { + if (0 < scounts[i]) { ++nreqs; psnd = ((char *) sbuf) + (ptrdiff_t)sdisps[i] * sext; err = MCA_PML_CALL(isend_init(psnd, scounts[i], sdtype, From d9f0c26c46f81312473e3ca5eddab60db8e758af Mon Sep 17 00:00:00 2001 From: Wenduo Wang Date: Mon, 25 Dec 2023 17:53:04 +0000 Subject: [PATCH 2/4] coll/libnbc/ialltoallv: skip send/recv 0-byte data As per MPI specification the amount of data sent must be equal to the amount of data received for each communication pair, and therefore both count and datatype size should be accounted for to determine if the data is 0-byte and therefore skippable. Signed-off-by: Wenduo Wang (cherry picked from commit a3a478750b64cbb8368a4fbfbe94eea57b9d996c) --- ompi/mca/coll/libnbc/nbc_ialltoallv.c | 59 ++++++++++++++++++--------- 1 file changed, 39 insertions(+), 20 deletions(-) diff --git a/ompi/mca/coll/libnbc/nbc_ialltoallv.c b/ompi/mca/coll/libnbc/nbc_ialltoallv.c index a52c2675acf..134fddf4f53 100644 --- a/ompi/mca/coll/libnbc/nbc_ialltoallv.c +++ b/ompi/mca/coll/libnbc/nbc_ialltoallv.c @@ -48,6 +48,7 @@ static int nbc_alltoallv_init(const void* sendbuf, const int *sendcounts, const mca_coll_base_module_t *module, bool persistent) { int rank, p, res; + size_t sdtype_size, rdtype_size; MPI_Aint sndext, rcvext; NBC_Schedule *schedule; char *rbuf, *sbuf, inplace; @@ -60,6 +61,7 @@ static int nbc_alltoallv_init(const void* sendbuf, const int *sendcounts, const rank = ompi_comm_rank (comm); p = ompi_comm_size (comm); + ompi_datatype_type_size(recvtype, &rdtype_size); res = ompi_datatype_type_extent (recvtype, &rcvext); if (MPI_SUCCESS != res) { NBC_Error("MPI Error in ompi_datatype_type_extent() (%i)", res); @@ -92,7 +94,9 @@ static int nbc_alltoallv_init(const void* sendbuf, const int *sendcounts, const sendcounts = recvcounts; sdispls = rdispls; sndext = rcvext; + sdtype_size = rdtype_size; } else { + ompi_datatype_type_size(sendtype, &sdtype_size); res = ompi_datatype_type_extent (sendtype, &sndext); if (MPI_SUCCESS != res) { NBC_Error("MPI Error in ompi_datatype_type_extent() (%i)", res); @@ -100,14 +104,19 @@ static int nbc_alltoallv_init(const void* sendbuf, const int *sendcounts, const } } + if (0 == sdtype_size || 0 == rdtype_size) { + /* Nothing to exchange */ + ompi_coll_base_nbc_reserve_tags(comm, 1); + return nbc_get_noop_request(persistent, request); + } + schedule = OBJ_NEW(NBC_Schedule); if (OPAL_UNLIKELY(NULL == schedule)) { free(tmpbuf); return OMPI_ERR_OUT_OF_RESOURCE; } - - if (!inplace && sendcounts[rank] != 0) { + if (!inplace && 0 < sendcounts[rank]) { rbuf = (char *) recvbuf + rdispls[rank] * rcvext; sbuf = (char *) sendbuf + sdispls[rank] * sndext; res = NBC_Sched_copy (sbuf, false, sendcounts[rank], sendtype, @@ -177,10 +186,18 @@ static int nbc_alltoallv_inter_init (const void* sendbuf, const int *sendcounts, mca_coll_base_module_t *module, bool persistent) { int res, rsize; + size_t sdtype_size, rdtype_size; MPI_Aint sndext, rcvext; NBC_Schedule *schedule; ompi_coll_libnbc_module_t *libnbc_module = (ompi_coll_libnbc_module_t*) module; + ompi_datatype_type_size(sendtype, &sdtype_size); + ompi_datatype_type_size(recvtype, &rdtype_size); + if (0 == sdtype_size || 0 == rdtype_size) { + /* Nothing to exchange */ + ompi_coll_base_nbc_reserve_tags(comm, 1); + return nbc_get_noop_request(persistent, request); + } res = ompi_datatype_type_extent(sendtype, &sndext); if (MPI_SUCCESS != res) { @@ -203,7 +220,7 @@ static int nbc_alltoallv_inter_init (const void* sendbuf, const int *sendcounts, for (int i = 0; i < rsize; i++) { /* post all sends */ - if (sendcounts[i] != 0) { + if (0 < sendcounts[i]) { char *sbuf = (char *) sendbuf + sdispls[i] * sndext; res = NBC_Sched_send (sbuf, false, sendcounts[i], sendtype, i, schedule, false); if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { @@ -212,7 +229,7 @@ static int nbc_alltoallv_inter_init (const void* sendbuf, const int *sendcounts, } } /* post all receives */ - if (recvcounts[i] != 0) { + if (0 < recvcounts[i]) { char *rbuf = (char *) recvbuf + rdispls[i] * rcvext; res = NBC_Sched_recv (rbuf, false, recvcounts[i], recvtype, i, schedule, false); if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { @@ -272,7 +289,7 @@ static inline int a2av_sched_linear(int rank, int p, NBC_Schedule *schedule, } /* post send */ - if (sendcounts[i] != 0) { + if (0 < sendcounts[i]) { char *sbuf = ((char *) sendbuf) + (sdispls[i] * sndext); res = NBC_Sched_send(sbuf, false, sendcounts[i], sendtype, i, schedule, false); if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { @@ -281,7 +298,7 @@ static inline int a2av_sched_linear(int rank, int p, NBC_Schedule *schedule, } /* post receive */ - if (recvcounts[i] != 0) { + if (0 < recvcounts[i]) { char *rbuf = ((char *) recvbuf) + (rdispls[i] * rcvext); res = NBC_Sched_recv(rbuf, false, recvcounts[i], recvtype, i, schedule, false); if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { @@ -306,7 +323,7 @@ static inline int a2av_sched_pairwise(int rank, int p, NBC_Schedule *schedule, int rcvpeer = (rank + p - i) %p; /* post send */ - if (sendcounts[sndpeer] != 0) { + if (0 < sendcounts[sndpeer]) { char *sbuf = ((char *) sendbuf) + (sdispls[sndpeer] * sndext); res = NBC_Sched_send(sbuf, false, sendcounts[sndpeer], sendtype, sndpeer, schedule, false); if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { @@ -315,7 +332,7 @@ static inline int a2av_sched_pairwise(int rank, int p, NBC_Schedule *schedule, } /* post receive */ - if (recvcounts[rcvpeer] != 0) { + if (0 < recvcounts[rcvpeer]) { char *rbuf = ((char *) recvbuf) + (rdispls[rcvpeer] * rcvext); res = NBC_Sched_recv(rbuf, false, recvcounts[rcvpeer], recvtype, rcvpeer, schedule, true); if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { @@ -338,7 +355,7 @@ static inline int a2av_sched_inplace(int rank, int p, NBC_Schedule *schedule, char *sbuf = (char *) buf + displs[speer] * ext; char *rbuf = (char *) buf + displs[rpeer] * ext; - if (0 != counts[rpeer]) { + if (0 < counts[rpeer]) { res = NBC_Sched_copy (rbuf, false, counts[rpeer], type, (void *)(-gap), true, counts[rpeer], type, schedule, true); @@ -346,26 +363,26 @@ static inline int a2av_sched_inplace(int rank, int p, NBC_Schedule *schedule, return res; } } - if (0 != counts[speer]) { + if (0 < counts[speer]) { res = NBC_Sched_send (sbuf, false , counts[speer], type, speer, schedule, false); if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { return res; } } - if (0 != counts[rpeer]) { + if (0 < counts[rpeer]) { res = NBC_Sched_recv (rbuf, false , counts[rpeer], type, rpeer, schedule, true); if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { return res; } } - if (0 != counts[rpeer]) { + if (0 < counts[rpeer]) { res = NBC_Sched_send ((void *)(-gap), true, counts[rpeer], type, rpeer, schedule, false); if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { return res; } } - if (0 != counts[speer]) { + if (0 < counts[speer]) { res = NBC_Sched_recv (sbuf, false, counts[speer], type, speer, schedule, true); if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { return res; @@ -374,15 +391,17 @@ static inline int a2av_sched_inplace(int rank, int p, NBC_Schedule *schedule, } if (0 == (p%2)) { int peer = (rank + p/2) % p; - char *tbuf = (char *) buf + displs[peer] * ext; - res = NBC_Sched_copy (tbuf, false, counts[peer], type, - (void *)(-gap), true, counts[peer], type, - schedule, true); - if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { - return res; + + if (0 < counts[peer]) { + res = NBC_Sched_copy(tbuf, false, counts[peer], type, (void *) (-gap), true, counts[peer], + type, schedule, true); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + return res; + } } - if (0 != counts[peer]) { + + if (0 < counts[peer]) { res = NBC_Sched_send ((void *)(-gap), true , counts[peer], type, peer, schedule, false); if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { return res; From 08d372cbdc6351dbf0e6f37d48d34cb9cd4950ef Mon Sep 17 00:00:00 2001 From: Wenduo Wang Date: Mon, 29 Jan 2024 19:25:44 +0000 Subject: [PATCH 3/4] alltoallv: check send and recv datatype sizes separately This is a followup to #12198. The previous change introduced a bug that skips send and recv if either datatype is of 0-size. This is wrong because MPI_Alltoallv allows each process to use a unique datatype. The correct solution is to check send and recv datatype size separately and skip send and recv accordingly. This includes the pairwise algorithm which should not send/recv 0-count data. Signed-off-by: Wenduo Wang (cherry picked from commit f242d0d860ac8145c9fc29b85323a75941e54ece) --- ompi/mca/coll/base/coll_base_alltoallv.c | 49 +++++++++++++++--------- 1 file changed, 30 insertions(+), 19 deletions(-) diff --git a/ompi/mca/coll/base/coll_base_alltoallv.c b/ompi/mca/coll/base/coll_base_alltoallv.c index 1fe5be6b3ee..bf12732d3f2 100644 --- a/ompi/mca/coll/base/coll_base_alltoallv.c +++ b/ompi/mca/coll/base/coll_base_alltoallv.c @@ -201,6 +201,7 @@ ompi_coll_base_alltoallv_intra_pairwise(const void *sbuf, const int *scounts, co int line = -1, err = 0, rank, size, step = 0, sendto, recvfrom; size_t sdtype_size, rdtype_size; void *psnd, *prcv; + ompi_request_t *req; ptrdiff_t sext, rext; if (MPI_IN_PLACE == sbuf) { @@ -217,16 +218,12 @@ ompi_coll_base_alltoallv_intra_pairwise(const void *sbuf, const int *scounts, co ompi_datatype_type_size(sdtype, &sdtype_size); ompi_datatype_type_size(rdtype, &rdtype_size); - if (0 == sdtype_size || 0 == rdtype_size) { - /* Nothing to exchange */ - return MPI_SUCCESS; - } - ompi_datatype_type_extent(sdtype, &sext); ompi_datatype_type_extent(rdtype, &rext); /* Perform pairwise exchange starting from 1 since local exchange is done */ for (step = 0; step < size; step++) { + req = MPI_REQUEST_NULL; /* Determine sender and receiver for this step. */ sendto = (rank + step) % size; @@ -237,12 +234,31 @@ ompi_coll_base_alltoallv_intra_pairwise(const void *sbuf, const int *scounts, co prcv = (char*)rbuf + (ptrdiff_t)rdisps[recvfrom] * rext; /* send and receive */ - err = ompi_coll_base_sendrecv( psnd, scounts[sendto], sdtype, sendto, - MCA_COLL_BASE_TAG_ALLTOALLV, - prcv, rcounts[recvfrom], rdtype, recvfrom, - MCA_COLL_BASE_TAG_ALLTOALLV, - comm, MPI_STATUS_IGNORE, rank); - if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; } + if (0 < rcounts[recvfrom] && 0 < rdtype_size) { + err = MCA_PML_CALL(irecv(prcv, rcounts[recvfrom], rdtype, recvfrom, + MCA_COLL_BASE_TAG_ALLTOALLV, comm, &req)); + if (MPI_SUCCESS != err) { + line = __LINE__; + goto err_hndl; + } + } + + if (0 < scounts[sendto] && 0 < sdtype_size) { + err = MCA_PML_CALL(send(psnd, scounts[sendto], sdtype, sendto, + MCA_COLL_BASE_TAG_ALLTOALLV, MCA_PML_BASE_SEND_STANDARD, comm)); + if (MPI_SUCCESS != err) { + line = __LINE__; + goto err_hndl; + } + } + + if (MPI_REQUEST_NULL != req) { + err = ompi_request_wait(&req, MPI_STATUS_IGNORE); + if (MPI_SUCCESS != err) { + line = __LINE__; + goto err_hndl; + } + } } return MPI_SUCCESS; @@ -293,18 +309,13 @@ ompi_coll_base_alltoallv_intra_basic_linear(const void *sbuf, const int *scounts ompi_datatype_type_size(rdtype, &rdtype_size); ompi_datatype_type_size(sdtype, &sdtype_size); - if (0 == rdtype_size || 0 == sdtype_size) { - /* Nothing to exchange */ - return MPI_SUCCESS; - } - ompi_datatype_type_extent(sdtype, &sext); ompi_datatype_type_extent(rdtype, &rext); /* Simple optimization - handle send to self first */ psnd = ((char *) sbuf) + (ptrdiff_t)sdisps[rank] * sext; prcv = ((char *) rbuf) + (ptrdiff_t)rdisps[rank] * rext; - if (0 < scounts[rank]) { + if (0 < scounts[rank] && 0 < sdtype_size) { err = ompi_datatype_sndrcv(psnd, scounts[rank], sdtype, prcv, rcounts[rank], rdtype); if (MPI_SUCCESS != err) { @@ -328,7 +339,7 @@ ompi_coll_base_alltoallv_intra_basic_linear(const void *sbuf, const int *scounts continue; } - if (0 < rcounts[i]) { + if (0 < rcounts[i] && 0 < rdtype_size) { ++nreqs; prcv = ((char *) rbuf) + (ptrdiff_t)rdisps[i] * rext; err = MCA_PML_CALL(irecv_init(prcv, rcounts[i], rdtype, @@ -344,7 +355,7 @@ ompi_coll_base_alltoallv_intra_basic_linear(const void *sbuf, const int *scounts continue; } - if (0 < scounts[i]) { + if (0 < scounts[i] && 0 < sdtype_size) { ++nreqs; psnd = ((char *) sbuf) + (ptrdiff_t)sdisps[i] * sext; err = MCA_PML_CALL(isend_init(psnd, scounts[i], sdtype, From 4d135f8d46e9c28019dcab20b6189365f4acddc3 Mon Sep 17 00:00:00 2001 From: Wenduo Wang Date: Mon, 29 Jan 2024 19:35:38 +0000 Subject: [PATCH 4/4] ialltoallv: check send and recv datatype sizes separately This is a followup to #12198. The previous change introduced a bug that skips send and recv if either datatype is of 0-size. This is wrong because MPI_Ialltoallv allows each process to use a unique datatype. The correct solution is to check send and recv datatype size separately and skip send and recv accordingly. Signed-off-by: Wenduo Wang (cherry picked from commit 737eefd57c5ba7e8e68a3389595a72bc6aae0db6) --- ompi/mca/coll/libnbc/nbc_ialltoallv.c | 58 ++++++++++++--------------- 1 file changed, 26 insertions(+), 32 deletions(-) diff --git a/ompi/mca/coll/libnbc/nbc_ialltoallv.c b/ompi/mca/coll/libnbc/nbc_ialltoallv.c index 134fddf4f53..80833a37c2c 100644 --- a/ompi/mca/coll/libnbc/nbc_ialltoallv.c +++ b/ompi/mca/coll/libnbc/nbc_ialltoallv.c @@ -23,19 +23,19 @@ static inline int a2av_sched_linear(int rank, int p, NBC_Schedule *schedule, const void *sendbuf, const int *sendcounts, - const int *sdispls, MPI_Aint sndext, MPI_Datatype sendtype, + const int *sdispls, MPI_Aint sndext, MPI_Datatype sendtype, const size_t sdtype_size, void *recvbuf, const int *recvcounts, - const int *rdispls, MPI_Aint rcvext, MPI_Datatype recvtype); + const int *rdispls, MPI_Aint rcvext, MPI_Datatype recvtype, const size_t rdtype_size); static inline int a2av_sched_pairwise(int rank, int p, NBC_Schedule *schedule, const void *sendbuf, const int *sendcounts, const int *sdispls, - MPI_Aint sndext, MPI_Datatype sendtype, + MPI_Aint sndext, MPI_Datatype sendtype, const size_t sdtype_size, void *recvbuf, const int *recvcounts, const int *rdispls, - MPI_Aint rcvext, MPI_Datatype recvtype); + MPI_Aint rcvext, MPI_Datatype recvtype, const size_t rdtype_size); static inline int a2av_sched_inplace(int rank, int p, NBC_Schedule *schedule, void *buf, const int *counts, const int *displs, - MPI_Aint ext, MPI_Datatype type, ptrdiff_t gap); + MPI_Aint ext, MPI_Datatype type, const size_t dtype_size, ptrdiff_t gap); /* an alltoallv schedule can not be cached easily because the contents * of the recvcounts array may change, so a comparison of the address @@ -104,19 +104,13 @@ static int nbc_alltoallv_init(const void* sendbuf, const int *sendcounts, const } } - if (0 == sdtype_size || 0 == rdtype_size) { - /* Nothing to exchange */ - ompi_coll_base_nbc_reserve_tags(comm, 1); - return nbc_get_noop_request(persistent, request); - } - schedule = OBJ_NEW(NBC_Schedule); if (OPAL_UNLIKELY(NULL == schedule)) { free(tmpbuf); return OMPI_ERR_OUT_OF_RESOURCE; } - if (!inplace && 0 < sendcounts[rank]) { + if (!inplace && 0 < sendcounts[rank] && 0 < sdtype_size) { rbuf = (char *) recvbuf + rdispls[rank] * rcvext; sbuf = (char *) sendbuf + sdispls[rank] * sndext; res = NBC_Sched_copy (sbuf, false, sendcounts[rank], sendtype, @@ -128,12 +122,12 @@ static int nbc_alltoallv_init(const void* sendbuf, const int *sendcounts, const } if (inplace) { - res = a2av_sched_inplace(rank, p, schedule, recvbuf, recvcounts, - rdispls, rcvext, recvtype, gap); + res = a2av_sched_inplace(rank, p, schedule, recvbuf, recvcounts, rdispls, rcvext, recvtype, + rdtype_size, gap); } else { res = a2av_sched_linear(rank, p, schedule, - sendbuf, sendcounts, sdispls, sndext, sendtype, - recvbuf, recvcounts, rdispls, rcvext, recvtype); + sendbuf, sendcounts, sdispls, sndext, sendtype, sdtype_size, + recvbuf, recvcounts, rdispls, rcvext, recvtype, rdtype_size); } if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { OBJ_RELEASE(schedule); @@ -193,11 +187,6 @@ static int nbc_alltoallv_inter_init (const void* sendbuf, const int *sendcounts, ompi_datatype_type_size(sendtype, &sdtype_size); ompi_datatype_type_size(recvtype, &rdtype_size); - if (0 == sdtype_size || 0 == rdtype_size) { - /* Nothing to exchange */ - ompi_coll_base_nbc_reserve_tags(comm, 1); - return nbc_get_noop_request(persistent, request); - } res = ompi_datatype_type_extent(sendtype, &sndext); if (MPI_SUCCESS != res) { @@ -220,7 +209,7 @@ static int nbc_alltoallv_inter_init (const void* sendbuf, const int *sendcounts, for (int i = 0; i < rsize; i++) { /* post all sends */ - if (0 < sendcounts[i]) { + if (0 < sendcounts[i] && 0 < sdtype_size) { char *sbuf = (char *) sendbuf + sdispls[i] * sndext; res = NBC_Sched_send (sbuf, false, sendcounts[i], sendtype, i, schedule, false); if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { @@ -229,7 +218,7 @@ static int nbc_alltoallv_inter_init (const void* sendbuf, const int *sendcounts, } } /* post all receives */ - if (0 < recvcounts[i]) { + if (0 < recvcounts[i] && 0 < rdtype_size) { char *rbuf = (char *) recvbuf + rdispls[i] * rcvext; res = NBC_Sched_recv (rbuf, false, recvcounts[i], recvtype, i, schedule, false); if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { @@ -278,9 +267,9 @@ int ompi_coll_libnbc_ialltoallv_inter (const void* sendbuf, const int *sendcount __opal_attribute_unused__ static inline int a2av_sched_linear(int rank, int p, NBC_Schedule *schedule, const void *sendbuf, const int *sendcounts, const int *sdispls, - MPI_Aint sndext, MPI_Datatype sendtype, + MPI_Aint sndext, MPI_Datatype sendtype, const size_t sdtype_size, void *recvbuf, const int *recvcounts, const int *rdispls, - MPI_Aint rcvext, MPI_Datatype recvtype) { + MPI_Aint rcvext, MPI_Datatype recvtype, const size_t rdtype_size) { int res; for (int i = 0 ; i < p ; ++i) { @@ -289,7 +278,7 @@ static inline int a2av_sched_linear(int rank, int p, NBC_Schedule *schedule, } /* post send */ - if (0 < sendcounts[i]) { + if (0 < sendcounts[i] && 0 < sdtype_size) { char *sbuf = ((char *) sendbuf) + (sdispls[i] * sndext); res = NBC_Sched_send(sbuf, false, sendcounts[i], sendtype, i, schedule, false); if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { @@ -298,7 +287,7 @@ static inline int a2av_sched_linear(int rank, int p, NBC_Schedule *schedule, } /* post receive */ - if (0 < recvcounts[i]) { + if (0 < recvcounts[i] && 0 < rdtype_size) { char *rbuf = ((char *) recvbuf) + (rdispls[i] * rcvext); res = NBC_Sched_recv(rbuf, false, recvcounts[i], recvtype, i, schedule, false); if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { @@ -313,9 +302,9 @@ static inline int a2av_sched_linear(int rank, int p, NBC_Schedule *schedule, __opal_attribute_unused__ static inline int a2av_sched_pairwise(int rank, int p, NBC_Schedule *schedule, const void *sendbuf, const int *sendcounts, const int *sdispls, - MPI_Aint sndext, MPI_Datatype sendtype, + MPI_Aint sndext, MPI_Datatype sendtype, const size_t sdtype_size, void *recvbuf, const int *recvcounts, const int *rdispls, - MPI_Aint rcvext, MPI_Datatype recvtype) { + MPI_Aint rcvext, MPI_Datatype recvtype, const size_t rdtype_size) { int res; for (int i = 1 ; i < p ; ++i) { @@ -323,7 +312,7 @@ static inline int a2av_sched_pairwise(int rank, int p, NBC_Schedule *schedule, int rcvpeer = (rank + p - i) %p; /* post send */ - if (0 < sendcounts[sndpeer]) { + if (0 < sendcounts[sndpeer] && 0 < sdtype_size) { char *sbuf = ((char *) sendbuf) + (sdispls[sndpeer] * sndext); res = NBC_Sched_send(sbuf, false, sendcounts[sndpeer], sendtype, sndpeer, schedule, false); if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { @@ -332,7 +321,7 @@ static inline int a2av_sched_pairwise(int rank, int p, NBC_Schedule *schedule, } /* post receive */ - if (0 < recvcounts[rcvpeer]) { + if (0 < recvcounts[rcvpeer] && 0 < rdtype_size) { char *rbuf = ((char *) recvbuf) + (rdispls[rcvpeer] * rcvext); res = NBC_Sched_recv(rbuf, false, recvcounts[rcvpeer], recvtype, rcvpeer, schedule, true); if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { @@ -346,7 +335,7 @@ static inline int a2av_sched_pairwise(int rank, int p, NBC_Schedule *schedule, static inline int a2av_sched_inplace(int rank, int p, NBC_Schedule *schedule, void *buf, const int *counts, const int *displs, - MPI_Aint ext, MPI_Datatype type, ptrdiff_t gap) { + MPI_Aint ext, MPI_Datatype type, const size_t dtype_size, ptrdiff_t gap) { int res; for (int i = 1; i < (p+1)/2; i++) { @@ -355,6 +344,11 @@ static inline int a2av_sched_inplace(int rank, int p, NBC_Schedule *schedule, char *sbuf = (char *) buf + displs[speer] * ext; char *rbuf = (char *) buf + displs[rpeer] * ext; + if (0 == dtype_size) { + /* Nothing to exchange */ + return OMPI_SUCCESS; + } + if (0 < counts[rpeer]) { res = NBC_Sched_copy (rbuf, false, counts[rpeer], type, (void *)(-gap), true, counts[rpeer], type,