From 21221eb70a5281070e3300d2716bd4c90158787b Mon Sep 17 00:00:00 2001 From: Nathan Hjelm Date: Tue, 8 Oct 2019 21:10:49 -0700 Subject: [PATCH 1/2] coll/basic: fix neighbor alltoall message ordering This commit updates the coll/basic component to correctly order sends and receives for cartesian communicators with cyclic boundaries. This addresses an issue identified by mpi-forum/mpi-issues#153. This issue occurs when the size in any dimension is 1. This gives the same neighbor in the positive and negative directions. The old code was sending and receiving in the same order so the -1 buffer contained the +1 result and vise-versa. The problem is addressed by using unique tags for each send. This should cover both the case where overtaking is allowed and is not allowed. The former case will be possible is a MPI_Cart_create_with_info() call is added to the standard. Signed-off-by: Nathan Hjelm (cherry picked from commit 196a91e604885d7aae9ac9dfbd9b2e846b3015b7) --- ompi/mca/coll/basic/coll_basic_neighbor_alltoall.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/ompi/mca/coll/basic/coll_basic_neighbor_alltoall.c b/ompi/mca/coll/basic/coll_basic_neighbor_alltoall.c index 70fdf9dc1b6..c5103860e01 100644 --- a/ompi/mca/coll/basic/coll_basic_neighbor_alltoall.c +++ b/ompi/mca/coll/basic/coll_basic_neighbor_alltoall.c @@ -15,6 +15,7 @@ * Copyright (c) 2014-2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2017 IBM Corporation. All rights reserved. + * Copyright (c) 2019 Google, LLC. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -67,7 +68,7 @@ mca_coll_basic_neighbor_alltoall_cart(const void *sbuf, int scount, struct ompi_ if (MPI_PROC_NULL != srank) { nreqs++; rc = MCA_PML_CALL(irecv(rbuf, rcount, rdtype, srank, - MCA_COLL_BASE_TAG_ALLTOALL, + MCA_COLL_BASE_TAG_NONBLOCKING_BASE - 2 * dim, comm, preqs++)); if (OMPI_SUCCESS != rc) break; } @@ -77,7 +78,7 @@ mca_coll_basic_neighbor_alltoall_cart(const void *sbuf, int scount, struct ompi_ if (MPI_PROC_NULL != drank) { nreqs++; rc = MCA_PML_CALL(irecv(rbuf, rcount, rdtype, drank, - MCA_COLL_BASE_TAG_ALLTOALL, + MCA_COLL_BASE_TAG_NONBLOCKING_BASE - 2 * dim - 1, comm, preqs++)); if (OMPI_SUCCESS != rc) break; } @@ -104,7 +105,7 @@ mca_coll_basic_neighbor_alltoall_cart(const void *sbuf, int scount, struct ompi_ * a const for the send buffer. */ nreqs++; rc = MCA_PML_CALL(isend((void *) sbuf, scount, sdtype, srank, - MCA_COLL_BASE_TAG_ALLTOALL, + MCA_COLL_BASE_TAG_NONBLOCKING_BASE - 2 * dim - 1, MCA_PML_BASE_SEND_STANDARD, comm, preqs++)); if (OMPI_SUCCESS != rc) break; @@ -115,7 +116,7 @@ mca_coll_basic_neighbor_alltoall_cart(const void *sbuf, int scount, struct ompi_ if (MPI_PROC_NULL != drank) { nreqs++; rc = MCA_PML_CALL(isend((void *) sbuf, scount, sdtype, drank, - MCA_COLL_BASE_TAG_ALLTOALL, + MCA_COLL_BASE_TAG_NONBLOCKING_BASE - 2 * dim, MCA_PML_BASE_SEND_STANDARD, comm, preqs++)); if (OMPI_SUCCESS != rc) break; From be58cf7982e26a87b66713bb4591e2a0972366d2 Mon Sep 17 00:00:00 2001 From: George Bosilca Date: Wed, 11 Dec 2019 12:40:38 -0500 Subject: [PATCH 2/2] Fix the communication ordering for all cartesian neighbor collectives. This work is rooted in the [MPI Forum issue 153](https://github.com/mpi-forum/mpi-issues/issues/153). Signed-off-by: George Bosilca (cherry picked from commit 86acdee4606c1ac3b38070d1b7973a00a991f1d6) --- ompi/mca/coll/base/coll_tags.h | 2 ++ .../mca/coll/basic/coll_basic_neighbor_allgather.c | 10 +++++----- .../coll/basic/coll_basic_neighbor_allgatherv.c | 12 +++++++----- ompi/mca/coll/basic/coll_basic_neighbor_alltoall.c | 14 +++++++++----- .../mca/coll/basic/coll_basic_neighbor_alltoallv.c | 10 +++++----- .../mca/coll/basic/coll_basic_neighbor_alltoallw.c | 10 +++++----- 6 files changed, 33 insertions(+), 25 deletions(-) diff --git a/ompi/mca/coll/base/coll_tags.h b/ompi/mca/coll/base/coll_tags.h index f40f029fbbc..2bcf2a6cc95 100644 --- a/ompi/mca/coll/base/coll_tags.h +++ b/ompi/mca/coll/base/coll_tags.h @@ -43,6 +43,8 @@ #define MCA_COLL_BASE_TAG_SCATTERV -26 #define MCA_COLL_BASE_TAG_NONBLOCKING_BASE -27 #define MCA_COLL_BASE_TAG_NONBLOCKING_END ((-1 * INT_MAX/2) + 1) +#define MCA_COLL_BASE_TAG_NEIGHBOR_BASE (MCA_COLL_BASE_TAG_NONBLOCKING_END - 1) +#define MCA_COLL_BASE_TAG_NEIGHBOR_END (MCA_COLL_BASE_TAG_NEIGHBOR_BASE - 1024) #define MCA_COLL_BASE_TAG_HCOLL_BASE (-1 * INT_MAX/2) #define MCA_COLL_BASE_TAG_HCOLL_END (-1 * INT_MAX) #endif /* MCA_COLL_BASE_TAGS_H */ diff --git a/ompi/mca/coll/basic/coll_basic_neighbor_allgather.c b/ompi/mca/coll/basic/coll_basic_neighbor_allgather.c index 8f79b43d870..9bb1e360fe3 100644 --- a/ompi/mca/coll/basic/coll_basic_neighbor_allgather.c +++ b/ompi/mca/coll/basic/coll_basic_neighbor_allgather.c @@ -3,7 +3,7 @@ * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2016 The University of Tennessee and The University + * Copyright (c) 2004-2019 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, @@ -70,7 +70,7 @@ mca_coll_basic_neighbor_allgather_cart(const void *sbuf, int scount, if (MPI_PROC_NULL != srank) { nreqs++; rc = MCA_PML_CALL(irecv(rbuf, rcount, rdtype, srank, - MCA_COLL_BASE_TAG_ALLGATHER, + MCA_COLL_BASE_TAG_NEIGHBOR_BASE - 2 * dim, comm, preqs++)); if (OMPI_SUCCESS != rc) break; @@ -78,7 +78,7 @@ mca_coll_basic_neighbor_allgather_cart(const void *sbuf, int scount, /* remove cast from const when the pml layer is updated to take * a const for the send buffer. */ rc = MCA_PML_CALL(isend((void *) sbuf, scount, sdtype, srank, - MCA_COLL_BASE_TAG_ALLGATHER, + MCA_COLL_BASE_TAG_NEIGHBOR_BASE - 2 * dim - 1, MCA_PML_BASE_SEND_STANDARD, comm, preqs++)); if (OMPI_SUCCESS != rc) break; @@ -89,13 +89,13 @@ mca_coll_basic_neighbor_allgather_cart(const void *sbuf, int scount, if (MPI_PROC_NULL != drank) { nreqs++; rc = MCA_PML_CALL(irecv(rbuf, rcount, rdtype, drank, - MCA_COLL_BASE_TAG_ALLGATHER, + MCA_COLL_BASE_TAG_NEIGHBOR_BASE - 2 * dim - 1, comm, preqs++)); if (OMPI_SUCCESS != rc) break; nreqs++; rc = MCA_PML_CALL(isend((void *) sbuf, scount, sdtype, drank, - MCA_COLL_BASE_TAG_ALLGATHER, + MCA_COLL_BASE_TAG_NEIGHBOR_BASE - 2 * dim, MCA_PML_BASE_SEND_STANDARD, comm, preqs++)); if (OMPI_SUCCESS != rc) break; diff --git a/ompi/mca/coll/basic/coll_basic_neighbor_allgatherv.c b/ompi/mca/coll/basic/coll_basic_neighbor_allgatherv.c index f837109f908..37d97970a30 100644 --- a/ompi/mca/coll/basic/coll_basic_neighbor_allgatherv.c +++ b/ompi/mca/coll/basic/coll_basic_neighbor_allgatherv.c @@ -3,7 +3,7 @@ * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2016 The University of Tennessee and The University + * Copyright (c) 2004-2019 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, @@ -69,13 +69,14 @@ mca_coll_basic_neighbor_allgatherv_cart(const void *sbuf, int scount, struct omp if (MPI_PROC_NULL != srank) { nreqs++; rc = MCA_PML_CALL(irecv((char *) rbuf + disps[i] * extent, rcounts[i], rdtype, srank, - MCA_COLL_BASE_TAG_ALLGATHER, comm, preqs++)); + MCA_COLL_BASE_TAG_NEIGHBOR_BASE - 2 * dim, comm, preqs++)); if (OMPI_SUCCESS != rc) break; /* remove cast from const when the pml layer is updated to take * a const for the send buffer. */ nreqs++; - rc = MCA_PML_CALL(isend((void *) sbuf, scount, sdtype, srank, MCA_COLL_BASE_TAG_ALLGATHER, + rc = MCA_PML_CALL(isend((void *) sbuf, scount, sdtype, srank, + MCA_COLL_BASE_TAG_NEIGHBOR_BASE - 2 * dim - 1, MCA_PML_BASE_SEND_STANDARD, comm, preqs++)); if (OMPI_SUCCESS != rc) break; } @@ -83,11 +84,12 @@ mca_coll_basic_neighbor_allgatherv_cart(const void *sbuf, int scount, struct omp if (MPI_PROC_NULL != drank) { nreqs++; rc = MCA_PML_CALL(irecv((char *) rbuf + disps[i+1] * extent, rcounts[i+1], rdtype, drank, - MCA_COLL_BASE_TAG_ALLGATHER, comm, preqs++)); + MCA_COLL_BASE_TAG_NEIGHBOR_BASE - 2 * dim - 1, comm, preqs++)); if (OMPI_SUCCESS != rc) break; nreqs++; - rc = MCA_PML_CALL(isend((void *) sbuf, scount, sdtype, drank, MCA_COLL_BASE_TAG_ALLGATHER, + rc = MCA_PML_CALL(isend((void *) sbuf, scount, sdtype, drank, + MCA_COLL_BASE_TAG_NEIGHBOR_BASE - 2 * dim, MCA_PML_BASE_SEND_STANDARD, comm, preqs++)); if (OMPI_SUCCESS != rc) break; } diff --git a/ompi/mca/coll/basic/coll_basic_neighbor_alltoall.c b/ompi/mca/coll/basic/coll_basic_neighbor_alltoall.c index c5103860e01..6d6468174ff 100644 --- a/ompi/mca/coll/basic/coll_basic_neighbor_alltoall.c +++ b/ompi/mca/coll/basic/coll_basic_neighbor_alltoall.c @@ -3,7 +3,7 @@ * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2016 The University of Tennessee and The University + * Copyright (c) 2004-2019 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, @@ -37,6 +37,10 @@ #include "coll_basic.h" #include "ompi/mca/topo/base/base.h" +/** + * We only have 1024 tags for the neighbor collective, so for now we only support + * 512 dimensions. + */ static int mca_coll_basic_neighbor_alltoall_cart(const void *sbuf, int scount, struct ompi_datatype_t *sdtype, void *rbuf, int rcount, struct ompi_datatype_t *rdtype, struct ompi_communicator_t *comm, @@ -68,7 +72,7 @@ mca_coll_basic_neighbor_alltoall_cart(const void *sbuf, int scount, struct ompi_ if (MPI_PROC_NULL != srank) { nreqs++; rc = MCA_PML_CALL(irecv(rbuf, rcount, rdtype, srank, - MCA_COLL_BASE_TAG_NONBLOCKING_BASE - 2 * dim, + MCA_COLL_BASE_TAG_NEIGHBOR_BASE - 2 * dim, comm, preqs++)); if (OMPI_SUCCESS != rc) break; } @@ -78,7 +82,7 @@ mca_coll_basic_neighbor_alltoall_cart(const void *sbuf, int scount, struct ompi_ if (MPI_PROC_NULL != drank) { nreqs++; rc = MCA_PML_CALL(irecv(rbuf, rcount, rdtype, drank, - MCA_COLL_BASE_TAG_NONBLOCKING_BASE - 2 * dim - 1, + MCA_COLL_BASE_TAG_NEIGHBOR_BASE - 2 * dim - 1, comm, preqs++)); if (OMPI_SUCCESS != rc) break; } @@ -105,7 +109,7 @@ mca_coll_basic_neighbor_alltoall_cart(const void *sbuf, int scount, struct ompi_ * a const for the send buffer. */ nreqs++; rc = MCA_PML_CALL(isend((void *) sbuf, scount, sdtype, srank, - MCA_COLL_BASE_TAG_NONBLOCKING_BASE - 2 * dim - 1, + MCA_COLL_BASE_TAG_NEIGHBOR_BASE - 2 * dim - 1, MCA_PML_BASE_SEND_STANDARD, comm, preqs++)); if (OMPI_SUCCESS != rc) break; @@ -116,7 +120,7 @@ mca_coll_basic_neighbor_alltoall_cart(const void *sbuf, int scount, struct ompi_ if (MPI_PROC_NULL != drank) { nreqs++; rc = MCA_PML_CALL(isend((void *) sbuf, scount, sdtype, drank, - MCA_COLL_BASE_TAG_NONBLOCKING_BASE - 2 * dim, + MCA_COLL_BASE_TAG_NEIGHBOR_BASE - 2 * dim, MCA_PML_BASE_SEND_STANDARD, comm, preqs++)); if (OMPI_SUCCESS != rc) break; diff --git a/ompi/mca/coll/basic/coll_basic_neighbor_alltoallv.c b/ompi/mca/coll/basic/coll_basic_neighbor_alltoallv.c index 8449778140f..e963dc25412 100644 --- a/ompi/mca/coll/basic/coll_basic_neighbor_alltoallv.c +++ b/ompi/mca/coll/basic/coll_basic_neighbor_alltoallv.c @@ -3,7 +3,7 @@ * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2016 The University of Tennessee and The University + * Copyright (c) 2004-2019 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, @@ -68,14 +68,14 @@ mca_coll_basic_neighbor_alltoallv_cart(const void *sbuf, const int scounts[], co if (MPI_PROC_NULL != srank) { nreqs++; rc = MCA_PML_CALL(irecv((char *) rbuf + rdisps[i] * rdextent, rcounts[i], rdtype, srank, - MCA_COLL_BASE_TAG_ALLTOALL, comm, preqs++)); + MCA_COLL_BASE_TAG_NEIGHBOR_BASE - 2 * dim, comm, preqs++)); if (OMPI_SUCCESS != rc) break; } if (MPI_PROC_NULL != drank) { nreqs++; rc = MCA_PML_CALL(irecv((char *) rbuf + rdisps[i+1] * rdextent, rcounts[i+1], rdtype, drank, - MCA_COLL_BASE_TAG_ALLTOALL, comm, preqs++)); + MCA_COLL_BASE_TAG_NEIGHBOR_BASE - 2 * dim - 1, comm, preqs++)); if (OMPI_SUCCESS != rc) break; } } @@ -98,14 +98,14 @@ mca_coll_basic_neighbor_alltoallv_cart(const void *sbuf, const int scounts[], co nreqs++; /* remove cast from const when the pml layer is updated to take a const for the send buffer */ rc = MCA_PML_CALL(isend((char *) sbuf + sdisps[i] * sdextent, scounts[i], sdtype, srank, - MCA_COLL_BASE_TAG_ALLTOALL, MCA_PML_BASE_SEND_STANDARD, comm, preqs++)); + MCA_COLL_BASE_TAG_NEIGHBOR_BASE - 2 * dim - 1, MCA_PML_BASE_SEND_STANDARD, comm, preqs++)); if (OMPI_SUCCESS != rc) break; } if (MPI_PROC_NULL != drank) { nreqs++; rc = MCA_PML_CALL(isend((char *) sbuf + sdisps[i+1] * sdextent, scounts[i+1], sdtype, drank, - MCA_COLL_BASE_TAG_ALLTOALL, MCA_PML_BASE_SEND_STANDARD, comm, preqs++)); + MCA_COLL_BASE_TAG_NEIGHBOR_BASE - 2 * dim, MCA_PML_BASE_SEND_STANDARD, comm, preqs++)); if (OMPI_SUCCESS != rc) break; } } diff --git a/ompi/mca/coll/basic/coll_basic_neighbor_alltoallw.c b/ompi/mca/coll/basic/coll_basic_neighbor_alltoallw.c index 9060c82c106..29abb456252 100644 --- a/ompi/mca/coll/basic/coll_basic_neighbor_alltoallw.c +++ b/ompi/mca/coll/basic/coll_basic_neighbor_alltoallw.c @@ -3,7 +3,7 @@ * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2016 The University of Tennessee and The University + * Copyright (c) 2004-2019 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, @@ -65,14 +65,14 @@ mca_coll_basic_neighbor_alltoallw_cart(const void *sbuf, const int scounts[], co if (MPI_PROC_NULL != srank) { nreqs++; rc = MCA_PML_CALL(irecv((char *) rbuf + rdisps[i], rcounts[i], rdtypes[i], srank, - MCA_COLL_BASE_TAG_ALLTOALL, comm, preqs++)); + MCA_COLL_BASE_TAG_NEIGHBOR_BASE - 2 * dim, comm, preqs++)); if (OMPI_SUCCESS != rc) break; } if (MPI_PROC_NULL != drank) { nreqs++; rc = MCA_PML_CALL(irecv((char *) rbuf + rdisps[i+1], rcounts[i+1], rdtypes[i+1], drank, - MCA_COLL_BASE_TAG_ALLTOALL, comm, preqs++)); + MCA_COLL_BASE_TAG_NEIGHBOR_BASE - 2 * dim - 1, comm, preqs++)); if (OMPI_SUCCESS != rc) break; } } @@ -95,14 +95,14 @@ mca_coll_basic_neighbor_alltoallw_cart(const void *sbuf, const int scounts[], co nreqs++; /* remove cast from const when the pml layer is updated to take a const for the send buffer */ rc = MCA_PML_CALL(isend((char *) sbuf + sdisps[i], scounts[i], sdtypes[i], srank, - MCA_COLL_BASE_TAG_ALLTOALL, MCA_PML_BASE_SEND_STANDARD, comm, preqs++)); + MCA_COLL_BASE_TAG_NEIGHBOR_BASE - 2 * dim - 1, MCA_PML_BASE_SEND_STANDARD, comm, preqs++)); if (OMPI_SUCCESS != rc) break; } if (MPI_PROC_NULL != drank) { nreqs++; rc = MCA_PML_CALL(isend((char *) sbuf + sdisps[i+1], scounts[i+1], sdtypes[i+1], drank, - MCA_COLL_BASE_TAG_ALLTOALL, MCA_PML_BASE_SEND_STANDARD, comm, preqs++)); + MCA_COLL_BASE_TAG_NEIGHBOR_BASE - 2 * dim, MCA_PML_BASE_SEND_STANDARD, comm, preqs++)); if (OMPI_SUCCESS != rc) break; } }