From b920c4a6b5aa95e0ca1309f65ea462d65ba1be9c Mon Sep 17 00:00:00 2001 From: Pieter Noordhuis Date: Thu, 20 Oct 2016 10:19:02 -0700 Subject: [PATCH] Fix concurrency issue with MPI_Comm_accept When hammered with many connecting clients, a server running the function to assemble one big communicator would sometimes crash with `dpm_orte.c: Unpack beyond buffer size` (or something along those lines). The accept function executes 2 receive operations from the remote peer. This function does receives on the wildcard address, which means that these two receives could be fulfilled by different clients! This change simply records the sender of the first receive and uses that to filter the second receive. The conversion from `WAIT_FOR_COMPLETION` to `LAZY_WAIT_FOR_COMPLETION` is done to prevent unconnected clients (waiting in line to connect to the server) from looping on `poll(2)` without timeout (at 100% CPU). --- ompi/mca/dpm/orte/dpm_orte.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/ompi/mca/dpm/orte/dpm_orte.c b/ompi/mca/dpm/orte/dpm_orte.c index a4f9a174c04..95ee26f433d 100644 --- a/ompi/mca/dpm/orte/dpm_orte.c +++ b/ompi/mca/dpm/orte/dpm_orte.c @@ -206,7 +206,7 @@ static int connect_accept(ompi_communicator_t *comm, int root, ORTE_RML_NON_PERSISTENT, orte_rml_recv_callback, &xfer); /* wait for response */ - OMPI_WAIT_FOR_COMPLETION(xfer.active); + OMPI_LAZY_WAIT_FOR_COMPLETION(xfer.active); i=1; if (OPAL_SUCCESS != (rc = opal_dss.unpack(&xfer.data, &id, &i, ORTE_GRPCOMM_COLL_ID_T))) { ORTE_ERROR_LOG(rc); @@ -231,7 +231,9 @@ static int connect_accept(ompi_communicator_t *comm, int root, ORTE_RML_NON_PERSISTENT, orte_rml_recv_callback, &xfer); /* wait for response */ - OMPI_WAIT_FOR_COMPLETION(xfer.active); + OMPI_LAZY_WAIT_FOR_COMPLETION(xfer.active); + carport.jobid = xfer.name.jobid; + carport.vpid = xfer.name.vpid; i=1; if (OPAL_SUCCESS != (rc = opal_dss.unpack(&xfer.data, &id, &i, ORTE_GRPCOMM_COLL_ID_T))) { ORTE_ERROR_LOG(rc); @@ -321,7 +323,7 @@ static int connect_accept(ompi_communicator_t *comm, int root, ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); /* setup to recv */ xfer.active = true; - orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, tag, + orte_rml.recv_buffer_nb(&carport, tag, ORTE_RML_NON_PERSISTENT, orte_rml_recv_callback, &xfer); /* wait for response */