Skip to content
This repository was archived by the owner on Sep 30, 2022. It is now read-only.
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 29 additions & 3 deletions ompi/runtime/ompi_mpi_finalize.c
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2006-2013 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2006-2016 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2006-2015 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2006 University of Houston. All rights reserved.
Expand Down Expand Up @@ -84,12 +84,19 @@
extern bool ompi_enable_timing;
extern bool ompi_enable_timing_ext;

static void fence_cbfunc(int status, void *cbdata)
{
volatile bool *active = (volatile bool*)cbdata;
*active = false;
}

int ompi_mpi_finalize(void)
{
int ret = MPI_SUCCESS;
opal_list_item_t *item;
ompi_proc_t** procs;
size_t nprocs;
volatile bool active;
OPAL_TIMING_DECLARE(tm);
OPAL_TIMING_INIT_EXT(&tm, OPAL_TIMING_GET_TIME_OF_DAY);

Expand Down Expand Up @@ -213,7 +220,7 @@ int ompi_mpi_finalize(void)
have many other, much higher priority issues to handle that deal
with non-erroneous cases. */

/* Wait for everyone to reach this point. This is a grpcomm
/* Wait for everyone to reach this point. This is a PMIx
barrier instead of an MPI barrier for (at least) two reasons:

1. An MPI barrier doesn't ensure that all messages have been
Expand All @@ -233,7 +240,26 @@ int ompi_mpi_finalize(void)
del_procs behavior around May of 2014 (see
https://svn.open-mpi.org/trac/ompi/ticket/4669#comment:4 for
more details). */
opal_pmix.fence(NULL, 0);
if (NULL != opal_pmix.fence_nb) {
active = true;
/* Note that the non-blocking PMIx fence will cycle calling
opal_progress(), which will allow any other pending
communications/actions to complete. See
https://github.com/open-mpi/ompi/issues/1576 for the
original bug report. */
opal_pmix.fence_nb(NULL, 0, fence_cbfunc, (void*)&active);
OMPI_WAIT_FOR_COMPLETION(active);
} else {
/* However, we cannot guarantee that the provided PMIx has
fence_nb. If it doesn't, then do the best we can: an MPI
barrier on COMM_WORLD (which isn't the best because of the
reasons cited above), followed by a blocking PMIx fence
(which may not necessarily call opal_progress()). */
ompi_communicator_t *comm = &ompi_mpi_comm_world.comm;
comm->c_coll.coll_barrier(comm, comm->c_coll.coll_barrier_module);

opal_pmix.fence(NULL, 0);
}

/* check for timing request - get stop time and report elapsed
time if so */
Expand Down