Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 9 additions & 43 deletions ompi/mca/coll/ftagree/coll_ftagree_earlyreturning.c
Original file line number Diff line number Diff line change
Expand Up @@ -2954,6 +2954,15 @@ int mca_coll_ftagree_era_finalize(void)
"%s ftagree:agreement (ERA) GC: %lu passed agreements remain in the passed agreements hash table\n",
OMPI_NAME_PRINT(OMPI_PROC_MY_NAME),
opal_hash_table_get_size(&era_passed_agreements)));
/* Some agreements can remain in the era_passed_agreements table until
* finalize; notably, the last agreement in a communicator that has been
* freed.
*
* The commit that added this comment also removed the (unused) function
* mca_coll_ftagree_era_free_comm that could enforce purging that table
* during comm_free, at the cost of making comm_free hard synchronizing;
* this was deemed too disruptive for the small memory usage gain.
*/
for( rc = opal_hash_table_get_first_key_uint64(&era_passed_agreements, &key64, &value, &node);
OPAL_SUCCESS == rc;
rc = opal_hash_table_get_next_key_uint64(&era_passed_agreements, &key64, &value, node, &node) ) {
Expand Down Expand Up @@ -3366,46 +3375,3 @@ int mca_coll_ftagree_iera_intra(void *contrib,
return OMPI_SUCCESS;
}

#if 0
// Per @bosilca and @jsquyres discussion 29 Apr 2021: there is
// probably a memory leak in MPI_FINALIZE right now, because this
// function does not appear to be being called from anywhere.
// @bosilca's team is looking into it.
int mca_coll_ftagree_era_free_comm(ompi_communicator_t* comm,
mca_coll_base_module_t *module)
{
ompi_group_t* acked;
era_identifier_t aid;
int rc;

OPAL_OUTPUT_VERBOSE((4, ompi_ftmpi_output_handle,
"%s ftagree:agreement (ERA) Freeing Communicator (%d.%d).\n",
OMPI_NAME_PRINT(OMPI_PROC_MY_NAME),
comm->c_contextid,
comm->c_epoch));

opal_mutex_lock(&ompi_group_afp_mutex);
ompi_group_intersection(comm->c_remote_group, ompi_group_all_failed_procs, &acked);
opal_mutex_unlock(&ompi_group_afp_mutex);
do {
rc = mca_coll_ftagree_era_intra(NULL,
0,
&ompi_mpi_int.dt,
&ompi_mpi_op_band.op,
&acked, true,
comm,
comm->c_coll->coll_agree_module);
} while(rc != MPI_SUCCESS);
OBJ_RELEASE(acked);

aid.ERAID_FIELDS.contextid = comm->c_contextid.cid_sub.u64;
aid.ERAID_FIELDS.epoch = comm->c_epoch;

opal_mutex_lock(&era_mutex);
/** We don't need to set aid.ERAID_FIELDS.agreementid to collect all of them */
era_collect_passed_agreements(aid, 0, (uint16_t)-1);
opal_mutex_unlock(&era_mutex);

return OMPI_SUCCESS;
}
#endif