diff --git a/ompi/mca/coll/ftagree/coll_ftagree_earlyreturning.c b/ompi/mca/coll/ftagree/coll_ftagree_earlyreturning.c index ea363b2ef53..b34affe4b9a 100644 --- a/ompi/mca/coll/ftagree/coll_ftagree_earlyreturning.c +++ b/ompi/mca/coll/ftagree/coll_ftagree_earlyreturning.c @@ -2954,6 +2954,15 @@ int mca_coll_ftagree_era_finalize(void) "%s ftagree:agreement (ERA) GC: %lu passed agreements remain in the passed agreements hash table\n", OMPI_NAME_PRINT(OMPI_PROC_MY_NAME), opal_hash_table_get_size(&era_passed_agreements))); + /* Some agreements can remain in the era_passed_agreements table until + * finalize; notably, the last agreement in a communicator that has been + * freed. + * + * The commit that added this comment also removed the (unused) function + * mca_coll_ftagree_era_free_comm that could enforce purging that table + * during comm_free, at the cost of making comm_free hard synchronizing; + * this was deemed too disruptive for the small memory usage gain. + */ for( rc = opal_hash_table_get_first_key_uint64(&era_passed_agreements, &key64, &value, &node); OPAL_SUCCESS == rc; rc = opal_hash_table_get_next_key_uint64(&era_passed_agreements, &key64, &value, node, &node) ) { @@ -3366,46 +3375,3 @@ int mca_coll_ftagree_iera_intra(void *contrib, return OMPI_SUCCESS; } -#if 0 -// Per @bosilca and @jsquyres discussion 29 Apr 2021: there is -// probably a memory leak in MPI_FINALIZE right now, because this -// function does not appear to be being called from anywhere. -// @bosilca's team is looking into it. -int mca_coll_ftagree_era_free_comm(ompi_communicator_t* comm, - mca_coll_base_module_t *module) -{ - ompi_group_t* acked; - era_identifier_t aid; - int rc; - - OPAL_OUTPUT_VERBOSE((4, ompi_ftmpi_output_handle, - "%s ftagree:agreement (ERA) Freeing Communicator (%d.%d).\n", - OMPI_NAME_PRINT(OMPI_PROC_MY_NAME), - comm->c_contextid, - comm->c_epoch)); - - opal_mutex_lock(&ompi_group_afp_mutex); - ompi_group_intersection(comm->c_remote_group, ompi_group_all_failed_procs, &acked); - opal_mutex_unlock(&ompi_group_afp_mutex); - do { - rc = mca_coll_ftagree_era_intra(NULL, - 0, - &ompi_mpi_int.dt, - &ompi_mpi_op_band.op, - &acked, true, - comm, - comm->c_coll->coll_agree_module); - } while(rc != MPI_SUCCESS); - OBJ_RELEASE(acked); - - aid.ERAID_FIELDS.contextid = comm->c_contextid.cid_sub.u64; - aid.ERAID_FIELDS.epoch = comm->c_epoch; - - opal_mutex_lock(&era_mutex); - /** We don't need to set aid.ERAID_FIELDS.agreementid to collect all of them */ - era_collect_passed_agreements(aid, 0, (uint16_t)-1); - opal_mutex_unlock(&era_mutex); - - return OMPI_SUCCESS; -} -#endif