You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
For reference, UCG parameters require the following:
typedef struct ucg_params {
/* Callback functions for address lookup, used at connection establishment */
struct {
int (*lookup_f)(void *cb_group_context,
ucg_group_member_index_t index,
ucp_address_t **addr,
size_t *addr_len);
void (*release_f)(ucp_address_t *addr);
} address;
Currently, the OMPI-based implementation satisfies this requirement as follows:
int mca_coll_ucx_resolve_address(void *cb_group_obj,
ucg_group_member_index_t rank,
ucp_address_t **addr,
size_t *addr_len)
{
/* Sanity checks */
ompi_communicator_t* comm = (ompi_communicator_t*)cb_group_obj;
if (rank == (ucg_group_member_index_t)comm->c_my_rank) {
COLL_UCX_ERROR("mca_coll_ucx_resolve_address(rank=%lu)"
"shouldn't be called on its own rank (loopback)", rank);
return 1;
}
/* Check the cache for a previously established connection to that rank */
ompi_proc_t *proc_peer =
(struct ompi_proc_t*)ompi_comm_peer_lookup((ompi_communicator_t*)cb_group_obj, rank);
*addr = proc_peer->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_COLL];
*addr_len = 0; /* UCX doesn't need the length to unpack the address */
if (*addr) {
return 0;
}
/* Obtain the UCP address of the remote */
int ret = mca_coll_ucx_recv_worker_address(proc_peer, addr, addr_len);
if (ret < 0) {
COLL_UCX_ERROR("mca_coll_ucx_recv_worker_address(proc=%d rank=%lu) failed",
proc_peer->super.proc_name.vpid, rank);
return 1;
}
/* Cache the connection for future invocations with this rank */
proc_peer->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_COLL] = *addr;
return 0;
}
void mca_coll_ucx_release_address(ucp_address_t *addr)
{
/* no need to free - the address is stored in proc_peer->proc_endpoints */
}
The text was updated successfully, but these errors were encountered:
For reference, UCG parameters require the following:
Currently, the OMPI-based implementation satisfies this requirement as follows:
The text was updated successfully, but these errors were encountered: