Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions ompi/mca/mtl/ofi/mtl_ofi.c
Original file line number Diff line number Diff line change
Expand Up @@ -54,9 +54,22 @@ ompi_mtl_ofi_add_procs(struct mca_mtl_base_module_t *mtl,
char *ep_names = NULL;
fi_addr_t *fi_addrs = NULL;
mca_mtl_ofi_endpoint_t *endpoint = NULL;
int num_peers_limit = (1 << ompi_mtl_ofi.num_bits_source_rank) - 1;

namelen = ompi_mtl_ofi.epnamelen;

/* We cannot add more ranks than available tag bits */
if ((false == ompi_mtl_ofi.fi_cq_data) &&
OPAL_UNLIKELY(((int) (nprocs + ompi_mtl_ofi.num_peers) > num_peers_limit))) {
opal_output(0, "%s:%d: OFI provider: %s does not have enough bits for source rank in its tag.\n"
"Adding more ranks will result in undefined behaviour. Please enable\n"
"FI_REMOTE_CQ_DATA feature in the provider. For more info refer fi_cq(3).\n",
__FILE__, __LINE__, ompi_mtl_ofi.provider_name);
fflush(stderr);
ret = OMPI_ERROR;
goto bail;
}

/**
* Create array of EP names.
*/
Expand Down Expand Up @@ -126,6 +139,9 @@ ompi_mtl_ofi_add_procs(struct mca_mtl_base_module_t *mtl,
procs[i]->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_MTL] = endpoint;
}

/* Update global counter of number of procs added to this rank */
ompi_mtl_ofi.num_peers += nprocs;

ret = OMPI_SUCCESS;

bail:
Expand Down
6 changes: 5 additions & 1 deletion ompi/mca/mtl/ofi/mtl_ofi_component.c
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@

#include "mtl_ofi.h"
#include "opal/util/argv.h"
#include "opal/util/show_help.h"
#include "opal/util/printf.h"

static int ompi_mtl_ofi_component_open(void);
static int ompi_mtl_ofi_component_query(mca_base_module_t **module, int *priority);
Expand Down Expand Up @@ -576,6 +576,8 @@ ompi_mtl_ofi_component_init(bool enable_progress_threads,
ompi_mtl_ofi_define_tag_mode(ofi_tag_mode);
}

ompi_mtl_ofi.num_peers = 0;

/**
* Open fabric
* The getinfo struct returns a fabric attribute struct that can be used to
Expand Down Expand Up @@ -709,6 +711,8 @@ ompi_mtl_ofi_component_init(bool enable_progress_threads,
goto error;
}

ompi_mtl_ofi.provider_name = strdup(prov->fabric_attr->prov_name);

/**
* Free providers info since it's not needed anymore.
*/
Expand Down
8 changes: 7 additions & 1 deletion ompi/mca/mtl/ofi/mtl_ofi_endpoint.h
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,13 @@ typedef struct mca_mtl_ofi_endpoint_t mca_mtl_ofi_endpoint_t;
static inline mca_mtl_ofi_endpoint_t *ompi_mtl_ofi_get_endpoint (struct mca_mtl_base_module_t* mtl, ompi_proc_t *ompi_proc)
{
if (OPAL_UNLIKELY(NULL == ompi_proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_MTL])) {
ompi_mtl_ofi_add_procs(mtl, 1, &ompi_proc);
if (OPAL_UNLIKELY(OMPI_SUCCESS != ompi_mtl_ofi_add_procs(mtl, 1, &ompi_proc))) {
/* Fatal error. exit() out */
opal_output(0, "%s:%d: *** The Open MPI OFI MTL is aborting the MPI job (via exit(3)).\n",
__FILE__, __LINE__);
fflush(stderr);
exit(1);
}
}

return ompi_proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_MTL];
Expand Down
3 changes: 2 additions & 1 deletion ompi/mca/mtl/ofi/mtl_ofi_types.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ typedef struct mca_mtl_ofi_module_t {
/** "Any source" address */
fi_addr_t any_addr;

/** Optional user-specified OFI provider name */
/** OFI provider name */
char *provider_name;

/** Maximum inject size */
Expand All @@ -64,6 +64,7 @@ typedef struct mca_mtl_ofi_module_t {
unsigned long long source_rank_mask;
unsigned long long mpi_tag_mask;
int num_bits_mpi_tag;
int num_peers;

/** Synchronous protocol tag bits */
unsigned long long sync_send;
Expand Down