Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 41 additions & 0 deletions opal/mca/btl/usnic/btl_usnic_module.c
Original file line number Diff line number Diff line change
Expand Up @@ -1393,7 +1393,9 @@ static int create_ep(opal_btl_usnic_module_t* module,
struct opal_btl_usnic_channel_t *channel)
{
int rc;
struct sockaddr *sa;
struct sockaddr_in *sin;
size_t addrlen;
struct fi_info *hint;

hint = fi_dupinfo(module->fabric_info);
Expand Down Expand Up @@ -1436,6 +1438,21 @@ static int create_ep(opal_btl_usnic_module_t* module,
channel->info->caps &= ~(1ULL << 63);
}

/* all of the OMPI code assumes IPv4, but some versions of libfabric will
* return FI_SOCKADDR instead of FI_SOCKADDR_IN, so we need to do a little
* bit of sanity checking */
assert(FI_SOCKADDR_IN == channel->info->addr_format ||
FI_SOCKADDR == channel->info->addr_format);
if (FI_SOCKADDR == channel->info->addr_format) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Per the assert right above it, does this "if" need to check for FI_SOCKADDR or FI_SOCKADDR_IN?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Disregard my previous comment... I see that the purpose of this "if" is mainly just another assert.

However, it'll generate a CID / compiler warning in non-debug builds, because sa will be assigned and not used, right?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

However, it'll generate a CID / compiler warning in non-debug builds, because sa will be assigned and not used, right?

I suppose. What's the usual fix for that specific set of warnings? I rather hate sticking more code in just to make various static analysis tools happy when there's no actual problem.

I could also do it a bit less cleanly and exploit the fact that I know struct sockaddr and struct sockaddr_in must use the same layout for sa_family/sin_family. Then I don't need sa at all and I just make the assertion on sin->sin_family instead.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You could also just surround the if block with #if OPAL_ENABLE_DEBUG...?

sa = (struct sockaddr *)channel->info->src_addr;
assert(AF_INET == sa->sa_family);
}
sin = (struct sockaddr_in *)channel->info->src_addr;
assert(sizeof(struct sockaddr_in) == channel->info->src_addrlen);

/* no matter the version of libfabric, this should hold */
assert(0 == sin->sin_port);

rc = fi_endpoint(module->domain, channel->info, &channel->ep, NULL);
if (0 != rc || NULL == channel->ep) {
opal_show_help("help-mpi-btl-usnic.txt",
Expand Down Expand Up @@ -1496,6 +1513,30 @@ static int create_ep(opal_btl_usnic_module_t* module,
return OPAL_ERR_OUT_OF_RESOURCE;
}

/* Immediately after libfabric v1.0 was released, we implemented support
* for fi_getname and changed the behavior of fi_endpoint w.r.t. setting
* the src_addr field of the fi_info struct passed in. Before the change
* fi_endpoint would set the src_addr field, including the sin_port field
* but calling fi_getname would return -FI_ENOSYS. Afterwards the address
* would not be touched relative to whatever was set by fi_getinfo. So we
* must call fi_getname in that case.
*/
if (0 == sin->sin_port) {
addrlen = sizeof(struct sockaddr_in);
rc = fi_getname(&channel->ep->fid, channel->info->src_addr, &addrlen);
if (0 != rc) {
opal_show_help("help-mpi-btl-usnic.txt",
"internal error during init",
true,
opal_process_info.nodename,
module->fabric_info->fabric_attr->name,
"fi_getname() failed", __FILE__, __LINE__,
rc, fi_strerror(-rc));
return OPAL_ERR_OUT_OF_RESOURCE;
}
assert(0 != sin->sin_port);
}

/* actual sizes */
channel->chan_rd_num = channel->info->rx_attr->size;
channel->chan_sd_num = channel->info->tx_attr->size;
Expand Down
1 change: 1 addition & 0 deletions opal/mca/btl/usnic/btl_usnic_module.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
#define OPAL_BTL_USNIC_MODULE_H

#include <rdma/fabric.h>
#include <rdma/fi_cm.h>
#include <rdma/fi_eq.h>
#include <rdma/fi_endpoint.h>
#include <rdma/fi_errno.h>
Expand Down