diff --git a/opal/mca/btl/usnic/btl_usnic.h b/opal/mca/btl/usnic/btl_usnic.h index cc094ce38f4..afe13d38cbf 100644 --- a/opal/mca/btl/usnic/btl_usnic.h +++ b/opal/mca/btl/usnic/btl_usnic.h @@ -217,13 +217,6 @@ typedef struct opal_btl_usnic_component_t { the prefix is non-NULL) */ char *connectivity_map_prefix; - /** Expected return value from fi_cq_readerr() upon success. In - libfabric v1.0.0 / API v1.0, the usnic provider returned - sizeof(fi_cq_err_entry) upon success. In libfabric >=v1.1 / - API >=v1.1, the usnic provider returned 1 upon success. */ - ssize_t cq_readerr_success_value; - ssize_t cq_readerr_try_again_value; - /** Offset into the send buffer where the payload will go. For libfabric v1.0.0 / API v1.0, this is 0. For libfabric >=v1.1 / API >=v1.1, this is the endpoint.msg_prefix_size (i.e., diff --git a/opal/mca/btl/usnic/btl_usnic_cclient.c b/opal/mca/btl/usnic/btl_usnic_cclient.c index d76b3b8ca9b..b30ea538740 100644 --- a/opal/mca/btl/usnic/btl_usnic_cclient.c +++ b/opal/mca/btl/usnic/btl_usnic_cclient.c @@ -197,7 +197,7 @@ int opal_btl_usnic_connectivity_listen(opal_btl_usnic_module_t *module) /* Ensure to NULL-terminate the passed strings */ strncpy(cmd.nodename, opal_process_info.nodename, CONNECTIVITY_NODENAME_LEN - 1); - strncpy(cmd.usnic_name, module->fabric_info->fabric_attr->name, + strncpy(cmd.usnic_name, module->linux_device_name, CONNECTIVITY_IFNAME_LEN - 1); if (OPAL_SUCCESS != opal_fd_write(agent_fd, sizeof(cmd), &cmd)) { diff --git a/opal/mca/btl/usnic/btl_usnic_compat.c b/opal/mca/btl/usnic/btl_usnic_compat.c index 1289093ac10..e245e2b1e36 100644 --- a/opal/mca/btl/usnic/btl_usnic_compat.c +++ b/opal/mca/btl/usnic/btl_usnic_compat.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2014-2015 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2014-2016 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2015 Intel, Inc. All rights reserved. * $COPYRIGHT$ * @@ -535,7 +535,7 @@ opal_btl_usnic_prepare_src( #if MSGDEBUG2 opal_output(0, "prep_src: %s %s frag %p, size=%d+%u (was %u), conv=%p\n", - module->fabric_info->fabric_attr->name, + module->linux_device_name, (reserve + *size) <= module->max_frag_payload?"small":"large", (void *)frag, (int)reserve, (unsigned)*size, (unsigned)osize, (void *)convertor); @@ -721,7 +721,7 @@ opal_btl_usnic_prepare_src(struct mca_btl_base_module_t *base_module, #if MSGDEBUG2 opal_output(0, "prep_src: %s %s frag %p, size=%d+%u (was %u), conv=%p\n", - module->fabric_info->fabric_attr->name, + module->linux_device_name, (reserve + *size) <= module->max_frag_payload?"small":"large", (void *)frag, (int)reserve, (unsigned)*size, (unsigned)osize, (void *)convertor); diff --git a/opal/mca/btl/usnic/btl_usnic_component.c b/opal/mca/btl/usnic/btl_usnic_component.c index 07803ce9ee2..abf13a6f94f 100644 --- a/opal/mca/btl/usnic/btl_usnic_component.c +++ b/opal/mca/btl/usnic/btl_usnic_component.c @@ -322,9 +322,7 @@ static int check_usnic_config(opal_btl_usnic_module_t *module, char str[128]; unsigned unlp; struct fi_usnic_info *uip; - struct fi_info *info; - info = module->fabric_info; uip = &module->usnic_info; /* Note: we add one to num_local_procs to account for *this* @@ -373,7 +371,7 @@ static int check_usnic_config(opal_btl_usnic_module_t *module, "not enough usnic resources", true, opal_process_info.nodename, - info->fabric_attr->name, + module->linux_device_name, str); return OPAL_ERROR; } @@ -538,10 +536,12 @@ static bool filter_module(opal_btl_usnic_module_t *module, struct fi_usnic_info *uip; struct fi_info *info; bool match; + const char *linux_device_name; info = module->fabric_info; uip = &module->usnic_info; src = info->src_addr; + linux_device_name = module->linux_device_name; module_mask = src->sin_addr.s_addr & uip->ui.v1.ui_netmask_be; match = false; for (i = 0; i < filter->n_elt; ++i) { @@ -554,7 +554,7 @@ static bool filter_module(opal_btl_usnic_module_t *module, } } else { - if (strcmp(filter->elts[i].if_name, info->fabric_attr->name) == 0) { + if (strcmp(filter->elts[i].if_name, linux_device_name) == 0) { match = true; break; } @@ -606,9 +606,6 @@ static mca_btl_base_module_t** usnic_component_init(int* num_btl_modules, int min_distance, num_local_procs; struct fi_info *info_list; struct fi_info *info; - struct fi_info hints = {0}; - struct fi_ep_attr ep_attr = {0}; - struct fi_fabric_attr fabric_attr = {0}; struct fid_fabric *fabric; struct fid_domain *domain; int ret; @@ -622,7 +619,80 @@ static mca_btl_base_module_t** usnic_component_init(int* num_btl_modules, return NULL; } - /* We only want providers named "usnic that are of type EP_DGRAM */ + /* There are multiple dimensions to consider when requesting an + API version number from libfabric: + + 1. This code understands libfabric API versions v1.3 through + v1.4. + + 2. Open MPI may be *compiled* against one version of libfabric, + but may be *running* with another. + + 3. There were usnic-specific bugs in Libfabric prior to + libfabric v1.3.0 (where "v1.3.0" is the tarball/package + version, not the API version; but happily, the API version + was also 1.3 in Libfabric v1.3.0): + + - In libfabric v1.0.0 (i.e., API v1.0), the usnic provider + did not check the value of the "version" parameter passed + into fi_getinfo() + - If you pass FI_VERSION(1,0) to libfabric v1.1.0 (i.e., API + v1.1), the usnic provider will disable FI_MSG_PREFIX + support (on the assumption that the application will not + handle FI_MSG_PREFIX properly). This can happen if you + compile OMPI against libfabric v1.0.0 (i.e., API v1.0) and + run OMPI against libfabric v1.1.0 (i.e., API v1.1). + - Some critical AV bug fixes were included in libfabric + v1.3.0; prior versions can fail in fi_av_* operations in + unexpected ways (libnl: you win again!). + + So always request a minimum API version of v1.3. + + Note that the FI_MAJOR_VERSION and FI_MINOR_VERSION in + represent the API version, not the Libfabric + package (i.e., tarball) version. As of Libfabric v1.3, there + is currently no way to know a) what package version of + Libfabric you were compiled against, and b) what package + version of Libfabric you are running with. + + Also note that the usnic provider changed the strings in the + fabric and domain names in API v1.4. With API <= v1.3: + + - fabric name is "usnic_X" (device name) + - domain name is NULL + + With libfabric API >= v1.4, all Libfabric IP-based providers + (including usnic) follow the same convention: + + - fabric name is "a.b.c.d/e" (CIDR notation of network) + - domain name is "usnic_X" (device name) + + NOTE: The configure.m4 in this component will require libfabric + >= v1.1.0 (i.e., it won't accept v1.0.0) because it needs + access to the usNIC extension header structures that only + became available in v1.1.0.*/ + + /* First, check to see if the libfabric we are running with is <= + libfabric v1.3. If so, don't bother going further. */ + uint32_t libfabric_api; + libfabric_api = fi_version(); + if (libfabric_api < FI_VERSION(1, 3)) { + opal_output_verbose(5, USNIC_OUT, + "btl:usnic: disqualifiying myself because Libfabric does not support v1.3 of the API (v1.3 is *required* for correct usNIC functionality)."); + return NULL; + } + + /* Libfabric API 1.3 is fine. Above that, we know that Open MPI + works with libfabric API v1.4, so just use that. */ + if (libfabric_api > FI_VERSION(1, 3)) { + libfabric_api = FI_VERSION(1, 4); + } + + struct fi_info hints = {0}; + struct fi_ep_attr ep_attr = {0}; + struct fi_fabric_attr fabric_attr = {0}; + + /* We only want providers named "usnic" that are of type EP_DGRAM */ fabric_attr.prov_name = "usnic"; ep_attr.type = FI_EP_DGRAM; @@ -632,43 +702,10 @@ static mca_btl_base_module_t** usnic_component_init(int* num_btl_modules, hints.ep_attr = &ep_attr; hints.fabric_attr = &fabric_attr; - /* This code understands libfabric API v1.0 and v1.1. Even if we - were compiled with libfabric API v1.0, we still want to request - v1.1 -- here's why: - - - In libfabric v1.0.0 (i.e., API v1.0), the usnic provider did - not check the value of the "version" parameter passed into - fi_getinfo() - - - If you pass FI_VERSION(1,0) to libfabric v1.1.0 (i.e., API - v1.1), the usnic provider will disable FI_MSG_PREFIX support - (on the assumption that the application will not handle - FI_MSG_PREFIX properly). This can happen if you compile OMPI - against libfabric v1.0.0 (i.e., API v1.0) and run OMPI - against libfabric v1.1.0 (i.e., API v1.1). - - So never request API v1.0 -- always request a minimum of - v1.1. - - NOTE: The configure.m4 in this component will require libfabric - >= v1.1.0 (i.e., it won't accept v1.0.0) because of a critical - bug in the usnic provider in libfabric v1.0.0. However, the - compatibility code with libfabric v1.0.0 in the usNIC BTL has - been retained, for two reasons: - - 1. It's not harmful, nor overly complicated. So the - compatibility code was not ripped out. - 2. At least some versions of Cisco Open MPI are shipping with - an embedded (libfabric v1.0.0+critical bug fix). - - Someday, #2 may no longer be true, and we may therefore rip out - the libfabric v1.0.0 compatibility code. */ - uint32_t libfabric_api; - libfabric_api = FI_VERSION(1, 1); ret = fi_getinfo(libfabric_api, NULL, 0, 0, &hints, &info_list); if (0 != ret) { opal_output_verbose(5, USNIC_OUT, - "btl:usnic: disqualifiying myself due to fi_getinfo failure: %s (%d)", strerror(-ret), ret); + "btl:usnic: disqualifiying myself due to fi_getinfo(3) failure: %s (%d)", strerror(-ret), ret); return NULL; } @@ -699,30 +736,6 @@ static mca_btl_base_module_t** usnic_component_init(int* num_btl_modules, opal_output_verbose(5, USNIC_OUT, "btl:usnic: usNIC fabrics found"); - /* Due to ambiguities in documentation, in libfabric v1.0.0 (i.e., - API v1.0) the usnic provider returned sizeof(struct - fi_cq_err_entry) from fi_cq_readerr() upon success. - - The ambiguities were clarified in libfabric v1.1.0 (i.e., API - v1.1); the usnic provider returned 1 from fi_cq_readerr() upon - success. - - So query to see what version of the libfabric API we are - running with, and adapt accordingly. */ - libfabric_api = fi_version(); - if (1 == FI_MAJOR(libfabric_api) && - 0 == FI_MINOR(libfabric_api)) { - // Old fi_cq_readerr() behavior: success=sizeof(...), try again=0 - mca_btl_usnic_component.cq_readerr_success_value = - sizeof(struct fi_cq_err_entry); - mca_btl_usnic_component.cq_readerr_try_again_value = 0; - } else { - // New fi_cq_readerr() behavior: success=1, try again=-FI_EAGAIN - mca_btl_usnic_component.cq_readerr_success_value = 1; - mca_btl_usnic_component.cq_readerr_try_again_value = -FI_EAGAIN; - } - - /* libnl initialization */ opal_proc_t *me = opal_proc_local_get(); opal_process_name_t *name = &(me->proc_name); mca_btl_usnic_component.my_hashed_rte_name = @@ -786,13 +799,21 @@ static mca_btl_base_module_t** usnic_component_init(int* num_btl_modules, i < mca_btl_usnic_component.max_modules); ++i, info = info->next) { + // The fabric/domain names changed at libfabric API v1.4 (see above). + char *linux_device_name; + if (libfabric_api <= FI_VERSION(1, 3)) { + linux_device_name = info->fabric_attr->name; + } else { + linux_device_name = info->domain_attr->name; + } + ret = fi_fabric(info->fabric_attr, &fabric, NULL); if (0 != ret) { opal_show_help("help-mpi-btl-usnic.txt", "libfabric API failed", true, opal_process_info.nodename, - info->fabric_attr->name, + linux_device_name, "fi_fabric()", __FILE__, __LINE__, ret, strerror(-ret)); @@ -806,7 +827,7 @@ static mca_btl_base_module_t** usnic_component_init(int* num_btl_modules, "libfabric API failed", true, opal_process_info.nodename, - info->fabric_attr->name, + linux_device_name, "fi_domain()", __FILE__, __LINE__, ret, strerror(-ret)); @@ -815,8 +836,8 @@ static mca_btl_base_module_t** usnic_component_init(int* num_btl_modules, opal_memchecker_base_mem_defined(&domain, sizeof(domain)); opal_output_verbose(5, USNIC_OUT, - "btl:usnic: found: usNIC direct device %s", - info->fabric_attr->name); + "btl:usnic: found: usNIC device %s", + linux_device_name); /* Save a little info on the module that we have already gathered. The rest of the module will be filled in @@ -827,6 +848,12 @@ static mca_btl_base_module_t** usnic_component_init(int* num_btl_modules, module->fabric = fabric; module->domain = domain; module->fabric_info = info; + module->libfabric_api = libfabric_api; + module->linux_device_name = strdup(linux_device_name); + if (NULL == module->linux_device_name) { + OPAL_ERROR_LOG(OPAL_ERR_OUT_OF_RESOURCE); + goto error; + } /* Obtain usnic-specific device info (e.g., netmask) that doesn't come in the normal fi_getinfo(). This allows us to @@ -836,7 +863,7 @@ static mca_btl_base_module_t** usnic_component_init(int* num_btl_modules, if (ret != 0) { opal_output_verbose(5, USNIC_OUT, "btl:usnic: device %s fabric_open_ops failed %d (%s)", - info->fabric_attr->name, ret, fi_strerror(-ret)); + module->linux_device_name, ret, fi_strerror(-ret)); fi_close(&domain->fid); fi_close(&fabric->fid); continue; @@ -849,14 +876,14 @@ static mca_btl_base_module_t** usnic_component_init(int* num_btl_modules, if (ret != 0) { opal_output_verbose(5, USNIC_OUT, "btl:usnic: device %s usnic_getinfo failed %d (%s)", - info->fabric_attr->name, ret, fi_strerror(-ret)); + module->linux_device_name, ret, fi_strerror(-ret)); fi_close(&domain->fid); fi_close(&fabric->fid); continue; } opal_output_verbose(5, USNIC_OUT, "btl:usnic: device %s usnic_info: link speed=%d, netmask=0x%x, ifname=%s, num_vf=%d, qp/vf=%d, cq/vf=%d", - info->fabric_attr->name, + module->linux_device_name, (unsigned int) module->usnic_info.ui.v1.ui_link_speed, (unsigned int) module->usnic_info.ui.v1.ui_netmask_be, module->usnic_info.ui.v1.ui_ifname, @@ -870,7 +897,7 @@ static mca_btl_base_module_t** usnic_component_init(int* num_btl_modules, opal_output_verbose(5, USNIC_OUT, "btl:usnic: %s %s due to %s", (keep_module ? "keeping" : "skipping"), - info->fabric_attr->name, + module->linux_device_name, (filter_incl ? "if_include" : "if_exclude")); if (!keep_module) { fi_close(&domain->fid); @@ -888,7 +915,7 @@ static mca_btl_base_module_t** usnic_component_init(int* num_btl_modules, check_usnic_config(module, num_local_procs) != OPAL_SUCCESS) { opal_output_verbose(5, USNIC_OUT, "btl:usnic: device %s is not provisioned with enough resources -- skipping", - info->fabric_attr->name); + module->linux_device_name); fi_close(&domain->fid); fi_close(&fabric->fid); @@ -902,7 +929,7 @@ static mca_btl_base_module_t** usnic_component_init(int* num_btl_modules, opal_output_verbose(5, USNIC_OUT, "btl:usnic: device %s looks good!", - info->fabric_attr->name); + module->linux_device_name); /* Let this module advance to the next round! */ btls[j++] = &(module->super); @@ -952,7 +979,7 @@ static mca_btl_base_module_t** usnic_component_init(int* num_btl_modules, btls[num_final_modules++] = &(module->super); /* Output all of this module's values. */ - const char *devname = module->fabric_info->fabric_attr->name; + const char *devname = module->linux_device_name; opal_output_verbose(5, USNIC_OUT, "btl:usnic: %s num sqe=%d, num rqe=%d, num cqe=%d, num aveqe=%d", devname, @@ -1194,18 +1221,17 @@ usnic_handle_cq_error(opal_btl_usnic_module_t* module, if (cq_ret != -FI_EAVAIL) { BTL_ERROR(("%s: cq_read ret = %d (%s)", - module->fabric_info->fabric_attr->name, cq_ret, + module->linux_device_name, cq_ret, fi_strerror(-cq_ret))); channel->chan_error = true; } rc = fi_cq_readerr(channel->cq, &err_entry, 0); - if (rc == mca_btl_usnic_component.cq_readerr_try_again_value) { + if (rc == -FI_EAGAIN) { return; - } else if (rc != mca_btl_usnic_component.cq_readerr_success_value) { - BTL_ERROR(("%s: cq_readerr ret = %d (expected %d)", - module->fabric_info->fabric_attr->name, rc, - (int) mca_btl_usnic_component.cq_readerr_success_value)); + } else if (rc != 1) { + BTL_ERROR(("%s: cq_readerr ret = %d (expected 1)", + module->linux_device_name, rc)); channel->chan_error = true; } @@ -1217,7 +1243,7 @@ usnic_handle_cq_error(opal_btl_usnic_module_t* module, static int once = 0; if (once++ == 0) { BTL_ERROR(("%s: Channel %d, %s", - module->fabric_info->fabric_attr->name, + module->linux_device_name, channel->chan_index, FI_ECRC == err_entry.prov_errno ? "CRC error" : "message truncation")); @@ -1238,7 +1264,7 @@ usnic_handle_cq_error(opal_btl_usnic_module_t* module, } } else { BTL_ERROR(("%s: CQ[%d] prov_err = %d", - module->fabric_info->fabric_attr->name, channel->chan_index, + module->linux_device_name, channel->chan_index, err_entry.prov_errno)); channel->chan_error = true; } @@ -1451,7 +1477,7 @@ void opal_btl_usnic_component_debug(void) module = mca_btl_usnic_component.usnic_active_modules[i]; opal_output(0, "active_modules[%d]=%p %s max{frag,chunk,tiny}=%llu,%llu,%llu\n", - i, (void *)module, module->fabric_info->fabric_attr->name, + i, (void *)module, module->linux_device_name, (unsigned long long)module->max_frag_payload, (unsigned long long)module->max_chunk_payload, (unsigned long long)module->max_tiny_payload); diff --git a/opal/mca/btl/usnic/btl_usnic_hwloc.c b/opal/mca/btl/usnic/btl_usnic_hwloc.c index ff9442eef36..78ef4c3abcb 100644 --- a/opal/mca/btl/usnic/btl_usnic_hwloc.c +++ b/opal/mca/btl/usnic/btl_usnic_hwloc.c @@ -162,7 +162,7 @@ static hwloc_obj_t find_device_numa(opal_btl_usnic_module_t *module) if (obj->type != HWLOC_OBJ_NODE) { opal_output_verbose(5, USNIC_OUT, "btl:usnic:filter_numa: could not find NUMA node for %s; filtering by NUMA distance not possible", - module->fabric_info->fabric_attr->name); + module->linux_device_name); return NULL; } @@ -218,7 +218,7 @@ int opal_btl_usnic_hwloc_distance(opal_btl_usnic_module_t *module) opal_output_verbose(5, USNIC_OUT, "btl:usnic:filter_numa: %s is distance %d from me", - module->fabric_info->fabric_attr->name, + module->linux_device_name, module->numa_distance); } diff --git a/opal/mca/btl/usnic/btl_usnic_map.c b/opal/mca/btl/usnic/btl_usnic_map.c index ce2aca6abea..c9cbd8a83c2 100644 --- a/opal/mca/btl/usnic/btl_usnic_map.c +++ b/opal/mca/btl/usnic/btl_usnic_map.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013-2015 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2013-2016 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2014 Intel, Inc. All rights reserved * $COPYRIGHT$ * @@ -30,8 +30,8 @@ static int map_compare_modules(const void *aa, const void *bb) opal_btl_usnic_module_t *a = *((opal_btl_usnic_module_t**) aa); opal_btl_usnic_module_t *b = *((opal_btl_usnic_module_t**) bb); - return strcmp(a->fabric_info->fabric_attr->name, - b->fabric_info->fabric_attr->name); + return strcmp(a->linux_device_name, + b->linux_device_name); } /* @@ -74,7 +74,7 @@ static int map_output_modules(FILE *fp) prefix_len); fprintf(fp, "device=%s,ip=%s,mss=%" PRIsize_t "\n", - modules[i]->fabric_info->fabric_attr->name, + modules[i]->linux_device_name, ipv4, modules[i]->fabric_info->ep_attr->max_msg_size); } @@ -102,8 +102,8 @@ static int map_compare_endpoints(const void *aa, const void *bb) return -1; } - return strcmp(a->endpoint_module->fabric_info->fabric_attr->name, - b->endpoint_module->fabric_info->fabric_attr->name); + return strcmp(a->endpoint_module->linux_device_name, + b->endpoint_module->linux_device_name); } /* @@ -148,7 +148,7 @@ static int map_output_endpoints(FILE *fp, opal_btl_usnic_proc_t *proc) eps[i]->endpoint_remote_modex.netmask); fprintf(fp, "device=%s@peer_ip=%s", - eps[i]->endpoint_module->fabric_info->fabric_attr->name, + eps[i]->endpoint_module->linux_device_name, ipv4); ++num_output; } diff --git a/opal/mca/btl/usnic/btl_usnic_module.c b/opal/mca/btl/usnic/btl_usnic_module.c index 713c3f44eb2..601b87b0076 100644 --- a/opal/mca/btl/usnic/btl_usnic_module.c +++ b/opal/mca/btl/usnic/btl_usnic_module.c @@ -67,6 +67,30 @@ static void finalize_one_channel(opal_btl_usnic_module_t *module, struct opal_btl_usnic_channel_t *channel); +static int channel_addr2str(opal_btl_usnic_module_t *module, int channel, + char *str, size_t len_param) +{ + size_t len; + + len = len_param; + fi_av_straddr(module->av, module->mod_channels[channel].info->src_addr, + str, &len); + if (len > len_param) { + opal_show_help("help-mpi-btl-usnic.txt", + "libfabric API failed", + true, + opal_process_info.nodename, + module->linux_device_name, + "fi_av_straddr", __FILE__, __LINE__, + FI_ENODATA, + "Failed to convert address to string: buffer too short"); + + return OPAL_ERR_OUT_OF_RESOURCE; + } + + return OPAL_SUCCESS; +} + /* * Loop over a block of procs sent to us in add_procs and see if we @@ -100,7 +124,7 @@ static int add_procs_block_create_endpoints(opal_btl_usnic_module_t *module, if (opal_proc == my_proc) { opal_output_verbose(75, USNIC_OUT, "btl:usnic:add_procs:%s: not connecting to self", - module->fabric_info->fabric_attr->name); + module->linux_device_name); continue; } @@ -108,7 +132,7 @@ static int add_procs_block_create_endpoints(opal_btl_usnic_module_t *module, if (OPAL_PROC_ON_LOCAL_NODE(opal_proc->proc_flags)) { opal_output_verbose(75, USNIC_OUT, "btl:usnic:add_procs:%s: not connecting to %s on same server", - module->fabric_info->fabric_attr->name, + module->linux_device_name, usnic_compat_proc_name_print(&opal_proc->proc_name)); continue; } @@ -124,7 +148,7 @@ static int add_procs_block_create_endpoints(opal_btl_usnic_module_t *module, skip it */ opal_output_verbose(75, USNIC_OUT, "btl:usnic:add_procs:%s: peer %s on %s does not have usnic modex info; skipping", - module->fabric_info->fabric_attr->name, + module->linux_device_name, usnic_compat_proc_name_print(&opal_proc->proc_name), opal_get_proc_hostname(opal_proc)); continue; @@ -140,7 +164,7 @@ static int add_procs_block_create_endpoints(opal_btl_usnic_module_t *module, if (OPAL_SUCCESS != rc) { opal_output_verbose(5, USNIC_OUT, "btl:usnic:add_procs:%s: unable to create endpoint to peer %s on %s", - module->fabric_info->fabric_attr->name, + module->linux_device_name, usnic_compat_proc_name_print(&opal_proc->proc_name), opal_get_proc_hostname(opal_proc)); OBJ_RELEASE(usnic_proc); @@ -157,12 +181,29 @@ static int add_procs_block_create_endpoints(opal_btl_usnic_module_t *module, modex->ipv4_addr, modex->netmask); + char local_pri_addr[64] = {0}; + rc = channel_addr2str(module, USNIC_PRIORITY_CHANNEL, + local_pri_addr, sizeof(local_pri_addr)); + if (OPAL_SUCCESS != rc) { + OBJ_RELEASE(usnic_proc); + continue; + } + + char local_data_addr[64] = {0}; + rc = channel_addr2str(module, USNIC_DATA_CHANNEL, + local_data_addr, sizeof(local_data_addr)); + if (OPAL_SUCCESS != rc) { + OBJ_RELEASE(usnic_proc); + continue; + } + opal_output_verbose(5, USNIC_OUT, - "btl:usnic:add_procs:%s: new usnic peer endpoint: %s, proirity port %d, data port %d", - module->fabric_info->fabric_attr->name, - str, - modex->ports[USNIC_PRIORITY_CHANNEL], - modex->ports[USNIC_DATA_CHANNEL]); + "btl:usnic:add_procs:%s: new usnic peer endpoint: pri=%s:%d, data=%s:%d (local: pri=%s, data=%s)", + module->linux_device_name, + str, modex->ports[USNIC_PRIORITY_CHANNEL], + str, modex->ports[USNIC_DATA_CHANNEL], + local_pri_addr, + local_data_addr); endpoints[i] = usnic_endpoint; ++num_created; @@ -195,14 +236,14 @@ static void add_procs_warn_unreachable(opal_btl_usnic_module_t *module, opal_output_verbose(15, USNIC_OUT, "btl:usnic: %s (which is %s) couldn't reach peer %s", - module->fabric_info->fabric_attr->name, + module->linux_device_name, module->if_ipv4_addr_str, remote); opal_show_help("help-mpi-btl-usnic.txt", "unreachable peer IP", true, opal_process_info.nodename, module->if_ipv4_addr_str, - module->fabric_info->fabric_attr->name, + module->linux_device_name, opal_get_proc_hostname(endpoint->endpoint_proc->proc_opal), remote); } @@ -301,7 +342,7 @@ add_procs_block_reap_fi_av_inserts(opal_btl_usnic_module_t *module, "libfabric API failed", true, opal_process_info.nodename, - module->fabric_info->fabric_attr->name, + module->linux_device_name, "async insertion result", __FILE__, __LINE__, err_entry.err, "Failed to insert address to AV"); @@ -325,7 +366,7 @@ add_procs_block_reap_fi_av_inserts(opal_btl_usnic_module_t *module, "internal error during init", true, opal_process_info.nodename, - module->fabric_info->fabric_attr->name, + module->linux_device_name, "fi_eq_readerr()", __FILE__, __LINE__, ret, "Returned != sizeof(err_entry)"); @@ -346,7 +387,7 @@ add_procs_block_reap_fi_av_inserts(opal_btl_usnic_module_t *module, "internal error during init", true, opal_process_info.nodename, - module->fabric_info->fabric_attr->name, + module->linux_device_name, "fi_eq_sread()", __FILE__, __LINE__, ret, "Returned != (sizeof(entry) or -FI_EAVAIL)"); @@ -898,6 +939,8 @@ static int usnic_finalize(struct mca_btl_base_module_t* btl) fi_close(&module->domain->fid); fi_close(&module->fabric->fid); + free(module->linux_device_name); + return OPAL_SUCCESS; } @@ -1413,7 +1456,7 @@ static void module_async_event_callback(int fd, short flags, void *arg) opal_show_help("help-mpi-btl-usnic.txt", "libfabric API failed", true, opal_process_info.nodename, - module->fabric_info->fabric_attr->name, + module->linux_device_name, "fi_eq_read()", __FILE__, __LINE__, ret, "Failed to get domain event"); @@ -1432,7 +1475,7 @@ static void module_async_event_callback(int fd, short flags, void *arg) ignore it. */ opal_output_verbose(10, USNIC_OUT, "btl:usnic: got LINK_UP on %s", - module->fabric_info->fabric_attr->name); + module->linux_device_name); break; case 1: // USD_EVENT_LINK_DOWN: @@ -1451,7 +1494,7 @@ static void module_async_event_callback(int fd, short flags, void *arg) opal_show_help("help-mpi-btl-usnic.txt", "async event", true, opal_process_info.nodename, - module->fabric_info->fabric_attr->name, + module->linux_device_name, str, entry.data); fatal = true; } @@ -1482,7 +1525,7 @@ static int create_ep(opal_btl_usnic_module_t* module, "internal error during init", true, opal_process_info.nodename, - module->fabric_info->fabric_attr->name, + module->linux_device_name, "fi_dupinfo() failed", __FILE__, __LINE__, -1, "Unknown"); return OPAL_ERR_OUT_OF_RESOURCE; @@ -1500,14 +1543,14 @@ static int create_ep(opal_btl_usnic_module_t* module, opal_process_info.my_local_rank); } - rc = fi_getinfo(FI_VERSION(1, 1), NULL, 0, 0, hint, &channel->info); + rc = fi_getinfo(module->libfabric_api, NULL, 0, 0, hint, &channel->info); fi_freeinfo(hint); if (0 != rc) { opal_show_help("help-mpi-btl-usnic.txt", "internal error during init", true, opal_process_info.nodename, - module->fabric_info->fabric_attr->name, + module->linux_device_name, "fi_getinfo() failed", __FILE__, __LINE__, rc, fi_strerror(-rc)); return OPAL_ERR_OUT_OF_RESOURCE; @@ -1543,7 +1586,7 @@ static int create_ep(opal_btl_usnic_module_t* module, "internal error during init", true, opal_process_info.nodename, - module->fabric_info->fabric_attr->name, + module->linux_device_name, "fi_endpoint() failed", __FILE__, __LINE__, rc, fi_strerror(-rc)); return OPAL_ERR_OUT_OF_RESOURCE; @@ -1556,7 +1599,7 @@ static int create_ep(opal_btl_usnic_module_t* module, "internal error during init", true, opal_process_info.nodename, - module->fabric_info->fabric_attr->name, + module->linux_device_name, "fi_ep_bind() SCQ to EP failed", __FILE__, __LINE__, rc, fi_strerror(-rc)); return OPAL_ERR_OUT_OF_RESOURCE; @@ -1567,7 +1610,7 @@ static int create_ep(opal_btl_usnic_module_t* module, "internal error during init", true, opal_process_info.nodename, - module->fabric_info->fabric_attr->name, + module->linux_device_name, "fi_ep_bind() RCQ to EP failed", __FILE__, __LINE__, rc, fi_strerror(-rc)); return OPAL_ERR_OUT_OF_RESOURCE; @@ -1578,7 +1621,7 @@ static int create_ep(opal_btl_usnic_module_t* module, "internal error during init", true, opal_process_info.nodename, - module->fabric_info->fabric_attr->name, + module->linux_device_name, "fi_ep_bind() AV to EP failed", __FILE__, __LINE__, rc, fi_strerror(-rc)); return OPAL_ERR_OUT_OF_RESOURCE; @@ -1591,7 +1634,7 @@ static int create_ep(opal_btl_usnic_module_t* module, "internal error during init", true, opal_process_info.nodename, - module->fabric_info->fabric_attr->name, + module->linux_device_name, "fi_enable() failed", __FILE__, __LINE__, rc, fi_strerror(-rc)); return OPAL_ERR_OUT_OF_RESOURCE; @@ -1613,7 +1656,7 @@ static int create_ep(opal_btl_usnic_module_t* module, "internal error during init", true, opal_process_info.nodename, - module->fabric_info->fabric_attr->name, + module->linux_device_name, "fi_getname() failed", __FILE__, __LINE__, rc, fi_strerror(-rc)); return OPAL_ERR_OUT_OF_RESOURCE; @@ -1621,6 +1664,21 @@ static int create_ep(opal_btl_usnic_module_t* module, assert(0 != sin->sin_port); } + char *str; + if (USNIC_PRIORITY_CHANNEL == channel->chan_index) { + str = "priority"; + } else if (USNIC_DATA_CHANNEL == channel->chan_index) { + str = "data"; + } else { + str = "UNKNOWN"; + } + opal_output_verbose(15, USNIC_OUT, + "btl:usnic:create_ep:%s: new usnic local endpoint channel %s: %s:%d", + module->linux_device_name, + str, + inet_ntoa(sin->sin_addr), + ntohs(sin->sin_port)); + /* actual sizes */ channel->chan_rd_num = channel->info->rx_attr->size; channel->chan_sd_num = channel->info->tx_attr->size; @@ -1704,7 +1762,7 @@ static int init_one_channel(opal_btl_usnic_module_t *module, "internal error during init", true, opal_process_info.nodename, - module->fabric_info->fabric_attr->name, + module->linux_device_name, "failed to create CQ", __FILE__, __LINE__); goto error; } @@ -1760,7 +1818,7 @@ static int init_one_channel(opal_btl_usnic_module_t *module, "internal error during init", true, opal_process_info.nodename, - module->fabric_info->fabric_attr->name, + module->linux_device_name, "Failed to get receive buffer from freelist", __FILE__, __LINE__); goto error; @@ -1776,7 +1834,7 @@ static int init_one_channel(opal_btl_usnic_module_t *module, "internal error during init", true, opal_process_info.nodename, - module->fabric_info->fabric_attr->name, + module->linux_device_name, "Failed to post receive buffer", __FILE__, __LINE__); goto error; @@ -1843,7 +1901,7 @@ static void init_local_modex_part1(opal_btl_usnic_module_t *module) opal_output_verbose(5, USNIC_OUT, "btl:usnic: %s IP charactertics: %s, %u Mbps", - module->fabric_info->fabric_attr->name, + module->linux_device_name, module->if_ipv4_addr_str, modex->link_speed_mbps); } @@ -2055,7 +2113,7 @@ static int init_mpool(opal_btl_usnic_module_t *module) mpool_resources.register_mem = usnic_reg_mr; mpool_resources.deregister_mem = usnic_dereg_mr; asprintf(&mpool_resources.pool_name, "%s", - module->fabric_info->fabric_attr->name); + module->linux_device_name); module->super.btl_mpool = mca_mpool_base_module_create(mca_btl_usnic_component.usnic_mpool_name, &module->super, &mpool_resources); @@ -2064,7 +2122,7 @@ static int init_mpool(opal_btl_usnic_module_t *module) "internal error during init", true, opal_process_info.nodename, - module->fabric_info->fabric_attr->name, + module->linux_device_name, "create mpool", __FILE__, __LINE__); return OPAL_ERROR; } @@ -2177,7 +2235,7 @@ static void init_async_event(opal_btl_usnic_module_t *module) "libfabric API failed", true, opal_process_info.nodename, - module->fabric_info->fabric_attr->name, + module->linux_device_name, "fi_control(eq, FI_GETWAIT)", __FILE__, __LINE__, ret, fi_strerror(-ret)); diff --git a/opal/mca/btl/usnic/btl_usnic_module.h b/opal/mca/btl/usnic/btl_usnic_module.h index b4f5d0c7390..b7d5d6fc0c2 100644 --- a/opal/mca/btl/usnic/btl_usnic_module.h +++ b/opal/mca/btl/usnic/btl_usnic_module.h @@ -100,8 +100,10 @@ typedef struct opal_btl_usnic_module_t { /* Cache for use during component_init to associate a module with the libfabric device that it came from. */ + uint32_t libfabric_api; struct fid_fabric *fabric; struct fid_domain *domain; + char *linux_device_name; struct fi_info *fabric_info; struct fi_usnic_ops_fabric *usnic_fabric_ops; struct fi_usnic_ops_av *usnic_av_ops; diff --git a/opal/mca/btl/usnic/btl_usnic_proc.c b/opal/mca/btl/usnic/btl_usnic_proc.c index 9d71a6ed9d5..f0fefbff964 100644 --- a/opal/mca/btl/usnic/btl_usnic_proc.c +++ b/opal/mca/btl/usnic/btl_usnic_proc.c @@ -11,7 +11,7 @@ * All rights reserved. * Copyright (c) 2006 Sandia National Laboratories. All rights * reserved. - * Copyright (c) 2013-2015 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2013-2016 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2013-2014 Intel, Inc. All rights reserved * $COPYRIGHT$ * @@ -643,7 +643,7 @@ static int match_modex(opal_btl_usnic_module_t *module, opal_show_help("help-mpi-btl-usnic.txt", "MTU mismatch", true, opal_process_info.nodename, - module->fabric_info->fabric_attr->name, + module->linux_device_name, module->fabric_info->ep_attr->max_msg_size, (NULL == proc->proc_opal->proc_hostname) ? "unknown" : proc->proc_opal->proc_hostname, @@ -700,7 +700,7 @@ static int start_av_insert(opal_btl_usnic_module_t *module, opal_show_help("help-mpi-btl-usnic.txt", "libfabric API failed", true, opal_process_info.nodename, - module->fabric_info->fabric_attr->name, + module->linux_device_name, "fi_av_insert()", __FILE__, __LINE__, ret, "Failed to initiate AV insert"); diff --git a/opal/mca/btl/usnic/btl_usnic_send.h b/opal/mca/btl/usnic/btl_usnic_send.h index 2020544f205..86676a35b93 100644 --- a/opal/mca/btl/usnic/btl_usnic_send.h +++ b/opal/mca/btl/usnic/btl_usnic_send.h @@ -216,7 +216,7 @@ opal_btl_usnic_endpoint_send_segment( "CHUNK" : "FRAG", sseg->ss_base.us_btl_header->pkt_seq, sseg->ss_base.us_btl_header->sender, - endpoint->endpoint_module->fabric_info->fabric_attr->name, + endpoint->endpoint_module->linux_device_name, local_ip, module->local_modex.ports[sseg->ss_channel], (void*)sseg, diff --git a/opal/mca/btl/usnic/btl_usnic_stats.c b/opal/mca/btl/usnic/btl_usnic_stats.c index 9c3acac868c..a0c3393cc78 100644 --- a/opal/mca/btl/usnic/btl_usnic_stats.c +++ b/opal/mca/btl/usnic/btl_usnic_stats.c @@ -86,7 +86,7 @@ void opal_btl_usnic_print_stats( prefix, opal_proc_local_get()->proc_name.vpid, - module->fabric_info->fabric_attr->name, + module->linux_device_name, module->stats.num_total_sends, module->mod_channels[USNIC_PRIORITY_CHANNEL].num_channel_sends, @@ -394,7 +394,7 @@ static void setup_mpit_pvars_enum(void) devices[i].value = i; rc = asprintf(&str, "%s,%hhu.%hhu.%hhu.%hhu/%" PRIu32, - m->fabric_info->fabric_attr->name, + m->linux_device_name, c[0], c[1], c[2], c[3], usnic_netmask_to_cidrlen(sin->sin_addr.s_addr)); assert(rc > 0); diff --git a/opal/mca/btl/usnic/btl_usnic_util.c b/opal/mca/btl/usnic/btl_usnic_util.c index 9c1db480cd4..17eeb7650db 100644 --- a/opal/mca/btl/usnic/btl_usnic_util.c +++ b/opal/mca/btl/usnic/btl_usnic_util.c @@ -34,6 +34,8 @@ void opal_btl_usnic_exit(opal_btl_usnic_module_t *module) } /* If we didn't find a PML error callback, just exit. */ if (NULL == module) { + fprintf(stderr, "*** The Open MPI usnic BTL is aborting the MPI job (via exit(3)).\n"); + fflush(stderr); exit(1); } } @@ -47,7 +49,7 @@ void opal_btl_usnic_exit(opal_btl_usnic_module_t *module) module->pml_error_callback(&module->super, MCA_BTL_ERROR_FLAGS_FATAL, (opal_proc_t*) opal_proc_local_get(), - "usnic"); + "The usnic BTL is aborting the MPI job (via PML error callback)."); } /* If the PML error callback returns (or if there wasn't one),