diff --git a/opal/mca/btl/openib/btl_openib_component.c b/opal/mca/btl/openib/btl_openib_component.c index 1c5abaffcdf..f0b6814556c 100644 --- a/opal/mca/btl/openib/btl_openib_component.c +++ b/opal/mca/btl/openib/btl_openib_component.c @@ -1581,7 +1581,7 @@ static int init_one_device(opal_list_t *btl_list, struct ibv_device* ib_dev) /* Open up the device */ dev_context = ibv_open_device(ib_dev); if (NULL == dev_context) { - return OPAL_ERR_OUT_OF_RESOURCE; + return OPAL_ERR_NOT_SUPPORTED; } /* Find out if this device supports RC QPs */ @@ -2532,11 +2532,6 @@ btl_openib_component_init(int *num_btl_modules, goto no_btls; } - /* Init CPC components */ - if (OPAL_SUCCESS != (ret = opal_btl_openib_connect_base_init())) { - goto no_btls; - } - /* If we are using ptmalloc2 and there are no posix threads available, this will cause memory corruption. Refuse to run. Right now, ptmalloc2 is the only memory manager that we have on @@ -2749,10 +2744,12 @@ btl_openib_component_init(int *num_btl_modules, found = true; ret = init_one_device(&btl_list, dev_sorted[i].ib_dev); - if (OPAL_SUCCESS != ret && OPAL_ERR_NOT_SUPPORTED != ret) { + if (OPAL_ERR_NOT_SUPPORTED == ret) { + ++num_devices_intentionally_ignored; + continue; + } else if (OPAL_SUCCESS != ret) { free(dev_sorted); goto no_btls; - } } free(dev_sorted); @@ -2792,6 +2789,12 @@ btl_openib_component_init(int *num_btl_modules, goto no_btls; } + /* Now that we know we have devices and ports that we want to use, + init CPC components */ + if (OPAL_SUCCESS != (ret = opal_btl_openib_connect_base_init())) { + goto no_btls; + } + /* Setup the BSRQ QP's based on the final value of mca_btl_openib_component.receive_queues. */ if (OPAL_SUCCESS != setup_qps()) { diff --git a/opal/mca/common/verbs/Makefile.am b/opal/mca/common/verbs/Makefile.am index e410343c9ee..b03abce0160 100644 --- a/opal/mca/common/verbs/Makefile.am +++ b/opal/mca/common/verbs/Makefile.am @@ -1,7 +1,7 @@ # # Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved. # Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved. -# Copyright (c) 2012-2014 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2012-2015 Cisco Systems, Inc. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -17,6 +17,7 @@ headers = \ sources = \ common_verbs_basics.c \ common_verbs_devlist.c \ + common_verbs_fake.c \ common_verbs_find_max_inline.c \ common_verbs_find_ports.c \ common_verbs_mca.c \ diff --git a/opal/mca/common/verbs/common_verbs.h b/opal/mca/common/verbs/common_verbs.h index b306de4a7a0..795f89a6637 100644 --- a/opal/mca/common/verbs/common_verbs.h +++ b/opal/mca/common/verbs/common_verbs.h @@ -2,7 +2,7 @@ * Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved. * All rights reserved. * Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved. - * Copyright (c) 2012-2014 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2012-2015 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2014 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. @@ -180,6 +180,11 @@ OPAL_DECLSPEC int opal_common_verbs_qp_test(struct ibv_context *device_context, */ int opal_common_verbs_fork_test(void); +/* + * Register fake verbs drivers + */ +void opal_common_verbs_register_fake_drivers(void); + END_C_DECLS #endif diff --git a/opal/mca/common/verbs/common_verbs_basics.c b/opal/mca/common/verbs/common_verbs_basics.c index 461f51ad8c5..e79a16d6a8c 100644 --- a/opal/mca/common/verbs/common_verbs_basics.c +++ b/opal/mca/common/verbs/common_verbs_basics.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2012-2015 Cisco Systems, Inc. All rights reserved. * * $COPYRIGHT$ * @@ -66,29 +66,34 @@ bool opal_common_verbs_check_basics(void) int opal_common_verbs_fork_test(void) { - /* Make sure that ibv_fork_init is called before the calls to other memory registering verbs, - * which will be called after this function */ + int ret = OPAL_SUCCESS; + + /* Make sure that ibv_fork_init() is the first ibv_* function to + be invoked in this process. */ #ifdef HAVE_IBV_FORK_INIT if (0 != opal_common_verbs_want_fork_support) { /* Check if fork support is requested by the user */ if (0 != ibv_fork_init()) { - /* If the opal_common_verbs_want_fork_support MCA parameter is >0 but - * the call to ibv_fork_init() failed, then return an error code. + /* If the opal_common_verbs_want_fork_support MCA + * parameter is >0 but the call to ibv_fork_init() failed, + * then return an error code. */ if (opal_common_verbs_want_fork_support > 0) { opal_show_help("help-opal-common-verbs.txt", "ibv_fork_init fail", true, opal_proc_local_get()->proc_hostname, errno, strerror(errno)); - return OPAL_ERROR; + ret = OPAL_ERROR; } - } else { - return OPAL_SUCCESS; } - } else { - return OPAL_SUCCESS; } #endif - return OPAL_SUCCESS; + + /* Now rgister any necessary fake libibverbs drivers. We + piggyback loading these fake drivers on the fork test because + they must be loaded before ibv_get_device_list() is invoked. */ + opal_common_verbs_register_fake_drivers(); + + return ret; } diff --git a/opal/mca/common/verbs/common_verbs_fake.c b/opal/mca/common/verbs/common_verbs_fake.c new file mode 100644 index 00000000000..093a9ddd458 --- /dev/null +++ b/opal/mca/common/verbs/common_verbs_fake.c @@ -0,0 +1,97 @@ +/* + * Copyright (c) 2015 Cisco Systems, Inc. All rights reserved. + * + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +/* + * The code in this file prevents spurious libibverbs warnings on + * stderr about devices that it doesn't recognize. + * + * Specifically, Cisco usNIC devices are exposed through the Linux + * InfiniBand kernel interface (i.e., they show up in + * /sys/class/infiniband). However, the userspace side of these + * drivers is not exposed through libibverbs (i.e., there is no + * libibverbs provider/plugin for usNIC). Therefore, when + * ibv_get_device_list() is invoked, libibverbs cannot find a plugin + * for usnic devices. This causes libibverbs to emit a spurious + * warning message on stderr. + * + * To avoid these extra stderr warnings, we insert a fake usnic verbs + * libibverbs provider that safely squelches these warnings. + * + * More specifically: the userspace side of usNIC is exposed through + * libfabric; we don't need libibverbs warnings about not being able + * to find a usnic driver. + */ + +#include "opal_config.h" + +#include +#ifdef HAVE_INFINIBAND_DRIVER_H +#include +#endif + +#include "common_verbs.h" + +/***********************************************************************/ + +#define PCI_VENDOR_ID_CISCO (0x1137) + +static struct ibv_context *fake_alloc_context(struct ibv_device *ibdev, + int cmd_fd) +{ + /* Nothing to do here */ + return NULL; +} + +static void fake_free_context(struct ibv_context *ibctx) +{ + /* Nothing to do here */ +} + +/* Put just enough in here to convince libibverbs that this is a valid + device, and a little extra just in case someone looks at this + struct in a debugger. */ +static struct ibv_device fake_dev = { + .ops = { + .alloc_context = fake_alloc_context, + .free_context = fake_free_context + }, + .name = "fake ibv_device inserted by Open MPI for non-verbs devices" +}; + +static struct ibv_device *fake_driver_init(const char *uverbs_sys_path, + int abi_version) +{ + char value[8]; + int vendor; + + /* This function should only be invoked for + /sys/class/infiniband/usnic_X devices, but double check just to + be absolutely sure: read the vendor ID and ensure that it is + Cisco. */ + if (ibv_read_sysfs_file(uverbs_sys_path, "device/vendor", + value, sizeof(value)) < 0) { + return NULL; + } + sscanf(value, "%i", &vendor); + + if (vendor == PCI_VENDOR_ID_CISCO) { + return &fake_dev; + } + + /* We didn't find a device that we want to support */ + return NULL; +} + + +void opal_common_verbs_register_fake_drivers(void) +{ + /* Register a fake driver for "usnic_verbs" devices */ + ibv_register_driver("usnic_verbs", fake_driver_init); +}