Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 11 additions & 8 deletions opal/mca/btl/openib/btl_openib_component.c
Original file line number Diff line number Diff line change
Expand Up @@ -1581,7 +1581,7 @@ static int init_one_device(opal_list_t *btl_list, struct ibv_device* ib_dev)
/* Open up the device */
dev_context = ibv_open_device(ib_dev);
if (NULL == dev_context) {
return OPAL_ERR_OUT_OF_RESOURCE;
return OPAL_ERR_NOT_SUPPORTED;
}

/* Find out if this device supports RC QPs */
Expand Down Expand Up @@ -2532,11 +2532,6 @@ btl_openib_component_init(int *num_btl_modules,
goto no_btls;
}

/* Init CPC components */
if (OPAL_SUCCESS != (ret = opal_btl_openib_connect_base_init())) {
goto no_btls;
}

/* If we are using ptmalloc2 and there are no posix threads
available, this will cause memory corruption. Refuse to run.
Right now, ptmalloc2 is the only memory manager that we have on
Expand Down Expand Up @@ -2749,10 +2744,12 @@ btl_openib_component_init(int *num_btl_modules,

found = true;
ret = init_one_device(&btl_list, dev_sorted[i].ib_dev);
if (OPAL_SUCCESS != ret && OPAL_ERR_NOT_SUPPORTED != ret) {
if (OPAL_ERR_NOT_SUPPORTED == ret) {
++num_devices_intentionally_ignored;
continue;
} else if (OPAL_SUCCESS != ret) {
free(dev_sorted);
goto no_btls;

}
}
free(dev_sorted);
Expand Down Expand Up @@ -2792,6 +2789,12 @@ btl_openib_component_init(int *num_btl_modules,
goto no_btls;
}

/* Now that we know we have devices and ports that we want to use,
init CPC components */
if (OPAL_SUCCESS != (ret = opal_btl_openib_connect_base_init())) {
goto no_btls;
}

/* Setup the BSRQ QP's based on the final value of
mca_btl_openib_component.receive_queues. */
if (OPAL_SUCCESS != setup_qps()) {
Expand Down
3 changes: 2 additions & 1 deletion opal/mca/common/verbs/Makefile.am
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#
# Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved.
# Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved.
# Copyright (c) 2012-2014 Cisco Systems, Inc. All rights reserved.
# Copyright (c) 2012-2015 Cisco Systems, Inc. All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
Expand All @@ -17,6 +17,7 @@ headers = \
sources = \
common_verbs_basics.c \
common_verbs_devlist.c \
common_verbs_fake.c \
common_verbs_find_max_inline.c \
common_verbs_find_ports.c \
common_verbs_mca.c \
Expand Down
7 changes: 6 additions & 1 deletion opal/mca/common/verbs/common_verbs.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
* Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved.
* All rights reserved.
* Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved.
* Copyright (c) 2012-2014 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2012-2015 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2014 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
Expand Down Expand Up @@ -180,6 +180,11 @@ OPAL_DECLSPEC int opal_common_verbs_qp_test(struct ibv_context *device_context,
*/
int opal_common_verbs_fork_test(void);

/*
* Register fake verbs drivers
*/
void opal_common_verbs_register_fake_drivers(void);

END_C_DECLS

#endif
Expand Down
27 changes: 16 additions & 11 deletions opal/mca/common/verbs/common_verbs_basics.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2012 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2012-2015 Cisco Systems, Inc. All rights reserved.
*
* $COPYRIGHT$
*
Expand Down Expand Up @@ -66,29 +66,34 @@ bool opal_common_verbs_check_basics(void)

int opal_common_verbs_fork_test(void)
{
/* Make sure that ibv_fork_init is called before the calls to other memory registering verbs,
* which will be called after this function */
int ret = OPAL_SUCCESS;

/* Make sure that ibv_fork_init() is the first ibv_* function to
be invoked in this process. */
#ifdef HAVE_IBV_FORK_INIT
if (0 != opal_common_verbs_want_fork_support) {
/* Check if fork support is requested by the user */
if (0 != ibv_fork_init()) {
/* If the opal_common_verbs_want_fork_support MCA parameter is >0 but
* the call to ibv_fork_init() failed, then return an error code.
/* If the opal_common_verbs_want_fork_support MCA
* parameter is >0 but the call to ibv_fork_init() failed,
* then return an error code.
*/
if (opal_common_verbs_want_fork_support > 0) {
opal_show_help("help-opal-common-verbs.txt",
"ibv_fork_init fail", true,
opal_proc_local_get()->proc_hostname, errno,
strerror(errno));
return OPAL_ERROR;
ret = OPAL_ERROR;
}
} else {
return OPAL_SUCCESS;
}
} else {
return OPAL_SUCCESS;
}
#endif
return OPAL_SUCCESS;

/* Now rgister any necessary fake libibverbs drivers. We
piggyback loading these fake drivers on the fork test because
they must be loaded before ibv_get_device_list() is invoked. */
opal_common_verbs_register_fake_drivers();

return ret;
}

97 changes: 97 additions & 0 deletions opal/mca/common/verbs/common_verbs_fake.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
/*
* Copyright (c) 2015 Cisco Systems, Inc. All rights reserved.
*
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/

/*
* The code in this file prevents spurious libibverbs warnings on
* stderr about devices that it doesn't recognize.
*
* Specifically, Cisco usNIC devices are exposed through the Linux
* InfiniBand kernel interface (i.e., they show up in
* /sys/class/infiniband). However, the userspace side of these
* drivers is not exposed through libibverbs (i.e., there is no
* libibverbs provider/plugin for usNIC). Therefore, when
* ibv_get_device_list() is invoked, libibverbs cannot find a plugin
* for usnic devices. This causes libibverbs to emit a spurious
* warning message on stderr.
*
* To avoid these extra stderr warnings, we insert a fake usnic verbs
* libibverbs provider that safely squelches these warnings.
*
* More specifically: the userspace side of usNIC is exposed through
* libfabric; we don't need libibverbs warnings about not being able
* to find a usnic driver.
*/

#include "opal_config.h"

#include <infiniband/verbs.h>
#ifdef HAVE_INFINIBAND_DRIVER_H
#include <infiniband/driver.h>
#endif

#include "common_verbs.h"

/***********************************************************************/

#define PCI_VENDOR_ID_CISCO (0x1137)

static struct ibv_context *fake_alloc_context(struct ibv_device *ibdev,
int cmd_fd)
{
/* Nothing to do here */
return NULL;
}

static void fake_free_context(struct ibv_context *ibctx)
{
/* Nothing to do here */
}

/* Put just enough in here to convince libibverbs that this is a valid
device, and a little extra just in case someone looks at this
struct in a debugger. */
static struct ibv_device fake_dev = {
.ops = {
.alloc_context = fake_alloc_context,
.free_context = fake_free_context
},
.name = "fake ibv_device inserted by Open MPI for non-verbs devices"
};

static struct ibv_device *fake_driver_init(const char *uverbs_sys_path,
int abi_version)
{
char value[8];
int vendor;

/* This function should only be invoked for
/sys/class/infiniband/usnic_X devices, but double check just to
be absolutely sure: read the vendor ID and ensure that it is
Cisco. */
if (ibv_read_sysfs_file(uverbs_sys_path, "device/vendor",
value, sizeof(value)) < 0) {
return NULL;
}
sscanf(value, "%i", &vendor);

if (vendor == PCI_VENDOR_ID_CISCO) {
return &fake_dev;
}

/* We didn't find a device that we want to support */
return NULL;
}


void opal_common_verbs_register_fake_drivers(void)
{
/* Register a fake driver for "usnic_verbs" devices */
ibv_register_driver("usnic_verbs", fake_driver_init);
}