Skip to content
This repository was archived by the owner on Sep 30, 2022. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
63 changes: 36 additions & 27 deletions opal/mca/btl/openib/btl_openib.c
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
* All rights reserved.
* Copyright (c) 2007-2013 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2006-2015 Mellanox Technologies. All rights reserved.
* Copyright (c) 2006-2015 Los Alamos National Security, LLC. All rights
* Copyright (c) 2006-2016 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2006-2007 Voltaire All rights reserved.
* Copyright (c) 2008-2012 Oracle and/or its affiliates. All rights reserved.
Expand Down Expand Up @@ -399,14 +399,16 @@ static int create_srq(mca_btl_openib_module_t *openib_btl)
}
}

openib_btl->srqs_created = true;

return OPAL_SUCCESS;
}

static int openib_btl_prepare(struct mca_btl_openib_module_t* openib_btl)
{
int rc = OPAL_SUCCESS;
opal_mutex_lock(&openib_btl->ib_lock);
if (0 == openib_btl->num_peers &&
if (!openib_btl->srqs_created &&
(mca_btl_openib_component.num_srq_qps > 0 ||
mca_btl_openib_component.num_xrc_qps > 0)) {
rc = create_srq(openib_btl);
Expand All @@ -416,17 +418,12 @@ static int openib_btl_prepare(struct mca_btl_openib_module_t* openib_btl)
}


static int openib_btl_size_queues(struct mca_btl_openib_module_t* openib_btl, size_t nprocs)
static int openib_btl_size_queues(struct mca_btl_openib_module_t* openib_btl)
{
uint32_t send_cqes, recv_cqes;
int rc = OPAL_SUCCESS, qp;
mca_btl_openib_device_t *device = openib_btl->device;

if( 0 == nprocs){
/* nothing to do */
return OPAL_SUCCESS;
}

opal_mutex_lock(&openib_btl->ib_lock);
/* figure out reasonable sizes for completion queues */
for(qp = 0; qp < mca_btl_openib_component.num_qps; qp++) {
Expand All @@ -435,7 +432,7 @@ static int openib_btl_size_queues(struct mca_btl_openib_module_t* openib_btl, si
recv_cqes = mca_btl_openib_component.qp_infos[qp].rd_num;
} else {
send_cqes = (mca_btl_openib_component.qp_infos[qp].rd_num +
mca_btl_openib_component.qp_infos[qp].u.pp_qp.rd_rsv) * nprocs;
mca_btl_openib_component.qp_infos[qp].u.pp_qp.rd_rsv) * openib_btl->num_peers;
recv_cqes = send_cqes;
}

Expand All @@ -455,7 +452,6 @@ static int openib_btl_size_queues(struct mca_btl_openib_module_t* openib_btl, si
goto out;
}

openib_btl->num_peers += nprocs;
out:
opal_mutex_unlock(&openib_btl->ib_lock);
return rc;
Expand Down Expand Up @@ -1028,10 +1024,14 @@ int mca_btl_openib_add_procs(
return rc;
}

rc = openib_btl_prepare(openib_btl);
if (OPAL_SUCCESS != rc) {
BTL_ERROR(("could not prepare openib btl structure for usel"));
return rc;
if (0 == openib_btl->num_peers) {
/* ensure completion queues are created before attempting to
* make a loop-back queue pair */
rc = openib_btl_size_queues(openib_btl);
if (OPAL_SUCCESS != rc) {
BTL_ERROR(("error creating cqs"));
return rc;
}
}

/* prepare all proc's and account them properly */
Expand Down Expand Up @@ -1080,10 +1080,20 @@ int mca_btl_openib_add_procs(
}
}

/* account this procs if need */
rc = openib_btl_size_queues(openib_btl, nprocs_new);
if (nprocs_new) {
OPAL_THREAD_ADD32(&openib_btl->num_peers, nprocs_new);

/* adjust cq sizes given the new procs */
rc = openib_btl_size_queues (openib_btl);
if (OPAL_SUCCESS != rc) {
BTL_ERROR(("error creating cqs"));
return rc;
}
}

rc = openib_btl_prepare (openib_btl);
if (OPAL_SUCCESS != rc) {
BTL_ERROR(("error creating cqs"));
BTL_ERROR(("could not prepare openib btl module for use"));
return rc;
}

Expand Down Expand Up @@ -1156,23 +1166,15 @@ struct mca_btl_base_endpoint_t *mca_btl_openib_get_ep (struct mca_btl_base_modul
{
mca_btl_openib_module_t *openib_btl = (mca_btl_openib_module_t *) btl;
volatile mca_btl_base_endpoint_t *endpoint = NULL;
int local_port_cnt = 0, btl_rank, rc;
mca_btl_openib_proc_t *ib_proc;
int rc;
int local_port_cnt = 0, btl_rank;
size_t nprocs_new = 0;

rc = prepare_device_for_use (openib_btl->device);
if (OPAL_SUCCESS != rc) {
BTL_ERROR(("could not prepare openib device for use"));
return NULL;
}

rc = openib_btl_prepare(openib_btl);
if (OPAL_SUCCESS != rc) {
BTL_ERROR(("could not prepare openib btl structure for use"));
return NULL;
}

if (NULL == (ib_proc = mca_btl_openib_proc_get_locked(proc))) {
/* if we don't have connection info for this process, it's
* okay because some other method might be able to reach it,
Expand All @@ -1189,7 +1191,8 @@ struct mca_btl_base_endpoint_t *mca_btl_openib_get_ep (struct mca_btl_base_modul

/* this is a new process to this openib btl
* account this procs if need */
rc = openib_btl_size_queues(openib_btl, nprocs_new);
OPAL_THREAD_ADD32(&openib_btl->num_peers, 1);
rc = openib_btl_size_queues(openib_btl);
if (OPAL_SUCCESS != rc) {
BTL_ERROR(("error creating cqs"));
return NULL;
Expand All @@ -1214,6 +1217,12 @@ struct mca_btl_base_endpoint_t *mca_btl_openib_get_ep (struct mca_btl_base_modul
return NULL;
}

rc = openib_btl_prepare(openib_btl);
if (OPAL_SUCCESS != rc) {
BTL_ERROR(("could not prepare openib btl structure for use"));
goto exit;
}

for (size_t j = 0 ; j < ib_proc->proc_endpoint_count ; ++j) {
endpoint = ib_proc->proc_endpoints[j];
if (endpoint->endpoint_btl == openib_btl) {
Expand Down
3 changes: 2 additions & 1 deletion opal/mca/btl/openib/btl_openib.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
* All rights reserved.
* Copyright (c) 2006-2011 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2006-2009 Mellanox Technologies. All rights reserved.
* Copyright (c) 2006-2015 Los Alamos National Security, LLC. All rights
* Copyright (c) 2006-2016 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2006-2007 Voltaire All rights reserved.
* Copyright (c) 2009-2010 Oracle and/or its affiliates. All rights reserved.
Expand Down Expand Up @@ -465,6 +465,7 @@ struct mca_btl_openib_module_t {
mca_btl_base_module_t super;

bool btl_inited;
bool srqs_created;

/** Common information about all ports */
mca_btl_openib_modex_message_t port_info;
Expand Down
13 changes: 2 additions & 11 deletions opal/mca/btl/openib/btl_openib_atomic.c
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2014 Los Alamos National Security, LLC. All rights
* Copyright (c) 2014-2016 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2015 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
Expand Down Expand Up @@ -73,16 +73,7 @@ static int mca_btl_openib_atomic_internal (struct mca_btl_base_module_t *btl, st

frag->sr_desc.wr.atomic.rkey = rkey;

#if HAVE_XRC
if (MCA_BTL_XRC_ENABLED && BTL_OPENIB_QP_TYPE_XRC(qp)) {
#if OPAL_HAVE_CONNECTX_XRC_DOMAINS
frag->sr_desc.qp_type.xrc.remote_srqn = endpoint->rem_info.rem_srqs[qp].rem_srq_num;
#else
frag->sr_desc.xrc_remote_srq_num = endpoint->rem_info.rem_srqs[qp].rem_srq_num;
#endif

}
#endif
/* NTH: the SRQ# is set in mca_btl_get_internal */

if (endpoint->endpoint_state != MCA_BTL_IB_CONNECTED) {
OPAL_THREAD_LOCK(&endpoint->endpoint_lock);
Expand Down
6 changes: 3 additions & 3 deletions opal/mca/btl/openib/btl_openib_endpoint.c
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2006-2013 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2006-2015 Los Alamos National Security, LLC. All rights
* Copyright (c) 2006-2016 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2006-2007 Voltaire All rights reserved.
* Copyright (c) 2006-2009 Mellanox Technologies, Inc. All rights reserved.
Expand Down Expand Up @@ -579,7 +579,7 @@ void mca_btl_openib_endpoint_connected(mca_btl_openib_endpoint_t *endpoint)

opal_output(-1, "Now we are CONNECTED");
if (MCA_BTL_XRC_ENABLED) {
OPAL_THREAD_LOCK(&endpoint->ib_addr->addr_lock);
opal_mutex_lock (&endpoint->ib_addr->addr_lock);
if (MCA_BTL_IB_ADDR_CONNECTED == endpoint->ib_addr->status) {
/* We are not xrc master */
/* set our qp pointer to master qp */
Expand Down Expand Up @@ -622,7 +622,7 @@ void mca_btl_openib_endpoint_connected(mca_btl_openib_endpoint_t *endpoint)
}
}
}
OPAL_THREAD_UNLOCK(&endpoint->ib_addr->addr_lock);
opal_mutex_unlock (&endpoint->ib_addr->addr_lock);
}


Expand Down
25 changes: 14 additions & 11 deletions opal/mca/btl/openib/btl_openib_get.c
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
* All rights reserved.
* Copyright (c) 2007-2013 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2006-2009 Mellanox Technologies. All rights reserved.
* Copyright (c) 2006-2014 Los Alamos National Security, LLC. All rights
* Copyright (c) 2006-2016 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2006-2007 Voltaire All rights reserved.
* Copyright (c) 2008-2012 Oracle and/or its affiliates. All rights reserved.
Expand Down Expand Up @@ -92,16 +92,6 @@ int mca_btl_openib_get (mca_btl_base_module_t *btl, struct mca_btl_base_endpoint
frag->sr_desc.wr.rdma.rkey = remote_handle->rkey;
}

#if HAVE_XRC
if (MCA_BTL_XRC_ENABLED && BTL_OPENIB_QP_TYPE_XRC(qp)) {
#if OPAL_HAVE_CONNECTX_XRC_DOMAINS
frag->sr_desc.qp_type.xrc.remote_srqn = ep->rem_info.rem_srqs[qp].rem_srq_num;
#else
frag->sr_desc.xrc_remote_srq_num = ep->rem_info.rem_srqs[qp].rem_srq_num;
#endif
}
#endif

if (ep->endpoint_state != MCA_BTL_IB_CONNECTED) {
OPAL_THREAD_LOCK(&ep->endpoint_lock);
rc = check_endpoint_state(ep, &to_base_frag(frag)->base, &ep->pending_get_frags);
Expand Down Expand Up @@ -138,6 +128,19 @@ int mca_btl_openib_get_internal (mca_btl_base_module_t *btl, struct mca_btl_base
int qp = to_base_frag(frag)->base.order;
struct ibv_send_wr *bad_wr;

#if HAVE_XRC
if (MCA_BTL_XRC_ENABLED && BTL_OPENIB_QP_TYPE_XRC(qp)) {
/* NTH: the remote SRQ number is only available once the endpoint is connected. By
* setting the value here instead of mca_btl_openib_get we guarantee the rem_srqs
* array is initialized. */
#if OPAL_HAVE_CONNECTX_XRC_DOMAINS
frag->sr_desc.qp_type.xrc.remote_srqn = ep->rem_info.rem_srqs[qp].rem_srq_num;
#else
frag->sr_desc.xrc_remote_srq_num = ep->rem_info.rem_srqs[qp].rem_srq_num;
#endif
}
#endif

/* check for a send wqe */
if (qp_get_wqe(ep, qp) < 0) {
qp_put_wqe(ep, qp);
Expand Down
34 changes: 18 additions & 16 deletions opal/mca/btl/openib/btl_openib_put.c
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
* All rights reserved.
* Copyright (c) 2007-2013 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2006-2009 Mellanox Technologies. All rights reserved.
* Copyright (c) 2006-2014 Los Alamos National Security, LLC. All rights
* Copyright (c) 2006-2016 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2006-2007 Voltaire All rights reserved.
* Copyright (c) 2008-2012 Oracle and/or its affiliates. All rights reserved.
Expand Down Expand Up @@ -49,7 +49,7 @@ int mca_btl_openib_put (mca_btl_base_module_t *btl, struct mca_btl_base_endpoint
qp = mca_btl_openib_component.rdma_qp;
}

if (OPAL_UNLIKELY((ep->qps[qp].ib_inline_max < size && !local_handle) || !remote_handle ||
if (OPAL_UNLIKELY((btl->btl_put_local_registration_threshold < size && !local_handle) || !remote_handle ||
size > btl->btl_put_limit)) {
return OPAL_ERR_BAD_PARAM;
}
Expand Down Expand Up @@ -101,19 +101,6 @@ int mca_btl_openib_put (mca_btl_base_module_t *btl, struct mca_btl_base_endpoint
to_out_frag(frag)->sr_desc.wr.rdma.rkey = remote_handle->rkey;
}

#if HAVE_XRC
if (MCA_BTL_XRC_ENABLED && BTL_OPENIB_QP_TYPE_XRC(qp)) {

#if OPAL_HAVE_CONNECTX_XRC
to_out_frag(frag)->sr_desc.xrc_remote_srq_num = ep->rem_info.rem_srqs[qp].rem_srq_num;
#elif OPAL_HAVE_CONNECTX_XRC_DOMAINS
to_out_frag(frag)->sr_desc.qp_type.xrc.remote_srqn = ep->rem_info.rem_srqs[qp].rem_srq_num;
#else
#error "that should never happen"
#endif
}
#endif

if (ep->endpoint_state != MCA_BTL_IB_CONNECTED) {
OPAL_THREAD_LOCK(&ep->endpoint_lock);
rc = check_endpoint_state(ep, &to_base_frag(frag)->base, &ep->pending_put_frags);
Expand Down Expand Up @@ -153,6 +140,21 @@ int mca_btl_openib_put_internal (mca_btl_base_module_t *btl, struct mca_btl_base
struct ibv_send_wr *bad_wr;
int rc;

#if HAVE_XRC
if (MCA_BTL_XRC_ENABLED && BTL_OPENIB_QP_TYPE_XRC(qp)) {
/* NTH: the remote SRQ number is only available once the endpoint is connected. By
* setting the value here instead of mca_btl_openib_put we guarantee the rem_srqs
* array is initialized. */
#if OPAL_HAVE_CONNECTX_XRC
to_out_frag(frag)->sr_desc.xrc_remote_srq_num = ep->rem_info.rem_srqs[qp].rem_srq_num;
#elif OPAL_HAVE_CONNECTX_XRC_DOMAINS
to_out_frag(frag)->sr_desc.qp_type.xrc.remote_srqn = ep->rem_info.rem_srqs[qp].rem_srq_num;
#else
#error "that should never happen"
#endif
}
#endif

/* check for a send wqe */
if (qp_get_wqe(ep, qp) < 0) {
qp_put_wqe(ep, qp);
Expand All @@ -164,7 +166,7 @@ int mca_btl_openib_put_internal (mca_btl_base_module_t *btl, struct mca_btl_base

if (0 != (rc = ibv_post_send(ep->qps[qp].qp->lcl_qp, &to_out_frag(frag)->sr_desc, &bad_wr))) {
qp_put_wqe(ep, qp);
return OPAL_ERROR;;
return OPAL_ERROR;
}

return OPAL_SUCCESS;
Expand Down
8 changes: 7 additions & 1 deletion opal/mca/btl/openib/btl_openib_xrc.c
Original file line number Diff line number Diff line change
@@ -1,10 +1,13 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2007-2008 Mellanox Technologies. All rights reserved.
* Copyright (c) 2009 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2014 NVIDIA Corporation. All rights reserved.
* Copyright (c) 2014-2015 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* Copyright (c) 2014 Bull SAS. All rights reserved.
* Copyright (c) 2016 Los Alamos National Security, LLC. All rights
* reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
Expand Down Expand Up @@ -122,7 +125,10 @@ static void ib_address_constructor(ib_address_t *ib_addr)
ib_addr->lid = 0;
ib_addr->status = MCA_BTL_IB_ADDR_CLOSED;
ib_addr->qp = NULL;
OBJ_CONSTRUCT(&ib_addr->addr_lock, opal_mutex_t);
/* NTH: make the addr_lock recursive because mca_btl_openib_endpoint_connected can call
* into the CPC with the lock held. The alternative would be to drop the lock but the
* lock is never obtained in a critical path. */
OBJ_CONSTRUCT(&ib_addr->addr_lock, opal_recursive_mutex_t);
OBJ_CONSTRUCT(&ib_addr->pending_ep, opal_list_t);
}

Expand Down
Loading