diff --git a/opal/mca/btl/openib/btl_openib.c b/opal/mca/btl/openib/btl_openib.c index 2722f928e4..e5c4343efd 100644 --- a/opal/mca/btl/openib/btl_openib.c +++ b/opal/mca/btl/openib/btl_openib.c @@ -12,7 +12,7 @@ * All rights reserved. * Copyright (c) 2007-2013 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2006-2015 Mellanox Technologies. All rights reserved. - * Copyright (c) 2006-2015 Los Alamos National Security, LLC. All rights + * Copyright (c) 2006-2016 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2006-2007 Voltaire All rights reserved. * Copyright (c) 2008-2012 Oracle and/or its affiliates. All rights reserved. @@ -399,6 +399,8 @@ static int create_srq(mca_btl_openib_module_t *openib_btl) } } + openib_btl->srqs_created = true; + return OPAL_SUCCESS; } @@ -406,7 +408,7 @@ static int openib_btl_prepare(struct mca_btl_openib_module_t* openib_btl) { int rc = OPAL_SUCCESS; opal_mutex_lock(&openib_btl->ib_lock); - if (0 == openib_btl->num_peers && + if (!openib_btl->srqs_created && (mca_btl_openib_component.num_srq_qps > 0 || mca_btl_openib_component.num_xrc_qps > 0)) { rc = create_srq(openib_btl); @@ -416,17 +418,12 @@ static int openib_btl_prepare(struct mca_btl_openib_module_t* openib_btl) } -static int openib_btl_size_queues(struct mca_btl_openib_module_t* openib_btl, size_t nprocs) +static int openib_btl_size_queues(struct mca_btl_openib_module_t* openib_btl) { uint32_t send_cqes, recv_cqes; int rc = OPAL_SUCCESS, qp; mca_btl_openib_device_t *device = openib_btl->device; - if( 0 == nprocs){ - /* nothing to do */ - return OPAL_SUCCESS; - } - opal_mutex_lock(&openib_btl->ib_lock); /* figure out reasonable sizes for completion queues */ for(qp = 0; qp < mca_btl_openib_component.num_qps; qp++) { @@ -435,7 +432,7 @@ static int openib_btl_size_queues(struct mca_btl_openib_module_t* openib_btl, si recv_cqes = mca_btl_openib_component.qp_infos[qp].rd_num; } else { send_cqes = (mca_btl_openib_component.qp_infos[qp].rd_num + - mca_btl_openib_component.qp_infos[qp].u.pp_qp.rd_rsv) * nprocs; + mca_btl_openib_component.qp_infos[qp].u.pp_qp.rd_rsv) * openib_btl->num_peers; recv_cqes = send_cqes; } @@ -455,7 +452,6 @@ static int openib_btl_size_queues(struct mca_btl_openib_module_t* openib_btl, si goto out; } - openib_btl->num_peers += nprocs; out: opal_mutex_unlock(&openib_btl->ib_lock); return rc; @@ -1028,10 +1024,14 @@ int mca_btl_openib_add_procs( return rc; } - rc = openib_btl_prepare(openib_btl); - if (OPAL_SUCCESS != rc) { - BTL_ERROR(("could not prepare openib btl structure for usel")); - return rc; + if (0 == openib_btl->num_peers) { + /* ensure completion queues are created before attempting to + * make a loop-back queue pair */ + rc = openib_btl_size_queues(openib_btl); + if (OPAL_SUCCESS != rc) { + BTL_ERROR(("error creating cqs")); + return rc; + } } /* prepare all proc's and account them properly */ @@ -1080,10 +1080,20 @@ int mca_btl_openib_add_procs( } } - /* account this procs if need */ - rc = openib_btl_size_queues(openib_btl, nprocs_new); + if (nprocs_new) { + OPAL_THREAD_ADD32(&openib_btl->num_peers, nprocs_new); + + /* adjust cq sizes given the new procs */ + rc = openib_btl_size_queues (openib_btl); + if (OPAL_SUCCESS != rc) { + BTL_ERROR(("error creating cqs")); + return rc; + } + } + + rc = openib_btl_prepare (openib_btl); if (OPAL_SUCCESS != rc) { - BTL_ERROR(("error creating cqs")); + BTL_ERROR(("could not prepare openib btl module for use")); return rc; } @@ -1156,10 +1166,8 @@ struct mca_btl_base_endpoint_t *mca_btl_openib_get_ep (struct mca_btl_base_modul { mca_btl_openib_module_t *openib_btl = (mca_btl_openib_module_t *) btl; volatile mca_btl_base_endpoint_t *endpoint = NULL; + int local_port_cnt = 0, btl_rank, rc; mca_btl_openib_proc_t *ib_proc; - int rc; - int local_port_cnt = 0, btl_rank; - size_t nprocs_new = 0; rc = prepare_device_for_use (openib_btl->device); if (OPAL_SUCCESS != rc) { @@ -1167,12 +1175,6 @@ struct mca_btl_base_endpoint_t *mca_btl_openib_get_ep (struct mca_btl_base_modul return NULL; } - rc = openib_btl_prepare(openib_btl); - if (OPAL_SUCCESS != rc) { - BTL_ERROR(("could not prepare openib btl structure for use")); - return NULL; - } - if (NULL == (ib_proc = mca_btl_openib_proc_get_locked(proc))) { /* if we don't have connection info for this process, it's * okay because some other method might be able to reach it, @@ -1189,7 +1191,8 @@ struct mca_btl_base_endpoint_t *mca_btl_openib_get_ep (struct mca_btl_base_modul /* this is a new process to this openib btl * account this procs if need */ - rc = openib_btl_size_queues(openib_btl, nprocs_new); + OPAL_THREAD_ADD32(&openib_btl->num_peers, 1); + rc = openib_btl_size_queues(openib_btl); if (OPAL_SUCCESS != rc) { BTL_ERROR(("error creating cqs")); return NULL; @@ -1214,6 +1217,12 @@ struct mca_btl_base_endpoint_t *mca_btl_openib_get_ep (struct mca_btl_base_modul return NULL; } + rc = openib_btl_prepare(openib_btl); + if (OPAL_SUCCESS != rc) { + BTL_ERROR(("could not prepare openib btl structure for use")); + goto exit; + } + for (size_t j = 0 ; j < ib_proc->proc_endpoint_count ; ++j) { endpoint = ib_proc->proc_endpoints[j]; if (endpoint->endpoint_btl == openib_btl) { diff --git a/opal/mca/btl/openib/btl_openib.h b/opal/mca/btl/openib/btl_openib.h index c0dd643e56..9b6464b272 100644 --- a/opal/mca/btl/openib/btl_openib.h +++ b/opal/mca/btl/openib/btl_openib.h @@ -12,7 +12,7 @@ * All rights reserved. * Copyright (c) 2006-2011 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2006-2009 Mellanox Technologies. All rights reserved. - * Copyright (c) 2006-2015 Los Alamos National Security, LLC. All rights + * Copyright (c) 2006-2016 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2006-2007 Voltaire All rights reserved. * Copyright (c) 2009-2010 Oracle and/or its affiliates. All rights reserved. @@ -465,6 +465,7 @@ struct mca_btl_openib_module_t { mca_btl_base_module_t super; bool btl_inited; + bool srqs_created; /** Common information about all ports */ mca_btl_openib_modex_message_t port_info; diff --git a/opal/mca/btl/openib/btl_openib_atomic.c b/opal/mca/btl/openib/btl_openib_atomic.c index 8d7ac86f73..0c6460f2cf 100644 --- a/opal/mca/btl/openib/btl_openib_atomic.c +++ b/opal/mca/btl/openib/btl_openib_atomic.c @@ -1,6 +1,6 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* - * Copyright (c) 2014 Los Alamos National Security, LLC. All rights + * Copyright (c) 2014-2016 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. @@ -73,16 +73,7 @@ static int mca_btl_openib_atomic_internal (struct mca_btl_base_module_t *btl, st frag->sr_desc.wr.atomic.rkey = rkey; -#if HAVE_XRC - if (MCA_BTL_XRC_ENABLED && BTL_OPENIB_QP_TYPE_XRC(qp)) { -#if OPAL_HAVE_CONNECTX_XRC_DOMAINS - frag->sr_desc.qp_type.xrc.remote_srqn = endpoint->rem_info.rem_srqs[qp].rem_srq_num; -#else - frag->sr_desc.xrc_remote_srq_num = endpoint->rem_info.rem_srqs[qp].rem_srq_num; -#endif - - } -#endif + /* NTH: the SRQ# is set in mca_btl_get_internal */ if (endpoint->endpoint_state != MCA_BTL_IB_CONNECTED) { OPAL_THREAD_LOCK(&endpoint->endpoint_lock); diff --git a/opal/mca/btl/openib/btl_openib_endpoint.c b/opal/mca/btl/openib/btl_openib_endpoint.c index a4f84e0892..0186f8d5e2 100644 --- a/opal/mca/btl/openib/btl_openib_endpoint.c +++ b/opal/mca/btl/openib/btl_openib_endpoint.c @@ -11,7 +11,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006-2013 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2006-2015 Los Alamos National Security, LLC. All rights + * Copyright (c) 2006-2016 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2006-2007 Voltaire All rights reserved. * Copyright (c) 2006-2009 Mellanox Technologies, Inc. All rights reserved. @@ -579,7 +579,7 @@ void mca_btl_openib_endpoint_connected(mca_btl_openib_endpoint_t *endpoint) opal_output(-1, "Now we are CONNECTED"); if (MCA_BTL_XRC_ENABLED) { - OPAL_THREAD_LOCK(&endpoint->ib_addr->addr_lock); + opal_mutex_lock (&endpoint->ib_addr->addr_lock); if (MCA_BTL_IB_ADDR_CONNECTED == endpoint->ib_addr->status) { /* We are not xrc master */ /* set our qp pointer to master qp */ @@ -622,7 +622,7 @@ void mca_btl_openib_endpoint_connected(mca_btl_openib_endpoint_t *endpoint) } } } - OPAL_THREAD_UNLOCK(&endpoint->ib_addr->addr_lock); + opal_mutex_unlock (&endpoint->ib_addr->addr_lock); } diff --git a/opal/mca/btl/openib/btl_openib_get.c b/opal/mca/btl/openib/btl_openib_get.c index 2d335619c1..c8bc78105d 100644 --- a/opal/mca/btl/openib/btl_openib_get.c +++ b/opal/mca/btl/openib/btl_openib_get.c @@ -12,7 +12,7 @@ * All rights reserved. * Copyright (c) 2007-2013 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2006-2009 Mellanox Technologies. All rights reserved. - * Copyright (c) 2006-2014 Los Alamos National Security, LLC. All rights + * Copyright (c) 2006-2016 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2006-2007 Voltaire All rights reserved. * Copyright (c) 2008-2012 Oracle and/or its affiliates. All rights reserved. @@ -92,16 +92,6 @@ int mca_btl_openib_get (mca_btl_base_module_t *btl, struct mca_btl_base_endpoint frag->sr_desc.wr.rdma.rkey = remote_handle->rkey; } -#if HAVE_XRC - if (MCA_BTL_XRC_ENABLED && BTL_OPENIB_QP_TYPE_XRC(qp)) { -#if OPAL_HAVE_CONNECTX_XRC_DOMAINS - frag->sr_desc.qp_type.xrc.remote_srqn = ep->rem_info.rem_srqs[qp].rem_srq_num; -#else - frag->sr_desc.xrc_remote_srq_num = ep->rem_info.rem_srqs[qp].rem_srq_num; -#endif - } -#endif - if (ep->endpoint_state != MCA_BTL_IB_CONNECTED) { OPAL_THREAD_LOCK(&ep->endpoint_lock); rc = check_endpoint_state(ep, &to_base_frag(frag)->base, &ep->pending_get_frags); @@ -138,6 +128,19 @@ int mca_btl_openib_get_internal (mca_btl_base_module_t *btl, struct mca_btl_base int qp = to_base_frag(frag)->base.order; struct ibv_send_wr *bad_wr; +#if HAVE_XRC + if (MCA_BTL_XRC_ENABLED && BTL_OPENIB_QP_TYPE_XRC(qp)) { + /* NTH: the remote SRQ number is only available once the endpoint is connected. By + * setting the value here instead of mca_btl_openib_get we guarantee the rem_srqs + * array is initialized. */ +#if OPAL_HAVE_CONNECTX_XRC_DOMAINS + frag->sr_desc.qp_type.xrc.remote_srqn = ep->rem_info.rem_srqs[qp].rem_srq_num; +#else + frag->sr_desc.xrc_remote_srq_num = ep->rem_info.rem_srqs[qp].rem_srq_num; +#endif + } +#endif + /* check for a send wqe */ if (qp_get_wqe(ep, qp) < 0) { qp_put_wqe(ep, qp); diff --git a/opal/mca/btl/openib/btl_openib_put.c b/opal/mca/btl/openib/btl_openib_put.c index 25b5d3f532..2a9ee2ddd6 100644 --- a/opal/mca/btl/openib/btl_openib_put.c +++ b/opal/mca/btl/openib/btl_openib_put.c @@ -12,7 +12,7 @@ * All rights reserved. * Copyright (c) 2007-2013 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2006-2009 Mellanox Technologies. All rights reserved. - * Copyright (c) 2006-2014 Los Alamos National Security, LLC. All rights + * Copyright (c) 2006-2016 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2006-2007 Voltaire All rights reserved. * Copyright (c) 2008-2012 Oracle and/or its affiliates. All rights reserved. @@ -49,7 +49,7 @@ int mca_btl_openib_put (mca_btl_base_module_t *btl, struct mca_btl_base_endpoint qp = mca_btl_openib_component.rdma_qp; } - if (OPAL_UNLIKELY((ep->qps[qp].ib_inline_max < size && !local_handle) || !remote_handle || + if (OPAL_UNLIKELY((btl->btl_put_local_registration_threshold < size && !local_handle) || !remote_handle || size > btl->btl_put_limit)) { return OPAL_ERR_BAD_PARAM; } @@ -101,19 +101,6 @@ int mca_btl_openib_put (mca_btl_base_module_t *btl, struct mca_btl_base_endpoint to_out_frag(frag)->sr_desc.wr.rdma.rkey = remote_handle->rkey; } -#if HAVE_XRC - if (MCA_BTL_XRC_ENABLED && BTL_OPENIB_QP_TYPE_XRC(qp)) { - -#if OPAL_HAVE_CONNECTX_XRC - to_out_frag(frag)->sr_desc.xrc_remote_srq_num = ep->rem_info.rem_srqs[qp].rem_srq_num; -#elif OPAL_HAVE_CONNECTX_XRC_DOMAINS - to_out_frag(frag)->sr_desc.qp_type.xrc.remote_srqn = ep->rem_info.rem_srqs[qp].rem_srq_num; -#else -#error "that should never happen" -#endif - } -#endif - if (ep->endpoint_state != MCA_BTL_IB_CONNECTED) { OPAL_THREAD_LOCK(&ep->endpoint_lock); rc = check_endpoint_state(ep, &to_base_frag(frag)->base, &ep->pending_put_frags); @@ -153,6 +140,21 @@ int mca_btl_openib_put_internal (mca_btl_base_module_t *btl, struct mca_btl_base struct ibv_send_wr *bad_wr; int rc; +#if HAVE_XRC + if (MCA_BTL_XRC_ENABLED && BTL_OPENIB_QP_TYPE_XRC(qp)) { + /* NTH: the remote SRQ number is only available once the endpoint is connected. By + * setting the value here instead of mca_btl_openib_put we guarantee the rem_srqs + * array is initialized. */ +#if OPAL_HAVE_CONNECTX_XRC + to_out_frag(frag)->sr_desc.xrc_remote_srq_num = ep->rem_info.rem_srqs[qp].rem_srq_num; +#elif OPAL_HAVE_CONNECTX_XRC_DOMAINS + to_out_frag(frag)->sr_desc.qp_type.xrc.remote_srqn = ep->rem_info.rem_srqs[qp].rem_srq_num; +#else +#error "that should never happen" +#endif + } +#endif + /* check for a send wqe */ if (qp_get_wqe(ep, qp) < 0) { qp_put_wqe(ep, qp); @@ -164,7 +166,7 @@ int mca_btl_openib_put_internal (mca_btl_base_module_t *btl, struct mca_btl_base if (0 != (rc = ibv_post_send(ep->qps[qp].qp->lcl_qp, &to_out_frag(frag)->sr_desc, &bad_wr))) { qp_put_wqe(ep, qp); - return OPAL_ERROR;; + return OPAL_ERROR; } return OPAL_SUCCESS; diff --git a/opal/mca/btl/openib/btl_openib_xrc.c b/opal/mca/btl/openib/btl_openib_xrc.c index 3fc0e32c29..1952c31b12 100644 --- a/opal/mca/btl/openib/btl_openib_xrc.c +++ b/opal/mca/btl/openib/btl_openib_xrc.c @@ -1,3 +1,4 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2007-2008 Mellanox Technologies. All rights reserved. * Copyright (c) 2009 Cisco Systems, Inc. All rights reserved. @@ -5,6 +6,8 @@ * Copyright (c) 2014-2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2014 Bull SAS. All rights reserved. + * Copyright (c) 2016 Los Alamos National Security, LLC. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -122,7 +125,10 @@ static void ib_address_constructor(ib_address_t *ib_addr) ib_addr->lid = 0; ib_addr->status = MCA_BTL_IB_ADDR_CLOSED; ib_addr->qp = NULL; - OBJ_CONSTRUCT(&ib_addr->addr_lock, opal_mutex_t); + /* NTH: make the addr_lock recursive because mca_btl_openib_endpoint_connected can call + * into the CPC with the lock held. The alternative would be to drop the lock but the + * lock is never obtained in a critical path. */ + OBJ_CONSTRUCT(&ib_addr->addr_lock, opal_recursive_mutex_t); OBJ_CONSTRUCT(&ib_addr->pending_ep, opal_list_t); } diff --git a/opal/mca/btl/openib/connect/btl_openib_connect_udcm.c b/opal/mca/btl/openib/connect/btl_openib_connect_udcm.c index 947ca371f0..ff735f9b72 100644 --- a/opal/mca/btl/openib/connect/btl_openib_connect_udcm.c +++ b/opal/mca/btl/openib/connect/btl_openib_connect_udcm.c @@ -3,7 +3,7 @@ * Copyright (c) 2007-2013 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2008-2009 Mellanox Technologies. All rights reserved. * Copyright (c) 2009 IBM Corporation. All rights reserved. - * Copyright (c) 2011-2015 Los Alamos National Security, LLC. All rights + * Copyright (c) 2011-2016 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2014-2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. @@ -240,6 +240,7 @@ typedef struct udcm_msg_hdr { #if HAVE_XRC /* UDCM_MESSAGE_XCONNECT, UDCM_MESSAGE_XCONNECT2 */ struct msg_xrc_connect { + opal_process_name_t rem_name; int32_t rem_ep_index; uint8_t rem_port_num; uint32_t rem_qp_num; @@ -343,11 +344,7 @@ static int udcm_xrc_start_connect (opal_btl_openib_connect_base_module_t *cpc, static int udcm_xrc_restart_connect (mca_btl_base_endpoint_t *lcl_ep); static int udcm_xrc_send_qp_connect (mca_btl_openib_endpoint_t *lcl_ep, uint32_t rem_qp_num, uint32_t rem_psn); static int udcm_xrc_send_qp_create (mca_btl_base_endpoint_t *lcl_ep); -#if OPAL_HAVE_CONNECTX_XRC_DOMAINS static int udcm_xrc_recv_qp_connect (mca_btl_openib_endpoint_t *lcl_ep, uint32_t qp_num); -#else -static int udcm_xrc_recv_qp_connect (mca_btl_openib_endpoint_t *lcl_ep); -#endif static int udcm_xrc_recv_qp_create (mca_btl_openib_endpoint_t *lcl_ep, uint32_t rem_qp_num, uint32_t rem_psn); static int udcm_xrc_send_request (mca_btl_base_endpoint_t *lcl_ep, mca_btl_base_endpoint_t *rem_ep, uint8_t msg_type); @@ -529,27 +526,24 @@ static int udcm_component_finalize(void) static int udcm_endpoint_init_self_xrc (struct mca_btl_base_endpoint_t *lcl_ep) { udcm_endpoint_t *udep = UDCM_ENDPOINT_DATA(lcl_ep); + int32_t recv_qpn; int rc; opal_mutex_lock (&udep->udep_lock); do { -#if OPAL_HAVE_CONNECTX_XRC_DOMAINS - rc = udcm_xrc_recv_qp_connect (lcl_ep, lcl_ep->qps[0].qp->lcl_qp->qp_num); -#else - lcl_ep->xrc_recv_qp_num = lcl_ep->qps[0].qp->lcl_qp->qp_num; - rc = udcm_xrc_recv_qp_connect (lcl_ep); -#endif - if (OPAL_SUCCESS != rc) { - BTL_VERBOSE(("error connecting loopback XRC receive queue pair")); + if (OPAL_SUCCESS != (rc = udcm_endpoint_init_data (lcl_ep))) { + BTL_VERBOSE(("error initializing loopback endpoint cpc data")); break; } - rc = mca_btl_openib_endpoint_post_recvs (lcl_ep); + rc = udcm_xrc_send_qp_create (lcl_ep); if (OPAL_SUCCESS != rc) { - BTL_VERBOSE(("error posting receives for loopback queue pair")); + BTL_VERBOSE(("error creating send queue pair for loopback endpoint")); break; } + lcl_ep->rem_info.rem_index = lcl_ep->index; + rc = udcm_xrc_recv_qp_create (lcl_ep, lcl_ep->qps[0].qp->lcl_qp->qp_num, lcl_ep->qps[0].qp->lcl_psn); if (OPAL_SUCCESS != rc) { @@ -557,14 +551,35 @@ static int udcm_endpoint_init_self_xrc (struct mca_btl_base_endpoint_t *lcl_ep) break; } - rc = udcm_xrc_send_qp_connect (lcl_ep, lcl_ep->qps[0].qp->lcl_qp->qp_num, - lcl_ep->qps[0].qp->lcl_psn); + for (int i = 0 ; i < mca_btl_openib_component.num_xrc_qps ; ++i) { + uint32_t srq_num; +#if OPAL_HAVE_CONNECTX_XRC_DOMAINS + if (ibv_get_srq_num(lcl_ep->endpoint_btl->qps[i].u.srq_qp.srq, &srq_num)) { + BTL_ERROR(("BTL openib UDCM internal error: can't get srq num")); + } +#else + srq_num = lcl_ep->endpoint_btl->qps[i].u.srq_qp.srq->xrc_srq_num; +#endif + lcl_ep->rem_info.rem_srqs[i].rem_srq_num = srq_num; + } + +#if OPAL_HAVE_CONNECTX_XRC_DOMAINS + recv_qpn = lcl_ep->xrc_recv_qp->qp_num; +#else + recv_qpn = lcl_ep->xrc_recv_qp_num; +#endif + + lcl_ep->ib_addr->remote_xrc_rcv_qp_num = recv_qpn; + lcl_ep->rem_info.rem_qps[0].rem_psn = lcl_ep->xrc_recv_psn; + lcl_ep->rem_info.rem_qps[0].rem_qp_num = recv_qpn; + + rc = udcm_xrc_send_qp_connect (lcl_ep, recv_qpn, lcl_ep->xrc_recv_psn); if (OPAL_SUCCESS != rc) { - BTL_VERBOSE(("error creating loopback XRC send queue pair")); + BTL_VERBOSE(("error connecting loopback XRC send queue pair")); break; } - lcl_ep->endpoint_state = MCA_BTL_IB_CONNECTED; + BTL_VERBOSE(("successfully created loopback queue pair")); /* need to hold the endpoint lock before calling udcm_finish_connection */ OPAL_THREAD_LOCK(&lcl_ep->endpoint_lock); @@ -606,8 +621,6 @@ static int udcm_endpoint_init_self (struct mca_btl_base_endpoint_t *lcl_ep) break; } - lcl_ep->endpoint_state = MCA_BTL_IB_CONNECTED; - /* need to hold the endpoint lock before calling udcm_finish_connection */ OPAL_THREAD_LOCK(&lcl_ep->endpoint_lock); rc = udcm_finish_connection (lcl_ep); @@ -2607,11 +2620,7 @@ static int udcm_xrc_send_qp_create (mca_btl_base_endpoint_t *lcl_ep) /* mark: xrc receive qp */ /* Recv qp connect */ -#if OPAL_HAVE_CONNECTX_XRC_DOMAINS static int udcm_xrc_recv_qp_connect (mca_btl_openib_endpoint_t *lcl_ep, uint32_t qp_num) -#else -static int udcm_xrc_recv_qp_connect (mca_btl_openib_endpoint_t *lcl_ep) -#endif { mca_btl_openib_module_t *openib_btl = lcl_ep->endpoint_btl; @@ -2625,9 +2634,9 @@ static int udcm_xrc_recv_qp_connect (mca_btl_openib_endpoint_t *lcl_ep) BTL_VERBOSE(("Connecting Recv QP\n")); lcl_ep->xrc_recv_qp = ibv_open_qp(openib_btl->device->ib_dev_context, &attr); if (NULL == lcl_ep->xrc_recv_qp) { /* failed to regester the qp, so it is already die and we should create new one */ - /* Return NOT READY !!!*/ - BTL_ERROR(("Failed to register qp_num: %d, get error: %s (%d)\n. Replying with RNR", - qp_num, strerror(errno), errno)); + /* Return NOT READY !!!*/ + BTL_VERBOSE(("Failed to register qp_num: %d, get error: %s (%d)\n. Replying with RNR", + qp_num, strerror(errno), errno)); return OPAL_ERROR; } else { BTL_VERBOSE(("Connected to XRC Recv qp [%d]", lcl_ep->xrc_recv_qp->qp_num)); @@ -2635,13 +2644,16 @@ static int udcm_xrc_recv_qp_connect (mca_btl_openib_endpoint_t *lcl_ep) } #else int ret; + /* silence unused variable warning */ + (void) qp_num; + BTL_VERBOSE(("Connecting receive qp: %d", lcl_ep->xrc_recv_qp_num)); ret = ibv_reg_xrc_rcv_qp(openib_btl->device->xrc_domain, lcl_ep->xrc_recv_qp_num); if (ret) { /* failed to regester the qp, so it is already die and we should create new one */ /* Return NOT READY !!!*/ lcl_ep->xrc_recv_qp_num = 0; - BTL_ERROR(("Failed to register qp_num: %d , get error: %s (%d). Replying with RNR", - lcl_ep->xrc_recv_qp_num, strerror(ret), ret)); + BTL_VERBOSE(("Failed to register qp_num: %d , get error: %s (%d). Replying with RNR", + lcl_ep->xrc_recv_qp_num, strerror(ret), ret)); return OPAL_ERROR; } #endif @@ -2817,9 +2829,9 @@ static int udcm_xrc_send_request (mca_btl_base_endpoint_t *lcl_ep, mca_btl_base_ return rc; } - msg->data->hdr.data.req.rem_ep_index = htonl(lcl_ep->index); - msg->data->hdr.data.req.rem_port_num = m->modex.mm_port_num; - msg->data->hdr.data.req.rem_name = OPAL_PROC_MY_NAME; + msg->data->hdr.data.xreq.rem_ep_index = htonl(lcl_ep->index); + msg->data->hdr.data.xreq.rem_port_num = m->modex.mm_port_num; + msg->data->hdr.data.xreq.rem_name = OPAL_PROC_MY_NAME; if (UDCM_MESSAGE_XCONNECT == msg_type) { BTL_VERBOSE(("Sending XConnect with qp: %d, psn: %d", lcl_ep->qps[0].qp->lcl_qp->qp_num, @@ -2923,11 +2935,7 @@ static int udcm_xrc_handle_xconnect (mca_btl_openib_endpoint_t *lcl_ep, udcm_msg if (UDCM_MESSAGE_XCONNECT2 == msg_hdr->type) { response_type = UDCM_MESSAGE_XRESPONSE2; -#if OPAL_HAVE_CONNECTX_XRC_DOMAINS rc = udcm_xrc_recv_qp_connect (lcl_ep, msg_hdr->data.xreq.rem_qp_num); -#else - rc = udcm_xrc_recv_qp_connect (lcl_ep); -#endif if (OPAL_SUCCESS != rc) { /* return not ready. remote side will retry */ rej_reason = UDCM_REJ_NOT_READY;