Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
49 changes: 31 additions & 18 deletions opal/mca/btl/openib/btl_openib.c
Original file line number Diff line number Diff line change
Expand Up @@ -425,13 +425,20 @@ static int openib_btl_prepare(struct mca_btl_openib_module_t* openib_btl)
static int openib_btl_size_queues(struct mca_btl_openib_module_t* openib_btl)
{
uint32_t send_cqes, recv_cqes;
int rc = OPAL_SUCCESS, qp;
int rc = OPAL_SUCCESS;
mca_btl_openib_device_t *device = openib_btl->device;
uint32_t requested[BTL_OPENIB_MAX_CQ];
bool need_resize = false;

opal_mutex_lock(&openib_btl->ib_lock);

for (int cq = 0 ; cq < BTL_OPENIB_MAX_CQ ; ++cq) {
requested[cq] = 0;
}

/* figure out reasonable sizes for completion queues */
for(qp = 0; qp < mca_btl_openib_component.num_qps; qp++) {
if(BTL_OPENIB_QP_TYPE_SRQ(qp)) {
for (int qp = 0 ; qp < mca_btl_openib_component.num_qps ; qp++) {
if (BTL_OPENIB_QP_TYPE_SRQ(qp)) {
send_cqes = mca_btl_openib_component.qp_infos[qp].u.srq_qp.sd_max;
recv_cqes = mca_btl_openib_component.qp_infos[qp].rd_num;
} else {
Expand All @@ -440,24 +447,30 @@ static int openib_btl_size_queues(struct mca_btl_openib_module_t* openib_btl)
recv_cqes = send_cqes;
}

opal_mutex_lock(&openib_btl->device->device_lock);
openib_btl->device->cq_size[qp_cq_prio(qp)] += recv_cqes;
openib_btl->device->cq_size[BTL_OPENIB_LP_CQ] += send_cqes;
opal_mutex_unlock(&openib_btl->device->device_lock);
requested[qp_cq_prio(qp)] += recv_cqes;
requested[BTL_OPENIB_LP_CQ] += send_cqes;
}

rc = adjust_cq(device, BTL_OPENIB_HP_CQ);
if (OPAL_SUCCESS != rc) {
goto out;
}
opal_mutex_lock (&openib_btl->device->device_lock);
for (int cq = 0 ; cq < BTL_OPENIB_MAX_CQ ; ++cq) {
if (requested[cq] < mca_btl_openib_component.ib_cq_size[cq]) {
requested[cq] = mca_btl_openib_component.ib_cq_size[cq];
} else if (requested[cq] > openib_btl->device->ib_dev_attr.max_cqe) {
requested[cq] = openib_btl->device->ib_dev_attr.max_cqe;
}

rc = adjust_cq(device, BTL_OPENIB_LP_CQ);
if (OPAL_SUCCESS != rc) {
goto out;
}
if (openib_btl->device->cq_size[cq] < requested[cq]) {
openib_btl->device->cq_size[cq] = requested[cq];

out:
rc = adjust_cq (device, cq);
if (OPAL_SUCCESS != rc) {
break;
}
}
}
opal_mutex_unlock (&openib_btl->device->device_lock);
opal_mutex_unlock(&openib_btl->ib_lock);

return rc;
}

Expand Down Expand Up @@ -1107,7 +1120,7 @@ int mca_btl_openib_add_procs(
}

if (nprocs_new) {
OPAL_THREAD_ADD32(&openib_btl->num_peers, nprocs_new);
opal_atomic_add_32 (&openib_btl->num_peers, nprocs_new);

/* adjust cq sizes given the new procs */
rc = openib_btl_size_queues (openib_btl);
Expand Down Expand Up @@ -1217,7 +1230,7 @@ struct mca_btl_base_endpoint_t *mca_btl_openib_get_ep (struct mca_btl_base_modul

/* this is a new process to this openib btl
* account this procs if need */
OPAL_THREAD_ADD32(&openib_btl->num_peers, 1);
opal_atomic_add_32 (&openib_btl->num_peers, 1);
rc = openib_btl_size_queues(openib_btl);
if (OPAL_SUCCESS != rc) {
BTL_ERROR(("error creating cqs"));
Expand Down
17 changes: 9 additions & 8 deletions opal/mca/btl/openib/btl_openib.h
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,12 @@ BEGIN_C_DECLS
* Infiniband (IB) BTL component.
*/

enum {
BTL_OPENIB_HP_CQ,
BTL_OPENIB_LP_CQ,
BTL_OPENIB_MAX_CQ,
};

typedef enum {
MCA_BTL_OPENIB_TRANSPORT_IB,
MCA_BTL_OPENIB_TRANSPORT_IWARP,
Expand Down Expand Up @@ -206,7 +212,7 @@ struct mca_btl_openib_component_t {
uint32_t reg_mru_len; /**< Length of the registration cache most recently used list */
uint32_t use_srq; /**< Use the Shared Receive Queue (SRQ mode) */

uint32_t ib_cq_size[2]; /**< Max outstanding CQE on the CQ */
uint32_t ib_cq_size[BTL_OPENIB_MAX_CQ]; /**< Max outstanding CQE on the CQ */

int ib_max_inline_data; /**< Max size of inline data */
unsigned int ib_pkey_val;
Expand Down Expand Up @@ -379,8 +385,8 @@ typedef struct mca_btl_openib_device_t {
#endif
struct ibv_device_attr ib_dev_attr;
struct ibv_pd *ib_pd;
struct ibv_cq *ib_cq[2];
uint32_t cq_size[2];
struct ibv_cq *ib_cq[BTL_OPENIB_MAX_CQ];
uint32_t cq_size[BTL_OPENIB_MAX_CQ];
mca_mpool_base_module_t *mpool;
mca_rcache_base_module_t *rcache;
/* MTU for this device */
Expand Down Expand Up @@ -863,11 +869,6 @@ extern int mca_btl_openib_ft_event(int state);
*/
void mca_btl_openib_show_init_error(const char *file, int line,
const char *func, const char *dev);

#define BTL_OPENIB_HP_CQ 0
#define BTL_OPENIB_LP_CQ 1


/**
* Post to Shared Receive Queue with certain priority
*
Expand Down