Skip to content

Commit

Permalink
efa: Respect maximum TX doorbell batch
Browse files Browse the repository at this point in the history
The max TX batch reports the maximum number of cachelines to be written
before ringing the WQ doorbell.
Respect the max batch size on both post send APIs.

Signed-off-by: Gal Pressman <galpress@amazon.com>
  • Loading branch information
gal-pressman committed Aug 2, 2020
1 parent b9ffd6e commit 7aad28d
Show file tree
Hide file tree
Showing 4 changed files with 48 additions and 14 deletions.
6 changes: 3 additions & 3 deletions providers/efa/efa-abi.h
@@ -1,6 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */
/*
* Copyright 2019 Amazon.com, Inc. or its affiliates. All rights reserved.
* Copyright 2019-2020 Amazon.com, Inc. or its affiliates. All rights reserved.
*/

#ifndef __EFA_ABI_H__
Expand All @@ -12,8 +12,8 @@

#define EFA_ABI_VERSION 1

DECLARE_DRV_CMD(efa_alloc_ucontext, IB_USER_VERBS_CMD_GET_CONTEXT, empty,
efa_ibv_alloc_ucontext_resp);
DECLARE_DRV_CMD(efa_alloc_ucontext, IB_USER_VERBS_CMD_GET_CONTEXT,
efa_ibv_alloc_ucontext_cmd, efa_ibv_alloc_ucontext_resp);
DECLARE_DRV_CMD(efa_alloc_pd, IB_USER_VERBS_CMD_ALLOC_PD, empty,
efa_ibv_alloc_pd_resp);
DECLARE_DRV_CMD(efa_create_cq, IB_USER_VERBS_CMD_CREATE_CQ, efa_ibv_create_cq,
Expand Down
9 changes: 6 additions & 3 deletions providers/efa/efa.c
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause
/*
* Copyright 2019 Amazon.com, Inc. or its affiliates. All rights reserved.
* Copyright 2019-2020 Amazon.com, Inc. or its affiliates. All rights reserved.
*/

#include <stdio.h>
Expand Down Expand Up @@ -51,18 +51,20 @@ static struct verbs_context *efa_alloc_context(struct ibv_device *vdev,
void *private_data)
{
struct efa_alloc_ucontext_resp resp = {};
struct efa_alloc_ucontext cmd = {};
struct ibv_device_attr_ex attr;
struct ibv_get_context cmd;
unsigned int qp_table_sz;
struct efa_context *ctx;
int err;

cmd.comp_mask |= EFA_ALLOC_UCONTEXT_CMD_COMP_TX_BATCH;

ctx = verbs_init_and_alloc_context(vdev, cmd_fd, ctx, ibvctx,
RDMA_DRIVER_EFA);
if (!ctx)
return NULL;

if (ibv_cmd_get_context(&ctx->ibvctx, &cmd, sizeof(cmd),
if (ibv_cmd_get_context(&ctx->ibvctx, &cmd.ibv_cmd, sizeof(cmd),
&resp.ibv_resp, sizeof(resp)))
goto err_free_ctx;

Expand All @@ -71,6 +73,7 @@ static struct verbs_context *efa_alloc_context(struct ibv_device *vdev,
ctx->cqe_size = sizeof(struct efa_io_rx_cdesc);
ctx->inline_buf_size = resp.inline_buf_size;
ctx->max_llq_size = resp.max_llq_size;
ctx->max_tx_batch = resp.max_tx_batch;
pthread_spin_init(&ctx->qp_table_lock, PTHREAD_PROCESS_PRIVATE);

/* ah udata is mandatory for ah number retrieval */
Expand Down
2 changes: 2 additions & 0 deletions providers/efa/efa.h
Expand Up @@ -29,6 +29,7 @@ struct efa_context {
uint16_t max_rq_sge;
uint32_t max_rdma_size;
uint16_t max_wr_rdma_sge;
uint16_t max_tx_batch;
size_t cqe_size;
struct efa_qp **qp_table;
unsigned int qp_table_sz_m1;
Expand Down Expand Up @@ -98,6 +99,7 @@ struct efa_sq {
size_t desc_ring_mmap_size;
size_t max_inline_data;
size_t max_wr_rdma_sge;
uint16_t max_batch_wr;

/* Buffer for pending WR entries in the current session */
uint8_t *local_queue;
Expand Down
45 changes: 37 additions & 8 deletions providers/efa/verbs.c
Expand Up @@ -34,6 +34,12 @@ static bool is_buf_cleared(void *buf, size_t len)
return true;
}

#define min3(a, b, c) \
({ \
typeof(a) _tmpmin = min(a, b); \
min(_tmpmin, c); \
})

#define is_ext_cleared(ptr, inlen) \
is_buf_cleared(ptr + sizeof(*ptr), inlen - sizeof(*ptr))

Expand Down Expand Up @@ -612,6 +618,9 @@ static int efa_sq_initialize(struct efa_qp *qp,
sq->desc += sq->desc_offset;
sq->max_wr_rdma_sge = min_t(uint16_t, ctx->max_wr_rdma_sge,
EFA_IO_TX_DESC_NUM_RDMA_BUFS);
sq->max_batch_wr = ctx->max_tx_batch ?
(ctx->max_tx_batch * 64) / sizeof(struct efa_io_tx_wqe) :
UINT16_MAX;

return 0;

Expand Down Expand Up @@ -1217,6 +1226,7 @@ int efa_post_send(struct ibv_qp *ibvqp, struct ibv_send_wr *wr,
struct efa_qp *qp = to_efa_qp(ibvqp);
struct efa_io_tx_wqe tx_wqe;
uint32_t sq_desc_offset;
uint32_t curbatch = 0;
struct efa_ah *ah;
int err = 0;

Expand Down Expand Up @@ -1261,12 +1271,19 @@ int efa_post_send(struct ibv_qp *ibvqp, struct ibv_send_wr *wr,

/* advance index and change phase */
efa_sq_advance_post_idx(qp);
curbatch++;

if (curbatch == qp->sq.max_batch_wr) {
curbatch = 0;
efa_sq_ring_doorbell(&qp->sq, qp->sq.wq.pc);
}

wr = wr->next;
}

ring_db:
efa_sq_ring_doorbell(&qp->sq, qp->sq.wq.pc);
if (curbatch)
efa_sq_ring_doorbell(&qp->sq, qp->sq.wq.pc);

/*
* Not using mmio_wc_spinunlock as the doorbell write should be done
Expand Down Expand Up @@ -1502,25 +1519,29 @@ static inline void efa_sq_roll_back(struct efa_qp *qp)
static int efa_send_wr_complete(struct ibv_qp_ex *ibvqpx)
{
struct efa_qp *qp = to_efa_qp_ex(ibvqpx);
uint32_t max_txbatch = qp->sq.max_batch_wr;
uint32_t num_wqe_to_copy;
uint16_t local_idx = 0;
uint16_t curbatch = 0;
uint16_t sq_desc_idx;
uint16_t pc;

if (unlikely(qp->wr_session_err)) {
efa_sq_roll_back(qp);
goto out;
}

/*
* Copy local queue to device in chunks, as the descriptor index
* might have wrapped around the submission queue.
* Copy local queue to device in chunks, handling wraparound and max
* doorbell batch.
*/
sq_desc_idx = (qp->sq.wq.pc - qp->sq.num_wqe_pending) &
qp->sq.wq.desc_mask;
pc = qp->sq.wq.pc - qp->sq.num_wqe_pending;
sq_desc_idx = pc & qp->sq.wq.desc_mask;

while (qp->sq.num_wqe_pending) {
num_wqe_to_copy = min(qp->sq.num_wqe_pending,
qp->sq.wq.wqe_cnt - sq_desc_idx);
num_wqe_to_copy = min3(qp->sq.num_wqe_pending,
qp->sq.wq.wqe_cnt - sq_desc_idx,
max_txbatch - curbatch);
mmio_memcpy_x64((struct efa_io_tx_wqe *)qp->sq.desc +
sq_desc_idx,
(struct efa_io_tx_wqe *)qp->sq.local_queue +
Expand All @@ -1529,11 +1550,19 @@ static int efa_send_wr_complete(struct ibv_qp_ex *ibvqpx)

qp->sq.num_wqe_pending -= num_wqe_to_copy;
local_idx += num_wqe_to_copy;
curbatch += num_wqe_to_copy;
pc += num_wqe_to_copy;
sq_desc_idx = (sq_desc_idx + num_wqe_to_copy) &
qp->sq.wq.desc_mask;

if (curbatch == max_txbatch) {
efa_sq_ring_doorbell(&qp->sq, pc);
curbatch = 0;
}
}

efa_sq_ring_doorbell(&qp->sq, qp->sq.wq.pc);
if (curbatch)
efa_sq_ring_doorbell(&qp->sq, qp->sq.wq.pc);
out:
/*
* Not using mmio_wc_spinunlock as the doorbell write should be done
Expand Down

0 comments on commit 7aad28d

Please sign in to comment.