Skip to content

Commit

Permalink
efa: Add RDMA write support
Browse files Browse the repository at this point in the history
Add rdma write functions (with and without imm) to qp extended api and
expose capability bit using query device direct verb.

Signed-off-by: Yonatan Nachum <ynachum@amazon.com>
  • Loading branch information
YonatanNachum committed Mar 16, 2023
1 parent 9051af8 commit f85d3d2
Show file tree
Hide file tree
Showing 7 changed files with 83 additions and 13 deletions.
11 changes: 7 additions & 4 deletions providers/efa/efa_io_defs.h
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */
/*
* Copyright 2018-2022 Amazon.com, Inc. or its affiliates. All rights reserved.
* Copyright 2018-2023 Amazon.com, Inc. or its affiliates. All rights reserved.
*/

#ifndef _EFA_IO_H_
Expand All @@ -23,6 +23,8 @@ enum efa_io_send_op_type {
EFA_IO_SEND = 0,
/* RDMA read */
EFA_IO_RDMA_READ = 1,
/* RDMA write */
EFA_IO_RDMA_WRITE = 2,
};

enum efa_io_comp_status {
Expand Down Expand Up @@ -62,8 +64,7 @@ struct efa_io_tx_meta_desc {

/*
* control flags
* 3:0 : op_type - operation type: send/rdma/fast mem
* ops/etc
* 3:0 : op_type - enum efa_io_send_op_type
* 4 : has_imm - immediate_data field carries valid
* data.
* 5 : inline_msg - inline mode - inline message data
Expand Down Expand Up @@ -219,7 +220,8 @@ struct efa_io_cdesc_common {
* 2:1 : q_type - enum efa_io_queue_type: send/recv
* 3 : has_imm - indicates that immediate data is
* present - for RX completions only
* 7:4 : reserved28 - MBZ
* 6:4 : op_type - enum efa_io_send_op_type
* 7 : reserved31 - MBZ
*/
uint8_t flags;

Expand Down Expand Up @@ -285,5 +287,6 @@ struct efa_io_rx_cdesc_ex {
#define EFA_IO_CDESC_COMMON_PHASE_MASK BIT(0)
#define EFA_IO_CDESC_COMMON_Q_TYPE_MASK GENMASK(2, 1)
#define EFA_IO_CDESC_COMMON_HAS_IMM_MASK BIT(3)
#define EFA_IO_CDESC_COMMON_OP_TYPE_MASK GENMASK(6, 4)

#endif /* _EFA_IO_H_ */
3 changes: 2 additions & 1 deletion providers/efa/efadv.h
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */
/*
* Copyright 2019-2022 Amazon.com, Inc. or its affiliates. All rights reserved.
* Copyright 2019-2023 Amazon.com, Inc. or its affiliates. All rights reserved.
*/

#ifndef __EFADV_H__
Expand Down Expand Up @@ -39,6 +39,7 @@ enum {
EFADV_DEVICE_ATTR_CAPS_RDMA_READ = 1 << 0,
EFADV_DEVICE_ATTR_CAPS_RNR_RETRY = 1 << 1,
EFADV_DEVICE_ATTR_CAPS_CQ_WITH_SGID = 1 << 2,
EFADV_DEVICE_ATTR_CAPS_RDMA_WRITE = 1 << 3,
};

struct efadv_device_attr {
Expand Down
3 changes: 3 additions & 0 deletions providers/efa/man/efadv_query_device.3.md
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,9 @@ struct efadv_device_attr {
Reading source address (SGID) from receive completion descriptors is supported.
Valid only for unknown AH.

EFADV_DEVICE_ATTR_CAPS_RDMA_WRITE:
RDMA write is supported

*max_rdma_size*
: Maximum RDMA transfer size in bytes.

Expand Down
72 changes: 66 additions & 6 deletions providers/efa/verbs.c
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,9 @@ int efadv_query_device(struct ibv_context *ibvctx,

if (EFA_DEV_CAP(ctx, RDMA_READ))
attr->device_caps |= EFADV_DEVICE_ATTR_CAPS_RDMA_READ;

if (EFA_DEV_CAP(ctx, RDMA_WRITE))
attr->device_caps |= EFADV_DEVICE_ATTR_CAPS_RDMA_WRITE;
}

attr->comp_mask = comp_mask_out;
Expand Down Expand Up @@ -419,24 +422,34 @@ static void efa_process_cqe(struct efa_cq *cq, struct ibv_wc *wc,
{
struct efa_io_cdesc_common *cqe = cq->cur_cqe;
uint32_t wrid_idx;
enum efa_io_send_op_type op_type;

wc->status = to_ibv_status(cqe->status);
wc->vendor_err = cqe->status;
wc->wc_flags = 0;
wc->qp_num = cqe->qp_num;

op_type = EFA_GET(&cqe->flags, EFA_IO_CDESC_COMMON_OP_TYPE);

if (EFA_GET(&cqe->flags, EFA_IO_CDESC_COMMON_Q_TYPE) ==
EFA_IO_SEND_QUEUE) {
cq->cur_wq = &qp->sq.wq;
wc->opcode = IBV_WC_SEND;
if (op_type == EFA_IO_RDMA_WRITE)
wc->opcode = IBV_WC_RDMA_WRITE;
else
wc->opcode = IBV_WC_SEND;
} else {
struct efa_io_rx_cdesc *rcqe =
container_of(cqe, struct efa_io_rx_cdesc, common);

cq->cur_wq = &qp->rq.wq;

wc->byte_len = cqe->length;
wc->opcode = IBV_WC_RECV;
if (op_type == EFA_IO_RDMA_WRITE)
wc->opcode = IBV_WC_RECV_RDMA_WITH_IMM;
else
wc->opcode = IBV_WC_RECV;

wc->src_qp = rcqe->src_qp_num;
wc->sl = 0;
wc->slid = rcqe->ah;
Expand Down Expand Up @@ -614,10 +627,20 @@ static enum ibv_wc_opcode efa_wc_read_opcode(struct ibv_cq_ex *ibvcqx)
{
struct efa_cq *cq = to_efa_cq_ex(ibvcqx);
struct efa_io_cdesc_common *cqe = cq->cur_cqe;
enum efa_io_send_op_type op_type;

op_type = EFA_GET(&cqe->flags, EFA_IO_CDESC_COMMON_OP_TYPE);

if (EFA_GET(&cqe->flags, EFA_IO_CDESC_COMMON_Q_TYPE) ==
EFA_IO_SEND_QUEUE)
EFA_IO_SEND_QUEUE) {
if (op_type == EFA_IO_RDMA_WRITE)
return IBV_WC_RDMA_WRITE;

return IBV_WC_SEND;
}

if (op_type == EFA_IO_RDMA_WRITE)
return IBV_WC_RECV_RDMA_WITH_IMM;

return IBV_WC_RECV;
}
Expand Down Expand Up @@ -1211,9 +1234,13 @@ static int efa_check_qp_attr(struct efa_context *ctx,
uint64_t supp_send_ops_mask;
uint64_t supp_ud_send_ops_mask = IBV_QP_EX_WITH_SEND |
IBV_QP_EX_WITH_SEND_WITH_IMM;
uint64_t supp_srd_send_ops_mask =
IBV_QP_EX_WITH_SEND | IBV_QP_EX_WITH_SEND_WITH_IMM |
(EFA_DEV_CAP(ctx, RDMA_READ) ? IBV_QP_EX_WITH_RDMA_READ : 0);
uint64_t supp_srd_send_ops_mask = IBV_QP_EX_WITH_SEND |
IBV_QP_EX_WITH_SEND_WITH_IMM;
if (EFA_DEV_CAP(ctx, RDMA_READ))
supp_srd_send_ops_mask |= IBV_QP_EX_WITH_RDMA_READ;
if (EFA_DEV_CAP(ctx, RDMA_WRITE))
supp_srd_send_ops_mask |= IBV_QP_EX_WITH_RDMA_WRITE |
IBV_QP_EX_WITH_RDMA_WRITE_WITH_IMM;

#define EFA_CREATE_QP_SUPP_ATTR_MASK \
(IBV_QP_INIT_ATTR_PD | IBV_QP_INIT_ATTR_SEND_OPS_FLAGS)
Expand Down Expand Up @@ -1896,6 +1923,31 @@ static void efa_send_wr_rdma_read(struct ibv_qp_ex *ibvqpx, uint32_t rkey,
efa_send_wr_set_rdma_addr(tx_wqe, rkey, remote_addr);
}

static void efa_send_wr_rdma_write(struct ibv_qp_ex *ibvqpx, uint32_t rkey,
uint64_t remote_addr)
{
struct efa_io_tx_wqe *tx_wqe;

tx_wqe = efa_send_wr_common(ibvqpx, EFA_IO_RDMA_WRITE);
if (unlikely(!tx_wqe))
return;

efa_send_wr_set_rdma_addr(tx_wqe, rkey, remote_addr);
}

static void efa_send_wr_rdma_write_imm(struct ibv_qp_ex *ibvqpx, uint32_t rkey,
uint64_t remote_addr, __be32 imm_data)
{
struct efa_io_tx_wqe *tx_wqe;

tx_wqe = efa_send_wr_common(ibvqpx, EFA_IO_RDMA_WRITE);
if (unlikely(!tx_wqe))
return;

efa_send_wr_set_rdma_addr(tx_wqe, rkey, remote_addr);
efa_send_wr_set_imm_data(tx_wqe, imm_data);
}

static void efa_send_wr_set_sge(struct ibv_qp_ex *ibvqpx, uint32_t lkey,
uint64_t addr, uint32_t length)
{
Expand All @@ -1916,6 +1968,7 @@ static void efa_send_wr_set_sge(struct ibv_qp_ex *ibvqpx, uint32_t lkey,
buf = &tx_wqe->data.sgl[0];
break;
case EFA_IO_RDMA_READ:
case EFA_IO_RDMA_WRITE:
tx_wqe->data.rdma_req.remote_mem.length = length;
buf = &tx_wqe->data.rdma_req.local_mem[0];
break;
Expand Down Expand Up @@ -1953,6 +2006,7 @@ static void efa_send_wr_set_sge_list(struct ibv_qp_ex *ibvqpx, size_t num_sge,
efa_post_send_sgl(tx_wqe->data.sgl, sg_list, num_sge);
break;
case EFA_IO_RDMA_READ:
case EFA_IO_RDMA_WRITE:
if (unlikely(num_sge > sq->max_wr_rdma_sge)) {
verbs_err(verbs_get_ctx(qp->verbs_qp.qp.context),
"SQ[%u] num_sge[%zu] > max_rdma_sge[%zu]\n",
Expand Down Expand Up @@ -2161,6 +2215,12 @@ static void efa_qp_fill_wr_pfns(struct ibv_qp_ex *ibvqpx,
if (attr_ex->send_ops_flags & IBV_QP_EX_WITH_RDMA_READ)
ibvqpx->wr_rdma_read = efa_send_wr_rdma_read;

if (attr_ex->send_ops_flags & IBV_QP_EX_WITH_RDMA_WRITE)
ibvqpx->wr_rdma_write = efa_send_wr_rdma_write;

if (attr_ex->send_ops_flags & IBV_QP_EX_WITH_RDMA_WRITE_WITH_IMM)
ibvqpx->wr_rdma_write_imm = efa_send_wr_rdma_write_imm;

ibvqpx->wr_set_inline_data = efa_send_wr_set_inline_data;
ibvqpx->wr_set_inline_data_list = efa_send_wr_set_inline_data_list;
ibvqpx->wr_set_sge = efa_send_wr_set_sge;
Expand Down
3 changes: 2 additions & 1 deletion pyverbs/providers/efa/efa_enums.pyx
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# SPDX-License-Identifier: (GPL-2.0 OR Linux-OpenIB)
# Copyright 2020-2022 Amazon.com, Inc. or its affiliates. All rights reserved.
# Copyright 2020-2023 Amazon.com, Inc. or its affiliates. All rights reserved.

#cython: language_level=3

Expand All @@ -8,6 +8,7 @@ cdef extern from 'infiniband/efadv.h':
cpdef enum:
EFADV_DEVICE_ATTR_CAPS_RDMA_READ
EFADV_DEVICE_ATTR_CAPS_CQ_WITH_SGID
EFADV_DEVICE_ATTR_CAPS_RDMA_WRITE

cpdef enum:
EFADV_QP_DRIVER_TYPE_SRD
Expand Down
3 changes: 2 additions & 1 deletion pyverbs/providers/efa/efadv.pyx
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# SPDX-License-Identifier: (GPL-2.0 OR Linux-OpenIB)
# Copyright 2020-2022 Amazon.com, Inc. or its affiliates. All rights reserved.
# Copyright 2020-2023 Amazon.com, Inc. or its affiliates. All rights reserved.

cimport pyverbs.providers.efa.efadv_enums as dve
cimport pyverbs.providers.efa.libefa as dv
Expand All @@ -18,6 +18,7 @@ def dev_cap_to_str(flags):
dve.EFADV_DEVICE_ATTR_CAPS_RDMA_READ: 'RDMA Read',
dve.EFADV_DEVICE_ATTR_CAPS_RNR_RETRY: 'RNR Retry',
dve.EFADV_DEVICE_ATTR_CAPS_CQ_WITH_SGID: 'CQ entries with source GID',
dve.EFADV_DEVICE_ATTR_CAPS_RDMA_WRITE: 'RDMA Write',
}
return bitmask_to_str(flags, l)

Expand Down
1 change: 1 addition & 0 deletions pyverbs/providers/efa/efadv_enums.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ cdef extern from 'infiniband/efadv.h':
EFADV_DEVICE_ATTR_CAPS_RDMA_READ
EFADV_DEVICE_ATTR_CAPS_RNR_RETRY
EFADV_DEVICE_ATTR_CAPS_CQ_WITH_SGID
EFADV_DEVICE_ATTR_CAPS_RDMA_WRITE

cpdef enum:
EFADV_QP_DRIVER_TYPE_SRD
Expand Down

0 comments on commit f85d3d2

Please sign in to comment.