diff --git a/providers/efa/efa_io_defs.h b/providers/efa/efa_io_defs.h index 1b2dfc4fd..9633a7371 100644 --- a/providers/efa/efa_io_defs.h +++ b/providers/efa/efa_io_defs.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */ /* - * Copyright 2018-2022 Amazon.com, Inc. or its affiliates. All rights reserved. + * Copyright 2018-2023 Amazon.com, Inc. or its affiliates. All rights reserved. */ #ifndef _EFA_IO_H_ @@ -23,6 +23,8 @@ enum efa_io_send_op_type { EFA_IO_SEND = 0, /* RDMA read */ EFA_IO_RDMA_READ = 1, + /* RDMA write */ + EFA_IO_RDMA_WRITE = 2, }; enum efa_io_comp_status { @@ -62,8 +64,7 @@ struct efa_io_tx_meta_desc { /* * control flags - * 3:0 : op_type - operation type: send/rdma/fast mem - * ops/etc + * 3:0 : op_type - enum efa_io_send_op_type * 4 : has_imm - immediate_data field carries valid * data. * 5 : inline_msg - inline mode - inline message data @@ -219,7 +220,8 @@ struct efa_io_cdesc_common { * 2:1 : q_type - enum efa_io_queue_type: send/recv * 3 : has_imm - indicates that immediate data is * present - for RX completions only - * 7:4 : reserved28 - MBZ + * 6:4 : op_type - enum efa_io_send_op_type + * 7 : reserved31 - MBZ */ uint8_t flags; @@ -285,5 +287,6 @@ struct efa_io_rx_cdesc_ex { #define EFA_IO_CDESC_COMMON_PHASE_MASK BIT(0) #define EFA_IO_CDESC_COMMON_Q_TYPE_MASK GENMASK(2, 1) #define EFA_IO_CDESC_COMMON_HAS_IMM_MASK BIT(3) +#define EFA_IO_CDESC_COMMON_OP_TYPE_MASK GENMASK(6, 4) #endif /* _EFA_IO_H_ */ diff --git a/providers/efa/efadv.h b/providers/efa/efadv.h index d62c5bec1..845822a37 100644 --- a/providers/efa/efadv.h +++ b/providers/efa/efadv.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */ /* - * Copyright 2019-2022 Amazon.com, Inc. or its affiliates. All rights reserved. + * Copyright 2019-2023 Amazon.com, Inc. or its affiliates. All rights reserved. */ #ifndef __EFADV_H__ @@ -39,6 +39,7 @@ enum { EFADV_DEVICE_ATTR_CAPS_RDMA_READ = 1 << 0, EFADV_DEVICE_ATTR_CAPS_RNR_RETRY = 1 << 1, EFADV_DEVICE_ATTR_CAPS_CQ_WITH_SGID = 1 << 2, + EFADV_DEVICE_ATTR_CAPS_RDMA_WRITE = 1 << 3, }; struct efadv_device_attr { diff --git a/providers/efa/man/efadv_query_device.3.md b/providers/efa/man/efadv_query_device.3.md index 1b689813e..d447ab2b7 100644 --- a/providers/efa/man/efadv_query_device.3.md +++ b/providers/efa/man/efadv_query_device.3.md @@ -76,6 +76,9 @@ struct efadv_device_attr { Reading source address (SGID) from receive completion descriptors is supported. Valid only for unknown AH. + EFADV_DEVICE_ATTR_CAPS_RDMA_WRITE: + RDMA write is supported + *max_rdma_size* : Maximum RDMA transfer size in bytes. diff --git a/providers/efa/verbs.c b/providers/efa/verbs.c index b3476d7a2..3a830fbfc 100644 --- a/providers/efa/verbs.c +++ b/providers/efa/verbs.c @@ -178,6 +178,9 @@ int efadv_query_device(struct ibv_context *ibvctx, if (EFA_DEV_CAP(ctx, RDMA_READ)) attr->device_caps |= EFADV_DEVICE_ATTR_CAPS_RDMA_READ; + + if (EFA_DEV_CAP(ctx, RDMA_WRITE)) + attr->device_caps |= EFADV_DEVICE_ATTR_CAPS_RDMA_WRITE; } attr->comp_mask = comp_mask_out; @@ -419,16 +422,22 @@ static void efa_process_cqe(struct efa_cq *cq, struct ibv_wc *wc, { struct efa_io_cdesc_common *cqe = cq->cur_cqe; uint32_t wrid_idx; + enum efa_io_send_op_type op_type; wc->status = to_ibv_status(cqe->status); wc->vendor_err = cqe->status; wc->wc_flags = 0; wc->qp_num = cqe->qp_num; + op_type = EFA_GET(&cqe->flags, EFA_IO_CDESC_COMMON_OP_TYPE); + if (EFA_GET(&cqe->flags, EFA_IO_CDESC_COMMON_Q_TYPE) == EFA_IO_SEND_QUEUE) { cq->cur_wq = &qp->sq.wq; - wc->opcode = IBV_WC_SEND; + if (op_type == EFA_IO_RDMA_WRITE) + wc->opcode = IBV_WC_RDMA_WRITE; + else + wc->opcode = IBV_WC_SEND; } else { struct efa_io_rx_cdesc *rcqe = container_of(cqe, struct efa_io_rx_cdesc, common); @@ -436,7 +445,11 @@ static void efa_process_cqe(struct efa_cq *cq, struct ibv_wc *wc, cq->cur_wq = &qp->rq.wq; wc->byte_len = cqe->length; - wc->opcode = IBV_WC_RECV; + if (op_type == EFA_IO_RDMA_WRITE) + wc->opcode = IBV_WC_RECV_RDMA_WITH_IMM; + else + wc->opcode = IBV_WC_RECV; + wc->src_qp = rcqe->src_qp_num; wc->sl = 0; wc->slid = rcqe->ah; @@ -614,10 +627,20 @@ static enum ibv_wc_opcode efa_wc_read_opcode(struct ibv_cq_ex *ibvcqx) { struct efa_cq *cq = to_efa_cq_ex(ibvcqx); struct efa_io_cdesc_common *cqe = cq->cur_cqe; + enum efa_io_send_op_type op_type; + + op_type = EFA_GET(&cqe->flags, EFA_IO_CDESC_COMMON_OP_TYPE); if (EFA_GET(&cqe->flags, EFA_IO_CDESC_COMMON_Q_TYPE) == - EFA_IO_SEND_QUEUE) + EFA_IO_SEND_QUEUE) { + if (op_type == EFA_IO_RDMA_WRITE) + return IBV_WC_RDMA_WRITE; + return IBV_WC_SEND; + } + + if (op_type == EFA_IO_RDMA_WRITE) + return IBV_WC_RECV_RDMA_WITH_IMM; return IBV_WC_RECV; } @@ -1211,9 +1234,13 @@ static int efa_check_qp_attr(struct efa_context *ctx, uint64_t supp_send_ops_mask; uint64_t supp_ud_send_ops_mask = IBV_QP_EX_WITH_SEND | IBV_QP_EX_WITH_SEND_WITH_IMM; - uint64_t supp_srd_send_ops_mask = - IBV_QP_EX_WITH_SEND | IBV_QP_EX_WITH_SEND_WITH_IMM | - (EFA_DEV_CAP(ctx, RDMA_READ) ? IBV_QP_EX_WITH_RDMA_READ : 0); + uint64_t supp_srd_send_ops_mask = IBV_QP_EX_WITH_SEND | + IBV_QP_EX_WITH_SEND_WITH_IMM; + if (EFA_DEV_CAP(ctx, RDMA_READ)) + supp_srd_send_ops_mask |= IBV_QP_EX_WITH_RDMA_READ; + if (EFA_DEV_CAP(ctx, RDMA_WRITE)) + supp_srd_send_ops_mask |= IBV_QP_EX_WITH_RDMA_WRITE | + IBV_QP_EX_WITH_RDMA_WRITE_WITH_IMM; #define EFA_CREATE_QP_SUPP_ATTR_MASK \ (IBV_QP_INIT_ATTR_PD | IBV_QP_INIT_ATTR_SEND_OPS_FLAGS) @@ -1896,6 +1923,31 @@ static void efa_send_wr_rdma_read(struct ibv_qp_ex *ibvqpx, uint32_t rkey, efa_send_wr_set_rdma_addr(tx_wqe, rkey, remote_addr); } +static void efa_send_wr_rdma_write(struct ibv_qp_ex *ibvqpx, uint32_t rkey, + uint64_t remote_addr) +{ + struct efa_io_tx_wqe *tx_wqe; + + tx_wqe = efa_send_wr_common(ibvqpx, EFA_IO_RDMA_WRITE); + if (unlikely(!tx_wqe)) + return; + + efa_send_wr_set_rdma_addr(tx_wqe, rkey, remote_addr); +} + +static void efa_send_wr_rdma_write_imm(struct ibv_qp_ex *ibvqpx, uint32_t rkey, + uint64_t remote_addr, __be32 imm_data) +{ + struct efa_io_tx_wqe *tx_wqe; + + tx_wqe = efa_send_wr_common(ibvqpx, EFA_IO_RDMA_WRITE); + if (unlikely(!tx_wqe)) + return; + + efa_send_wr_set_rdma_addr(tx_wqe, rkey, remote_addr); + efa_send_wr_set_imm_data(tx_wqe, imm_data); +} + static void efa_send_wr_set_sge(struct ibv_qp_ex *ibvqpx, uint32_t lkey, uint64_t addr, uint32_t length) { @@ -1916,6 +1968,7 @@ static void efa_send_wr_set_sge(struct ibv_qp_ex *ibvqpx, uint32_t lkey, buf = &tx_wqe->data.sgl[0]; break; case EFA_IO_RDMA_READ: + case EFA_IO_RDMA_WRITE: tx_wqe->data.rdma_req.remote_mem.length = length; buf = &tx_wqe->data.rdma_req.local_mem[0]; break; @@ -1953,6 +2006,7 @@ static void efa_send_wr_set_sge_list(struct ibv_qp_ex *ibvqpx, size_t num_sge, efa_post_send_sgl(tx_wqe->data.sgl, sg_list, num_sge); break; case EFA_IO_RDMA_READ: + case EFA_IO_RDMA_WRITE: if (unlikely(num_sge > sq->max_wr_rdma_sge)) { verbs_err(verbs_get_ctx(qp->verbs_qp.qp.context), "SQ[%u] num_sge[%zu] > max_rdma_sge[%zu]\n", @@ -2161,6 +2215,12 @@ static void efa_qp_fill_wr_pfns(struct ibv_qp_ex *ibvqpx, if (attr_ex->send_ops_flags & IBV_QP_EX_WITH_RDMA_READ) ibvqpx->wr_rdma_read = efa_send_wr_rdma_read; + if (attr_ex->send_ops_flags & IBV_QP_EX_WITH_RDMA_WRITE) + ibvqpx->wr_rdma_write = efa_send_wr_rdma_write; + + if (attr_ex->send_ops_flags & IBV_QP_EX_WITH_RDMA_WRITE_WITH_IMM) + ibvqpx->wr_rdma_write_imm = efa_send_wr_rdma_write_imm; + ibvqpx->wr_set_inline_data = efa_send_wr_set_inline_data; ibvqpx->wr_set_inline_data_list = efa_send_wr_set_inline_data_list; ibvqpx->wr_set_sge = efa_send_wr_set_sge; diff --git a/pyverbs/providers/efa/efa_enums.pyx b/pyverbs/providers/efa/efa_enums.pyx index 6b26baa95..9fe32a95f 100644 --- a/pyverbs/providers/efa/efa_enums.pyx +++ b/pyverbs/providers/efa/efa_enums.pyx @@ -1,5 +1,5 @@ # SPDX-License-Identifier: (GPL-2.0 OR Linux-OpenIB) -# Copyright 2020-2022 Amazon.com, Inc. or its affiliates. All rights reserved. +# Copyright 2020-2023 Amazon.com, Inc. or its affiliates. All rights reserved. #cython: language_level=3 @@ -8,6 +8,7 @@ cdef extern from 'infiniband/efadv.h': cpdef enum: EFADV_DEVICE_ATTR_CAPS_RDMA_READ EFADV_DEVICE_ATTR_CAPS_CQ_WITH_SGID + EFADV_DEVICE_ATTR_CAPS_RDMA_WRITE cpdef enum: EFADV_QP_DRIVER_TYPE_SRD diff --git a/pyverbs/providers/efa/efadv.pyx b/pyverbs/providers/efa/efadv.pyx index 568bde6da..1cd8c6ced 100644 --- a/pyverbs/providers/efa/efadv.pyx +++ b/pyverbs/providers/efa/efadv.pyx @@ -1,5 +1,5 @@ # SPDX-License-Identifier: (GPL-2.0 OR Linux-OpenIB) -# Copyright 2020-2022 Amazon.com, Inc. or its affiliates. All rights reserved. +# Copyright 2020-2023 Amazon.com, Inc. or its affiliates. All rights reserved. cimport pyverbs.providers.efa.efadv_enums as dve cimport pyverbs.providers.efa.libefa as dv @@ -18,6 +18,7 @@ def dev_cap_to_str(flags): dve.EFADV_DEVICE_ATTR_CAPS_RDMA_READ: 'RDMA Read', dve.EFADV_DEVICE_ATTR_CAPS_RNR_RETRY: 'RNR Retry', dve.EFADV_DEVICE_ATTR_CAPS_CQ_WITH_SGID: 'CQ entries with source GID', + dve.EFADV_DEVICE_ATTR_CAPS_RDMA_WRITE: 'RDMA Write', } return bitmask_to_str(flags, l) diff --git a/pyverbs/providers/efa/efadv_enums.pxd b/pyverbs/providers/efa/efadv_enums.pxd index a649bc9fb..9d65ade30 100644 --- a/pyverbs/providers/efa/efadv_enums.pxd +++ b/pyverbs/providers/efa/efadv_enums.pxd @@ -9,6 +9,7 @@ cdef extern from 'infiniband/efadv.h': EFADV_DEVICE_ATTR_CAPS_RDMA_READ EFADV_DEVICE_ATTR_CAPS_RNR_RETRY EFADV_DEVICE_ATTR_CAPS_CQ_WITH_SGID + EFADV_DEVICE_ATTR_CAPS_RDMA_WRITE cpdef enum: EFADV_QP_DRIVER_TYPE_SRD