From f06067e668ab5fdd34f686f79ecb5ae11131e5e2 Mon Sep 17 00:00:00 2001 From: Sylvain Didelot Date: Fri, 5 Apr 2024 07:29:55 +0200 Subject: [PATCH 1/3] prov/verbs: Remove assert(cq) from vrb_find_max_inline() It's better to return from vrb_find_max_inline() if the CQ cannot be created. Signed-off-by: Sylvain Didelot --- prov/verbs/src/verbs_init.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/prov/verbs/src/verbs_init.c b/prov/verbs/src/verbs_init.c index 05183d0f9d4..65f88a8bc4d 100644 --- a/prov/verbs/src/verbs_init.c +++ b/prov/verbs/src/verbs_init.c @@ -479,7 +479,8 @@ int vrb_find_max_inline(struct ibv_pd *pd, struct ibv_context *context, } cq = ibv_create_cq(context, 1, NULL, NULL, 0); - assert(cq); + if (!cq) + goto out; memset(&qp_attr, 0, sizeof(qp_attr)); qp_attr.send_cq = cq; @@ -540,10 +541,8 @@ int vrb_find_max_inline(struct ibv_pd *pd, struct ibv_context *context, ibv_destroy_qp(qp); } - if (cq) { - ibv_destroy_cq(cq); - } - + ibv_destroy_cq(cq); +out: return rst; } From 99c220e1f37e42bb3c12d3b28eaa380b24a8ff94 Mon Sep 17 00:00:00 2001 From: Sylvain Didelot Date: Mon, 25 Mar 2024 10:34:43 +0100 Subject: [PATCH 2/3] prov/verbs: Allow for large TX queues with limited (or no) inline data Using large TX queues with the verbs provider would cause fi_getinfo() to return an empty list of verbs adapters because the call to ibv_create_qp() executed as part of fi_getinfo() would fail with EINVAL. The failure happens because the code allocates the QP with the maximum amount of inline data supported by the adapter, which is empirically determined by vrb_find_max_inline(). The problem is that using inline data limits the TX queue size that can be allocated. The patch removes vrb_get_qp_cap(), whose the sole purpose is to set the maximum inline data size returned by vrb_find_max_inline(). This operation can be done in vrb_get_device_attrs() directly. Signed-off-by: Sylvain Didelot --- prov/verbs/src/verbs_info.c | 80 ++++--------------------------------- prov/verbs/src/verbs_init.c | 13 ++++-- prov/verbs/src/verbs_ofi.h | 4 +- 3 files changed, 19 insertions(+), 78 deletions(-) diff --git a/prov/verbs/src/verbs_info.c b/prov/verbs/src/verbs_info.c index e4def4c6d5d..e4885192aff 100644 --- a/prov/verbs/src/verbs_info.c +++ b/prov/verbs/src/verbs_info.c @@ -1,6 +1,7 @@ /* * Copyright (c) 2013-2015 Intel Corporation, Inc. All rights reserved. * (C) Copyright 2020 Hewlett Packard Enterprise Development LP + * Copyright (c) 2024 DataDirect Networks, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -426,76 +427,6 @@ static int vrb_rai_to_fi(struct rdma_addrinfo *rai, struct fi_info *fi) return FI_SUCCESS; } -static inline int vrb_get_qp_cap(struct ibv_context *ctx, - struct fi_info *info, uint32_t protocol) -{ - struct ibv_pd *pd; - struct ibv_cq *cq; - struct ibv_qp *qp; - struct ibv_qp_init_attr init_attr; - enum ibv_qp_type qp_type; - int ret = 0; - - pd = ibv_alloc_pd(ctx); - if (!pd) { - VRB_WARN_ERRNO(FI_LOG_FABRIC, "ibv_alloc_pd"); - return -errno; - } - - cq = ibv_create_cq(ctx, 1, NULL, NULL, 0); - if (!cq) { - VRB_WARN_ERRNO(FI_LOG_FABRIC, "ibv_create_cq"); - ret = -errno; - goto err1; - } - - if (protocol == FI_PROTO_RDMA_CM_IB_XRC) - qp_type = IBV_QPT_XRC_SEND; - else - qp_type = (info->ep_attr->type != FI_EP_DGRAM) ? - IBV_QPT_RC : IBV_QPT_UD; - - memset(&init_attr, 0, sizeof init_attr); - init_attr.send_cq = cq; - - assert(info->tx_attr->size && - info->tx_attr->iov_limit && - info->rx_attr->size && - info->rx_attr->iov_limit); - - init_attr.cap.max_send_wr = MIN(vrb_gl_data.def_tx_size, - info->tx_attr->size); - init_attr.cap.max_send_sge = MIN(vrb_gl_data.def_tx_iov_limit, - info->tx_attr->iov_limit); - - if (qp_type != IBV_QPT_XRC_SEND) { - init_attr.recv_cq = cq; - init_attr.cap.max_recv_wr = MIN(vrb_gl_data.def_rx_size, - info->rx_attr->size); - init_attr.cap.max_recv_sge = MIN(vrb_gl_data.def_rx_iov_limit, - info->rx_attr->iov_limit); - } - init_attr.cap.max_inline_data = vrb_find_max_inline(pd, ctx, qp_type); - init_attr.qp_type = qp_type; - - qp = ibv_create_qp(pd, &init_attr); - if (!qp) { - VRB_WARN_ERRNO(FI_LOG_FABRIC, "ibv_create_qp"); - ret = -errno; - goto err2; - } - - info->tx_attr->inject_size = init_attr.cap.max_inline_data; - - ibv_destroy_qp(qp); -err2: - ibv_destroy_cq(cq); -err1: - ibv_dealloc_pd(pd); - - return ret; -} - static int vrb_mtu_type_to_len(enum ibv_mtu mtu_type) { switch (mtu_type) { @@ -552,6 +483,7 @@ static int vrb_get_device_attrs(struct ibv_context *ctx, enum fi_log_level level = vrb_gl_data.msg.prefer_xrc ? FI_LOG_WARN : FI_LOG_INFO; const char *dev_name = ibv_get_device_name(ctx->device); + enum ibv_qp_type qp_type; ret = ibv_query_device(ctx, &device_attr); if (ret) { @@ -595,11 +527,13 @@ static int vrb_get_device_attrs(struct ibv_context *ctx, if (protocol == FI_PROTO_RDMA_CM_IB_XRC) { info->rx_attr->iov_limit = MIN(info->rx_attr->iov_limit, 1); info->ep_attr->rx_ctx_cnt = FI_SHARED_CONTEXT; + qp_type = IBV_QPT_XRC_SEND; + } else { + qp_type = (info->ep_attr->type != FI_EP_DGRAM) ? + IBV_QPT_RC : IBV_QPT_UD; } - ret = vrb_get_qp_cap(ctx, info, protocol); - if (ret) - return ret; + info->tx_attr->inject_size = vrb_find_max_inline(ctx, qp_type); for (port_num = 1; port_num < device_attr.phys_port_cnt + 1; port_num++) { ret = ibv_query_port(ctx, port_num, &port_attr); diff --git a/prov/verbs/src/verbs_init.c b/prov/verbs/src/verbs_init.c index 65f88a8bc4d..493a8363f58 100644 --- a/prov/verbs/src/verbs_init.c +++ b/prov/verbs/src/verbs_init.c @@ -1,5 +1,6 @@ /* * Copyright (c) 2013-2021 Intel Corporation, Inc. All rights reserved. + * Copyright (c) 2024 DataDirect Networks, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -461,10 +462,10 @@ void vrb_set_rnr_timer(struct ibv_qp *qp) vrb_dbg_query_qp_attr(qp); } -int vrb_find_max_inline(struct ibv_pd *pd, struct ibv_context *context, - enum ibv_qp_type qp_type) +int vrb_find_max_inline(struct ibv_context *context, enum ibv_qp_type qp_type) { struct ibv_qp_init_attr qp_attr; + struct ibv_pd *pd; struct ibv_qp *qp = NULL; struct ibv_cq *cq; int max_inline = 2; @@ -478,9 +479,13 @@ int vrb_find_max_inline(struct ibv_pd *pd, struct ibv_context *context, return verbs_dev_presets[i].max_inline_data; } + pd = ibv_alloc_pd(context); + if (!pd) + goto out; + cq = ibv_create_cq(context, 1, NULL, NULL, 0); if (!cq) - goto out; + goto destroy_pd; memset(&qp_attr, 0, sizeof(qp_attr)); qp_attr.send_cq = cq; @@ -542,6 +547,8 @@ int vrb_find_max_inline(struct ibv_pd *pd, struct ibv_context *context, } ibv_destroy_cq(cq); +destroy_pd: + ibv_dealloc_pd(pd); out: return rst; } diff --git a/prov/verbs/src/verbs_ofi.h b/prov/verbs/src/verbs_ofi.h index 82454763efe..c49fef90453 100644 --- a/prov/verbs/src/verbs_ofi.h +++ b/prov/verbs/src/verbs_ofi.h @@ -4,6 +4,7 @@ * Copyright (c) 2018-2019 Cray Inc. All rights reserved. * Copyright (c) 2018-2019 System Fabric Works, Inc. All rights reserved. * (C) Copyright 2020 Hewlett Packard Enterprise Development LP + * Copyright (c) 2024 DataDirect Networks, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -878,8 +879,7 @@ int vrb_query_atomic(struct fid_domain *domain_fid, enum fi_datatype datatype, uint64_t flags); void vrb_set_rnr_timer(struct ibv_qp *qp); void vrb_cleanup_cq(struct vrb_ep *cur_ep); -int vrb_find_max_inline(struct ibv_pd *pd, struct ibv_context *context, - enum ibv_qp_type qp_type); +int vrb_find_max_inline(struct ibv_context *context, enum ibv_qp_type qp_type); struct vrb_dgram_av { struct util_av util_av; From 3403a1f91a2d5a96cd5f2a5696336a861a033d6a Mon Sep 17 00:00:00 2001 From: Sylvain Didelot Date: Thu, 4 Apr 2024 21:57:07 +0200 Subject: [PATCH 3/3] prov/verbs: Remove the word "maximum" from the env variable labels These environment variables define the default values when no hints are provided. Remove the word "maximum" to avoid confusion over whether these are a default value or a maximum value. Signed-off-by: Sylvain Didelot --- man/fi_verbs.7.md | 10 +++++----- prov/verbs/src/verbs_init.c | 10 +++++----- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/man/fi_verbs.7.md b/man/fi_verbs.7.md index 14ebc1faf33..7d4ec9cc2f4 100644 --- a/man/fi_verbs.7.md +++ b/man/fi_verbs.7.md @@ -163,21 +163,21 @@ The verbs provider checks for the following environment variables. ### Common variables: *FI_VERBS_TX_SIZE* -: Default maximum tx context size (default: 384) +: Default tx context size (default: 384) *FI_VERBS_RX_SIZE* -: Default maximum rx context size (default: 384) +: Default rx context size (default: 384) *FI_VERBS_TX_IOV_LIMIT* -: Default maximum tx iov_limit (default: 4). Note: RDM (internal - +: Default tx iov_limit (default: 4). Note: RDM (internal - deprecated) EP type supports only 1 *FI_VERBS_RX_IOV_LIMIT* -: Default maximum rx iov_limit (default: 4). Note: RDM (internal - +: Default rx iov_limit (default: 4). Note: RDM (internal - deprecated) EP type supports only 1 *FI_VERBS_INLINE_SIZE* -: Default maximum inline size. Actual inject size returned in fi_info +: Default inline size. Actual inject size returned in fi_info may be greater (default: 64) *FI_VERBS_MIN_RNR_TIMER* diff --git a/prov/verbs/src/verbs_init.c b/prov/verbs/src/verbs_init.c index 493a8363f58..828aee73c5f 100644 --- a/prov/verbs/src/verbs_init.c +++ b/prov/verbs/src/verbs_init.c @@ -614,31 +614,31 @@ static int vrb_get_param_str(const char *param_name, int vrb_read_params(void) { /* Common parameters */ - if (vrb_get_param_int("tx_size", "Default maximum tx context size", + if (vrb_get_param_int("tx_size", "Default tx context size", &vrb_gl_data.def_tx_size) || (vrb_gl_data.def_tx_size < 0)) { VRB_WARN(FI_LOG_CORE, "Invalid value of tx_size\n"); return -FI_EINVAL; } - if (vrb_get_param_int("rx_size", "Default maximum rx context size", + if (vrb_get_param_int("rx_size", "Default rx context size", &vrb_gl_data.def_rx_size) || (vrb_gl_data.def_rx_size < 0)) { VRB_WARN(FI_LOG_CORE, "Invalid value of rx_size\n"); return -FI_EINVAL; } - if (vrb_get_param_int("tx_iov_limit", "Default maximum tx iov_limit", + if (vrb_get_param_int("tx_iov_limit", "Default tx iov_limit", &vrb_gl_data.def_tx_iov_limit) || (vrb_gl_data.def_tx_iov_limit < 0)) { VRB_WARN(FI_LOG_CORE, "Invalid value of tx_iov_limit\n"); return -FI_EINVAL; } - if (vrb_get_param_int("rx_iov_limit", "Default maximum rx iov_limit", + if (vrb_get_param_int("rx_iov_limit", "Default rx iov_limit", &vrb_gl_data.def_rx_iov_limit) || (vrb_gl_data.def_rx_iov_limit < 0)) { VRB_WARN(FI_LOG_CORE, "Invalid value of rx_iov_limit\n"); return -FI_EINVAL; } - if (vrb_get_param_int("inline_size", "Default maximum inline size. " + if (vrb_get_param_int("inline_size", "Default inline size. " "Actual inject size returned in fi_info may be " "greater", &vrb_gl_data.def_inline_size) || (vrb_gl_data.def_inline_size < 0)) {