From 69056ae940e0b5ac7b191825cb35f452a96cf681 Mon Sep 17 00:00:00 2001 From: Long Li Date: Thu, 1 Dec 2022 16:00:38 -0800 Subject: [PATCH 1/2] Update kernel headers To commit 0266a177631d ("RDMA/mana_ib: Add a driver for Microsoft Azure Network Adapter"). Signed-off-by: Long Li --- kernel-headers/CMakeLists.txt | 2 + kernel-headers/rdma/ib_user_ioctl_verbs.h | 1 + kernel-headers/rdma/mana-abi.h | 66 +++++++++++++++++++++++ 3 files changed, 69 insertions(+) create mode 100644 kernel-headers/rdma/mana-abi.h diff --git a/kernel-headers/CMakeLists.txt b/kernel-headers/CMakeLists.txt index 436dfb4d2..82c191cad 100644 --- a/kernel-headers/CMakeLists.txt +++ b/kernel-headers/CMakeLists.txt @@ -10,6 +10,7 @@ publish_internal_headers(rdma rdma/ib_user_sa.h rdma/ib_user_verbs.h rdma/irdma-abi.h + rdma/mana-abi.h rdma/mlx4-abi.h rdma/mlx5-abi.h rdma/mlx5_user_ioctl_cmds.h @@ -70,6 +71,7 @@ rdma_kernel_provider_abi( rdma/hns-abi.h rdma/ib_user_verbs.h rdma/irdma-abi.h + rdma/mana-abi.h rdma/mlx4-abi.h rdma/mlx5-abi.h rdma/mthca-abi.h diff --git a/kernel-headers/rdma/ib_user_ioctl_verbs.h b/kernel-headers/rdma/ib_user_ioctl_verbs.h index 7dd562102..e0c25537f 100644 --- a/kernel-headers/rdma/ib_user_ioctl_verbs.h +++ b/kernel-headers/rdma/ib_user_ioctl_verbs.h @@ -251,6 +251,7 @@ enum rdma_driver_id { RDMA_DRIVER_EFA, RDMA_DRIVER_SIW, RDMA_DRIVER_ERDMA, + RDMA_DRIVER_MANA, }; enum ib_uverbs_gid_type { diff --git a/kernel-headers/rdma/mana-abi.h b/kernel-headers/rdma/mana-abi.h new file mode 100644 index 000000000..5fcb31b37 --- /dev/null +++ b/kernel-headers/rdma/mana-abi.h @@ -0,0 +1,66 @@ +/* SPDX-License-Identifier: (GPL-2.0 WITH Linux-syscall-note) */ +/* + * Copyright (c) 2022, Microsoft Corporation. All rights reserved. + */ + +#ifndef MANA_ABI_USER_H +#define MANA_ABI_USER_H + +#include +#include + +/* + * Increment this value if any changes that break userspace ABI + * compatibility are made. + */ + +#define MANA_IB_UVERBS_ABI_VERSION 1 + +struct mana_ib_create_cq { + __aligned_u64 buf_addr; +}; + +struct mana_ib_create_qp { + __aligned_u64 sq_buf_addr; + __u32 sq_buf_size; + __u32 port; +}; + +struct mana_ib_create_qp_resp { + __u32 sqid; + __u32 cqid; + __u32 tx_vp_offset; + __u32 reserved; +}; + +struct mana_ib_create_wq { + __aligned_u64 wq_buf_addr; + __u32 wq_buf_size; + __u32 reserved; +}; + +/* RX Hash function flags */ +enum mana_ib_rx_hash_function_flags { + MANA_IB_RX_HASH_FUNC_TOEPLITZ = 1 << 0, +}; + +struct mana_ib_create_qp_rss { + __aligned_u64 rx_hash_fields_mask; + __u8 rx_hash_function; + __u8 reserved[7]; + __u32 rx_hash_key_len; + __u8 rx_hash_key[40]; + __u32 port; +}; + +struct rss_resp_entry { + __u32 cqid; + __u32 wqid; +}; + +struct mana_ib_create_qp_rss_resp { + __aligned_u64 num_entries; + struct rss_resp_entry entries[64]; +}; + +#endif From 443f196deee00b7a7fb4305c1d6a3be42db4f2f8 Mon Sep 17 00:00:00 2001 From: Long Li Date: Tue, 2 Nov 2021 21:40:46 +0000 Subject: [PATCH 2/2] mana: Microsoft Azure Network Adapter (MANA) RDMA provider Introduce a provider that exposes MANA devices to user applications. Signed-off-by: Long Li --- CMakeLists.txt | 2 + MAINTAINERS | 5 + README.md | 1 + debian/control | 3 +- debian/copyright | 4 + debian/ibverbs-providers.install | 1 + debian/ibverbs-providers.lintian-overrides | 4 +- debian/ibverbs-providers.symbols | 5 + debian/libibverbs-dev.install | 6 + debian/rules | 2 +- libibverbs/verbs.h | 7 +- providers/mana/CMakeLists.txt | 13 + providers/mana/libmana.map | 8 + providers/mana/man/CMakeLists.txt | 5 + providers/mana/man/manadv.7.md | 47 ++ providers/mana/man/manadv_init_obj.3.md | 83 ++++ .../mana/man/manadv_set_context_attr.3.md | 65 +++ providers/mana/mana.c | 415 ++++++++++++++++++ providers/mana/mana.h | 160 +++++++ providers/mana/manadv.c | 88 ++++ providers/mana/manadv.h | 84 ++++ providers/mana/qp.c | 262 +++++++++++ providers/mana/wq.c | 189 ++++++++ redhat/rdma-core.spec | 6 + suse/rdma-core.spec | 22 + 25 files changed, 1480 insertions(+), 7 deletions(-) create mode 100644 providers/mana/CMakeLists.txt create mode 100644 providers/mana/libmana.map create mode 100644 providers/mana/man/CMakeLists.txt create mode 100644 providers/mana/man/manadv.7.md create mode 100644 providers/mana/man/manadv_init_obj.3.md create mode 100644 providers/mana/man/manadv_set_context_attr.3.md create mode 100644 providers/mana/mana.c create mode 100644 providers/mana/mana.h create mode 100644 providers/mana/manadv.c create mode 100644 providers/mana/manadv.h create mode 100644 providers/mana/qp.c create mode 100644 providers/mana/wq.c diff --git a/CMakeLists.txt b/CMakeLists.txt index 296c30544..fbabd0302 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -713,6 +713,8 @@ add_subdirectory(providers/efa/man) add_subdirectory(providers/erdma) add_subdirectory(providers/hns) add_subdirectory(providers/irdma) +add_subdirectory(providers/mana) +add_subdirectory(providers/mana/man) add_subdirectory(providers/mlx4) add_subdirectory(providers/mlx4/man) add_subdirectory(providers/mlx5) diff --git a/MAINTAINERS b/MAINTAINERS index 466f876f2..e7dce61c6 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -124,6 +124,11 @@ M: Sean Hefty S: Supported F: librdmacm/ +MANA USERSPACE PROVIDER (for mana_ib.ko) +M: Long Li +S: Supported +F: providers/mana/ + MLX4 USERSPACE PROVIDER (for mlx4_ib.ko) M: Yishai Hadas H: Roland Dreier diff --git a/README.md b/README.md index ebb941e21..e2983a95d 100644 --- a/README.md +++ b/README.md @@ -21,6 +21,7 @@ is included: - hns-roce.ko - irdma.ko - ib_qib.ko + - mana_ib.ko - mlx4_ib.ko - mlx5_ib.ko - ib_mthca.ko diff --git a/debian/control b/debian/control index d5f97757c..e1cc28275 100644 --- a/debian/control +++ b/debian/control @@ -68,7 +68,7 @@ Package: ibverbs-providers Architecture: linux-any Multi-Arch: same Depends: ${misc:Depends}, ${shlibs:Depends} -Provides: libefa1, libipathverbs1, libmlx4-1, libmlx5-1, libmthca1 +Provides: libefa1, libipathverbs1, libmana1, libmlx4-1, libmlx5-1, libmthca1 Replaces: libipathverbs1 (<< 15), libmlx4-1 (<< 15), libmlx5-1 (<< 15), @@ -98,6 +98,7 @@ Description: User space provider drivers for libibverbs - hns: HiSilicon Hip06 SoC - ipathverbs: QLogic InfiniPath HCAs - irdma: Intel Ethernet Connection RDMA + - mana: Microsoft Azure Network Adapter - mlx4: Mellanox ConnectX-3 InfiniBand HCAs - mlx5: Mellanox Connect-IB/X-4+ InfiniBand HCAs - mthca: Mellanox InfiniBand HCAs diff --git a/debian/copyright b/debian/copyright index 066177dc4..e75b38046 100644 --- a/debian/copyright +++ b/debian/copyright @@ -180,6 +180,10 @@ Files: providers/irdma/* Copyright: 2015-2021, Intel Corporation. License: BSD-MIT or GPL-2 +Files: providers/mana/* +Copyright: 2022, Microsoft Corporation. +License: BSD-MIT or GPL-2 + Files: providers/mlx4/* Copyright: 2004-2005, Topspin Communications. 2005-2007, Cisco, Inc. diff --git a/debian/ibverbs-providers.install b/debian/ibverbs-providers.install index 4f971fbf3..f69498d96 100644 --- a/debian/ibverbs-providers.install +++ b/debian/ibverbs-providers.install @@ -1,5 +1,6 @@ etc/libibverbs.d/ usr/lib/*/libefa.so.* usr/lib/*/libibverbs/lib*-rdmav*.so +usr/lib/*/libmana.so* usr/lib/*/libmlx4.so.* usr/lib/*/libmlx5.so.* diff --git a/debian/ibverbs-providers.lintian-overrides b/debian/ibverbs-providers.lintian-overrides index 8a44d54fb..5815058ff 100644 --- a/debian/ibverbs-providers.lintian-overrides +++ b/debian/ibverbs-providers.lintian-overrides @@ -1,2 +1,2 @@ -# libefa, libmlx4 and libmlx5 are ibverbs provider that provides more functions. -ibverbs-providers: package-name-doesnt-match-sonames libefa1 libmlx4-1 libmlx5-1 +# libefa, libmana, libmlx4 and libmlx5 are ibverbs provider that provides more functions. +ibverbs-providers: package-name-doesnt-match-sonames libefa1 libmana1 libmlx4-1 libmlx5-1 diff --git a/debian/ibverbs-providers.symbols b/debian/ibverbs-providers.symbols index c4fd91cdb..cacf6442f 100644 --- a/debian/ibverbs-providers.symbols +++ b/debian/ibverbs-providers.symbols @@ -172,3 +172,8 @@ libefa.so.1 ibverbs-providers #MINVER# efadv_query_ah@EFA_1.1 26 efadv_cq_from_ibv_cq_ex@EFA_1.2 43 efadv_create_cq@EFA_1.2 43 +libmana.so.1 ibverbs-providers #MINVER# +* Build-Depends-Package: libibverbs-dev + MANA_1.0@MANA_1.0 41 + manadv_init_obj@MANA_1.0 41 + manadv_set_context_attr@MANA_1.0 41 diff --git a/debian/libibverbs-dev.install b/debian/libibverbs-dev.install index bc8caa538..5f2ffd55b 100644 --- a/debian/libibverbs-dev.install +++ b/debian/libibverbs-dev.install @@ -1,6 +1,7 @@ usr/include/infiniband/arch.h usr/include/infiniband/efadv.h usr/include/infiniband/ib_user_ioctl_verbs.h +usr/include/infiniband/manadv.h usr/include/infiniband/mlx4dv.h usr/include/infiniband/mlx5_api.h usr/include/infiniband/mlx5_user_ioctl_verbs.h @@ -16,20 +17,25 @@ usr/lib/*/libefa.a usr/lib/*/libefa.so usr/lib/*/libibverbs*.so usr/lib/*/libibverbs.a +usr/lib/*/libmana.a +usr/lib/*/libmana.so usr/lib/*/libmlx4.a usr/lib/*/libmlx4.so usr/lib/*/libmlx5.a usr/lib/*/libmlx5.so usr/lib/*/pkgconfig/libefa.pc usr/lib/*/pkgconfig/libibverbs.pc +usr/lib/*/pkgconfig/libmana.pc usr/lib/*/pkgconfig/libmlx4.pc usr/lib/*/pkgconfig/libmlx5.pc usr/share/man/man3/efadv_*.3 usr/share/man/man3/ibv_* usr/share/man/man3/mbps_to_ibv_rate.3 +usr/share/man/man3/manadv_*.3 usr/share/man/man3/mlx4dv_*.3 usr/share/man/man3/mlx5dv_*.3 usr/share/man/man3/mult_to_ibv_rate.3 usr/share/man/man7/efadv.7 +usr/share/man/man7/manadv.7 usr/share/man/man7/mlx4dv.7 usr/share/man/man7/mlx5dv.7 diff --git a/debian/rules b/debian/rules index 35915e552..9627a4073 100755 --- a/debian/rules +++ b/debian/rules @@ -62,7 +62,7 @@ ifneq (,$(filter-out $(COHERENT_DMA_ARCHS),$(DEB_HOST_ARCH))) for package in ibverbs-providers libibverbs-dev rdma-core; do \ test -e debian/$$package.install.backup || cp debian/$$package.install debian/$$package.install.backup; \ done - sed -i '/efa\|mlx[45]/d' debian/ibverbs-providers.install debian/libibverbs-dev.install debian/rdma-core.install + sed -i '/efa\|mana\|mlx[45]/d' debian/ibverbs-providers.install debian/libibverbs-dev.install debian/rdma-core.install endif DESTDIR=$(CURDIR)/debian/tmp ninja -C build-deb install diff --git a/libibverbs/verbs.h b/libibverbs/verbs.h index 29fc83b37..ebdfca7c5 100644 --- a/libibverbs/verbs.h +++ b/libibverbs/verbs.h @@ -2193,7 +2193,7 @@ struct ibv_device **ibv_get_device_list(int *num_devices); */ #ifdef RDMA_STATIC_PROVIDERS #define _RDMA_STATIC_PREFIX_(_1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, \ - _12, _13, _14, _15, _16, _17, _18, ...) \ + _12, _13, _14, _15, _16, _17, _18, _19, ...) \ &verbs_provider_##_1, &verbs_provider_##_2, &verbs_provider_##_3, \ &verbs_provider_##_4, &verbs_provider_##_5, \ &verbs_provider_##_6, &verbs_provider_##_7, \ @@ -2202,11 +2202,11 @@ struct ibv_device **ibv_get_device_list(int *num_devices); &verbs_provider_##_12, &verbs_provider_##_13, \ &verbs_provider_##_14, &verbs_provider_##_15, \ &verbs_provider_##_16, &verbs_provider_##_17, \ - &verbs_provider_##_18 + &verbs_provider_##_18, &verbs_provider_##_19 #define _RDMA_STATIC_PREFIX(arg) \ _RDMA_STATIC_PREFIX_(arg, none, none, none, none, none, none, none, \ none, none, none, none, none, none, none, none, \ - none, none) + none, none, none) struct verbs_devices_ops; extern const struct verbs_device_ops verbs_provider_bnxt_re; @@ -2217,6 +2217,7 @@ extern const struct verbs_device_ops verbs_provider_hfi1verbs; extern const struct verbs_device_ops verbs_provider_hns; extern const struct verbs_device_ops verbs_provider_ipathverbs; extern const struct verbs_device_ops verbs_provider_irdma; +extern const struct verbs_device_ops verbs_provider_mana; extern const struct verbs_device_ops verbs_provider_mlx4; extern const struct verbs_device_ops verbs_provider_mlx5; extern const struct verbs_device_ops verbs_provider_mthca; diff --git a/providers/mana/CMakeLists.txt b/providers/mana/CMakeLists.txt new file mode 100644 index 000000000..7219ee2ec --- /dev/null +++ b/providers/mana/CMakeLists.txt @@ -0,0 +1,13 @@ +rdma_shared_provider(mana libmana.map + 1 1.0.${PACKAGE_VERSION} + mana.c + manadv.c + qp.c + wq.c +) + +publish_headers(infiniband + manadv.h +) + +rdma_pkg_config("mana" "libibverbs" "${CMAKE_THREAD_LIBS_INIT}") diff --git a/providers/mana/libmana.map b/providers/mana/libmana.map new file mode 100644 index 000000000..ab66295d6 --- /dev/null +++ b/providers/mana/libmana.map @@ -0,0 +1,8 @@ +/* Export symbols should be added below according to + Documentation/versioning.md document. */ +MANA_1.0 { + global: + manadv_set_context_attr; + manadv_init_obj; + local: *; +}; diff --git a/providers/mana/man/CMakeLists.txt b/providers/mana/man/CMakeLists.txt new file mode 100644 index 000000000..24f185969 --- /dev/null +++ b/providers/mana/man/CMakeLists.txt @@ -0,0 +1,5 @@ +rdma_man_pages( + manadv.7.md + manadv_init_obj.3.md + manadv_set_context_attr.3.md +) diff --git a/providers/mana/man/manadv.7.md b/providers/mana/man/manadv.7.md new file mode 100644 index 000000000..8c7ec88fb --- /dev/null +++ b/providers/mana/man/manadv.7.md @@ -0,0 +1,47 @@ +--- +layout: page +title: MANADV +section: 7 +tagline: Verbs +date: 2022-05-16 +header: "MANA Direct Verbs Manual" +footer: mana +--- + +# NAME +manadv - Direct verbs for mana devices + +This provides low level access to mana devices to perform direct operations, +without general branching performed by libibverbs. + +# DESCRIPTION +The libibverbs API is an abstract one. It is agnostic to any underlying +provider specific implementation. While this abstraction has the advantage +of user applications portability, it has a performance penalty. For some +applications optimizing performance is more important than portability. + +The mana direct verbs API is intended for such applications. +It exposes mana specific low level operations, allowing the application +to bypass the libibverbs API. + +This version of the driver supports one QP type: IBV_QPT_RAW_PACKET. To use +this QP type, the application is required to use manadv_set_context_attr() +to set external buffer allocators for allocating queues, and use +manadv_init_obj() to obtain all the queue information. The application +implements its own queue operations, bypassing libibverbs API for +sending/receiving traffic over the queues. At hardware layer, IBV_QPT_RAW_PACKET +QP shares the same hardware resource as the Ethernet port used in the kernel. +The software checks for exclusive use of the hardware Ethernet port, and will +fail the QP creation if the port is already in use. To create a +IBV_QPT_RAW_PACKET on a specified port, the user needs to configure the system +in such a way that this port is not used by any other software (including the +Kernel). If the port is used, ibv_create_qp() will fail with errno set to EBUSY. + +The direct include of manadv.h together with linkage to mana library will +allow usage of this new interface. + +# SEE ALSO +**verbs**(7) + +# AUTHORS +Long Li diff --git a/providers/mana/man/manadv_init_obj.3.md b/providers/mana/man/manadv_init_obj.3.md new file mode 100644 index 000000000..575ea34f7 --- /dev/null +++ b/providers/mana/man/manadv_init_obj.3.md @@ -0,0 +1,83 @@ +--- +layout: page +title: manadv_init_obj +section: 3 +tagline: Verbs +--- + +# NAME +manadv_init_obj \- Initialize mana direct verbs object from ibv_xxx structures + +# SYNOPSIS" +```c +#include + +int manadv_init_obj(struct manadv_obj *obj, uint64_t obj_type); +``` + +# DESCRIPTION +manadv_init_obj() +This function will initialize manadv_xxx structs based on supplied type. The information +for initialization is taken from ibv_xx structs supplied as part of input. + +# ARGUMENTS +*obj* +: The manadv_xxx structs be to returned. + +```c +struct manadv_qp { + void *sq_buf; + uint32_t sq_count; + uint32_t sq_size; + uint32_t sq_id; + uint32_t tx_vp_offset; + void *db_page; +}; + +struct manadv_cq { + void *buf; + uint32_t count; + uint32_t cq_id; +}; + +struct manadv_rwq { + void *buf; + uint32_t count; + uint32_t size; + uint32_t wq_id; + void *db_page; +}; + +struct manadv_obj { + struct { + struct ibv_qp *in; + struct manadv_qp *out; + } qp; + + struct { + struct ibv_cq *in; + struct manadv_cq *out; + } cq; + + struct { + struct ibv_wq *in; + struct manadv_rwq *out; + } rwq; +}; +``` + +*obj_type* +: The types of the manadv_xxx structs to be returned. + +```c +enum manadv_obj_type { + MANADV_OBJ_QP = 1 << 0, + MANADV_OBJ_CQ = 1 << 1, + MANADV_OBJ_RWQ = 1 << 2, +}; +``` +# RETURN VALUE +0 on success or the value of errno on failure (which indicates the failure reason). + +# AUTHORS +Long Li diff --git a/providers/mana/man/manadv_set_context_attr.3.md b/providers/mana/man/manadv_set_context_attr.3.md new file mode 100644 index 000000000..0a96e2331 --- /dev/null +++ b/providers/mana/man/manadv_set_context_attr.3.md @@ -0,0 +1,65 @@ +--- +layout: page +title: manadv_set_context_attr +section: 3 +tagline: Verbs +--- + +# NAME +manadv_set_context_attr - Set context attributes + +# SYNOPSIS +```c +#include + +int manadv_set_context_attr(struct ibv_context *context, + enum manadv_set_ctx_attr_type attr_type, + void *attr); +``` + +# DESCRIPTION +manadv_set_context_attr gives the ability to set vendor specific attributes on +the RDMA context. + +# ARGUMENTS +*context* +: RDMA device context to work on. + +*attr_type* +: The type of the provided attribute. + +*attr* +: Pointer to the attribute to be set. + +## attr_type +```c +enum manadv_set_ctx_attr_type { + /* Attribute type uint8_t */ + MANADV_SET_CTX_ATTR_BUF_ALLOCATORS = 0, +}; +``` +*MANADV_SET_CTX_ATTR_BUF_ALLOCATORS* +: Provide an external buffer allocator + +```c +struct manadv_ctx_allocators { + void *(*alloc)(size_t size, void *priv_data); + void (*free)(void *ptr, void *priv_data); + void *data; +}; +``` +*alloc* +: Function used for buffer allocation instead of libmana internal method + +*free* +: Function used to free buffers allocated by alloc function + +*data* +: Metadata that can be used by alloc and free functions + +# RETURN VALUE +Returns 0 on success, or the value of errno on failure +(which indicates the failure reason). + +# AUTHOR +Long Li diff --git a/providers/mana/mana.c b/providers/mana/mana.c new file mode 100644 index 000000000..b2dd82e9d --- /dev/null +++ b/providers/mana/mana.c @@ -0,0 +1,415 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* + * Copyright (c) 2022, Microsoft Corporation. All rights reserved. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include +#include +#include + +#include "mana.h" + +DECLARE_DRV_CMD(mana_alloc_ucontext, IB_USER_VERBS_CMD_GET_CONTEXT, empty, + empty); + +DECLARE_DRV_CMD(mana_alloc_pd, IB_USER_VERBS_CMD_ALLOC_PD, empty, empty); + +DECLARE_DRV_CMD(mana_create_cq, IB_USER_VERBS_CMD_CREATE_CQ, mana_ib_create_cq, + empty); + +static const struct verbs_match_ent hca_table[] = { + VERBS_DRIVER_ID(RDMA_DRIVER_MANA), + {}, +}; + +struct mana_context *to_mctx(struct ibv_context *ibctx) +{ + return container_of(ibctx, struct mana_context, ibv_ctx.context); +} + +int mana_query_device_ex(struct ibv_context *context, + const struct ibv_query_device_ex_input *input, + struct ibv_device_attr_ex *attr, size_t attr_size) +{ + struct ib_uverbs_ex_query_device_resp resp; + size_t resp_size = sizeof(resp); + int ret; + + ret = ibv_cmd_query_device_any(context, input, attr, attr_size, &resp, + &resp_size); + + verbs_debug(verbs_get_ctx(context), + "device attr max_qp %d max_qp_wr %d max_cqe %d\n", + attr->orig_attr.max_qp, attr->orig_attr.max_qp_wr, + attr->orig_attr.max_cqe); + + return ret; +} + +int mana_query_port(struct ibv_context *context, uint8_t port, + struct ibv_port_attr *attr) +{ + struct ibv_query_port cmd; + + return ibv_cmd_query_port(context, port, attr, &cmd, sizeof(cmd)); +} + +struct ibv_pd *mana_alloc_pd(struct ibv_context *context) +{ + struct ibv_alloc_pd cmd; + struct mana_alloc_pd_resp resp; + struct mana_pd *pd; + int ret; + + pd = calloc(1, sizeof(*pd)); + if (!pd) + return NULL; + + ret = ibv_cmd_alloc_pd(context, &pd->ibv_pd, &cmd, sizeof(cmd), + &resp.ibv_resp, sizeof(resp)); + if (ret) { + verbs_err(verbs_get_ctx(context), "Failed to allocate PD\n"); + errno = ret; + free(pd); + return NULL; + } + + return &pd->ibv_pd; +} + +struct ibv_pd * +mana_alloc_parent_domain(struct ibv_context *context, + struct ibv_parent_domain_init_attr *attr) +{ + struct mana_parent_domain *mparent_domain; + + if (ibv_check_alloc_parent_domain(attr)) { + errno = EINVAL; + return NULL; + } + + if (!check_comp_mask(attr->comp_mask, + IBV_PARENT_DOMAIN_INIT_ATTR_PD_CONTEXT)) { + verbs_err( + verbs_get_ctx(context), + "This driver supports IBV_PARENT_DOMAIN_INIT_ATTR_PD_CONTEXT only\n"); + errno = EINVAL; + return NULL; + } + + mparent_domain = calloc(1, sizeof(*mparent_domain)); + if (!mparent_domain) { + errno = ENOMEM; + return NULL; + } + + mparent_domain->mpd.mprotection_domain = + container_of(attr->pd, struct mana_pd, ibv_pd); + ibv_initialize_parent_domain(&mparent_domain->mpd.ibv_pd, attr->pd); + + if (attr->comp_mask & IBV_PARENT_DOMAIN_INIT_ATTR_PD_CONTEXT) + mparent_domain->pd_context = attr->pd_context; + + return &mparent_domain->mpd.ibv_pd; +} + +int mana_dealloc_pd(struct ibv_pd *ibpd) +{ + int ret; + struct mana_pd *pd = container_of(ibpd, struct mana_pd, ibv_pd); + + if (pd->mprotection_domain) { + struct mana_parent_domain *parent_domain = + container_of(pd, struct mana_parent_domain, mpd); + + free(parent_domain); + return 0; + } + + ret = ibv_cmd_dealloc_pd(ibpd); + if (ret) { + verbs_err(verbs_get_ctx(ibpd->context), + "Failed to deallocate PD\n"); + return ret; + } + + free(pd); + + return 0; +} + +struct ibv_mr *mana_reg_mr(struct ibv_pd *pd, void *addr, size_t length, + uint64_t hca_va, int access) +{ + struct verbs_mr *vmr; + struct ibv_reg_mr cmd; + struct ib_uverbs_reg_mr_resp resp; + int ret; + + vmr = malloc(sizeof(*vmr)); + if (!vmr) + return NULL; + + ret = ibv_cmd_reg_mr(pd, addr, length, hca_va, access, vmr, &cmd, + sizeof(cmd), &resp, sizeof(resp)); + if (ret) { + verbs_err(verbs_get_ctx(pd->context), + "Failed to register MR\n"); + errno = ret; + free(vmr); + return NULL; + } + + return &vmr->ibv_mr; +} + +int mana_dereg_mr(struct verbs_mr *vmr) +{ + int ret; + + ret = ibv_cmd_dereg_mr(vmr); + if (ret) { + verbs_err(verbs_get_ctx(vmr->ibv_mr.context), + "Failed to deregister MR\n"); + return ret; + } + + free(vmr); + return 0; +} + +struct ibv_cq *mana_create_cq(struct ibv_context *context, int cqe, + struct ibv_comp_channel *channel, int comp_vector) +{ + struct mana_context *ctx = to_mctx(context); + struct mana_cq *cq; + struct mana_create_cq cmd = {}; + struct mana_create_cq_resp resp = {}; + struct mana_ib_create_cq *cmd_drv; + int cq_size; + int ret; + + if (cqe > MAX_SEND_BUFFERS_PER_QUEUE) { + verbs_err(verbs_get_ctx(context), "CQE %d exceeding limit\n", + cqe); + errno = EINVAL; + return NULL; + } + + if (!ctx->extern_alloc.alloc || !ctx->extern_alloc.free) { + /* + * This version of driver doesn't support allocating buffers + * in rdma-core. + */ + verbs_err(verbs_get_ctx(context), + "Allocating core buffers for CQ is not supported\n"); + errno = EINVAL; + return NULL; + } + + cq = calloc(1, sizeof(*cq)); + if (!cq) + return NULL; + + cq_size = cqe * COMP_ENTRY_SIZE; + cq_size = roundup_pow_of_two(cq_size); + cq_size = align(cq_size, MANA_PAGE_SIZE); + + cq->buf = ctx->extern_alloc.alloc(cq_size, ctx->extern_alloc.data); + if (!cq->buf) { + errno = ENOMEM; + goto free_cq; + } + cq->cqe = cqe; + + cmd_drv = &cmd.drv_payload; + cmd_drv->buf_addr = (uintptr_t)cq->buf; + + ret = ibv_cmd_create_cq(context, cq->cqe, channel, comp_vector, + &cq->ibcq, &cmd.ibv_cmd, sizeof(cmd), + &resp.ibv_resp, sizeof(resp)); + + if (ret) { + verbs_err(verbs_get_ctx(context), "Failed to Create CQ\n"); + ctx->extern_alloc.free(cq->buf, ctx->extern_alloc.data); + errno = ret; + goto free_cq; + } + + return &cq->ibcq; + +free_cq: + free(cq); + return NULL; +} + +int mana_destroy_cq(struct ibv_cq *ibcq) +{ + int ret; + struct mana_cq *cq = container_of(ibcq, struct mana_cq, ibcq); + struct mana_context *ctx = to_mctx(ibcq->context); + + if (!ctx->extern_alloc.free) { + /* + * This version of driver doesn't support allocating buffers + * in rdma-core. It's not possible to reach the code here. + */ + verbs_err(verbs_get_ctx(ibcq->context), + "Invalid external context in destroy CQ\n"); + return -EINVAL; + } + + ret = ibv_cmd_destroy_cq(ibcq); + if (ret) { + verbs_err(verbs_get_ctx(ibcq->context), + "Failed to Destroy CQ\n"); + return ret; + } + + ctx->extern_alloc.free(cq->buf, ctx->extern_alloc.data); + free(cq); + + return ret; +} + +static int mana_poll_cq(struct ibv_cq *ibcq, int nwc, struct ibv_wc *wc) +{ + /* This version of driver supports RAW QP only. + * Polling CQ is done directly in the application. + */ + return EOPNOTSUPP; +} + +static int mana_post_recv(struct ibv_qp *ibqp, struct ibv_recv_wr *wr, + struct ibv_recv_wr **bad) +{ + /* This version of driver supports RAW QP only. + * Posting WR is done directly in the application. + */ + return EOPNOTSUPP; +} + +static int mana_post_send(struct ibv_qp *ibqp, struct ibv_send_wr *wr, + struct ibv_send_wr **bad) +{ + /* This version of driver supports RAW QP only. + * Posting WR is done directly in the application. + */ + return EOPNOTSUPP; +} + +static void mana_free_context(struct ibv_context *ibctx) +{ + struct mana_context *context = to_mctx(ibctx); + + munmap(context->db_page, DOORBELL_PAGE_SIZE); + verbs_uninit_context(&context->ibv_ctx); + free(context); +} + +static const struct verbs_context_ops mana_ctx_ops = { + .alloc_pd = mana_alloc_pd, + .alloc_parent_domain = mana_alloc_parent_domain, + .create_cq = mana_create_cq, + .create_qp = mana_create_qp, + .create_qp_ex = mana_create_qp_ex, + .create_rwq_ind_table = mana_create_rwq_ind_table, + .create_wq = mana_create_wq, + .dealloc_pd = mana_dealloc_pd, + .dereg_mr = mana_dereg_mr, + .destroy_cq = mana_destroy_cq, + .destroy_qp = mana_destroy_qp, + .destroy_rwq_ind_table = mana_destroy_rwq_ind_table, + .destroy_wq = mana_destroy_wq, + .free_context = mana_free_context, + .modify_wq = mana_modify_wq, + .modify_qp = mana_modify_qp, + .poll_cq = mana_poll_cq, + .post_recv = mana_post_recv, + .post_send = mana_post_send, + .query_device_ex = mana_query_device_ex, + .query_port = mana_query_port, + .reg_mr = mana_reg_mr, +}; + +static struct verbs_device *mana_device_alloc(struct verbs_sysfs_dev *sysfs_dev) +{ + struct mana_device *dev; + + dev = calloc(1, sizeof(*dev)); + if (!dev) + return NULL; + + return &dev->verbs_dev; +} + +static void mana_uninit_device(struct verbs_device *verbs_device) +{ + struct mana_device *dev = + container_of(verbs_device, struct mana_device, verbs_dev); + + free(dev); +} + +static struct verbs_context *mana_alloc_context(struct ibv_device *ibdev, + int cmd_fd, void *private_data) +{ + int ret; + struct mana_context *context; + struct mana_alloc_ucontext_resp resp; + struct ibv_get_context cmd; + + context = verbs_init_and_alloc_context(ibdev, cmd_fd, context, ibv_ctx, + RDMA_DRIVER_MANA); + if (!context) + return NULL; + + ret = ibv_cmd_get_context(&context->ibv_ctx, &cmd, sizeof(cmd), + &resp.ibv_resp, sizeof(resp)); + if (ret) { + verbs_err(&context->ibv_ctx, "Failed to get ucontext\n"); + errno = ret; + goto free_ctx; + } + + verbs_set_ops(&context->ibv_ctx, &mana_ctx_ops); + + context->db_page = mmap(NULL, DOORBELL_PAGE_SIZE, PROT_WRITE, + MAP_SHARED, context->ibv_ctx.context.cmd_fd, 0); + if (context->db_page == MAP_FAILED) { + verbs_err(&context->ibv_ctx, "Failed to map doorbell page\n"); + errno = ENOENT; + goto free_ctx; + } + verbs_debug(&context->ibv_ctx, "Mapped db_page=%p\n", context->db_page); + + return &context->ibv_ctx; + +free_ctx: + verbs_uninit_context(&context->ibv_ctx); + free(context); + return NULL; +} + +static const struct verbs_device_ops mana_dev_ops = { + .name = "mana", + .match_min_abi_version = MANA_IB_UVERBS_ABI_VERSION, + .match_max_abi_version = MANA_IB_UVERBS_ABI_VERSION, + .match_table = hca_table, + .alloc_device = mana_device_alloc, + .uninit_device = mana_uninit_device, + .alloc_context = mana_alloc_context, +}; + +PROVIDER_DRIVER(mana, mana_dev_ops); diff --git a/providers/mana/mana.h b/providers/mana/mana.h new file mode 100644 index 000000000..93251a4dc --- /dev/null +++ b/providers/mana/mana.h @@ -0,0 +1,160 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* + * Copyright (c) 2022, Microsoft Corporation. All rights reserved. + */ + +#ifndef _MANA_H_ +#define _MANA_H_ + +#include "manadv.h" + +#define MAX_SEND_BUFFERS_PER_QUEUE 256 +#define COMP_ENTRY_SIZE 64 +#define MANA_IB_TOEPLITZ_HASH_KEY_SIZE_IN_BYTES 40 + +#define DMA_OOB_SIZE 8 + +#define INLINE_OOB_SMALL_SIZE 8 +#define INLINE_OOB_LARGE_SIZE 24 + +#define GDMA_WQE_ALIGNMENT_UNIT_SIZE 32 +#define MAX_TX_WQE_SIZE 512 +#define MAX_RX_WQE_SIZE 256 + +/* The size of a SGE in WQE */ +#define SGE_SIZE 16 + +#define DOORBELL_PAGE_SIZE 4096 +#define MANA_PAGE_SIZE 4096 + +static inline int align_next_power2(int size) +{ + int val = 1; + + while (val < size) + val <<= 1; + + return val; +} + +static inline int align_hw_size(int size) +{ + size = align(size, MANA_PAGE_SIZE); + return align_next_power2(size); +} + +static inline int get_wqe_size(int sge) +{ + int wqe_size = sge * SGE_SIZE + DMA_OOB_SIZE + INLINE_OOB_SMALL_SIZE; + + return align(wqe_size, GDMA_WQE_ALIGNMENT_UNIT_SIZE); +} + +struct mana_context { + struct verbs_context ibv_ctx; + struct manadv_ctx_allocators extern_alloc; + void *db_page; +}; + +struct mana_rwq_ind_table { + struct ibv_rwq_ind_table ib_ind_table; + + uint32_t ind_tbl_size; + struct ibv_wq **ind_tbl; +}; + +struct mana_qp { + struct verbs_qp ibqp; + + void *send_buf; + uint32_t send_buf_size; + + int send_wqe_count; + + uint32_t sqid; + uint32_t tx_vp_offset; +}; + +struct mana_wq { + struct ibv_wq ibwq; + + void *buf; + uint32_t buf_size; + + uint32_t wqe; + uint32_t sge; + + uint32_t wqid; +}; + +struct mana_cq { + struct ibv_cq ibcq; + uint32_t cqe; + void *buf; + + uint32_t cqid; +}; + +struct mana_device { + struct verbs_device verbs_dev; +}; + +struct mana_pd { + struct ibv_pd ibv_pd; + struct mana_pd *mprotection_domain; +}; + +struct mana_parent_domain { + struct mana_pd mpd; + void *pd_context; +}; + +struct mana_context *to_mctx(struct ibv_context *ibctx); + +int mana_query_device_ex(struct ibv_context *context, + const struct ibv_query_device_ex_input *input, + struct ibv_device_attr_ex *attr, size_t attr_size); + +int mana_query_port(struct ibv_context *context, uint8_t port, + struct ibv_port_attr *attr); + +struct ibv_pd *mana_alloc_pd(struct ibv_context *context); +struct ibv_pd * +mana_alloc_parent_domain(struct ibv_context *context, + struct ibv_parent_domain_init_attr *attr); + +int mana_dealloc_pd(struct ibv_pd *pd); + +struct ibv_mr *mana_reg_mr(struct ibv_pd *pd, void *addr, size_t length, + uint64_t hca_va, int access); + +int mana_dereg_mr(struct verbs_mr *vmr); + +struct ibv_cq *mana_create_cq(struct ibv_context *context, int cqe, + struct ibv_comp_channel *channel, + int comp_vector); + +int mana_destroy_cq(struct ibv_cq *cq); + +struct ibv_wq *mana_create_wq(struct ibv_context *context, + struct ibv_wq_init_attr *attr); + +int mana_destroy_wq(struct ibv_wq *wq); +int mana_modify_wq(struct ibv_wq *ibwq, struct ibv_wq_attr *attr); + +struct ibv_rwq_ind_table * +mana_create_rwq_ind_table(struct ibv_context *context, + struct ibv_rwq_ind_table_init_attr *init_attr); + +int mana_destroy_rwq_ind_table(struct ibv_rwq_ind_table *rwq_ind_table); + +struct ibv_qp *mana_create_qp(struct ibv_pd *pd, struct ibv_qp_init_attr *attr); + +struct ibv_qp *mana_create_qp_ex(struct ibv_context *context, + struct ibv_qp_init_attr_ex *attr); + +int mana_modify_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr, int attr_mask); + +int mana_destroy_qp(struct ibv_qp *ibqp); + +#endif diff --git a/providers/mana/manadv.c b/providers/mana/manadv.c new file mode 100644 index 000000000..3fcd52335 --- /dev/null +++ b/providers/mana/manadv.c @@ -0,0 +1,88 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* + * Copyright (c) 2022, Microsoft Corporation. All rights reserved. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include +#include +#include + +#include "mana.h" + +int manadv_set_context_attr(struct ibv_context *ibv_ctx, + enum manadv_set_ctx_attr_type type, void *attr) +{ + struct mana_context *ctx = to_mctx(ibv_ctx); + int ret; + + switch (type) { + case MANADV_CTX_ATTR_BUF_ALLOCATORS: + ctx->extern_alloc = *((struct manadv_ctx_allocators *)attr); + ret = 0; + break; + default: + verbs_err(verbs_get_ctx(ibv_ctx), + "Unsupported context type %d\n", type); + ret = EOPNOTSUPP; + } + + return ret; +} + +int manadv_init_obj(struct manadv_obj *obj, uint64_t obj_type) +{ + if (obj_type & ~(MANADV_OBJ_QP | MANADV_OBJ_CQ | MANADV_OBJ_RWQ)) + return EINVAL; + + if (obj_type & MANADV_OBJ_QP) { + struct ibv_qp *ibqp = obj->qp.in; + struct mana_qp *qp = + container_of(ibqp, struct mana_qp, ibqp.qp); + + struct ibv_context *context = ibqp->context; + struct mana_context *ctx = to_mctx(context); + + obj->qp.out->sq_buf = qp->send_buf; + obj->qp.out->sq_count = qp->send_wqe_count; + obj->qp.out->sq_size = qp->send_buf_size; + obj->qp.out->sq_id = qp->sqid; + obj->qp.out->tx_vp_offset = qp->tx_vp_offset; + obj->qp.out->db_page = ctx->db_page; + } + + if (obj_type & MANADV_OBJ_CQ) { + struct ibv_cq *ibcq = obj->cq.in; + struct mana_cq *cq = container_of(ibcq, struct mana_cq, ibcq); + + obj->cq.out->buf = cq->buf; + obj->cq.out->count = cq->cqe; + obj->cq.out->cq_id = cq->cqid; + } + + if (obj_type & MANADV_OBJ_RWQ) { + struct ibv_wq *ibwq = obj->rwq.in; + struct mana_wq *wq = container_of(ibwq, struct mana_wq, ibwq); + + struct ibv_context *context = ibwq->context; + struct mana_context *ctx = to_mctx(context); + + obj->rwq.out->buf = wq->buf; + obj->rwq.out->count = wq->wqe; + obj->rwq.out->size = wq->buf_size; + obj->rwq.out->wq_id = wq->wqid; + obj->rwq.out->db_page = ctx->db_page; + } + + return 0; +} diff --git a/providers/mana/manadv.h b/providers/mana/manadv.h new file mode 100644 index 000000000..27c8fe939 --- /dev/null +++ b/providers/mana/manadv.h @@ -0,0 +1,84 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* + * Copyright (c) 2022, Microsoft Corporation. All rights reserved. + */ + +#ifndef _MANA_DV_H_ +#define _MANA_DV_H_ + +#include +#include +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +enum manadv_set_ctx_attr_type { + /* Attribute type uint8_t */ + MANADV_CTX_ATTR_BUF_ALLOCATORS = 0, +}; + +struct manadv_ctx_allocators { + void *(*alloc)(size_t size, void *priv_data); + void (*free)(void *ptr, void *priv_data); + void *data; +}; + +int manadv_set_context_attr(struct ibv_context *ibv_ctx, + enum manadv_set_ctx_attr_type type, void *attr); + +struct manadv_qp { + void *sq_buf; + uint32_t sq_count; + uint32_t sq_size; + uint32_t sq_id; + uint32_t tx_vp_offset; + void *db_page; +}; + +struct manadv_cq { + void *buf; + uint32_t count; + uint32_t cq_id; +}; + +struct manadv_rwq { + void *buf; + uint32_t count; + uint32_t size; + uint32_t wq_id; + void *db_page; +}; + +struct manadv_obj { + struct { + struct ibv_qp *in; + struct manadv_qp *out; + } qp; + + struct { + struct ibv_cq *in; + struct manadv_cq *out; + } cq; + + struct { + struct ibv_wq *in; + struct manadv_rwq *out; + } rwq; +}; + +enum manadv_obj_type { + MANADV_OBJ_QP = 1 << 0, + MANADV_OBJ_CQ = 1 << 1, + MANADV_OBJ_RWQ = 1 << 2, +}; + +int manadv_init_obj(struct manadv_obj *obj, uint64_t obj_type); + +#ifdef __cplusplus +} +#endif + +#endif /* _MANA_DV_H_ */ diff --git a/providers/mana/qp.c b/providers/mana/qp.c new file mode 100644 index 000000000..39c7ed57a --- /dev/null +++ b/providers/mana/qp.c @@ -0,0 +1,262 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* + * Copyright (c) 2022, Microsoft Corporation. All rights reserved. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include +#include +#include + +#include "mana.h" + +DECLARE_DRV_CMD(mana_create_qp, IB_USER_VERBS_CMD_CREATE_QP, mana_ib_create_qp, + mana_ib_create_qp_resp); + +DECLARE_DRV_CMD(mana_create_qp_ex, IB_USER_VERBS_EX_CMD_CREATE_QP, + mana_ib_create_qp_rss, mana_ib_create_qp_rss_resp); + +static struct ibv_qp *mana_create_qp_raw(struct ibv_pd *ibpd, + struct ibv_qp_init_attr *attr) +{ + int ret; + struct mana_cq *cq; + struct mana_qp *qp; + struct mana_pd *pd = container_of(ibpd, struct mana_pd, ibv_pd); + struct mana_parent_domain *mpd; + uint32_t port; + + struct mana_create_qp qp_cmd = {}; + struct mana_create_qp_resp qp_resp = {}; + struct mana_ib_create_qp *qp_cmd_drv; + struct mana_ib_create_qp_resp *qp_resp_drv; + + struct mana_context *ctx = to_mctx(ibpd->context); + + /* This is a RAW QP, pd is a parent domain with port number */ + if (!pd->mprotection_domain) { + verbs_err(verbs_get_ctx(ibpd->context), + "Create RAW QP should use parent domain\n"); + errno = EINVAL; + return NULL; + } + + mpd = container_of(pd, struct mana_parent_domain, mpd); + port = (uint32_t)(uintptr_t)mpd->pd_context; + + cq = container_of(attr->send_cq, struct mana_cq, ibcq); + + if (attr->cap.max_send_wr > MAX_SEND_BUFFERS_PER_QUEUE) { + verbs_err(verbs_get_ctx(ibpd->context), + "max_send_wr %d exceeds MAX_SEND_BUFFERS_PER_QUEUE\n", + attr->cap.max_send_wr); + errno = EINVAL; + return NULL; + } + + if (get_wqe_size(attr->cap.max_send_sge) > MAX_TX_WQE_SIZE) { + verbs_err(verbs_get_ctx(ibpd->context), + "max_send_sge %d exceeding queue size limits\n", + attr->cap.max_send_sge); + errno = EINVAL; + return NULL; + } + + if (!ctx->extern_alloc.alloc || !ctx->extern_alloc.free) { + verbs_err(verbs_get_ctx(ibpd->context), + "RAW QP requires extern alloc for buffers\n"); + errno = EINVAL; + return NULL; + } + + qp = calloc(1, sizeof(*qp)); + if (!qp) + return NULL; + + qp->send_buf_size = + attr->cap.max_send_wr * get_wqe_size(attr->cap.max_send_sge); + qp->send_buf_size = align_hw_size(qp->send_buf_size); + + qp->send_buf = ctx->extern_alloc.alloc(qp->send_buf_size, + ctx->extern_alloc.data); + if (!qp->send_buf) { + errno = ENOMEM; + goto free_qp; + } + + qp_cmd_drv = &qp_cmd.drv_payload; + qp_resp_drv = &qp_resp.drv_payload; + + qp_cmd_drv->sq_buf_addr = (uintptr_t)qp->send_buf; + qp_cmd_drv->sq_buf_size = qp->send_buf_size; + qp_cmd_drv->port = port; + + ret = ibv_cmd_create_qp(ibpd, &qp->ibqp.qp, attr, &qp_cmd.ibv_cmd, + sizeof(qp_cmd), &qp_resp.ibv_resp, + sizeof(qp_resp)); + if (ret) { + verbs_err(verbs_get_ctx(ibpd->context), "Create QP failed\n"); + ctx->extern_alloc.free(qp->send_buf, ctx->extern_alloc.data); + errno = ret; + goto free_qp; + } + + qp->sqid = qp_resp_drv->sqid; + qp->tx_vp_offset = qp_resp_drv->tx_vp_offset; + qp->send_wqe_count = attr->cap.max_send_wr; + + cq->cqid = qp_resp_drv->cqid; + + return &qp->ibqp.qp; + +free_qp: + free(qp); + return NULL; +} + +struct ibv_qp *mana_create_qp(struct ibv_pd *ibpd, + struct ibv_qp_init_attr *attr) +{ + switch (attr->qp_type) { + case IBV_QPT_RAW_PACKET: + return mana_create_qp_raw(ibpd, attr); + default: + verbs_err(verbs_get_ctx(ibpd->context), + "QP type %u is not supported\n", attr->qp_type); + errno = EINVAL; + } + + return NULL; +} + +int mana_modify_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr, int attr_mask) +{ + return EOPNOTSUPP; +} + +int mana_destroy_qp(struct ibv_qp *ibqp) +{ + int ret; + struct mana_qp *qp = container_of(ibqp, struct mana_qp, ibqp.qp); + struct mana_context *ctx = to_mctx(ibqp->context); + + if (!ctx->extern_alloc.free) { + /* + * This version of driver doesn't support allocating buffers + * in rdma-core. + */ + verbs_err(verbs_get_ctx(ibqp->context), + "Invalid context in Destroy QP\n"); + return -EINVAL; + } + + ret = ibv_cmd_destroy_qp(ibqp); + if (ret) { + verbs_err(verbs_get_ctx(ibqp->context), "Destroy QP failed\n"); + return ret; + } + + ctx->extern_alloc.free(qp->send_buf, ctx->extern_alloc.data); + free(qp); + + return 0; +} + +static struct ibv_qp *mana_create_qp_ex_raw(struct ibv_context *context, + struct ibv_qp_init_attr_ex *attr) +{ + struct mana_create_qp_ex cmd = {}; + struct mana_ib_create_qp_rss *cmd_drv; + struct mana_create_qp_ex_resp resp = {}; + struct mana_ib_create_qp_rss_resp *cmd_resp; + struct mana_qp *qp; + struct mana_pd *pd = container_of(attr->pd, struct mana_pd, ibv_pd); + struct mana_parent_domain *mpd; + uint32_t port; + int ret; + + cmd_drv = &cmd.drv_payload; + cmd_resp = &resp.drv_payload; + + /* For a RAW QP, pd is a parent domain with port number */ + if (!pd->mprotection_domain) { + verbs_err(verbs_get_ctx(context), + "RAW QP needs to be on a parent domain\n"); + errno = EINVAL; + return NULL; + } + + if (attr->rx_hash_conf.rx_hash_key_len != + MANA_IB_TOEPLITZ_HASH_KEY_SIZE_IN_BYTES) { + verbs_err(verbs_get_ctx(context), + "Invalid RX hash key length\n"); + errno = EINVAL; + return NULL; + } + + mpd = container_of(pd, struct mana_parent_domain, mpd); + port = (uint32_t)(uintptr_t)mpd->pd_context; + + qp = calloc(1, sizeof(*qp)); + if (!qp) + return NULL; + + cmd_drv->rx_hash_fields_mask = attr->rx_hash_conf.rx_hash_fields_mask; + cmd_drv->rx_hash_function = attr->rx_hash_conf.rx_hash_function; + cmd_drv->rx_hash_key_len = attr->rx_hash_conf.rx_hash_key_len; + if (cmd_drv->rx_hash_key_len) + memcpy(cmd_drv->rx_hash_key, attr->rx_hash_conf.rx_hash_key, + cmd_drv->rx_hash_key_len); + + cmd_drv->port = port; + + ret = ibv_cmd_create_qp_ex2(context, &qp->ibqp, attr, &cmd.ibv_cmd, + sizeof(cmd), &resp.ibv_resp, sizeof(resp)); + if (ret) { + verbs_err(verbs_get_ctx(context), "Create QP EX failed\n"); + free(qp); + errno = ret; + return NULL; + } + + if (attr->rwq_ind_tbl) { + struct mana_rwq_ind_table *ind_table = + container_of(attr->rwq_ind_tbl, + struct mana_rwq_ind_table, ib_ind_table); + for (int i = 0; i < ind_table->ind_tbl_size; i++) { + struct mana_wq *wq = container_of(ind_table->ind_tbl[i], + struct mana_wq, ibwq); + struct mana_cq *cq = + container_of(wq->ibwq.cq, struct mana_cq, ibcq); + wq->wqid = cmd_resp->entries[i].wqid; + cq->cqid = cmd_resp->entries[i].cqid; + } + } + + return &qp->ibqp.qp; +} + +struct ibv_qp *mana_create_qp_ex(struct ibv_context *context, + struct ibv_qp_init_attr_ex *attr) +{ + switch (attr->qp_type) { + case IBV_QPT_RAW_PACKET: + return mana_create_qp_ex_raw(context, attr); + default: + verbs_err(verbs_get_ctx(context), + "QP type %u is not supported\n", attr->qp_type); + errno = EINVAL; + } + + return NULL; +} diff --git a/providers/mana/wq.c b/providers/mana/wq.c new file mode 100644 index 000000000..b57576865 --- /dev/null +++ b/providers/mana/wq.c @@ -0,0 +1,189 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* + * Copyright (c) 2022, Microsoft Corporation. All rights reserved. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include +#include +#include + +#include "mana.h" + +DECLARE_DRV_CMD(mana_create_wq, IB_USER_VERBS_EX_CMD_CREATE_WQ, + mana_ib_create_wq, empty); + +DECLARE_DRV_CMD(mana_create_rwq_ind_table, + IB_USER_VERBS_EX_CMD_CREATE_RWQ_IND_TBL, empty, empty); + +int mana_modify_wq(struct ibv_wq *ibwq, struct ibv_wq_attr *attr) +{ + return EOPNOTSUPP; +} + +struct ibv_wq *mana_create_wq(struct ibv_context *context, + struct ibv_wq_init_attr *attr) +{ + int ret; + struct mana_context *ctx = to_mctx(context); + struct mana_wq *wq; + struct mana_create_wq wq_cmd = {}; + struct mana_create_wq_resp wq_resp = {}; + struct mana_ib_create_wq *wq_cmd_drv; + + if (attr->max_wr > MAX_SEND_BUFFERS_PER_QUEUE) { + verbs_err(verbs_get_ctx(context), + "max_wr %d exceeds MAX_SEND_BUFFERS_PER_QUEUE\n", + attr->max_wr); + errno = EINVAL; + return NULL; + } + + if (get_wqe_size(attr->max_sge) > MAX_RX_WQE_SIZE) { + verbs_err(verbs_get_ctx(context), + "max_sge %d exceeding WQE size limit\n", + attr->max_sge); + errno = EINVAL; + return NULL; + } + + if (!ctx->extern_alloc.alloc || !ctx->extern_alloc.free) { + verbs_err(verbs_get_ctx(context), + "WQ buffer needs to be externally allocated\n"); + errno = EINVAL; + return NULL; + } + + wq = calloc(1, sizeof(*wq)); + if (!wq) + return NULL; + + wq->sge = attr->max_sge; + wq->buf_size = attr->max_wr * get_wqe_size(attr->max_sge); + wq->buf_size = align_hw_size(wq->buf_size); + wq->buf = ctx->extern_alloc.alloc(wq->buf_size, ctx->extern_alloc.data); + + if (!wq->buf) { + errno = ENOMEM; + goto free_wq; + } + + wq->wqe = attr->max_wr; + + wq_cmd_drv = &wq_cmd.drv_payload; + wq_cmd_drv->wq_buf_addr = (uintptr_t)wq->buf; + wq_cmd_drv->wq_buf_size = wq->buf_size; + + ret = ibv_cmd_create_wq(context, attr, &wq->ibwq, &wq_cmd.ibv_cmd, + sizeof(wq_cmd), &wq_resp.ibv_resp, + sizeof(wq_resp)); + + if (ret) { + verbs_err(verbs_get_ctx(context), "Failed to Create WQ\n"); + ctx->extern_alloc.free(wq->buf, ctx->extern_alloc.data); + errno = ret; + goto free_wq; + } + + return &wq->ibwq; + +free_wq: + free(wq); + return NULL; +} + +int mana_destroy_wq(struct ibv_wq *ibwq) +{ + struct mana_wq *wq = container_of(ibwq, struct mana_wq, ibwq); + struct mana_context *ctx = to_mctx(ibwq->context); + int ret; + + if (!ctx->extern_alloc.free) { + verbs_err(verbs_get_ctx(ibwq->context), + "WQ needs external alloc context\n"); + return EINVAL; + } + + ret = ibv_cmd_destroy_wq(ibwq); + if (ret) { + verbs_err(verbs_get_ctx(ibwq->context), + "Failed to destroy WQ\n"); + return ret; + } + + ctx->extern_alloc.free(wq->buf, ctx->extern_alloc.data); + free(wq); + + return 0; +} + +struct ibv_rwq_ind_table * +mana_create_rwq_ind_table(struct ibv_context *context, + struct ibv_rwq_ind_table_init_attr *init_attr) +{ + int ret; + struct mana_rwq_ind_table *ind_table; + struct mana_create_rwq_ind_table_resp resp = {}; + int i; + + ind_table = calloc(1, sizeof(*ind_table)); + if (!ind_table) + return NULL; + + ret = ibv_cmd_create_rwq_ind_table(context, init_attr, + &ind_table->ib_ind_table, + &resp.ibv_resp, sizeof(resp)); + if (ret) { + verbs_err(verbs_get_ctx(context), + "Failed to create RWQ IND table\n"); + errno = ret; + goto free_ind_table; + } + + ind_table->ind_tbl_size = 1 << init_attr->log_ind_tbl_size; + ind_table->ind_tbl = + calloc(ind_table->ind_tbl_size, sizeof(struct ibv_wq *)); + if (!ind_table->ind_tbl) { + errno = ENOMEM; + goto free_ind_table; + } + for (i = 0; i < ind_table->ind_tbl_size; i++) + ind_table->ind_tbl[i] = init_attr->ind_tbl[i]; + + return &ind_table->ib_ind_table; + +free_ind_table: + free(ind_table); + return NULL; +} + +int mana_destroy_rwq_ind_table(struct ibv_rwq_ind_table *rwq_ind_table) +{ + struct mana_rwq_ind_table *ind_table = container_of( + rwq_ind_table, struct mana_rwq_ind_table, ib_ind_table); + + int ret; + + ret = ibv_cmd_destroy_rwq_ind_table(&ind_table->ib_ind_table); + if (ret) { + verbs_err(verbs_get_ctx(rwq_ind_table->context), + "Failed to destroy RWQ IND table\n"); + goto fail; + } + + free(ind_table->ind_tbl); + free(ind_table); + +fail: + return ret; +} diff --git a/redhat/rdma-core.spec b/redhat/rdma-core.spec index 3eba45afe..bf9ad82fc 100644 --- a/redhat/rdma-core.spec +++ b/redhat/rdma-core.spec @@ -158,6 +158,8 @@ Provides: libipathverbs = %{version}-%{release} Obsoletes: libipathverbs < %{version}-%{release} Provides: libirdma = %{version}-%{release} Obsoletes: libirdma < %{version}-%{release} +Provides: libmana = %{version}-%{release} +Obsoletes: libmana < %{version}-%{release} Provides: libmlx4 = %{version}-%{release} Obsoletes: libmlx4 < %{version}-%{release} Provides: libmlx5 = %{version}-%{release} @@ -185,6 +187,7 @@ Device-specific plug-in ibverbs userspace drivers are included: - libhns: HiSilicon Hip06 SoC - libipathverbs: QLogic InfiniPath HCA - libirdma: Intel Ethernet Connection RDMA +- libmana: Microsoft Azure Network Adapter - libmlx4: Mellanox ConnectX-3 InfiniBand HCA - libmlx5: Mellanox Connect-IB/X-4+ InfiniBand HCA - libmthca: Mellanox InfiniBand HCA @@ -446,9 +449,11 @@ fi %{_mandir}/man3/umad* %{_mandir}/man3/*_to_ibv_rate.* %{_mandir}/man7/rdma_cm.* +%{_mandir}/man3/manadv* %{_mandir}/man3/mlx5dv* %{_mandir}/man3/mlx4dv* %{_mandir}/man7/efadv* +%{_mandir}/man7/manadv* %{_mandir}/man7/mlx5dv* %{_mandir}/man7/mlx4dv* %{_mandir}/man3/ibnd_* @@ -568,6 +573,7 @@ fi %{_libdir}/libefa.so.* %{_libdir}/libibverbs*.so.* %{_libdir}/libibverbs/*.so +%{_libdir}/libmana.so.* %{_libdir}/libmlx5.so.* %{_libdir}/libmlx4.so.* %config(noreplace) %{_sysconfdir}/libibverbs.d/*.driver diff --git a/suse/rdma-core.spec b/suse/rdma-core.spec index e1f28aab0..661a4cfbf 100644 --- a/suse/rdma-core.spec +++ b/suse/rdma-core.spec @@ -38,6 +38,7 @@ Group: Productivity/Networking/Other %define verbs_so_major 1 %define rdmacm_so_major 1 %define umad_so_major 3 +%define mana_so_major 1 %define mlx4_so_major 1 %define mlx5_so_major 1 %define ibnetdisc_major 5 @@ -47,6 +48,7 @@ Group: Productivity/Networking/Other %define verbs_lname libibverbs%{verbs_so_major} %define rdmacm_lname librdmacm%{rdmacm_so_major} %define umad_lname libibumad%{umad_so_major} +%define mana_lname libmana-%{mana_so_major} %define mlx4_lname libmlx4-%{mlx4_so_major} %define mlx5_lname libmlx5-%{mlx5_so_major} @@ -157,6 +159,7 @@ Requires: %{umad_lname} = %{version}-%{release} Requires: %{verbs_lname} = %{version}-%{release} %if 0%{?dma_coherent} Requires: %{efa_lname} = %{version}-%{release} +Requires: %{mana_lname} = %{version}-%{release} Requires: %{mlx4_lname} = %{version}-%{release} Requires: %{mlx5_lname} = %{version}-%{release} %endif @@ -198,6 +201,7 @@ Obsoletes: libcxgb4-rdmav2 < %{version}-%{release} Obsoletes: libefa-rdmav2 < %{version}-%{release} Obsoletes: libhfi1verbs-rdmav2 < %{version}-%{release} Obsoletes: libipathverbs-rdmav2 < %{version}-%{release} +Obsoletes: libmana-rdmav2 < %{version}-%{release} Obsoletes: libmlx4-rdmav2 < %{version}-%{release} Obsoletes: libmlx5-rdmav2 < %{version}-%{release} Obsoletes: libmthca-rdmav2 < %{version}-%{release} @@ -205,6 +209,7 @@ Obsoletes: libocrdma-rdmav2 < %{version}-%{release} Obsoletes: librxe-rdmav2 < %{version}-%{release} %if 0%{?dma_coherent} Requires: %{efa_lname} = %{version}-%{release} +Requires: %{mana_lname} = %{version}-%{release} Requires: %{mlx4_lname} = %{version}-%{release} Requires: %{mlx5_lname} = %{version}-%{release} %endif @@ -226,6 +231,7 @@ Device-specific plug-in ibverbs userspace drivers are included: - libhns: HiSilicon Hip06 SoC - libipathverbs: QLogic InfiniPath HCA - libirdma: Intel Ethernet Connection RDMA +- libmana: Microsoft Azure Network Adapter - libmlx4: Mellanox ConnectX-3 InfiniBand HCA - libmlx5: Mellanox Connect-IB/X-4+ InfiniBand HCA - libmthca: Mellanox InfiniBand HCA @@ -250,6 +256,13 @@ Group: System/Libraries %description -n %efa_lname This package contains the efa runtime library. +%package -n %mana_lname +Summary: MANA runtime library +Group: System/Libraries + +%description -n %mana_lname +This package contains the mana runtime library. + %package -n %mlx4_lname Summary: MLX4 runtime library Group: System/Libraries @@ -493,6 +506,9 @@ rm -rf %{buildroot}/%{_sbindir}/srp_daemon.sh %post -n %efa_lname -p /sbin/ldconfig %postun -n %efa_lname -p /sbin/ldconfig +%post -n %mana_lname -p /sbin/ldconfig +%postun -n %mana_lname -p /sbin/ldconfig + %post -n %mlx4_lname -p /sbin/ldconfig %postun -n %mlx4_lname -p /sbin/ldconfig @@ -652,9 +668,11 @@ done %{_mandir}/man7/rdma_cm.* %if 0%{?dma_coherent} %{_mandir}/man3/efadv* +%{_mandir}/man3/manadv* %{_mandir}/man3/mlx5dv* %{_mandir}/man3/mlx4dv* %{_mandir}/man7/efadv* +%{_mandir}/man7/manadv* %{_mandir}/man7/mlx5dv* %{_mandir}/man7/mlx4dv* %endif @@ -687,6 +705,10 @@ done %defattr(-,root,root) %{_libdir}/libefa*.so.* +%files -n %mana_lname +%defattr(-,root,root) +%{_libdir}/libmana*.so.* + %files -n %mlx4_lname %defattr(-,root,root) %{_libdir}/libmlx4*.so.*