diff --git a/CMakeLists.txt b/CMakeLists.txt index b3b3ff1c8..e5d0c7746 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -328,6 +328,7 @@ add_subdirectory(libibcm) add_subdirectory(providers/cxgb3) add_subdirectory(providers/cxgb4) add_subdirectory(providers/hfi1verbs) +add_subdirectory(providers/hns) add_subdirectory(providers/i40iw) add_subdirectory(providers/ipathverbs) add_subdirectory(providers/mlx4) diff --git a/MAINTAINERS b/MAINTAINERS index d83de10d2..bc6eb5092 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -57,6 +57,12 @@ S: Supported L: intel-opa@lists.01.org (moderated for non-subscribers) F: providers/hfi1verbs/ +HNS USERSPACE PROVIDER (for hns-roce.ko) +M: Lijun Ou +M: Wei Hu(Xavier) +S: Supported +F: providers/hns/ + I40IW USERSPACE PROVIDER (for i40iw.ko) M: Tatyana Nikolova S: Supported diff --git a/README.md b/README.md index 3a1304269..e3bc33f06 100644 --- a/README.md +++ b/README.md @@ -18,6 +18,7 @@ is included: - iw_cxgb3.ko - iw_cxgb4.ko - hfi1.ko + - hns-roce.ko - i40iw.ko - ib_qib.ko - mlx4_ib.ko diff --git a/providers/hns/CMakeLists.txt b/providers/hns/CMakeLists.txt new file mode 100644 index 000000000..19a793e39 --- /dev/null +++ b/providers/hns/CMakeLists.txt @@ -0,0 +1,6 @@ +rdma_provider(hns + hns_roce_u.c + hns_roce_u_buf.c + hns_roce_u_hw_v1.c + hns_roce_u_verbs.c +) diff --git a/providers/hns/hns_roce_u.c b/providers/hns/hns_roce_u.c new file mode 100644 index 000000000..281f9f408 --- /dev/null +++ b/providers/hns/hns_roce_u.c @@ -0,0 +1,228 @@ +/* + * Copyright (c) 2016 Hisilicon Limited. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include "hns_roce_u.h" +#include "hns_roce_u_abi.h" + +#define HID_LEN 15 +#define DEV_MATCH_LEN 128 + +static const struct { + char hid[HID_LEN]; + void *data; + int version; +} acpi_table[] = { + {"acpi:HISI00D1:", &hns_roce_u_hw_v1, HNS_ROCE_HW_VER1}, + {}, +}; + +static const struct { + char compatible[DEV_MATCH_LEN]; + void *data; + int version; +} dt_table[] = { + {"hisilicon,hns-roce-v1", &hns_roce_u_hw_v1, HNS_ROCE_HW_VER1}, + {}, +}; + +static struct ibv_context *hns_roce_alloc_context(struct ibv_device *ibdev, + int cmd_fd) +{ + int i; + struct ibv_get_context cmd; + struct ibv_device_attr dev_attrs; + struct hns_roce_context *context; + struct hns_roce_alloc_ucontext_resp resp; + struct hns_roce_device *hr_dev = to_hr_dev(ibdev); + + context = calloc(1, sizeof(*context)); + if (!context) + return NULL; + + context->ibv_ctx.cmd_fd = cmd_fd; + if (ibv_cmd_get_context(&context->ibv_ctx, &cmd, sizeof(cmd), + &resp.ibv_resp, sizeof(resp))) + goto err_free; + + context->num_qps = resp.qp_tab_size; + context->qp_table_shift = ffs(context->num_qps) - 1 - + HNS_ROCE_QP_TABLE_BITS; + context->qp_table_mask = (1 << context->qp_table_shift) - 1; + + pthread_mutex_init(&context->qp_table_mutex, NULL); + for (i = 0; i < HNS_ROCE_QP_TABLE_SIZE; ++i) + context->qp_table[i].refcnt = 0; + + context->uar = mmap(NULL, to_hr_dev(ibdev)->page_size, + PROT_READ | PROT_WRITE, MAP_SHARED, cmd_fd, 0); + if (context->uar == MAP_FAILED) { + fprintf(stderr, PFX "Warning: failed to mmap() uar page.\n"); + goto err_free; + } + + if (hr_dev->hw_version == HNS_ROCE_HW_VER1) { + /* + * when vma->vm_pgoff is 1, the cq_tptr_base includes 64K CQ, + * a pointer of CQ need 2B size + */ + context->cq_tptr_base = mmap(NULL, HNS_ROCE_CQ_DB_BUF_SIZE, + PROT_READ | PROT_WRITE, MAP_SHARED, + cmd_fd, HNS_ROCE_TPTR_OFFSET); + if (context->cq_tptr_base == MAP_FAILED) { + fprintf(stderr, + PFX "Warning: Failed to mmap cq_tptr page.\n"); + goto db_free; + } + } + + pthread_spin_init(&context->uar_lock, PTHREAD_PROCESS_PRIVATE); + + context->ibv_ctx.ops.query_device = hns_roce_u_query_device; + context->ibv_ctx.ops.query_port = hns_roce_u_query_port; + context->ibv_ctx.ops.alloc_pd = hns_roce_u_alloc_pd; + context->ibv_ctx.ops.dealloc_pd = hns_roce_u_free_pd; + context->ibv_ctx.ops.reg_mr = hns_roce_u_reg_mr; + context->ibv_ctx.ops.dereg_mr = hns_roce_u_dereg_mr; + + context->ibv_ctx.ops.create_cq = hns_roce_u_create_cq; + context->ibv_ctx.ops.poll_cq = hr_dev->u_hw->poll_cq; + context->ibv_ctx.ops.req_notify_cq = hr_dev->u_hw->arm_cq; + context->ibv_ctx.ops.cq_event = hns_roce_u_cq_event; + context->ibv_ctx.ops.destroy_cq = hns_roce_u_destroy_cq; + + context->ibv_ctx.ops.create_qp = hns_roce_u_create_qp; + context->ibv_ctx.ops.query_qp = hns_roce_u_query_qp; + context->ibv_ctx.ops.modify_qp = hr_dev->u_hw->modify_qp; + context->ibv_ctx.ops.destroy_qp = hr_dev->u_hw->destroy_qp; + context->ibv_ctx.ops.post_send = hr_dev->u_hw->post_send; + context->ibv_ctx.ops.post_recv = hr_dev->u_hw->post_recv; + + if (hns_roce_u_query_device(&context->ibv_ctx, &dev_attrs)) + goto tptr_free; + + context->max_qp_wr = dev_attrs.max_qp_wr; + context->max_sge = dev_attrs.max_sge; + context->max_cqe = dev_attrs.max_cqe; + + return &context->ibv_ctx; + +tptr_free: + if (hr_dev->hw_version == HNS_ROCE_HW_VER1) { + if (munmap(context->cq_tptr_base, HNS_ROCE_CQ_DB_BUF_SIZE)) + fprintf(stderr, PFX "Warning: Munmap tptr failed.\n"); + context->cq_tptr_base = NULL; + } + +db_free: + munmap(context->uar, to_hr_dev(ibdev)->page_size); + context->uar = NULL; + +err_free: + free(context); + return NULL; +} + +static void hns_roce_free_context(struct ibv_context *ibctx) +{ + struct hns_roce_context *context = to_hr_ctx(ibctx); + + munmap(context->uar, to_hr_dev(ibctx->device)->page_size); + if (to_hr_dev(ibctx->device)->hw_version == HNS_ROCE_HW_VER1) + munmap(context->cq_tptr_base, HNS_ROCE_CQ_DB_BUF_SIZE); + + context->uar = NULL; + + free(context); + context = NULL; +} + +static struct ibv_device_ops hns_roce_dev_ops = { + .alloc_context = hns_roce_alloc_context, + .free_context = hns_roce_free_context +}; + +static struct ibv_device *hns_roce_driver_init(const char *uverbs_sys_path, + int abi_version) +{ + struct hns_roce_device *dev; + char value[128]; + int i; + void *u_hw; + int hw_version; + + if (ibv_read_sysfs_file(uverbs_sys_path, "device/modalias", + value, sizeof(value)) > 0) + for (i = 0; i < sizeof(acpi_table) / sizeof(acpi_table[0]); ++i) + if (!strcmp(value, acpi_table[i].hid)) { + u_hw = acpi_table[i].data; + hw_version = acpi_table[i].version; + goto found; + } + + if (ibv_read_sysfs_file(uverbs_sys_path, "device/of_node/compatible", + value, sizeof(value)) > 0) + for (i = 0; i < sizeof(dt_table) / sizeof(dt_table[0]); ++i) + if (!strcmp(value, dt_table[i].compatible)) { + u_hw = dt_table[i].data; + hw_version = dt_table[i].version; + goto found; + } + + return NULL; + +found: + dev = malloc(sizeof(struct hns_roce_device)); + if (!dev) { + fprintf(stderr, PFX "Fatal: couldn't allocate device for %s\n", + uverbs_sys_path); + return NULL; + } + + dev->ibv_dev.ops = hns_roce_dev_ops; + dev->u_hw = (struct hns_roce_u_hw *)u_hw; + dev->hw_version = hw_version; + dev->page_size = sysconf(_SC_PAGESIZE); + return &dev->ibv_dev; +} + +static __attribute__((constructor)) void hns_roce_register_driver(void) +{ + ibv_register_driver("hns", hns_roce_driver_init); +} diff --git a/providers/hns/hns_roce_u.h b/providers/hns/hns_roce_u.h new file mode 100644 index 000000000..4a6ed8ea8 --- /dev/null +++ b/providers/hns/hns_roce_u.h @@ -0,0 +1,255 @@ +/* + * Copyright (c) 2016 Hisilicon Limited. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef _HNS_ROCE_U_H +#define _HNS_ROCE_U_H + +#include + +#include +#include +#include +#include + +#define HNS_ROCE_CQE_ENTRY_SIZE 0x20 + +#define HNS_ROCE_MAX_CQ_NUM 0x10000 +#define HNS_ROCE_MIN_CQE_NUM 0x40 +#define HNS_ROCE_MIN_WQE_NUM 0x20 +#define HNS_ROCE_CQ_DB_BUF_SIZE ((HNS_ROCE_MAX_CQ_NUM >> 11) << 12) +#define HNS_ROCE_TPTR_OFFSET 0x1000 +#define HNS_ROCE_HW_VER1 ('h' << 24 | 'i' << 16 | '0' << 8 | '6') + +#define PFX "hns: " + +#ifndef likely +#define likely(x) __builtin_expect(!!(x), 1) +#endif + +#define roce_get_field(origin, mask, shift) \ + (((origin) & (mask)) >> (shift)) + +#define roce_get_bit(origin, shift) \ + roce_get_field((origin), (1ul << (shift)), (shift)) + +#define roce_set_field(origin, mask, shift, val) \ + do { \ + (origin) &= (~(mask)); \ + (origin) |= (((unsigned int)(val) << (shift)) & (mask)); \ + } while (0) + +#define roce_set_bit(origin, shift, val) \ + roce_set_field((origin), (1ul << (shift)), (shift), (val)) + +enum { + HNS_ROCE_QP_TABLE_BITS = 8, + HNS_ROCE_QP_TABLE_SIZE = 1 << HNS_ROCE_QP_TABLE_BITS, +}; + +/* operation type list */ +enum { + /* rq&srq operation */ + HNS_ROCE_OPCODE_SEND_DATA_RECEIVE = 0x06, + HNS_ROCE_OPCODE_RDMA_WITH_IMM_RECEIVE = 0x07, +}; + +struct hns_roce_device { + struct ibv_device ibv_dev; + int page_size; + struct hns_roce_u_hw *u_hw; + int hw_version; +}; + +struct hns_roce_buf { + void *buf; + unsigned int length; +}; + +struct hns_roce_context { + struct ibv_context ibv_ctx; + void *uar; + pthread_spinlock_t uar_lock; + + void *cq_tptr_base; + + struct { + struct hns_roce_qp **table; + int refcnt; + } qp_table[HNS_ROCE_QP_TABLE_SIZE]; + + pthread_mutex_t qp_table_mutex; + + int num_qps; + int qp_table_shift; + int qp_table_mask; + unsigned int max_qp_wr; + unsigned int max_sge; + int max_cqe; +}; + +struct hns_roce_pd { + struct ibv_pd ibv_pd; + unsigned int pdn; +}; + +struct hns_roce_cq { + struct ibv_cq ibv_cq; + struct hns_roce_buf buf; + pthread_spinlock_t lock; + unsigned int cqn; + unsigned int cq_depth; + unsigned int cons_index; + unsigned int *set_ci_db; + unsigned int *arm_db; + int arm_sn; +}; + +struct hns_roce_srq { + struct ibv_srq ibv_srq; + struct hns_roce_buf buf; + pthread_spinlock_t lock; + unsigned long *wrid; + unsigned int srqn; + int max; + unsigned int max_gs; + int wqe_shift; + int head; + int tail; + unsigned int *db; + unsigned short counter; +}; + +struct hns_roce_wq { + unsigned long *wrid; + pthread_spinlock_t lock; + unsigned int wqe_cnt; + int max_post; + unsigned int head; + unsigned int tail; + unsigned int max_gs; + int wqe_shift; + int offset; +}; + +struct hns_roce_qp { + struct ibv_qp ibv_qp; + struct hns_roce_buf buf; + int max_inline_data; + int buf_size; + unsigned int sq_signal_bits; + struct hns_roce_wq sq; + struct hns_roce_wq rq; + int port_num; + int sl; +}; + +struct hns_roce_u_hw { + int (*poll_cq)(struct ibv_cq *ibvcq, int ne, struct ibv_wc *wc); + int (*arm_cq)(struct ibv_cq *ibvcq, int solicited); + int (*post_send)(struct ibv_qp *ibvqp, struct ibv_send_wr *wr, + struct ibv_send_wr **bad_wr); + int (*post_recv)(struct ibv_qp *ibvqp, struct ibv_recv_wr *wr, + struct ibv_recv_wr **bad_wr); + int (*modify_qp)(struct ibv_qp *qp, struct ibv_qp_attr *attr, + int attr_mask); + int (*destroy_qp)(struct ibv_qp *ibqp); +}; + +static inline unsigned long align(unsigned long val, unsigned long align) +{ + return (val + align - 1) & ~(align - 1); +} + +static inline struct hns_roce_device *to_hr_dev(struct ibv_device *ibv_dev) +{ + return container_of(ibv_dev, struct hns_roce_device, ibv_dev); +} + +static inline struct hns_roce_context *to_hr_ctx(struct ibv_context *ibv_ctx) +{ + return container_of(ibv_ctx, struct hns_roce_context, ibv_ctx); +} + +static inline struct hns_roce_pd *to_hr_pd(struct ibv_pd *ibv_pd) +{ + return container_of(ibv_pd, struct hns_roce_pd, ibv_pd); +} + +static inline struct hns_roce_cq *to_hr_cq(struct ibv_cq *ibv_cq) +{ + return container_of(ibv_cq, struct hns_roce_cq, ibv_cq); +} + +static inline struct hns_roce_srq *to_hr_srq(struct ibv_srq *ibv_srq) +{ + return container_of(ibv_srq, struct hns_roce_srq, ibv_srq); +} + +static inline struct hns_roce_qp *to_hr_qp(struct ibv_qp *ibv_qp) +{ + return container_of(ibv_qp, struct hns_roce_qp, ibv_qp); +} + +int hns_roce_u_query_device(struct ibv_context *context, + struct ibv_device_attr *attr); +int hns_roce_u_query_port(struct ibv_context *context, uint8_t port, + struct ibv_port_attr *attr); + +struct ibv_pd *hns_roce_u_alloc_pd(struct ibv_context *context); +int hns_roce_u_free_pd(struct ibv_pd *pd); + +struct ibv_mr *hns_roce_u_reg_mr(struct ibv_pd *pd, void *addr, size_t length, + int access); +int hns_roce_u_dereg_mr(struct ibv_mr *mr); + +struct ibv_cq *hns_roce_u_create_cq(struct ibv_context *context, int cqe, + struct ibv_comp_channel *channel, + int comp_vector); + +int hns_roce_u_destroy_cq(struct ibv_cq *cq); +void hns_roce_u_cq_event(struct ibv_cq *cq); + +struct ibv_qp *hns_roce_u_create_qp(struct ibv_pd *pd, + struct ibv_qp_init_attr *attr); + +int hns_roce_u_query_qp(struct ibv_qp *ibqp, struct ibv_qp_attr *attr, + int attr_mask, struct ibv_qp_init_attr *init_attr); + +int hns_roce_alloc_buf(struct hns_roce_buf *buf, unsigned int size, + int page_size); +void hns_roce_free_buf(struct hns_roce_buf *buf); + +void hns_roce_init_qp_indices(struct hns_roce_qp *qp); + +extern struct hns_roce_u_hw hns_roce_u_hw_v1; + +#endif /* _HNS_ROCE_U_H */ diff --git a/providers/hns/hns_roce_u_abi.h b/providers/hns/hns_roce_u_abi.h new file mode 100644 index 000000000..e78f967cd --- /dev/null +++ b/providers/hns/hns_roce_u_abi.h @@ -0,0 +1,69 @@ +/* + * Copyright (c) 2016 Hisilicon Limited. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef _HNS_ROCE_U_ABI_H +#define _HNS_ROCE_U_ABI_H + +#include + +struct hns_roce_alloc_ucontext_resp { + struct ibv_get_context_resp ibv_resp; + __u32 qp_tab_size; +}; + +struct hns_roce_alloc_pd_resp { + struct ibv_alloc_pd_resp ibv_resp; + __u32 pdn; + __u32 reserved; +}; + +struct hns_roce_create_cq { + struct ibv_create_cq ibv_cmd; + __u64 buf_addr; + __u64 db_addr; +}; + +struct hns_roce_create_cq_resp { + struct ibv_create_cq_resp ibv_resp; + __u32 cqn; + __u32 reserved; +}; + +struct hns_roce_create_qp { + struct ibv_create_qp ibv_cmd; + __u64 buf_addr; + __u8 log_sq_bb_count; + __u8 log_sq_stride; + __u8 reserved[5]; +}; + +#endif /* _HNS_ROCE_U_ABI_H */ diff --git a/providers/hns/hns_roce_u_buf.c b/providers/hns/hns_roce_u_buf.c new file mode 100644 index 000000000..f92ea6513 --- /dev/null +++ b/providers/hns/hns_roce_u_buf.c @@ -0,0 +1,61 @@ +/* + * Copyright (c) 2016 Hisilicon Limited. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include + +#include "hns_roce_u.h" + +int hns_roce_alloc_buf(struct hns_roce_buf *buf, unsigned int size, + int page_size) +{ + int ret; + + buf->length = align(size, page_size); + buf->buf = mmap(NULL, buf->length, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + if (buf->buf == MAP_FAILED) + return errno; + + ret = ibv_dontfork_range(buf->buf, size); + if (ret) + munmap(buf->buf, buf->length); + + return ret; +} + +void hns_roce_free_buf(struct hns_roce_buf *buf) +{ + ibv_dofork_range(buf->buf, buf->length); + + munmap(buf->buf, buf->length); +} diff --git a/providers/hns/hns_roce_u_db.h b/providers/hns/hns_roce_u_db.h new file mode 100644 index 000000000..76d13ce9b --- /dev/null +++ b/providers/hns/hns_roce_u_db.h @@ -0,0 +1,54 @@ +/* + * Copyright (c) 2016 Hisilicon Limited. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include + +#include "hns_roce_u.h" + +#ifndef _HNS_ROCE_U_DB_H +#define _HNS_ROCE_U_DB_H + +#if __BYTE_ORDER == __LITTLE_ENDIAN +#define HNS_ROCE_PAIR_TO_64(val) ((uint64_t) val[1] << 32 | val[0]) +#elif __BYTE_ORDER == __BIG_ENDIAN +#define HNS_ROCE_PAIR_TO_64(val) ((uint64_t) val[0] << 32 | val[1]) +#else +#error __BYTE_ORDER not defined +#endif + +static inline void hns_roce_write64(uint32_t val[2], + struct hns_roce_context *ctx, int offset) +{ + *(volatile uint64_t *) (ctx->uar + offset) = HNS_ROCE_PAIR_TO_64(val); +} + +#endif /* _HNS_ROCE_U_DB_H */ diff --git a/providers/hns/hns_roce_u_hw_v1.c b/providers/hns/hns_roce_u_hw_v1.c new file mode 100644 index 000000000..e5cfe4898 --- /dev/null +++ b/providers/hns/hns_roce_u_hw_v1.c @@ -0,0 +1,837 @@ +/* + * Copyright (c) 2016 Hisilicon Limited. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include +#include "hns_roce_u_db.h" +#include "hns_roce_u_hw_v1.h" +#include "hns_roce_u.h" + +static inline void set_raddr_seg(struct hns_roce_wqe_raddr_seg *rseg, + uint64_t remote_addr, uint32_t rkey) +{ + rseg->raddr = remote_addr; + rseg->rkey = rkey; + rseg->len = 0; +} + +static void set_data_seg(struct hns_roce_wqe_data_seg *dseg, struct ibv_sge *sg) +{ + + dseg->lkey = sg->lkey; + dseg->addr = sg->addr; + dseg->len = sg->length; +} + +static void hns_roce_update_rq_head(struct hns_roce_context *ctx, + unsigned int qpn, unsigned int rq_head) +{ + struct hns_roce_rq_db rq_db; + + rq_db.u32_4 = 0; + rq_db.u32_8 = 0; + + roce_set_field(rq_db.u32_4, RQ_DB_U32_4_RQ_HEAD_M, + RQ_DB_U32_4_RQ_HEAD_S, rq_head); + roce_set_field(rq_db.u32_8, RQ_DB_U32_8_QPN_M, RQ_DB_U32_8_QPN_S, qpn); + roce_set_field(rq_db.u32_8, RQ_DB_U32_8_CMD_M, RQ_DB_U32_8_CMD_S, 1); + roce_set_bit(rq_db.u32_8, RQ_DB_U32_8_HW_SYNC_S, 1); + + hns_roce_write64((uint32_t *)&rq_db, ctx, ROCEE_DB_OTHERS_L_0_REG); +} + +static void hns_roce_update_sq_head(struct hns_roce_context *ctx, + unsigned int qpn, unsigned int port, + unsigned int sl, unsigned int sq_head) +{ + struct hns_roce_sq_db sq_db; + + sq_db.u32_4 = 0; + sq_db.u32_8 = 0; + + roce_set_field(sq_db.u32_4, SQ_DB_U32_4_SQ_HEAD_M, + SQ_DB_U32_4_SQ_HEAD_S, sq_head); + roce_set_field(sq_db.u32_4, SQ_DB_U32_4_PORT_M, SQ_DB_U32_4_PORT_S, + port); + roce_set_field(sq_db.u32_4, SQ_DB_U32_4_SL_M, SQ_DB_U32_4_SL_S, sl); + roce_set_field(sq_db.u32_8, SQ_DB_U32_8_QPN_M, SQ_DB_U32_8_QPN_S, qpn); + roce_set_bit(sq_db.u32_8, SQ_DB_U32_8_HW_SYNC, 1); + + hns_roce_write64((uint32_t *)&sq_db, ctx, ROCEE_DB_SQ_L_0_REG); +} + +static void hns_roce_update_cq_cons_index(struct hns_roce_context *ctx, + struct hns_roce_cq *cq) +{ + struct hns_roce_cq_db cq_db; + + cq_db.u32_4 = 0; + cq_db.u32_8 = 0; + + roce_set_bit(cq_db.u32_8, CQ_DB_U32_8_HW_SYNC_S, 1); + roce_set_field(cq_db.u32_8, CQ_DB_U32_8_CMD_M, CQ_DB_U32_8_CMD_S, 3); + roce_set_field(cq_db.u32_8, CQ_DB_U32_8_CMD_MDF_M, + CQ_DB_U32_8_CMD_MDF_S, 0); + roce_set_field(cq_db.u32_8, CQ_DB_U32_8_CQN_M, CQ_DB_U32_8_CQN_S, + cq->cqn); + roce_set_field(cq_db.u32_4, CQ_DB_U32_4_CONS_IDX_M, + CQ_DB_U32_4_CONS_IDX_S, + cq->cons_index & ((cq->cq_depth << 1) - 1)); + + hns_roce_write64((uint32_t *)&cq_db, ctx, ROCEE_DB_OTHERS_L_0_REG); +} + +static void hns_roce_handle_error_cqe(struct hns_roce_cqe *cqe, + struct ibv_wc *wc) +{ + switch (roce_get_field(cqe->cqe_byte_4, + CQE_BYTE_4_STATUS_OF_THE_OPERATION_M, + CQE_BYTE_4_STATUS_OF_THE_OPERATION_S) & + HNS_ROCE_CQE_STATUS_MASK) { + fprintf(stderr, PFX "error cqe!\n"); + case HNS_ROCE_CQE_SYNDROME_LOCAL_LENGTH_ERR: + wc->status = IBV_WC_LOC_LEN_ERR; + break; + case HNS_ROCE_CQE_SYNDROME_LOCAL_QP_OP_ERR: + wc->status = IBV_WC_LOC_QP_OP_ERR; + break; + case HNS_ROCE_CQE_SYNDROME_LOCAL_PROT_ERR: + wc->status = IBV_WC_LOC_PROT_ERR; + break; + case HNS_ROCE_CQE_SYNDROME_WR_FLUSH_ERR: + wc->status = IBV_WC_WR_FLUSH_ERR; + break; + case HNS_ROCE_CQE_SYNDROME_MEM_MANAGE_OPERATE_ERR: + wc->status = IBV_WC_MW_BIND_ERR; + break; + case HNS_ROCE_CQE_SYNDROME_BAD_RESP_ERR: + wc->status = IBV_WC_BAD_RESP_ERR; + break; + case HNS_ROCE_CQE_SYNDROME_LOCAL_ACCESS_ERR: + wc->status = IBV_WC_LOC_ACCESS_ERR; + break; + case HNS_ROCE_CQE_SYNDROME_REMOTE_INVAL_REQ_ERR: + wc->status = IBV_WC_REM_INV_REQ_ERR; + break; + case HNS_ROCE_CQE_SYNDROME_REMOTE_ACCESS_ERR: + wc->status = IBV_WC_REM_ACCESS_ERR; + break; + case HNS_ROCE_CQE_SYNDROME_REMOTE_OP_ERR: + wc->status = IBV_WC_REM_OP_ERR; + break; + case HNS_ROCE_CQE_SYNDROME_TRANSPORT_RETRY_EXC_ERR: + wc->status = IBV_WC_RETRY_EXC_ERR; + break; + case HNS_ROCE_CQE_SYNDROME_RNR_RETRY_EXC_ERR: + wc->status = IBV_WC_RNR_RETRY_EXC_ERR; + break; + default: + wc->status = IBV_WC_GENERAL_ERR; + break; + } +} + +static struct hns_roce_cqe *get_cqe(struct hns_roce_cq *cq, int entry) +{ + return cq->buf.buf + entry * HNS_ROCE_CQE_ENTRY_SIZE; +} + +static void *get_sw_cqe(struct hns_roce_cq *cq, int n) +{ + struct hns_roce_cqe *cqe = get_cqe(cq, n & cq->ibv_cq.cqe); + + return (!!(roce_get_bit(cqe->cqe_byte_4, CQE_BYTE_4_OWNER_S)) ^ + !!(n & (cq->ibv_cq.cqe + 1))) ? cqe : NULL; +} + +static struct hns_roce_cqe *next_cqe_sw(struct hns_roce_cq *cq) +{ + return get_sw_cqe(cq, cq->cons_index); +} + +static void *get_recv_wqe(struct hns_roce_qp *qp, int n) +{ + if ((n < 0) || (n > qp->rq.wqe_cnt)) { + printf("rq wqe index:%d,rq wqe cnt:%d\r\n", n, qp->rq.wqe_cnt); + return NULL; + } + + return qp->buf.buf + qp->rq.offset + (n << qp->rq.wqe_shift); +} + +static void *get_send_wqe(struct hns_roce_qp *qp, int n) +{ + if ((n < 0) || (n > qp->sq.wqe_cnt)) { + printf("sq wqe index:%d,sq wqe cnt:%d\r\n", n, qp->sq.wqe_cnt); + return NULL; + } + + return (void *)(qp->buf.buf + qp->sq.offset + (n << qp->sq.wqe_shift)); +} + +static int hns_roce_wq_overflow(struct hns_roce_wq *wq, int nreq, + struct hns_roce_cq *cq) +{ + unsigned int cur; + + cur = wq->head - wq->tail; + if (cur + nreq < wq->max_post) + return 0; + + /* While the num of wqe exceeds cap of the device, cq will be locked */ + pthread_spin_lock(&cq->lock); + cur = wq->head - wq->tail; + pthread_spin_unlock(&cq->lock); + + printf("wq:(head = %d, tail = %d, max_post = %d), nreq = 0x%x\n", + wq->head, wq->tail, wq->max_post, nreq); + + return cur + nreq >= wq->max_post; +} + +static struct hns_roce_qp *hns_roce_find_qp(struct hns_roce_context *ctx, + uint32_t qpn) +{ + int tind = (qpn & (ctx->num_qps - 1)) >> ctx->qp_table_shift; + + if (ctx->qp_table[tind].refcnt) { + return ctx->qp_table[tind].table[qpn & ctx->qp_table_mask]; + } else { + printf("hns_roce_find_qp fail!\n"); + return NULL; + } +} + +static void hns_roce_clear_qp(struct hns_roce_context *ctx, uint32_t qpn) +{ + int tind = (qpn & (ctx->num_qps - 1)) >> ctx->qp_table_shift; + + if (!--ctx->qp_table[tind].refcnt) + free(ctx->qp_table[tind].table); + else + ctx->qp_table[tind].table[qpn & ctx->qp_table_mask] = NULL; +} + +static int hns_roce_v1_poll_one(struct hns_roce_cq *cq, + struct hns_roce_qp **cur_qp, struct ibv_wc *wc) +{ + uint32_t qpn; + int is_send; + uint16_t wqe_ctr; + uint32_t local_qpn; + struct hns_roce_wq *wq = NULL; + struct hns_roce_cqe *cqe = NULL; + struct hns_roce_wqe_ctrl_seg *sq_wqe = NULL; + + /* According to CI, find the relative cqe */ + cqe = next_cqe_sw(cq); + if (!cqe) + return CQ_EMPTY; + + /* Get the next cqe, CI will be added gradually */ + ++cq->cons_index; + + rmb(); + + qpn = roce_get_field(cqe->cqe_byte_16, CQE_BYTE_16_LOCAL_QPN_M, + CQE_BYTE_16_LOCAL_QPN_S); + + is_send = (roce_get_bit(cqe->cqe_byte_4, CQE_BYTE_4_SQ_RQ_FLAG_S) == + HNS_ROCE_CQE_IS_SQ); + + local_qpn = roce_get_field(cqe->cqe_byte_16, CQE_BYTE_16_LOCAL_QPN_M, + CQE_BYTE_16_LOCAL_QPN_S); + + /* if qp is zero, it will not get the correct qpn */ + if (!*cur_qp || + (local_qpn & HNS_ROCE_CQE_QPN_MASK) != (*cur_qp)->ibv_qp.qp_num) { + + *cur_qp = hns_roce_find_qp(to_hr_ctx(cq->ibv_cq.context), + qpn & 0xffffff); + if (!*cur_qp) { + fprintf(stderr, PFX "can't find qp!\n"); + return CQ_POLL_ERR; + } + } + wc->qp_num = qpn & 0xffffff; + + if (is_send) { + wq = &(*cur_qp)->sq; + /* + * if sq_signal_bits is 1, the tail pointer first update to + * the wqe corresponding the current cqe + */ + if ((*cur_qp)->sq_signal_bits) { + wqe_ctr = (uint16_t)(roce_get_field(cqe->cqe_byte_4, + CQE_BYTE_4_WQE_INDEX_M, + CQE_BYTE_4_WQE_INDEX_S)); + /* + * wq->tail will plus a positive number every time, + * when wq->tail exceeds 32b, it is 0 and acc + */ + wq->tail += (wqe_ctr - (uint16_t) wq->tail) & + (wq->wqe_cnt - 1); + } + /* write the wr_id of wq into the wc */ + wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)]; + ++wq->tail; + } else { + wq = &(*cur_qp)->rq; + wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)]; + ++wq->tail; + } + + /* + * HW maintains wc status, set the err type and directly return, after + * generated the incorrect CQE + */ + if (roce_get_field(cqe->cqe_byte_4, + CQE_BYTE_4_STATUS_OF_THE_OPERATION_M, + CQE_BYTE_4_STATUS_OF_THE_OPERATION_S) != HNS_ROCE_CQE_SUCCESS) { + hns_roce_handle_error_cqe(cqe, wc); + return CQ_OK; + } + wc->status = IBV_WC_SUCCESS; + + /* + * According to the opcode type of cqe, mark the opcode and other + * information of wc + */ + if (is_send) { + /* Get opcode and flag before update the tail point for send */ + sq_wqe = (struct hns_roce_wqe_ctrl_seg *) + get_send_wqe(*cur_qp, roce_get_field(cqe->cqe_byte_4, + CQE_BYTE_4_WQE_INDEX_M, + CQE_BYTE_4_WQE_INDEX_S)); + switch (sq_wqe->flag & HNS_ROCE_WQE_OPCODE_MASK) { + case HNS_ROCE_WQE_OPCODE_SEND: + wc->opcode = IBV_WC_SEND; + break; + case HNS_ROCE_WQE_OPCODE_RDMA_READ: + wc->opcode = IBV_WC_RDMA_READ; + wc->byte_len = cqe->byte_cnt; + break; + case HNS_ROCE_WQE_OPCODE_RDMA_WRITE: + wc->opcode = IBV_WC_RDMA_WRITE; + break; + case HNS_ROCE_WQE_OPCODE_BIND_MW2: + wc->opcode = IBV_WC_BIND_MW; + break; + default: + wc->status = IBV_WC_GENERAL_ERR; + break; + } + wc->wc_flags = (sq_wqe->flag & HNS_ROCE_WQE_IMM ? + IBV_WC_WITH_IMM : 0); + } else { + /* Get opcode and flag in rq&srq */ + wc->byte_len = (cqe->byte_cnt); + + switch (roce_get_field(cqe->cqe_byte_4, + CQE_BYTE_4_OPERATION_TYPE_M, + CQE_BYTE_4_OPERATION_TYPE_S) & + HNS_ROCE_CQE_OPCODE_MASK) { + case HNS_ROCE_OPCODE_RDMA_WITH_IMM_RECEIVE: + wc->opcode = IBV_WC_RECV_RDMA_WITH_IMM; + wc->wc_flags = IBV_WC_WITH_IMM; + wc->imm_data = cqe->immediate_data; + break; + case HNS_ROCE_OPCODE_SEND_DATA_RECEIVE: + if (roce_get_bit(cqe->cqe_byte_4, + CQE_BYTE_4_IMMEDIATE_DATA_FLAG_S)) { + wc->opcode = IBV_WC_RECV; + wc->wc_flags = IBV_WC_WITH_IMM; + wc->imm_data = cqe->immediate_data; + } else { + wc->opcode = IBV_WC_RECV; + wc->wc_flags = 0; + } + break; + default: + wc->status = IBV_WC_GENERAL_ERR; + break; + } + } + + return CQ_OK; +} + +static int hns_roce_u_v1_poll_cq(struct ibv_cq *ibvcq, int ne, + struct ibv_wc *wc) +{ + int npolled; + int err = CQ_OK; + struct hns_roce_qp *qp = NULL; + struct hns_roce_cq *cq = to_hr_cq(ibvcq); + struct hns_roce_context *ctx = to_hr_ctx(ibvcq->context); + struct hns_roce_device *dev = to_hr_dev(ibvcq->context->device); + + pthread_spin_lock(&cq->lock); + + for (npolled = 0; npolled < ne; ++npolled) { + err = hns_roce_v1_poll_one(cq, &qp, wc + npolled); + if (err != CQ_OK) + break; + } + + if (npolled) { + if (dev->hw_version == HNS_ROCE_HW_VER1) { + *cq->set_ci_db = (unsigned short)(cq->cons_index & + ((cq->cq_depth << 1) - 1)); + mb(); + } + + hns_roce_update_cq_cons_index(ctx, cq); + } + + pthread_spin_unlock(&cq->lock); + + return err == CQ_POLL_ERR ? err : npolled; +} + +/** + * hns_roce_u_v1_arm_cq - request completion notification on a CQ + * @ibvcq: The completion queue to request notification for. + * @solicited: If non-zero, a event will be generated only for + * the next solicited CQ entry. If zero, any CQ entry, + * solicited or not, will generate an event + */ +static int hns_roce_u_v1_arm_cq(struct ibv_cq *ibvcq, int solicited) +{ + uint32_t ci; + uint32_t solicited_flag; + struct hns_roce_cq_db cq_db; + struct hns_roce_cq *cq = to_hr_cq(ibvcq); + + ci = cq->cons_index & ((cq->cq_depth << 1) - 1); + solicited_flag = solicited ? HNS_ROCE_CQ_DB_REQ_SOL : + HNS_ROCE_CQ_DB_REQ_NEXT; + + cq_db.u32_4 = 0; + cq_db.u32_8 = 0; + + roce_set_bit(cq_db.u32_8, CQ_DB_U32_8_HW_SYNC_S, 1); + roce_set_field(cq_db.u32_8, CQ_DB_U32_8_CMD_M, CQ_DB_U32_8_CMD_S, 3); + roce_set_field(cq_db.u32_8, CQ_DB_U32_8_CMD_MDF_M, + CQ_DB_U32_8_CMD_MDF_S, 1); + roce_set_bit(cq_db.u32_8, CQ_DB_U32_8_NOTIFY_TYPE_S, solicited_flag); + roce_set_field(cq_db.u32_8, CQ_DB_U32_8_CQN_M, CQ_DB_U32_8_CQN_S, + cq->cqn); + roce_set_field(cq_db.u32_4, CQ_DB_U32_4_CONS_IDX_M, + CQ_DB_U32_4_CONS_IDX_S, ci); + + hns_roce_write64((uint32_t *)&cq_db, to_hr_ctx(ibvcq->context), + ROCEE_DB_OTHERS_L_0_REG); + return 0; +} + +static int hns_roce_u_v1_post_send(struct ibv_qp *ibvqp, struct ibv_send_wr *wr, + struct ibv_send_wr **bad_wr) +{ + unsigned int ind; + void *wqe; + int nreq; + int ps_opcode, i; + int ret = 0; + struct hns_roce_wqe_ctrl_seg *ctrl = NULL; + struct hns_roce_wqe_data_seg *dseg = NULL; + struct hns_roce_qp *qp = to_hr_qp(ibvqp); + struct hns_roce_context *ctx = to_hr_ctx(ibvqp->context); + + pthread_spin_lock(&qp->sq.lock); + + /* check that state is OK to post send */ + ind = qp->sq.head; + + for (nreq = 0; wr; ++nreq, wr = wr->next) { + if (hns_roce_wq_overflow(&qp->sq, nreq, + to_hr_cq(qp->ibv_qp.send_cq))) { + ret = -1; + *bad_wr = wr; + goto out; + } + if (wr->num_sge > qp->sq.max_gs) { + ret = -1; + *bad_wr = wr; + printf("wr->num_sge(<=%d) = %d, check failed!\r\n", + qp->sq.max_gs, wr->num_sge); + goto out; + } + + ctrl = wqe = get_send_wqe(qp, ind & (qp->sq.wqe_cnt - 1)); + memset(ctrl, 0, sizeof(struct hns_roce_wqe_ctrl_seg)); + + qp->sq.wrid[ind & (qp->sq.wqe_cnt - 1)] = wr->wr_id; + for (i = 0; i < wr->num_sge; i++) + ctrl->msg_length += wr->sg_list[i].length; + + + ctrl->flag |= ((wr->send_flags & IBV_SEND_SIGNALED) ? + HNS_ROCE_WQE_CQ_NOTIFY : 0) | + (wr->send_flags & IBV_SEND_SOLICITED ? + HNS_ROCE_WQE_SE : 0) | + ((wr->opcode == IBV_WR_SEND_WITH_IMM || + wr->opcode == IBV_WR_RDMA_WRITE_WITH_IMM) ? + HNS_ROCE_WQE_IMM : 0) | + (wr->send_flags & IBV_SEND_FENCE ? + HNS_ROCE_WQE_FENCE : 0); + + if (wr->opcode == IBV_WR_SEND_WITH_IMM || + wr->opcode == IBV_WR_RDMA_WRITE_WITH_IMM) + ctrl->imm_data = wr->imm_data; + + wqe += sizeof(struct hns_roce_wqe_ctrl_seg); + + /* set remote addr segment */ + switch (ibvqp->qp_type) { + case IBV_QPT_RC: + switch (wr->opcode) { + case IBV_WR_RDMA_READ: + ps_opcode = HNS_ROCE_WQE_OPCODE_RDMA_READ; + set_raddr_seg(wqe, wr->wr.rdma.remote_addr, + wr->wr.rdma.rkey); + break; + case IBV_WR_RDMA_WRITE: + case IBV_WR_RDMA_WRITE_WITH_IMM: + ps_opcode = HNS_ROCE_WQE_OPCODE_RDMA_WRITE; + set_raddr_seg(wqe, wr->wr.rdma.remote_addr, + wr->wr.rdma.rkey); + break; + case IBV_WR_SEND: + case IBV_WR_SEND_WITH_IMM: + ps_opcode = HNS_ROCE_WQE_OPCODE_SEND; + break; + case IBV_WR_ATOMIC_CMP_AND_SWP: + case IBV_WR_ATOMIC_FETCH_AND_ADD: + default: + ps_opcode = HNS_ROCE_WQE_OPCODE_MASK; + break; + } + ctrl->flag |= (ps_opcode); + wqe += sizeof(struct hns_roce_wqe_raddr_seg); + break; + case IBV_QPT_UC: + case IBV_QPT_UD: + default: + break; + } + + dseg = wqe; + + /* Inline */ + if (wr->send_flags & IBV_SEND_INLINE && wr->num_sge) { + if (ctrl->msg_length > qp->max_inline_data) { + ret = -1; + *bad_wr = wr; + printf("inline data len(1-32)=%d, send_flags = 0x%x, check failed!\r\n", + wr->send_flags, ctrl->msg_length); + return ret; + } + + for (i = 0; i < wr->num_sge; i++) { + memcpy(wqe, + ((void *) (uintptr_t) wr->sg_list[i].addr), + wr->sg_list[i].length); + wqe = wqe + wr->sg_list[i].length; + } + + ctrl->flag |= HNS_ROCE_WQE_INLINE; + } else { + /* set sge */ + for (i = 0; i < wr->num_sge; i++) + set_data_seg(dseg+i, wr->sg_list + i); + + ctrl->flag |= wr->num_sge << HNS_ROCE_WQE_SGE_NUM_BIT; + } + + ind++; + } + +out: + /* Set DB return */ + if (likely(nreq)) { + qp->sq.head += nreq; + wmb(); + + hns_roce_update_sq_head(ctx, qp->ibv_qp.qp_num, + qp->port_num - 1, qp->sl, + qp->sq.head & ((qp->sq.wqe_cnt << 1) - 1)); + } + + pthread_spin_unlock(&qp->sq.lock); + + return ret; +} + +static void __hns_roce_v1_cq_clean(struct hns_roce_cq *cq, uint32_t qpn, + struct hns_roce_srq *srq) +{ + int nfreed = 0; + uint32_t prod_index; + uint8_t owner_bit = 0; + struct hns_roce_cqe *cqe, *dest; + struct hns_roce_context *ctx = to_hr_ctx(cq->ibv_cq.context); + + for (prod_index = cq->cons_index; get_sw_cqe(cq, prod_index); + ++prod_index) + if (prod_index == cq->cons_index + cq->ibv_cq.cqe) + break; + + while ((int) --prod_index - (int) cq->cons_index >= 0) { + cqe = get_cqe(cq, prod_index & cq->ibv_cq.cqe); + if ((roce_get_field(cqe->cqe_byte_16, CQE_BYTE_16_LOCAL_QPN_M, + CQE_BYTE_16_LOCAL_QPN_S) & 0xffffff) == qpn) { + ++nfreed; + } else if (nfreed) { + dest = get_cqe(cq, + (prod_index + nfreed) & cq->ibv_cq.cqe); + owner_bit = roce_get_bit(dest->cqe_byte_4, + CQE_BYTE_4_OWNER_S); + memcpy(dest, cqe, sizeof(*cqe)); + roce_set_bit(dest->cqe_byte_4, CQE_BYTE_4_OWNER_S, + owner_bit); + } + } + + if (nfreed) { + cq->cons_index += nfreed; + wmb(); + hns_roce_update_cq_cons_index(ctx, cq); + } +} + +static void hns_roce_v1_cq_clean(struct hns_roce_cq *cq, unsigned int qpn, + struct hns_roce_srq *srq) +{ + pthread_spin_lock(&cq->lock); + __hns_roce_v1_cq_clean(cq, qpn, srq); + pthread_spin_unlock(&cq->lock); +} + +static int hns_roce_u_v1_modify_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr, + int attr_mask) +{ + int ret; + struct ibv_modify_qp cmd; + struct hns_roce_qp *hr_qp = to_hr_qp(qp); + + ret = ibv_cmd_modify_qp(qp, attr, attr_mask, &cmd, sizeof(cmd)); + + if (!ret && (attr_mask & IBV_QP_STATE) && + attr->qp_state == IBV_QPS_RESET) { + hns_roce_v1_cq_clean(to_hr_cq(qp->recv_cq), qp->qp_num, + qp->srq ? to_hr_srq(qp->srq) : NULL); + if (qp->send_cq != qp->recv_cq) + hns_roce_v1_cq_clean(to_hr_cq(qp->send_cq), qp->qp_num, + NULL); + + hns_roce_init_qp_indices(to_hr_qp(qp)); + } + + if (!ret && (attr_mask & IBV_QP_PORT)) { + hr_qp->port_num = attr->port_num; + printf("hr_qp->port_num= 0x%x\n", hr_qp->port_num); + } + + hr_qp->sl = attr->ah_attr.sl; + + return ret; +} + +static void hns_roce_lock_cqs(struct ibv_qp *qp) +{ + struct hns_roce_cq *send_cq = to_hr_cq(qp->send_cq); + struct hns_roce_cq *recv_cq = to_hr_cq(qp->recv_cq); + + if (send_cq == recv_cq) { + pthread_spin_lock(&send_cq->lock); + } else if (send_cq->cqn < recv_cq->cqn) { + pthread_spin_lock(&send_cq->lock); + pthread_spin_lock(&recv_cq->lock); + } else { + pthread_spin_lock(&recv_cq->lock); + pthread_spin_lock(&send_cq->lock); + } +} + +static void hns_roce_unlock_cqs(struct ibv_qp *qp) +{ + struct hns_roce_cq *send_cq = to_hr_cq(qp->send_cq); + struct hns_roce_cq *recv_cq = to_hr_cq(qp->recv_cq); + + if (send_cq == recv_cq) { + pthread_spin_unlock(&send_cq->lock); + } else if (send_cq->cqn < recv_cq->cqn) { + pthread_spin_unlock(&recv_cq->lock); + pthread_spin_unlock(&send_cq->lock); + } else { + pthread_spin_unlock(&send_cq->lock); + pthread_spin_unlock(&recv_cq->lock); + } +} + +static int hns_roce_u_v1_destroy_qp(struct ibv_qp *ibqp) +{ + int ret; + struct hns_roce_qp *qp = to_hr_qp(ibqp); + + pthread_mutex_lock(&to_hr_ctx(ibqp->context)->qp_table_mutex); + ret = ibv_cmd_destroy_qp(ibqp); + if (ret) { + pthread_mutex_unlock(&to_hr_ctx(ibqp->context)->qp_table_mutex); + return ret; + } + + hns_roce_lock_cqs(ibqp); + + __hns_roce_v1_cq_clean(to_hr_cq(ibqp->recv_cq), ibqp->qp_num, + ibqp->srq ? to_hr_srq(ibqp->srq) : NULL); + + if (ibqp->send_cq != ibqp->recv_cq) + __hns_roce_v1_cq_clean(to_hr_cq(ibqp->send_cq), ibqp->qp_num, + NULL); + + hns_roce_clear_qp(to_hr_ctx(ibqp->context), ibqp->qp_num); + + hns_roce_unlock_cqs(ibqp); + pthread_mutex_unlock(&to_hr_ctx(ibqp->context)->qp_table_mutex); + + free(qp->sq.wrid); + if (qp->rq.wqe_cnt) + free(qp->rq.wrid); + + hns_roce_free_buf(&qp->buf); + free(qp); + + return ret; +} + +static int hns_roce_u_v1_post_recv(struct ibv_qp *ibvqp, struct ibv_recv_wr *wr, + struct ibv_recv_wr **bad_wr) +{ + int ret = 0; + int nreq; + int ind; + struct ibv_sge *sg; + struct hns_roce_rc_rq_wqe *rq_wqe; + struct hns_roce_qp *qp = to_hr_qp(ibvqp); + struct hns_roce_context *ctx = to_hr_ctx(ibvqp->context); + + pthread_spin_lock(&qp->rq.lock); + + /* check that state is OK to post receive */ + ind = qp->rq.head & (qp->rq.wqe_cnt - 1); + + for (nreq = 0; wr; ++nreq, wr = wr->next) { + if (hns_roce_wq_overflow(&qp->rq, nreq, + to_hr_cq(qp->ibv_qp.recv_cq))) { + ret = -1; + *bad_wr = wr; + goto out; + } + + if (wr->num_sge > qp->rq.max_gs) { + ret = -1; + *bad_wr = wr; + goto out; + } + + rq_wqe = get_recv_wqe(qp, ind); + if (wr->num_sge > HNS_ROCE_RC_RQ_WQE_MAX_SGE_NUM) { + ret = -1; + *bad_wr = wr; + goto out; + } + + if (wr->num_sge == HNS_ROCE_RC_RQ_WQE_MAX_SGE_NUM) { + roce_set_field(rq_wqe->u32_2, + RC_RQ_WQE_NUMBER_OF_DATA_SEG_M, + RC_RQ_WQE_NUMBER_OF_DATA_SEG_S, + HNS_ROCE_RC_RQ_WQE_MAX_SGE_NUM); + sg = wr->sg_list; + + rq_wqe->va0 = (sg->addr); + rq_wqe->l_key0 = (sg->lkey); + rq_wqe->length0 = (sg->length); + + sg = wr->sg_list + 1; + + rq_wqe->va1 = (sg->addr); + rq_wqe->l_key1 = (sg->lkey); + rq_wqe->length1 = (sg->length); + } else if (wr->num_sge == HNS_ROCE_RC_RQ_WQE_MAX_SGE_NUM - 1) { + roce_set_field(rq_wqe->u32_2, + RC_RQ_WQE_NUMBER_OF_DATA_SEG_M, + RC_RQ_WQE_NUMBER_OF_DATA_SEG_S, + HNS_ROCE_RC_RQ_WQE_MAX_SGE_NUM - 1); + sg = wr->sg_list; + + rq_wqe->va0 = (sg->addr); + rq_wqe->l_key0 = (sg->lkey); + rq_wqe->length0 = (sg->length); + + } else if (wr->num_sge == HNS_ROCE_RC_RQ_WQE_MAX_SGE_NUM - 2) { + roce_set_field(rq_wqe->u32_2, + RC_RQ_WQE_NUMBER_OF_DATA_SEG_M, + RC_RQ_WQE_NUMBER_OF_DATA_SEG_S, + HNS_ROCE_RC_RQ_WQE_MAX_SGE_NUM - 2); + } + + qp->rq.wrid[ind] = wr->wr_id; + + ind = (ind + 1) & (qp->rq.wqe_cnt - 1); + } + +out: + if (nreq) { + qp->rq.head += nreq; + + wmb(); + + hns_roce_update_rq_head(ctx, qp->ibv_qp.qp_num, + qp->rq.head & ((qp->rq.wqe_cnt << 1) - 1)); + } + + pthread_spin_unlock(&qp->rq.lock); + + return ret; +} + +struct hns_roce_u_hw hns_roce_u_hw_v1 = { + .poll_cq = hns_roce_u_v1_poll_cq, + .arm_cq = hns_roce_u_v1_arm_cq, + .post_send = hns_roce_u_v1_post_send, + .post_recv = hns_roce_u_v1_post_recv, + .modify_qp = hns_roce_u_v1_modify_qp, + .destroy_qp = hns_roce_u_v1_destroy_qp, +}; diff --git a/providers/hns/hns_roce_u_hw_v1.h b/providers/hns/hns_roce_u_hw_v1.h new file mode 100644 index 000000000..128c66f9c --- /dev/null +++ b/providers/hns/hns_roce_u_hw_v1.h @@ -0,0 +1,242 @@ +/* + * Copyright (c) 2016 Hisilicon Limited. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef _HNS_ROCE_U_HW_V1_H +#define _HNS_ROCE_U_HW_V1_H + +#define HNS_ROCE_CQ_DB_REQ_SOL 1 +#define HNS_ROCE_CQ_DB_REQ_NEXT 0 + +#define HNS_ROCE_CQE_IS_SQ 0 + +#define HNS_ROCE_RC_WQE_INLINE_DATA_MAX_LEN 32 +#define HNS_ROCE_RC_RQ_WQE_MAX_SGE_NUM 2 + +enum { + HNS_ROCE_WQE_INLINE = 1 << 31, + HNS_ROCE_WQE_SE = 1 << 30, + HNS_ROCE_WQE_SGE_NUM_BIT = 24, + HNS_ROCE_WQE_IMM = 1 << 23, + HNS_ROCE_WQE_FENCE = 1 << 21, + HNS_ROCE_WQE_CQ_NOTIFY = 1 << 20, + HNS_ROCE_WQE_OPCODE_SEND = 0 << 16, + HNS_ROCE_WQE_OPCODE_RDMA_READ = 1 << 16, + HNS_ROCE_WQE_OPCODE_RDMA_WRITE = 2 << 16, + HNS_ROCE_WQE_OPCODE_BIND_MW2 = 6 << 16, + HNS_ROCE_WQE_OPCODE_MASK = 15 << 16, +}; + +struct hns_roce_wqe_ctrl_seg { + __be32 sgl_pa_h; + __be32 flag; + __be32 imm_data; + __be32 msg_length; +}; + +struct hns_roce_wqe_data_seg { + __be64 addr; + __be32 lkey; + __be32 len; +}; + +struct hns_roce_wqe_raddr_seg { + __be32 rkey; + __be32 len; + __be64 raddr; +}; + +enum { + CQ_OK = 0, + CQ_EMPTY = -1, + CQ_POLL_ERR = -2, +}; + +enum { + HNS_ROCE_CQE_QPN_MASK = 0x3ffff, + HNS_ROCE_CQE_STATUS_MASK = 0x1f, + HNS_ROCE_CQE_OPCODE_MASK = 0xf, +}; + +enum { + HNS_ROCE_CQE_SUCCESS, + HNS_ROCE_CQE_SYNDROME_LOCAL_LENGTH_ERR, + HNS_ROCE_CQE_SYNDROME_LOCAL_QP_OP_ERR, + HNS_ROCE_CQE_SYNDROME_LOCAL_PROT_ERR, + HNS_ROCE_CQE_SYNDROME_WR_FLUSH_ERR, + HNS_ROCE_CQE_SYNDROME_MEM_MANAGE_OPERATE_ERR, + HNS_ROCE_CQE_SYNDROME_BAD_RESP_ERR, + HNS_ROCE_CQE_SYNDROME_LOCAL_ACCESS_ERR, + HNS_ROCE_CQE_SYNDROME_REMOTE_INVAL_REQ_ERR, + HNS_ROCE_CQE_SYNDROME_REMOTE_ACCESS_ERR, + HNS_ROCE_CQE_SYNDROME_REMOTE_OP_ERR, + HNS_ROCE_CQE_SYNDROME_TRANSPORT_RETRY_EXC_ERR, + HNS_ROCE_CQE_SYNDROME_RNR_RETRY_EXC_ERR, +}; + +struct hns_roce_cq_db { + unsigned int u32_4; + unsigned int u32_8; +}; +#define CQ_DB_U32_4_CONS_IDX_S 0 +#define CQ_DB_U32_4_CONS_IDX_M (((1UL << 16) - 1) << CQ_DB_U32_4_CONS_IDX_S) + +#define CQ_DB_U32_8_CQN_S 0 +#define CQ_DB_U32_8_CQN_M (((1UL << 16) - 1) << CQ_DB_U32_8_CQN_S) + +#define CQ_DB_U32_8_NOTIFY_TYPE_S 16 + +#define CQ_DB_U32_8_CMD_MDF_S 24 +#define CQ_DB_U32_8_CMD_MDF_M (((1UL << 4) - 1) << CQ_DB_U32_8_CMD_MDF_S) + +#define CQ_DB_U32_8_CMD_S 28 +#define CQ_DB_U32_8_CMD_M (((1UL << 3) - 1) << CQ_DB_U32_8_CMD_S) + +#define CQ_DB_U32_8_HW_SYNC_S 31 + +struct hns_roce_rq_db { + unsigned int u32_4; + unsigned int u32_8; +}; + +#define RQ_DB_U32_4_RQ_HEAD_S 0 +#define RQ_DB_U32_4_RQ_HEAD_M (((1UL << 15) - 1) << RQ_DB_U32_4_RQ_HEAD_S) + +#define RQ_DB_U32_8_QPN_S 0 +#define RQ_DB_U32_8_QPN_M (((1UL << 24) - 1) << RQ_DB_U32_8_QPN_S) + +#define RQ_DB_U32_8_CMD_S 28 +#define RQ_DB_U32_8_CMD_M (((1UL << 3) - 1) << RQ_DB_U32_8_CMD_S) + +#define RQ_DB_U32_8_HW_SYNC_S 31 + +struct hns_roce_sq_db { + unsigned int u32_4; + unsigned int u32_8; +}; + +#define SQ_DB_U32_4_SQ_HEAD_S 0 +#define SQ_DB_U32_4_SQ_HEAD_M (((1UL << 15) - 1) << SQ_DB_U32_4_SQ_HEAD_S) + +#define SQ_DB_U32_4_SL_S 16 +#define SQ_DB_U32_4_SL_M (((1UL << 2) - 1) << SQ_DB_U32_4_SL_S) + +#define SQ_DB_U32_4_PORT_S 18 +#define SQ_DB_U32_4_PORT_M (((1UL << 3) - 1) << SQ_DB_U32_4_PORT_S) + +#define SQ_DB_U32_4_DIRECT_WQE_S 31 + +#define SQ_DB_U32_8_QPN_S 0 +#define SQ_DB_U32_8_QPN_M (((1UL << 24) - 1) << SQ_DB_U32_8_QPN_S) + +#define SQ_DB_U32_8_HW_SYNC 31 + +struct hns_roce_cqe { + unsigned int cqe_byte_4; + union { + unsigned int r_key; + unsigned int immediate_data; + }; + unsigned int byte_cnt; + unsigned int cqe_byte_16; + unsigned int cqe_byte_20; + unsigned int s_mac_l; + unsigned int cqe_byte_28; + unsigned int reserved; +}; +#define CQE_BYTE_4_OPERATION_TYPE_S 0 +#define CQE_BYTE_4_OPERATION_TYPE_M \ + (((1UL << 4) - 1) << CQE_BYTE_4_OPERATION_TYPE_S) + +#define CQE_BYTE_4_OWNER_S 7 + +#define CQE_BYTE_4_STATUS_OF_THE_OPERATION_S 8 +#define CQE_BYTE_4_STATUS_OF_THE_OPERATION_M \ + (((1UL << 5) - 1) << CQE_BYTE_4_STATUS_OF_THE_OPERATION_S) + +#define CQE_BYTE_4_SQ_RQ_FLAG_S 14 + +#define CQE_BYTE_4_IMMEDIATE_DATA_FLAG_S 15 + +#define CQE_BYTE_4_WQE_INDEX_S 16 +#define CQE_BYTE_4_WQE_INDEX_M (((1UL << 14) - 1) << CQE_BYTE_4_WQE_INDEX_S) + +#define CQE_BYTE_16_LOCAL_QPN_S 0 +#define CQE_BYTE_16_LOCAL_QPN_M (((1UL << 24) - 1) << CQE_BYTE_16_LOCAL_QPN_S) + +#define ROCEE_DB_SQ_L_0_REG 0x230 + +#define ROCEE_DB_OTHERS_L_0_REG 0x238 + +struct hns_roce_rc_send_wqe { + unsigned int sgl_ba_31_0; + unsigned int u32_1; + union { + unsigned int r_key; + unsigned int immediate_data; + }; + unsigned int msg_length; + unsigned int rvd_3; + unsigned int rvd_4; + unsigned int rvd_5; + unsigned int rvd_6; + uint64_t va0; + unsigned int l_key0; + unsigned int length0; + + uint64_t va1; + unsigned int l_key1; + unsigned int length1; +}; + +struct hns_roce_rc_rq_wqe { + unsigned int u32_0; + unsigned int sgl_ba_31_0; + unsigned int u32_2; + unsigned int rvd_5; + unsigned int rvd_6; + unsigned int rvd_7; + unsigned int rvd_8; + unsigned int rvd_9; + + uint64_t va0; + unsigned int l_key0; + unsigned int length0; + + uint64_t va1; + unsigned int l_key1; + unsigned int length1; +}; +#define RC_RQ_WQE_NUMBER_OF_DATA_SEG_S 16 +#define RC_RQ_WQE_NUMBER_OF_DATA_SEG_M \ + (((1UL << 6) - 1) << RC_RQ_WQE_NUMBER_OF_DATA_SEG_S) + +#endif /* _HNS_ROCE_U_HW_V1_H */ diff --git a/providers/hns/hns_roce_u_verbs.c b/providers/hns/hns_roce_u_verbs.c new file mode 100644 index 000000000..0b8f444f4 --- /dev/null +++ b/providers/hns/hns_roce_u_verbs.c @@ -0,0 +1,525 @@ +/* + * Copyright (c) 2016 Hisilicon Limited. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "hns_roce_u.h" +#include "hns_roce_u_abi.h" +#include "hns_roce_u_hw_v1.h" + +void hns_roce_init_qp_indices(struct hns_roce_qp *qp) +{ + qp->sq.head = 0; + qp->sq.tail = 0; + qp->rq.head = 0; + qp->rq.tail = 0; +} + +int hns_roce_u_query_device(struct ibv_context *context, + struct ibv_device_attr *attr) +{ + int ret; + struct ibv_query_device cmd; + uint64_t raw_fw_ver; + unsigned int major, minor, sub_minor; + + ret = ibv_cmd_query_device(context, attr, &raw_fw_ver, &cmd, + sizeof(cmd)); + if (ret) + return ret; + + major = (raw_fw_ver >> 32) & 0xffff; + minor = (raw_fw_ver >> 16) & 0xffff; + sub_minor = raw_fw_ver & 0xffff; + + snprintf(attr->fw_ver, sizeof(attr->fw_ver), "%d.%d.%03d", major, minor, + sub_minor); + + return 0; +} + +int hns_roce_u_query_port(struct ibv_context *context, uint8_t port, + struct ibv_port_attr *attr) +{ + struct ibv_query_port cmd; + + return ibv_cmd_query_port(context, port, attr, &cmd, sizeof(cmd)); +} + +struct ibv_pd *hns_roce_u_alloc_pd(struct ibv_context *context) +{ + struct ibv_alloc_pd cmd; + struct hns_roce_pd *pd; + struct hns_roce_alloc_pd_resp resp; + + pd = (struct hns_roce_pd *)malloc(sizeof(*pd)); + if (!pd) + return NULL; + + if (ibv_cmd_alloc_pd(context, &pd->ibv_pd, &cmd, sizeof(cmd), + &resp.ibv_resp, sizeof(resp))) { + free(pd); + return NULL; + } + + pd->pdn = resp.pdn; + + return &pd->ibv_pd; +} + +int hns_roce_u_free_pd(struct ibv_pd *pd) +{ + int ret; + + ret = ibv_cmd_dealloc_pd(pd); + if (ret) + return ret; + + free(to_hr_pd(pd)); + + return ret; +} + +struct ibv_mr *hns_roce_u_reg_mr(struct ibv_pd *pd, void *addr, size_t length, + int access) +{ + int ret; + struct ibv_mr *mr; + struct ibv_reg_mr cmd; + struct ibv_reg_mr_resp resp; + + if (!addr) { + fprintf(stderr, "2nd parm addr is NULL!\n"); + return NULL; + } + + if (!length) { + fprintf(stderr, "3st parm length is 0!\n"); + return NULL; + } + + mr = malloc(sizeof(*mr)); + if (!mr) + return NULL; + + ret = ibv_cmd_reg_mr(pd, addr, length, (uintptr_t) addr, access, mr, + &cmd, sizeof(cmd), &resp, sizeof(resp)); + if (ret) { + free(mr); + return NULL; + } + + return mr; +} + +int hns_roce_u_dereg_mr(struct ibv_mr *mr) +{ + int ret; + + ret = ibv_cmd_dereg_mr(mr); + if (ret) + return ret; + + free(mr); + + return ret; +} + +static int align_cq_size(int req) +{ + int nent; + + for (nent = HNS_ROCE_MIN_CQE_NUM; nent < req; nent <<= 1) + ; + + return nent; +} + +static int align_qp_size(int req) +{ + int nent; + + for (nent = HNS_ROCE_MIN_WQE_NUM; nent < req; nent <<= 1) + ; + + return nent; +} + +static void hns_roce_set_sq_sizes(struct hns_roce_qp *qp, + struct ibv_qp_cap *cap, enum ibv_qp_type type) +{ + struct hns_roce_context *ctx = to_hr_ctx(qp->ibv_qp.context); + + qp->sq.max_gs = 2; + cap->max_send_sge = min(ctx->max_sge, qp->sq.max_gs); + qp->sq.max_post = min(ctx->max_qp_wr, qp->sq.wqe_cnt); + cap->max_send_wr = qp->sq.max_post; + qp->max_inline_data = 32; + cap->max_inline_data = qp->max_inline_data; +} + +static int hns_roce_verify_cq(int *cqe, struct hns_roce_context *context) +{ + if (*cqe < HNS_ROCE_MIN_CQE_NUM) { + fprintf(stderr, "cqe = %d, less than minimum CQE number.\n", + *cqe); + *cqe = HNS_ROCE_MIN_CQE_NUM; + } + + if (*cqe > context->max_cqe) + return -1; + + return 0; +} + +static int hns_roce_alloc_cq_buf(struct hns_roce_device *dev, + struct hns_roce_buf *buf, int nent) +{ + if (hns_roce_alloc_buf(buf, + align(nent * HNS_ROCE_CQE_ENTRY_SIZE, dev->page_size), + dev->page_size)) + return -1; + memset(buf->buf, 0, nent * HNS_ROCE_CQE_ENTRY_SIZE); + + return 0; +} + +static void hns_roce_calc_sq_wqe_size(struct ibv_qp_cap *cap, + enum ibv_qp_type type, + struct hns_roce_qp *qp) +{ + int size = sizeof(struct hns_roce_rc_send_wqe); + + for (qp->sq.wqe_shift = 6; 1 << qp->sq.wqe_shift < size; + qp->sq.wqe_shift++) + ; +} + +struct ibv_cq *hns_roce_u_create_cq(struct ibv_context *context, int cqe, + struct ibv_comp_channel *channel, + int comp_vector) +{ + struct hns_roce_create_cq cmd; + struct hns_roce_create_cq_resp resp; + struct hns_roce_cq *cq; + int ret; + + if (hns_roce_verify_cq(&cqe, to_hr_ctx(context))) + return NULL; + + cq = malloc(sizeof(*cq)); + if (!cq) + return NULL; + + cq->cons_index = 0; + + if (pthread_spin_init(&cq->lock, PTHREAD_PROCESS_PRIVATE)) + goto err; + + cqe = align_cq_size(cqe); + + if (hns_roce_alloc_cq_buf(to_hr_dev(context->device), &cq->buf, cqe)) + goto err; + + cmd.buf_addr = (uintptr_t) cq->buf.buf; + + ret = ibv_cmd_create_cq(context, cqe, channel, comp_vector, + &cq->ibv_cq, &cmd.ibv_cmd, sizeof(cmd), + &resp.ibv_resp, sizeof(resp)); + if (ret) + goto err_db; + + cq->cqn = resp.cqn; + cq->cq_depth = cqe; + + if (to_hr_dev(context->device)->hw_version == HNS_ROCE_HW_VER1) + cq->set_ci_db = to_hr_ctx(context)->cq_tptr_base + cq->cqn * 2; + else + cq->set_ci_db = to_hr_ctx(context)->uar + + ROCEE_DB_OTHERS_L_0_REG; + + cq->arm_db = cq->set_ci_db; + cq->arm_sn = 1; + *(cq->set_ci_db) = 0; + *(cq->arm_db) = 0; + + return &cq->ibv_cq; + +err_db: + hns_roce_free_buf(&cq->buf); + +err: + free(cq); + + return NULL; +} + +void hns_roce_u_cq_event(struct ibv_cq *cq) +{ + to_hr_cq(cq)->arm_sn++; +} + +int hns_roce_u_destroy_cq(struct ibv_cq *cq) +{ + int ret; + + ret = ibv_cmd_destroy_cq(cq); + if (ret) + return ret; + + hns_roce_free_buf(&to_hr_cq(cq)->buf); + free(to_hr_cq(cq)); + + return ret; +} + +static int hns_roce_verify_qp(struct ibv_qp_init_attr *attr, + struct hns_roce_context *context) +{ + if (attr->cap.max_send_wr < HNS_ROCE_MIN_WQE_NUM) { + fprintf(stderr, + "max_send_wr = %d, less than minimum WQE number.\n", + attr->cap.max_send_wr); + attr->cap.max_send_wr = HNS_ROCE_MIN_WQE_NUM; + } + + if (attr->cap.max_recv_wr < HNS_ROCE_MIN_WQE_NUM) { + fprintf(stderr, + "max_recv_wr = %d, less than minimum WQE number.\n", + attr->cap.max_recv_wr); + attr->cap.max_recv_wr = HNS_ROCE_MIN_WQE_NUM; + } + + if (attr->cap.max_recv_sge < 1) + attr->cap.max_recv_sge = 1; + if (attr->cap.max_send_wr > context->max_qp_wr || + attr->cap.max_recv_wr > context->max_qp_wr || + attr->cap.max_send_sge > context->max_sge || + attr->cap.max_recv_sge > context->max_sge) + return -1; + + if ((attr->qp_type != IBV_QPT_RC) && (attr->qp_type != IBV_QPT_UD)) + return -1; + + if ((attr->qp_type == IBV_QPT_RC) && + (attr->cap.max_inline_data > HNS_ROCE_RC_WQE_INLINE_DATA_MAX_LEN)) + return -1; + + if (attr->qp_type == IBV_QPT_UC) + return -1; + + return 0; +} + +static int hns_roce_alloc_qp_buf(struct ibv_pd *pd, struct ibv_qp_cap *cap, + enum ibv_qp_type type, struct hns_roce_qp *qp) +{ + qp->sq.wrid = + (unsigned long *)malloc(qp->sq.wqe_cnt * sizeof(uint64_t)); + if (!qp->sq.wrid) + return -1; + + if (qp->rq.wqe_cnt) { + qp->rq.wrid = malloc(qp->rq.wqe_cnt * sizeof(uint64_t)); + if (!qp->rq.wrid) { + free(qp->sq.wrid); + return -1; + } + } + + for (qp->rq.wqe_shift = 4; + 1 << qp->rq.wqe_shift < sizeof(struct hns_roce_rc_send_wqe); + qp->rq.wqe_shift++) + ; + + qp->buf_size = align((qp->sq.wqe_cnt << qp->sq.wqe_shift), 0x1000) + + (qp->rq.wqe_cnt << qp->rq.wqe_shift); + + if (qp->rq.wqe_shift > qp->sq.wqe_shift) { + qp->rq.offset = 0; + qp->sq.offset = qp->rq.wqe_cnt << qp->rq.wqe_shift; + } else { + qp->rq.offset = align((qp->sq.wqe_cnt << qp->sq.wqe_shift), + 0x1000); + qp->sq.offset = 0; + } + + if (hns_roce_alloc_buf(&qp->buf, align(qp->buf_size, 0x1000), + to_hr_dev(pd->context->device)->page_size)) { + free(qp->sq.wrid); + free(qp->rq.wrid); + return -1; + } + + memset(qp->buf.buf, 0, qp->buf_size); + + return 0; +} + +static int hns_roce_store_qp(struct hns_roce_context *ctx, uint32_t qpn, + struct hns_roce_qp *qp) +{ + int tind = (qpn & (ctx->num_qps - 1)) >> ctx->qp_table_shift; + + if (!ctx->qp_table[tind].refcnt) { + ctx->qp_table[tind].table = calloc(ctx->qp_table_mask + 1, + sizeof(struct hns_roce_qp *)); + if (!ctx->qp_table[tind].table) + return -1; + } + + ++ctx->qp_table[tind].refcnt; + ctx->qp_table[tind].table[qpn & ctx->qp_table_mask] = qp; + + return 0; +} + +struct ibv_qp *hns_roce_u_create_qp(struct ibv_pd *pd, + struct ibv_qp_init_attr *attr) +{ + int ret; + struct hns_roce_qp *qp = NULL; + struct hns_roce_create_qp cmd; + struct ibv_create_qp_resp resp; + struct hns_roce_context *context = to_hr_ctx(pd->context); + + if (hns_roce_verify_qp(attr, context)) { + fprintf(stderr, "hns_roce_verify_sizes failed!\n"); + return NULL; + } + + qp = malloc(sizeof(*qp)); + if (!qp) { + fprintf(stderr, "malloc failed!\n"); + return NULL; + } + + hns_roce_calc_sq_wqe_size(&attr->cap, attr->qp_type, qp); + qp->sq.wqe_cnt = align_qp_size(attr->cap.max_send_wr); + qp->rq.wqe_cnt = align_qp_size(attr->cap.max_recv_wr); + + if (hns_roce_alloc_qp_buf(pd, &attr->cap, attr->qp_type, qp)) { + fprintf(stderr, "hns_roce_alloc_qp_buf failed!\n"); + goto err; + } + + hns_roce_init_qp_indices(qp); + + if (pthread_spin_init(&qp->sq.lock, PTHREAD_PROCESS_PRIVATE) || + pthread_spin_init(&qp->rq.lock, PTHREAD_PROCESS_PRIVATE)) { + fprintf(stderr, "pthread_spin_init failed!\n"); + goto err_free; + } + + cmd.buf_addr = (uintptr_t) qp->buf.buf; + cmd.log_sq_stride = qp->sq.wqe_shift; + for (cmd.log_sq_bb_count = 0; qp->sq.wqe_cnt > 1 << cmd.log_sq_bb_count; + ++cmd.log_sq_bb_count) + ; + + memset(cmd.reserved, 0, sizeof(cmd.reserved)); + + pthread_mutex_lock(&to_hr_ctx(pd->context)->qp_table_mutex); + + ret = ibv_cmd_create_qp(pd, &qp->ibv_qp, attr, &cmd.ibv_cmd, + sizeof(cmd), &resp, sizeof(resp)); + if (ret) { + fprintf(stderr, "ibv_cmd_create_qp failed!\n"); + goto err_rq_db; + } + + ret = hns_roce_store_qp(to_hr_ctx(pd->context), qp->ibv_qp.qp_num, qp); + if (ret) { + fprintf(stderr, "hns_roce_store_qp failed!\n"); + goto err_destroy; + } + pthread_mutex_unlock(&to_hr_ctx(pd->context)->qp_table_mutex); + + qp->rq.wqe_cnt = attr->cap.max_recv_wr; + qp->rq.max_gs = attr->cap.max_recv_sge; + + /* adjust rq maxima to not exceed reported device maxima */ + attr->cap.max_recv_wr = min(context->max_qp_wr, attr->cap.max_recv_wr); + attr->cap.max_recv_sge = min(context->max_sge, attr->cap.max_recv_sge); + + qp->rq.max_post = attr->cap.max_recv_wr; + hns_roce_set_sq_sizes(qp, &attr->cap, attr->qp_type); + + qp->sq_signal_bits = attr->sq_sig_all ? 0 : 1; + + return &qp->ibv_qp; + +err_destroy: + ibv_cmd_destroy_qp(&qp->ibv_qp); + +err_rq_db: + pthread_mutex_unlock(&to_hr_ctx(pd->context)->qp_table_mutex); + +err_free: + free(qp->sq.wrid); + if (qp->rq.wqe_cnt) + free(qp->rq.wrid); + hns_roce_free_buf(&qp->buf); + +err: + free(qp); + + return NULL; +} + +int hns_roce_u_query_qp(struct ibv_qp *ibqp, struct ibv_qp_attr *attr, + int attr_mask, struct ibv_qp_init_attr *init_attr) +{ + int ret; + struct ibv_query_qp cmd; + struct hns_roce_qp *qp = to_hr_qp(ibqp); + + ret = ibv_cmd_query_qp(ibqp, attr, attr_mask, init_attr, &cmd, + sizeof(cmd)); + if (ret) + return ret; + + init_attr->cap.max_send_wr = qp->sq.max_post; + init_attr->cap.max_send_sge = qp->sq.max_gs; + init_attr->cap.max_inline_data = qp->max_inline_data; + + attr->cap = init_attr->cap; + + return ret; +}