Skip to content

Commit 34317a7

Browse files
UCT/GDAKI: Export EP to GPU (#10833)
1 parent f86e593 commit 34317a7

File tree

6 files changed

+190
-24
lines changed

6 files changed

+190
-24
lines changed

src/uct/ib/mlx5/dv/ib_mlx5_dv.c

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -197,12 +197,14 @@ ucs_status_t uct_ib_mlx5_devx_create_qp_common(uct_ib_iface_t *iface,
197197
UCT_IB_MLX5DV_SET(qpc, qpc, user_index, attr->uidx);
198198
UCT_IB_MLX5DV_SET(qpc, qpc, ts_format, UCT_IB_MLX5_QPC_TS_FORMAT_DEFAULT);
199199

200-
if (qp->devx.wq_buf == NULL) {
200+
if ((qp->devx.wq_buf == NULL) && (attr->umem_offset == 0)) {
201201
UCT_IB_MLX5DV_SET(qpc, qpc, no_sq, true);
202202
UCT_IB_MLX5DV_SET(qpc, qpc, offload_type, true);
203203
UCT_IB_MLX5DV_SET(create_qp_in, in, wq_umem_id, md->zero_mem.mem->umem_id);
204204
} else {
205205
UCT_IB_MLX5DV_SET(create_qp_in, in, wq_umem_id, qp->devx.mem.mem->umem_id);
206+
UCT_IB_MLX5DV_SET64(create_qp_in, in, wq_umem_offset,
207+
attr->umem_offset);
206208
}
207209

208210
if (md->super.ece_enable) {
@@ -248,13 +250,11 @@ ucs_status_t uct_ib_mlx5_devx_create_qp_common(uct_ib_iface_t *iface,
248250
attr->super.cap.max_recv_wr = 0;
249251

250252
if (tx != NULL) {
251-
ucs_assert(qp->devx.wq_buf != NULL);
252253
tx->reg = &uar->super;
253254
tx->qstart = qp->devx.wq_buf;
254255
tx->qend = UCS_PTR_BYTE_OFFSET(qp->devx.wq_buf, attr->len);
255256
tx->dbrec = &qp->devx.dbrec->db[MLX5_SND_DBR];
256257
tx->bb_max = attr->max_tx - 2 * UCT_IB_MLX5_MAX_BB;
257-
ucs_assert(*tx->dbrec == 0);
258258
uct_ib_mlx5_txwq_reset(tx);
259259
} else {
260260
ucs_assert(qp->devx.wq_buf == NULL);
@@ -602,7 +602,7 @@ uct_ib_mlx5_devx_create_cq_common(uct_ib_iface_t *iface, uct_ib_dir_t dir,
602602

603603
/* Set CQ umem related bits */
604604
UCT_IB_MLX5DV_SET(create_cq_in, in, cq_umem_id, cq->devx.mem.mem->umem_id);
605-
UCT_IB_MLX5DV_SET64(create_cq_in, in, cq_umem_offset, 0);
605+
UCT_IB_MLX5DV_SET64(create_cq_in, in, cq_umem_offset, attr->umem_offset);
606606

607607
UCT_IB_MLX5DV_SET(cqc, cqctx, log_cq_size, log_cq_size);
608608
UCT_IB_MLX5DV_SET(cqc, cqctx, cqe_sz, (attr->cqe_size == 128) ? 1 : 0);

src/uct/ib/mlx5/dv/ib_mlx5_ifc.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1563,7 +1563,7 @@ struct uct_ib_mlx5_create_qp_in_bits {
15631563

15641564
struct uct_ib_mlx5_qpc_bits qpc;
15651565

1566-
uint8_t reserved_at_800[0x40];
1566+
uint8_t wq_umem_offset[0x40];
15671567

15681568
uint8_t wq_umem_id[0x20];
15691569

src/uct/ib/mlx5/gdaki/gdaki.c

Lines changed: 152 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,11 @@
1616
#include <uct/cuda/base/cuda_iface.h>
1717

1818
#include <doca_log.h>
19+
#include <cuda.h>
20+
21+
22+
#define UCT_GDAKI_DOCA_NOTUSE 1
23+
#define UCT_GDAKI_DOCA_NOTUSEPTR (void*)1
1924

2025
typedef struct {
2126
uct_rc_iface_common_config_t super;
@@ -42,48 +47,181 @@ static UCS_CLASS_INIT_FUNC(uct_rc_gdaki_ep_t, const uct_ep_params_t *params)
4247
uct_ib_mlx5_md_t *md = ucs_derived_of(iface->super.super.super.super.md,
4348
uct_ib_mlx5_md_t);
4449
uct_ib_iface_init_attr_t init_attr = {};
50+
uct_ib_mlx5_cq_attr_t cq_attr = {};
4551
uct_ib_mlx5_qp_attr_t qp_attr = {};
52+
uct_rc_gdaki_dev_ep_t dev_ep = {};
4653
ucs_status_t status;
54+
doca_error_t derr;
55+
size_t dev_ep_size;
56+
uct_ib_mlx5_dbrec_t dbrec;
57+
CUcontext primary_ctx;
4758

4859
UCS_CLASS_CALL_SUPER_INIT(uct_base_ep_t, &iface->super.super.super.super);
4960

50-
init_attr.cq_len[UCT_IB_DIR_TX] = iface->super.super.config.tx_qp_len *
51-
UCT_IB_MLX5_MAX_BB;
52-
init_attr.flags = UCT_IB_CQ_IGNORE_OVERRUN;
53-
status = uct_ib_mlx5_devx_create_cq(&iface->super.super.super,
54-
UCT_IB_DIR_TX, &init_attr, &self->cq, 0,
55-
0);
61+
status = UCT_CUDADRV_FUNC_LOG_ERR(
62+
cuDevicePrimaryCtxRetain(&primary_ctx, iface->cuda_dev));
5663
if (status != UCS_OK) {
5764
return status;
5865
}
5966

67+
status = UCT_CUDADRV_FUNC_LOG_ERR(cuCtxPushCurrent(primary_ctx));
68+
if (status != UCS_OK) {
69+
goto err_ctx;
70+
}
71+
72+
init_attr.cq_len[UCT_IB_DIR_TX] = iface->super.super.config.tx_qp_len *
73+
UCT_IB_MLX5_MAX_BB;
74+
uct_ib_mlx5_cq_calc_sizes(&iface->super.super.super, UCT_IB_DIR_TX,
75+
&init_attr, 0, &cq_attr);
6076
uct_rc_iface_fill_attr(&iface->super.super, &qp_attr.super,
6177
iface->super.super.config.tx_qp_len, NULL);
78+
uct_ib_mlx5_wq_calc_sizes(&qp_attr);
79+
80+
cq_attr.flags |= UCT_IB_MLX5_CQ_IGNORE_OVERRUN;
81+
cq_attr.umem_offset = ucs_align_up_pow2(
82+
sizeof(uct_rc_gdaki_dev_ep_t) +
83+
qp_attr.max_tx * sizeof(uct_rc_gdaki_op_t),
84+
ucs_get_page_size());
85+
6286
qp_attr.mmio_mode = UCT_IB_MLX5_MMIO_MODE_DB;
6387
qp_attr.super.srq_num = 0;
64-
status = uct_ib_mlx5_devx_create_qp(&iface->super.super.super, &self->cq,
65-
&self->cq, &self->qp.super, &self->qp,
66-
&qp_attr);
88+
qp_attr.umem_offset = ucs_align_up_pow2(cq_attr.umem_offset +
89+
cq_attr.umem_len,
90+
ucs_get_page_size());
91+
92+
dev_ep_size = qp_attr.umem_offset + qp_attr.len;
93+
/*
94+
* dev_ep layout:
95+
* +---------------------+-------+---------+---------+
96+
* | counters, dbr | ops | cq buff | wq buff |
97+
* +---------------------+-------+---------+---------+
98+
*/
99+
derr = doca_gpu_mem_alloc(iface->gpu_dev, dev_ep_size, ucs_get_page_size(),
100+
DOCA_GPU_MEM_TYPE_GPU, (void**)&self->ep_gpu,
101+
NULL);
102+
if (derr != DOCA_SUCCESS) {
103+
ucs_error("doca_gpu_mem_alloc failed: %s", doca_error_get_descr(derr));
104+
status = UCS_ERR_IO_ERROR;
105+
goto err_ctx;
106+
}
107+
108+
/* TODO add dmabuf_fd support */
109+
self->umem = mlx5dv_devx_umem_reg(md->super.dev.ibv_context, self->ep_gpu,
110+
dev_ep_size, IBV_ACCESS_LOCAL_WRITE);
111+
if (self->umem == NULL) {
112+
uct_ib_check_memlock_limit_msg(md->super.dev.ibv_context,
113+
UCS_LOG_LEVEL_ERROR,
114+
"mlx5dv_devx_umem_reg(size=%zu)",
115+
dev_ep_size);
116+
status = UCS_ERR_NO_MEMORY;
117+
goto err_mem;
118+
}
119+
120+
self->cq.devx.mem.mem = self->umem;
121+
self->qp.super.devx.mem.mem = self->umem;
122+
123+
dbrec.mem_id = self->umem->umem_id;
124+
dbrec.offset = ucs_offsetof(uct_rc_gdaki_dev_ep_t, cq_dbrec);
125+
self->cq.devx.dbrec = &dbrec;
126+
status = uct_ib_mlx5_devx_create_cq_common(&iface->super.super.super,
127+
UCT_IB_DIR_TX, &cq_attr,
128+
&self->cq, 0, 0);
129+
if (status != UCS_OK) {
130+
goto err_umem;
131+
}
132+
133+
dbrec.offset = ucs_offsetof(uct_rc_gdaki_dev_ep_t, qp_dbrec);
134+
self->qp.super.devx.dbrec = &dbrec;
135+
status = uct_ib_mlx5_devx_create_qp_common(&iface->super.super.super,
136+
&self->cq, &self->cq,
137+
&self->qp.super, &self->qp,
138+
&qp_attr);
67139
if (status != UCS_OK) {
140+
goto err_cq;
141+
}
142+
143+
derr = doca_gpu_verbs_bridge_export_qp(
144+
iface->gpu_dev, self->qp.super.qp_num,
145+
UCS_PTR_BYTE_OFFSET(self->ep_gpu, qp_attr.umem_offset),
146+
qp_attr.max_tx, self->ep_gpu->qp_dbrec, self->qp.reg->addr.ptr,
147+
UCT_IB_MLX5_BF_REG_SIZE * 2, self->cq.cq_num,
148+
UCS_PTR_BYTE_OFFSET(self->ep_gpu, cq_attr.umem_offset),
149+
cq_attr.cq_size, self->ep_gpu->cq_dbrec, UCT_GDAKI_DOCA_NOTUSE,
150+
UCT_GDAKI_DOCA_NOTUSEPTR, UCT_GDAKI_DOCA_NOTUSE,
151+
UCT_GDAKI_DOCA_NOTUSEPTR, UCT_GDAKI_DOCA_NOTUSE,
152+
UCT_GDAKI_DOCA_NOTUSE, UCT_GDAKI_DOCA_NOTUSEPTR,
153+
UCT_GDAKI_DOCA_NOTUSE, UCT_GDAKI_DOCA_NOTUSEPTR, 0, &self->qp_cpu);
154+
if (derr != DOCA_SUCCESS) {
155+
ucs_error("doca_gpu_verbs_bridge_export_qp failed: %s",
156+
doca_error_get_descr(derr));
157+
status = UCS_ERR_INVALID_PARAM;
68158
goto err_qp;
69159
}
70160

161+
derr = doca_gpu_verbs_get_qp_dev(self->qp_cpu, &self->qp_gpu);
162+
if (derr != DOCA_SUCCESS) {
163+
status = UCS_ERR_INVALID_PARAM;
164+
goto err_dev_ep;
165+
}
166+
167+
dev_ep.qp = self->qp_gpu;
168+
169+
status = UCT_CUDADRV_FUNC_LOG_ERR(
170+
cuMemsetD8((CUdeviceptr)self->ep_gpu, 0, dev_ep_size));
171+
if (status != UCS_OK) {
172+
goto err_dev_ep;
173+
}
174+
175+
status = UCT_CUDADRV_FUNC_LOG_ERR(
176+
cuMemsetD8((CUdeviceptr)UCS_PTR_BYTE_OFFSET(self->ep_gpu,
177+
cq_attr.umem_offset),
178+
0xff, cq_attr.umem_len));
179+
if (status != UCS_OK) {
180+
goto err_dev_ep;
181+
}
182+
183+
status = UCT_CUDADRV_FUNC_LOG_ERR(
184+
cuMemcpyHtoD((CUdeviceptr)self->ep_gpu, &dev_ep, sizeof(dev_ep)));
185+
if (status != UCS_OK) {
186+
goto err_dev_ep;
187+
}
188+
189+
(void)UCT_CUDADRV_FUNC_LOG_ERR(cuCtxPopCurrent(NULL));
71190
return UCS_OK;
72191

192+
err_dev_ep:
193+
doca_gpu_verbs_unexport_qp(iface->gpu_dev, self->qp_cpu);
73194
err_qp:
74-
uct_ib_mlx5_devx_destroy_cq(md, &self->cq);
195+
uct_ib_mlx5_devx_destroy_qp_common(&self->qp.super);
196+
err_cq:
197+
uct_ib_mlx5_devx_destroy_cq_common(&self->cq);
198+
err_umem:
199+
mlx5dv_devx_umem_dereg(self->umem);
200+
err_mem:
201+
doca_gpu_mem_free(iface->gpu_dev, self->ep_gpu);
202+
err_ctx:
203+
(void)UCT_CUDADRV_FUNC_LOG_ERR(cuCtxPopCurrent(NULL));
204+
(void)UCT_CUDADRV_FUNC_LOG_ERR(cuDevicePrimaryCtxRelease(iface->cuda_dev));
75205
return status;
76206
}
77207

78208
static UCS_CLASS_CLEANUP_FUNC(uct_rc_gdaki_ep_t)
79209
{
80210
uct_rc_gdaki_iface_t *iface = ucs_derived_of(self->super.super.iface,
81211
uct_rc_gdaki_iface_t);
82-
uct_ib_mlx5_md_t *md = ucs_derived_of(iface->super.super.super.super.md,
83-
uct_ib_mlx5_md_t);
212+
doca_error_t derr;
213+
214+
derr = doca_gpu_verbs_unexport_qp(iface->gpu_dev, self->qp_cpu);
215+
if (derr != DOCA_SUCCESS) {
216+
ucs_error("doca_gpu_rdma_verbs_unexport_qp failed: %s",
217+
doca_error_get_descr(derr));
218+
}
84219

85-
uct_ib_mlx5_devx_destroy_qp(md, &self->qp.super);
86-
uct_ib_mlx5_devx_destroy_cq(md, &self->cq);
220+
uct_ib_mlx5_devx_destroy_qp_common(&self->qp.super);
221+
uct_ib_mlx5_devx_destroy_cq_common(&self->cq);
222+
mlx5dv_devx_umem_dereg(self->umem);
223+
doca_gpu_mem_free(iface->gpu_dev, self->ep_gpu);
224+
(void)UCT_CUDADRV_FUNC_LOG_ERR(cuDevicePrimaryCtxRelease(iface->cuda_dev));
87225
}
88226

89227
UCS_CLASS_DEFINE(uct_rc_gdaki_ep_t, uct_base_ep_t);

src/uct/ib/mlx5/gdaki/gdaki.h

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3,15 +3,16 @@
33
* See file LICENSE for terms.
44
*/
55

6-
#ifndef UCT_GDAKI_IFACE_H
7-
#define UCT_GDAKI_IFACE_H
6+
#ifndef UCT_GDAKI_H
7+
#define UCT_GDAKI_H
88

99
#include <uct/ib/mlx5/rc/rc_mlx5_common.h>
1010
#include <uct/base/uct_iface.h>
1111

1212
#include <cuda.h>
1313
#include <doca_gpunetio.h>
1414

15+
#include "gdaki_dev.h"
1516

1617
#define UCT_DEV_CUDA_NAME "cuda"
1718
#define UCT_DEV_CUDA_NAME_LEN 4
@@ -25,9 +26,13 @@ typedef struct uct_rc_gdaki_iface {
2526

2627

2728
typedef struct uct_rc_gdaki_ep {
28-
uct_base_ep_t super;
29-
uct_ib_mlx5_cq_t cq;
30-
uct_ib_mlx5_txwq_t qp;
29+
uct_base_ep_t super;
30+
uct_ib_mlx5_cq_t cq;
31+
uct_ib_mlx5_txwq_t qp;
32+
struct mlx5dv_devx_umem *umem;
33+
struct doca_gpu_verbs_qp *qp_cpu;
34+
struct doca_gpu_dev_verbs_qp *qp_gpu;
35+
uct_rc_gdaki_dev_ep_t *ep_gpu;
3136
} uct_rc_gdaki_ep_t;
3237

3338
#endif /* UCT_GDAKI_IFACE_H */

src/uct/ib/mlx5/gdaki/gdaki_dev.h

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
/**
2+
* Copyright (c) NVIDIA CORPORATION & AFFILIATES, 2025. ALL RIGHTS RESERVED.
3+
* See file LICENSE for terms.
4+
*/
5+
6+
#ifndef UCT_GDAKI_DEV_H
7+
#define UCT_GDAKI_DEV_H
8+
9+
typedef struct {
10+
/* TODO add uct completion */
11+
} uct_rc_gdaki_op_t;
12+
13+
14+
typedef struct {
15+
struct doca_gpu_dev_verbs_qp *qp;
16+
uint32_t cq_dbrec[2];
17+
uint32_t qp_dbrec[2];
18+
uct_rc_gdaki_op_t ops[0];
19+
} uct_rc_gdaki_dev_ep_t;
20+
21+
#endif /* UCT_GDAKI_DEV_H */

src/uct/ib/mlx5/ib_mlx5.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -585,6 +585,7 @@ typedef struct uct_ib_mlx5_cq_attr {
585585
unsigned cq_size;
586586
unsigned cqe_size;
587587
size_t umem_len;
588+
size_t umem_offset;
588589
unsigned flags;
589590
} uct_ib_mlx5_cq_attr_t;
590591

@@ -643,6 +644,7 @@ typedef struct uct_ib_mlx5_qp_attr {
643644
uint8_t log_num_dci_stream_channels;
644645
unsigned max_tx;
645646
unsigned len;
647+
size_t umem_offset;
646648
} uct_ib_mlx5_qp_attr_t;
647649

648650

0 commit comments

Comments
 (0)