Skip to content

Commit 655feb5

Browse files
UCT/GDAKI: Remove DOCA runtime dependency
1 parent e319427 commit 655feb5

File tree

6 files changed

+160
-173
lines changed

6 files changed

+160
-173
lines changed

src/uct/ib/mlx5/gdaki/Makefile.am

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,13 +11,13 @@ libuct_ib_mlx5_gda_la_CPPFLAGS = $(BASE_CPPFLAGS) $(IBVERBS_CPPFLAGS) \
1111
libuct_ib_mlx5_gda_la_CFLAGS = $(BASE_CFLAGS) $(LT_CFLAGS) $(CUDA_CFLAGS) \
1212
$(GPUNETIO_CFLAGS)
1313
libuct_ib_mlx5_gda_la_LDFLAGS = $(IBVERBS_LDFLAGS) $(CUDA_LDFLAGS) \
14-
$(GPUNETIO_LDFLAGS) -version-info $(SOVERSION)
14+
-version-info $(SOVERSION)
1515
libuct_ib_mlx5_gda_la_LIBADD = $(top_builddir)/src/ucs/libucs.la \
1616
$(top_builddir)/src/uct/libuct.la \
1717
$(top_builddir)/src/uct/ib/libuct_ib.la \
1818
$(top_builddir)/src/uct/ib/mlx5/libuct_ib_mlx5.la \
1919
$(top_builddir)/src/uct/cuda/libuct_cuda.la \
20-
$(CUDA_LIBS) $(GPUNETIO_LIBS)
20+
$(CUDA_LIBS)
2121
libuct_ib_mlx5_gda_ladir = $(includedir)/uct/ib/mlx5/gdaki
2222

2323
libuct_ib_mlx5_gda_la_SOURCES = \

src/uct/ib/mlx5/gdaki/configure.m4

Lines changed: 1 addition & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -14,42 +14,23 @@ AS_IF([test "x$cuda_happy" = "xyes"],
1414
[
1515
# Default value
1616
GPUNETIO_CFLAGS=""
17-
GPUNETIO_LDFLAGS=""
18-
GPUNETIO_LIBS="-ldoca_gpunetio"
1917
AS_IF([test "x$with_doca_gpunetio" != "xno"],
2018
[
2119
AS_IF([test "x$with_doca_gpunetio" = "xguess"],
2220
[
2321
AS_IF([$PKG_CONFIG --exists doca-gpunetio],
24-
[GPUNETIO_CFLAGS=$(pkg-config --cflags doca-gpunetio)
25-
GPUNETIO_LDFLAGS=$(pkg-config --libs-only-L doca-gpunetio)
26-
GPUNETIO_LIBS=$(pkg-config --libs-only-l doca-gpunetio)])
22+
[GPUNETIO_CFLAGS=$(pkg-config --cflags doca-gpunetio)])
2723
],
2824
[
2925
GPUNETIO_CFLAGS="-I${with_doca_gpunetio}/include"
30-
for doca_libdir in lib/x86_64-linux-gnu lib64; do
31-
if test -d "${with_doca_gpunetio}/${doca_libdir}"; then
32-
GPUNETIO_LDFLAGS="$GPUNETIO_LDFLAGS -L${with_doca_gpunetio}/${doca_libdir} "
33-
# Add rpath-link to search for doca_gpunetio dependencies
34-
GPUNETIO_LDFLAGS="$GPUNETIO_LDFLAGS -Wl,-rpath-link,${with_doca_gpunetio}/${doca_libdir}"
35-
fi
36-
done
37-
# Add CUDA lib dirs to rpath-link for gpunetio
38-
for cuda_libdir in $CUDA_LIB_DIRS; do
39-
GPUNETIO_LDFLAGS="$GPUNETIO_LDFLAGS -Wl,-rpath-link,${cuda_libdir}"
40-
done
4126
]) # "x$with_doca_gpunetio" != "xguess"
4227
]) # "x$with_doca_gpunetio" != "xno"
4328
4429
save_CPPFLAGS="$CPPFLAGS"
45-
save_LDFLAGS="$LDFLAGS"
4630
CPPFLAGS="$CPPFLAGS $CUDA_CFLAGS $GPUNETIO_CFLAGS"
47-
LDFLAGS="$LDFLAGS $CUDA_LDFLAGS $GPUNETIO_LDFLAGS"
4831
4932
gpunetio_happy=yes
5033
AC_CHECK_HEADERS([doca_gpunetio.h], [], [gpunetio_happy=no])
51-
AC_CHECK_LIB([doca_gpunetio], [doca_gpu_verbs_bridge_export_qp],
52-
[true], [gpunetio_happy=no], [$GPUNETIO_LIBS])
5334
5435
CPPFLAGS="$save_CPPFLAGS"
5536
LDFLAGS="$save_LDFLAGS"
@@ -60,8 +41,6 @@ AS_IF([test "x$gpunetio_happy" = "xyes"],
6041
[
6142
uct_ib_mlx5_modules="${uct_ib_mlx5_modules}:gda"
6243
AC_SUBST(GPUNETIO_CFLAGS)
63-
AC_SUBST(GPUNETIO_LDFLAGS)
64-
AC_SUBST(GPUNETIO_LIBS)
6544
],
6645
[
6746
# gpunetio was requested but not found

src/uct/ib/mlx5/gdaki/gdaki.c

Lines changed: 58 additions & 88 deletions
Original file line numberDiff line numberDiff line change
@@ -15,13 +15,9 @@
1515
#include <uct/ib/mlx5/rc/rc_mlx5.h>
1616
#include <uct/cuda/base/cuda_iface.h>
1717

18-
#include <doca_log.h>
1918
#include <cuda.h>
2019

2120

22-
#define UCT_GDAKI_DOCA_NOTUSE 1
23-
#define UCT_GDAKI_DOCA_NOTUSEPTR (void*)1
24-
2521
typedef struct {
2622
uct_rc_iface_common_config_t super;
2723
uct_rc_mlx5_iface_common_config_t mlx5;
@@ -40,6 +36,32 @@ ucs_config_field_t uct_rc_gdaki_iface_config_table[] = {
4036
};
4137

4238

39+
ucs_status_t
40+
uct_rc_gdaki_alloc(size_t size, size_t align, void **p_buf, CUdeviceptr *p_orig)
41+
{
42+
unsigned int flag = 1;
43+
ucs_status_t status;
44+
45+
status = UCT_CUDADRV_FUNC_LOG_ERR(cuMemAlloc(p_orig, size + align - 1));
46+
if (status != UCS_OK) {
47+
return status;
48+
}
49+
50+
*p_buf = (void*)ucs_align_up_pow2_ptr(*p_orig, align);
51+
status = UCT_CUDADRV_FUNC_LOG_ERR(
52+
cuPointerSetAttribute(&flag, CU_POINTER_ATTRIBUTE_SYNC_MEMOPS,
53+
(CUdeviceptr)*p_buf));
54+
if (status != UCS_OK) {
55+
goto err;
56+
}
57+
58+
return UCS_OK;
59+
60+
err:
61+
cuMemFree(*p_orig);
62+
return status;
63+
}
64+
4365
static UCS_CLASS_INIT_FUNC(uct_rc_gdaki_ep_t, const uct_ep_params_t *params)
4466
{
4567
uct_rc_gdaki_iface_t *iface = ucs_derived_of(params->iface,
@@ -51,7 +73,6 @@ static UCS_CLASS_INIT_FUNC(uct_rc_gdaki_ep_t, const uct_ep_params_t *params)
5173
uct_ib_mlx5_qp_attr_t qp_attr = {};
5274
uct_rc_gdaki_dev_ep_t dev_ep = {};
5375
ucs_status_t status;
54-
doca_error_t derr;
5576
size_t dev_ep_size;
5677
uct_ib_mlx5_dbrec_t dbrec;
5778

@@ -89,12 +110,9 @@ static UCS_CLASS_INIT_FUNC(uct_rc_gdaki_ep_t, const uct_ep_params_t *params)
89110
* | counters, dbr | ops | cq buff | wq buff |
90111
* +---------------------+-------+---------+---------+
91112
*/
92-
derr = doca_gpu_mem_alloc(iface->gpu_dev, dev_ep_size, ucs_get_page_size(),
93-
DOCA_GPU_MEM_TYPE_GPU, (void**)&self->ep_gpu,
94-
NULL);
95-
if (derr != DOCA_SUCCESS) {
96-
ucs_error("doca_gpu_mem_alloc failed: %s", doca_error_get_descr(derr));
97-
status = UCS_ERR_IO_ERROR;
113+
status = uct_rc_gdaki_alloc(dev_ep_size, ucs_get_page_size(),
114+
(void**)&self->ep_gpu, &self->ep_raw);
115+
if (status != UCS_OK) {
98116
goto err_ctx;
99117
}
100118

@@ -133,34 +151,30 @@ static UCS_CLASS_INIT_FUNC(uct_rc_gdaki_ep_t, const uct_ep_params_t *params)
133151
goto err_cq;
134152
}
135153

136-
derr = doca_gpu_verbs_bridge_export_qp(
137-
iface->gpu_dev, self->qp.super.qp_num,
138-
UCS_PTR_BYTE_OFFSET(self->ep_gpu, qp_attr.umem_offset),
139-
qp_attr.max_tx, self->ep_gpu->qp_dbrec, self->qp.reg->addr.ptr,
140-
UCT_IB_MLX5_BF_REG_SIZE * 2, self->cq.cq_num,
141-
UCS_PTR_BYTE_OFFSET(self->ep_gpu, cq_attr.umem_offset),
142-
cq_attr.cq_size, self->ep_gpu->cq_dbrec, UCT_GDAKI_DOCA_NOTUSE,
143-
UCT_GDAKI_DOCA_NOTUSEPTR, UCT_GDAKI_DOCA_NOTUSE,
144-
UCT_GDAKI_DOCA_NOTUSEPTR, UCT_GDAKI_DOCA_NOTUSE,
145-
UCT_GDAKI_DOCA_NOTUSE, UCT_GDAKI_DOCA_NOTUSEPTR,
146-
UCT_GDAKI_DOCA_NOTUSE, UCT_GDAKI_DOCA_NOTUSEPTR, 0, &self->qp_cpu);
147-
if (derr != DOCA_SUCCESS) {
148-
ucs_error("doca_gpu_verbs_bridge_export_qp failed: %s",
149-
doca_error_get_descr(derr));
150-
status = UCS_ERR_INVALID_PARAM;
151-
goto err_qp;
152-
}
153-
154-
derr = doca_gpu_verbs_get_qp_dev(self->qp_cpu, &self->qp_gpu);
155-
if (derr != DOCA_SUCCESS) {
156-
status = UCS_ERR_INVALID_PARAM;
154+
(void)cuMemHostRegister(self->qp.reg->addr.ptr, UCT_IB_MLX5_BF_REG_SIZE * 2,
155+
CU_MEMHOSTREGISTER_PORTABLE |
156+
CU_MEMHOSTREGISTER_DEVICEMAP |
157+
CU_MEMHOSTREGISTER_IOMEMORY);
158+
159+
status = UCT_CUDADRV_FUNC_LOG_ERR(
160+
cuMemHostGetDevicePointer((CUdeviceptr*)&self->sq_db,
161+
self->qp.reg->addr.ptr, 0));
162+
if (status != UCS_OK) {
157163
goto err_dev_ep;
158164
}
159165

160-
dev_ep.qp = self->qp_gpu;
161166
dev_ep.atomic_va = iface->atomic_buff;
162167
dev_ep.atomic_lkey = htonl(iface->atomic_mr->lkey);
163168

169+
dev_ep.sq_num = self->qp.super.qp_num;
170+
dev_ep.sq_wqe_daddr = UCS_PTR_BYTE_OFFSET(self->ep_gpu,
171+
qp_attr.umem_offset);
172+
dev_ep.sq_wqe_num = qp_attr.max_tx;
173+
dev_ep.sq_dbrec = &self->ep_gpu->qp_dbrec[MLX5_SND_DBR];
174+
dev_ep.cqe_daddr = UCS_PTR_BYTE_OFFSET(self->ep_gpu, cq_attr.umem_offset);
175+
dev_ep.cqe_num = cq_attr.cq_size;
176+
dev_ep.sq_db = self->sq_db;
177+
164178
status = UCT_CUDADRV_FUNC_LOG_ERR(
165179
cuMemsetD8((CUdeviceptr)self->ep_gpu, 0, dev_ep_size));
166180
if (status != UCS_OK) {
@@ -185,36 +199,26 @@ static UCS_CLASS_INIT_FUNC(uct_rc_gdaki_ep_t, const uct_ep_params_t *params)
185199
return UCS_OK;
186200

187201
err_dev_ep:
188-
doca_gpu_verbs_unexport_qp(iface->gpu_dev, self->qp_cpu);
189-
err_qp:
202+
(void)cuMemHostUnregister(dev_ep.sq_db);
190203
uct_ib_mlx5_devx_destroy_qp_common(&self->qp.super);
191204
err_cq:
192205
uct_ib_mlx5_devx_destroy_cq_common(&self->cq);
193206
err_umem:
194207
mlx5dv_devx_umem_dereg(self->umem);
195208
err_mem:
196-
doca_gpu_mem_free(iface->gpu_dev, self->ep_gpu);
209+
cuMemFree(self->ep_raw);
197210
err_ctx:
198211
(void)UCT_CUDADRV_FUNC_LOG_WARN(cuCtxPopCurrent(NULL));
199212
return status;
200213
}
201214

202215
static UCS_CLASS_CLEANUP_FUNC(uct_rc_gdaki_ep_t)
203216
{
204-
uct_rc_gdaki_iface_t *iface = ucs_derived_of(self->super.super.iface,
205-
uct_rc_gdaki_iface_t);
206-
doca_error_t derr;
207-
208-
derr = doca_gpu_verbs_unexport_qp(iface->gpu_dev, self->qp_cpu);
209-
if (derr != DOCA_SUCCESS) {
210-
ucs_warn("doca_gpu_rdma_verbs_unexport_qp failed: %s",
211-
doca_error_get_descr(derr));
212-
}
213-
217+
(void)cuMemHostUnregister(self->sq_db);
214218
uct_ib_mlx5_devx_destroy_qp_common(&self->qp.super);
215219
uct_ib_mlx5_devx_destroy_cq_common(&self->cq);
216220
mlx5dv_devx_umem_dereg(self->umem);
217-
doca_gpu_mem_free(iface->gpu_dev, self->ep_gpu);
221+
cuMemFree(self->ep_raw);
218222
}
219223

220224
UCS_CLASS_DEFINE(uct_rc_gdaki_ep_t, uct_base_ep_t);
@@ -456,7 +460,6 @@ static UCS_CLASS_INIT_FUNC(uct_rc_gdaki_iface_t, uct_md_h tl_md,
456460
char *gpu_name, *ib_name;
457461
char pci_addr[UCS_SYS_BDF_NAME_MAX];
458462
ucs_status_t status;
459-
doca_error_t derr;
460463
int cuda_id;
461464

462465
status = uct_rc_mlx5_dp_ordering_ooo_init(md, &self->super,
@@ -493,7 +496,7 @@ static UCS_CLASS_INIT_FUNC(uct_rc_gdaki_iface_t, uct_md_h tl_md,
493496

494497
status = UCT_CUDADRV_FUNC_LOG_ERR(cuDeviceGet(&self->cuda_dev, cuda_id));
495498
if (status != UCS_OK) {
496-
goto err_doca;
499+
return status;
497500
}
498501

499502
status = UCT_CUDADRV_FUNC_LOG_ERR(
@@ -507,24 +510,12 @@ static UCS_CLASS_INIT_FUNC(uct_rc_gdaki_iface_t, uct_md_h tl_md,
507510
goto err_ctx_release;
508511
}
509512

510-
derr = doca_gpu_create(pci_addr, &self->gpu_dev);
511-
if (derr != DOCA_SUCCESS) {
512-
status = UCS_ERR_IO_ERROR;
513-
ucs_error("doca_gpu_create failed: %s %s", doca_error_get_descr(derr),
514-
pci_addr);
513+
status = uct_rc_gdaki_alloc(sizeof(uint64_t), sizeof(uint64_t),
514+
(void**)&self->atomic_buff, &self->atomic_raw);
515+
if (status != UCS_OK) {
515516
goto err_ctx;
516517
}
517518

518-
derr = doca_gpu_mem_alloc(self->gpu_dev, sizeof(uint64_t), sizeof(uint64_t),
519-
DOCA_GPU_MEM_TYPE_GPU, (void**)&self->atomic_buff,
520-
NULL);
521-
if (derr != DOCA_SUCCESS) {
522-
ucs_error("doca_gpu_mem_alloc failed: %s", doca_error_get_descr(derr));
523-
status = UCS_ERR_IO_ERROR;
524-
goto err_doca;
525-
}
526-
527-
528519
self->atomic_mr = ibv_reg_mr(md->super.pd, self->atomic_buff,
529520
sizeof(uint64_t),
530521
IBV_ACCESS_LOCAL_WRITE |
@@ -540,9 +531,7 @@ static UCS_CLASS_INIT_FUNC(uct_rc_gdaki_iface_t, uct_md_h tl_md,
540531
return UCS_OK;
541532

542533
err_atomic:
543-
doca_gpu_mem_free(self->gpu_dev, self->atomic_buff);
544-
err_doca:
545-
doca_gpu_destroy(self->gpu_dev);
534+
cuMemFree(self->atomic_raw);
546535
err_ctx:
547536
(void)UCT_CUDADRV_FUNC_LOG_WARN(cuCtxPopCurrent(NULL));
548537
err_ctx_release:
@@ -553,8 +542,7 @@ static UCS_CLASS_INIT_FUNC(uct_rc_gdaki_iface_t, uct_md_h tl_md,
553542
static UCS_CLASS_CLEANUP_FUNC(uct_rc_gdaki_iface_t)
554543
{
555544
ibv_dereg_mr(self->atomic_mr);
556-
doca_gpu_mem_free(self->gpu_dev, self->atomic_buff);
557-
doca_gpu_destroy(self->gpu_dev);
545+
cuMemFree(self->atomic_raw);
558546
(void)UCT_CUDADRV_FUNC_LOG_WARN(cuDevicePrimaryCtxRelease(self->cuda_dev));
559547
}
560548

@@ -693,22 +681,4 @@ UCT_TL_DEFINE_ENTRY(&uct_ib_component, rc_gda, uct_gdaki_query_tl_devices,
693681
uct_rc_gdaki_iface_config_table,
694682
uct_rc_gdaki_iface_config_t);
695683

696-
static void uct_ib_doca_init(void)
697-
{
698-
struct doca_log_backend *sdk_log;
699-
doca_error_t derr;
700-
701-
derr = doca_log_level_set_global_sdk_limit(DOCA_LOG_LEVEL_ERROR);
702-
if (derr != DOCA_SUCCESS) {
703-
ucs_error("doca_log_level_set_global_sdk_limit failed: %d\n", derr);
704-
return;
705-
}
706-
707-
derr = doca_log_backend_create_with_file_sdk(stderr, &sdk_log);
708-
if (derr != DOCA_SUCCESS) {
709-
ucs_error("doca_log_backend_create_with_file_sdk failed: %d\n", derr);
710-
return;
711-
}
712-
}
713-
714-
UCT_TL_INIT(&uct_ib_component, rc_gda, ctor, uct_ib_doca_init(), )
684+
UCT_TL_INIT(&uct_ib_component, rc_gda, ctor, , )

0 commit comments

Comments
 (0)