1515#include <uct/ib/mlx5/rc/rc_mlx5.h>
1616#include <uct/cuda/base/cuda_iface.h>
1717
18- #include <doca_log.h>
1918#include <cuda.h>
2019
2120
22- #define UCT_GDAKI_DOCA_NOTUSE 1
23- #define UCT_GDAKI_DOCA_NOTUSEPTR (void*)1
24-
2521typedef struct {
2622 uct_rc_iface_common_config_t super ;
2723 uct_rc_mlx5_iface_common_config_t mlx5 ;
@@ -40,6 +36,32 @@ ucs_config_field_t uct_rc_gdaki_iface_config_table[] = {
4036};
4137
4238
39+ ucs_status_t
40+ uct_rc_gdaki_alloc (size_t size , size_t align , void * * p_buf , CUdeviceptr * p_orig )
41+ {
42+ unsigned int flag = 1 ;
43+ ucs_status_t status ;
44+
45+ status = UCT_CUDADRV_FUNC_LOG_ERR (cuMemAlloc (p_orig , size + align - 1 ));
46+ if (status != UCS_OK ) {
47+ return status ;
48+ }
49+
50+ * p_buf = (void * )ucs_align_up_pow2_ptr (* p_orig , align );
51+ status = UCT_CUDADRV_FUNC_LOG_ERR (
52+ cuPointerSetAttribute (& flag , CU_POINTER_ATTRIBUTE_SYNC_MEMOPS ,
53+ (CUdeviceptr )* p_buf ));
54+ if (status != UCS_OK ) {
55+ goto err ;
56+ }
57+
58+ return UCS_OK ;
59+
60+ err :
61+ cuMemFree (* p_orig );
62+ return status ;
63+ }
64+
4365static UCS_CLASS_INIT_FUNC (uct_rc_gdaki_ep_t , const uct_ep_params_t * params )
4466{
4567 uct_rc_gdaki_iface_t * iface = ucs_derived_of (params -> iface ,
@@ -51,7 +73,6 @@ static UCS_CLASS_INIT_FUNC(uct_rc_gdaki_ep_t, const uct_ep_params_t *params)
5173 uct_ib_mlx5_qp_attr_t qp_attr = {};
5274 uct_rc_gdaki_dev_ep_t dev_ep = {};
5375 ucs_status_t status ;
54- doca_error_t derr ;
5576 size_t dev_ep_size ;
5677 uct_ib_mlx5_dbrec_t dbrec ;
5778
@@ -89,12 +110,9 @@ static UCS_CLASS_INIT_FUNC(uct_rc_gdaki_ep_t, const uct_ep_params_t *params)
89110 * | counters, dbr | ops | cq buff | wq buff |
90111 * +---------------------+-------+---------+---------+
91112 */
92- derr = doca_gpu_mem_alloc (iface -> gpu_dev , dev_ep_size , ucs_get_page_size (),
93- DOCA_GPU_MEM_TYPE_GPU , (void * * )& self -> ep_gpu ,
94- NULL );
95- if (derr != DOCA_SUCCESS ) {
96- ucs_error ("doca_gpu_mem_alloc failed: %s" , doca_error_get_descr (derr ));
97- status = UCS_ERR_IO_ERROR ;
113+ status = uct_rc_gdaki_alloc (dev_ep_size , ucs_get_page_size (),
114+ (void * * )& self -> ep_gpu , & self -> ep_raw );
115+ if (status != UCS_OK ) {
98116 goto err_ctx ;
99117 }
100118
@@ -133,34 +151,30 @@ static UCS_CLASS_INIT_FUNC(uct_rc_gdaki_ep_t, const uct_ep_params_t *params)
133151 goto err_cq ;
134152 }
135153
136- derr = doca_gpu_verbs_bridge_export_qp (
137- iface -> gpu_dev , self -> qp .super .qp_num ,
138- UCS_PTR_BYTE_OFFSET (self -> ep_gpu , qp_attr .umem_offset ),
139- qp_attr .max_tx , self -> ep_gpu -> qp_dbrec , self -> qp .reg -> addr .ptr ,
140- UCT_IB_MLX5_BF_REG_SIZE * 2 , self -> cq .cq_num ,
141- UCS_PTR_BYTE_OFFSET (self -> ep_gpu , cq_attr .umem_offset ),
142- cq_attr .cq_size , self -> ep_gpu -> cq_dbrec , UCT_GDAKI_DOCA_NOTUSE ,
143- UCT_GDAKI_DOCA_NOTUSEPTR , UCT_GDAKI_DOCA_NOTUSE ,
144- UCT_GDAKI_DOCA_NOTUSEPTR , UCT_GDAKI_DOCA_NOTUSE ,
145- UCT_GDAKI_DOCA_NOTUSE , UCT_GDAKI_DOCA_NOTUSEPTR ,
146- UCT_GDAKI_DOCA_NOTUSE , UCT_GDAKI_DOCA_NOTUSEPTR , 0 , & self -> qp_cpu );
147- if (derr != DOCA_SUCCESS ) {
148- ucs_error ("doca_gpu_verbs_bridge_export_qp failed: %s" ,
149- doca_error_get_descr (derr ));
150- status = UCS_ERR_INVALID_PARAM ;
151- goto err_qp ;
152- }
153-
154- derr = doca_gpu_verbs_get_qp_dev (self -> qp_cpu , & self -> qp_gpu );
155- if (derr != DOCA_SUCCESS ) {
156- status = UCS_ERR_INVALID_PARAM ;
154+ (void )cuMemHostRegister (self -> qp .reg -> addr .ptr , UCT_IB_MLX5_BF_REG_SIZE * 2 ,
155+ CU_MEMHOSTREGISTER_PORTABLE |
156+ CU_MEMHOSTREGISTER_DEVICEMAP |
157+ CU_MEMHOSTREGISTER_IOMEMORY );
158+
159+ status = UCT_CUDADRV_FUNC_LOG_ERR (
160+ cuMemHostGetDevicePointer ((CUdeviceptr * )& self -> sq_db ,
161+ self -> qp .reg -> addr .ptr , 0 ));
162+ if (status != UCS_OK ) {
157163 goto err_dev_ep ;
158164 }
159165
160- dev_ep .qp = self -> qp_gpu ;
161166 dev_ep .atomic_va = iface -> atomic_buff ;
162167 dev_ep .atomic_lkey = htonl (iface -> atomic_mr -> lkey );
163168
169+ dev_ep .sq_num = self -> qp .super .qp_num ;
170+ dev_ep .sq_wqe_daddr = UCS_PTR_BYTE_OFFSET (self -> ep_gpu ,
171+ qp_attr .umem_offset );
172+ dev_ep .sq_wqe_num = qp_attr .max_tx ;
173+ dev_ep .sq_dbrec = & self -> ep_gpu -> qp_dbrec [MLX5_SND_DBR ];
174+ dev_ep .cqe_daddr = UCS_PTR_BYTE_OFFSET (self -> ep_gpu , cq_attr .umem_offset );
175+ dev_ep .cqe_num = cq_attr .cq_size ;
176+ dev_ep .sq_db = self -> sq_db ;
177+
164178 status = UCT_CUDADRV_FUNC_LOG_ERR (
165179 cuMemsetD8 ((CUdeviceptr )self -> ep_gpu , 0 , dev_ep_size ));
166180 if (status != UCS_OK ) {
@@ -185,36 +199,26 @@ static UCS_CLASS_INIT_FUNC(uct_rc_gdaki_ep_t, const uct_ep_params_t *params)
185199 return UCS_OK ;
186200
187201err_dev_ep :
188- doca_gpu_verbs_unexport_qp (iface -> gpu_dev , self -> qp_cpu );
189- err_qp :
202+ (void )cuMemHostUnregister (dev_ep .sq_db );
190203 uct_ib_mlx5_devx_destroy_qp_common (& self -> qp .super );
191204err_cq :
192205 uct_ib_mlx5_devx_destroy_cq_common (& self -> cq );
193206err_umem :
194207 mlx5dv_devx_umem_dereg (self -> umem );
195208err_mem :
196- doca_gpu_mem_free ( iface -> gpu_dev , self -> ep_gpu );
209+ cuMemFree ( self -> ep_raw );
197210err_ctx :
198211 (void )UCT_CUDADRV_FUNC_LOG_WARN (cuCtxPopCurrent (NULL ));
199212 return status ;
200213}
201214
202215static UCS_CLASS_CLEANUP_FUNC (uct_rc_gdaki_ep_t )
203216{
204- uct_rc_gdaki_iface_t * iface = ucs_derived_of (self -> super .super .iface ,
205- uct_rc_gdaki_iface_t );
206- doca_error_t derr ;
207-
208- derr = doca_gpu_verbs_unexport_qp (iface -> gpu_dev , self -> qp_cpu );
209- if (derr != DOCA_SUCCESS ) {
210- ucs_warn ("doca_gpu_rdma_verbs_unexport_qp failed: %s" ,
211- doca_error_get_descr (derr ));
212- }
213-
217+ (void )cuMemHostUnregister (self -> sq_db );
214218 uct_ib_mlx5_devx_destroy_qp_common (& self -> qp .super );
215219 uct_ib_mlx5_devx_destroy_cq_common (& self -> cq );
216220 mlx5dv_devx_umem_dereg (self -> umem );
217- doca_gpu_mem_free ( iface -> gpu_dev , self -> ep_gpu );
221+ cuMemFree ( self -> ep_raw );
218222}
219223
220224UCS_CLASS_DEFINE (uct_rc_gdaki_ep_t , uct_base_ep_t );
@@ -456,7 +460,6 @@ static UCS_CLASS_INIT_FUNC(uct_rc_gdaki_iface_t, uct_md_h tl_md,
456460 char * gpu_name , * ib_name ;
457461 char pci_addr [UCS_SYS_BDF_NAME_MAX ];
458462 ucs_status_t status ;
459- doca_error_t derr ;
460463 int cuda_id ;
461464
462465 status = uct_rc_mlx5_dp_ordering_ooo_init (md , & self -> super ,
@@ -493,7 +496,7 @@ static UCS_CLASS_INIT_FUNC(uct_rc_gdaki_iface_t, uct_md_h tl_md,
493496
494497 status = UCT_CUDADRV_FUNC_LOG_ERR (cuDeviceGet (& self -> cuda_dev , cuda_id ));
495498 if (status != UCS_OK ) {
496- goto err_doca ;
499+ return status ;
497500 }
498501
499502 status = UCT_CUDADRV_FUNC_LOG_ERR (
@@ -507,24 +510,12 @@ static UCS_CLASS_INIT_FUNC(uct_rc_gdaki_iface_t, uct_md_h tl_md,
507510 goto err_ctx_release ;
508511 }
509512
510- derr = doca_gpu_create (pci_addr , & self -> gpu_dev );
511- if (derr != DOCA_SUCCESS ) {
512- status = UCS_ERR_IO_ERROR ;
513- ucs_error ("doca_gpu_create failed: %s %s" , doca_error_get_descr (derr ),
514- pci_addr );
513+ status = uct_rc_gdaki_alloc (sizeof (uint64_t ), sizeof (uint64_t ),
514+ (void * * )& self -> atomic_buff , & self -> atomic_raw );
515+ if (status != UCS_OK ) {
515516 goto err_ctx ;
516517 }
517518
518- derr = doca_gpu_mem_alloc (self -> gpu_dev , sizeof (uint64_t ), sizeof (uint64_t ),
519- DOCA_GPU_MEM_TYPE_GPU , (void * * )& self -> atomic_buff ,
520- NULL );
521- if (derr != DOCA_SUCCESS ) {
522- ucs_error ("doca_gpu_mem_alloc failed: %s" , doca_error_get_descr (derr ));
523- status = UCS_ERR_IO_ERROR ;
524- goto err_doca ;
525- }
526-
527-
528519 self -> atomic_mr = ibv_reg_mr (md -> super .pd , self -> atomic_buff ,
529520 sizeof (uint64_t ),
530521 IBV_ACCESS_LOCAL_WRITE |
@@ -540,9 +531,7 @@ static UCS_CLASS_INIT_FUNC(uct_rc_gdaki_iface_t, uct_md_h tl_md,
540531 return UCS_OK ;
541532
542533err_atomic :
543- doca_gpu_mem_free (self -> gpu_dev , self -> atomic_buff );
544- err_doca :
545- doca_gpu_destroy (self -> gpu_dev );
534+ cuMemFree (self -> atomic_raw );
546535err_ctx :
547536 (void )UCT_CUDADRV_FUNC_LOG_WARN (cuCtxPopCurrent (NULL ));
548537err_ctx_release :
@@ -553,8 +542,7 @@ static UCS_CLASS_INIT_FUNC(uct_rc_gdaki_iface_t, uct_md_h tl_md,
553542static UCS_CLASS_CLEANUP_FUNC (uct_rc_gdaki_iface_t )
554543{
555544 ibv_dereg_mr (self -> atomic_mr );
556- doca_gpu_mem_free (self -> gpu_dev , self -> atomic_buff );
557- doca_gpu_destroy (self -> gpu_dev );
545+ cuMemFree (self -> atomic_raw );
558546 (void )UCT_CUDADRV_FUNC_LOG_WARN (cuDevicePrimaryCtxRelease (self -> cuda_dev ));
559547}
560548
@@ -693,22 +681,4 @@ UCT_TL_DEFINE_ENTRY(&uct_ib_component, rc_gda, uct_gdaki_query_tl_devices,
693681 uct_rc_gdaki_iface_config_table ,
694682 uct_rc_gdaki_iface_config_t );
695683
696- static void uct_ib_doca_init (void )
697- {
698- struct doca_log_backend * sdk_log ;
699- doca_error_t derr ;
700-
701- derr = doca_log_level_set_global_sdk_limit (DOCA_LOG_LEVEL_ERROR );
702- if (derr != DOCA_SUCCESS ) {
703- ucs_error ("doca_log_level_set_global_sdk_limit failed: %d\n" , derr );
704- return ;
705- }
706-
707- derr = doca_log_backend_create_with_file_sdk (stderr , & sdk_log );
708- if (derr != DOCA_SUCCESS ) {
709- ucs_error ("doca_log_backend_create_with_file_sdk failed: %d\n" , derr );
710- return ;
711- }
712- }
713-
714- UCT_TL_INIT (& uct_ib_component , rc_gda , ctor , uct_ib_doca_init (), )
684+ UCT_TL_INIT (& uct_ib_component , rc_gda , ctor , , )
0 commit comments