@@ -194,7 +194,8 @@ static void uct_cuda_ipc_cache_purge(uct_cuda_ipc_cache_t *cache)
194194
195195static ucs_status_t
196196uct_cuda_ipc_open_memhandle_legacy (CUipcMemHandle memh , CUdevice cu_dev ,
197- CUdeviceptr * mapped_addr )
197+ CUdeviceptr * mapped_addr ,
198+ ucs_log_level_t log_level )
198199{
199200 CUresult cuerr ;
200201 ucs_status_t status ;
@@ -207,8 +208,8 @@ uct_cuda_ipc_open_memhandle_legacy(CUipcMemHandle memh, CUdevice cu_dev,
207208 cuerr = cuIpcOpenMemHandle (mapped_addr , memh ,
208209 CU_IPC_MEM_LAZY_ENABLE_PEER_ACCESS );
209210 if (cuerr != CUDA_SUCCESS ) {
210- ucs_debug ( "cuIpcOpenMemHandle() failed: %s" ,
211- uct_cuda_base_cu_get_error_string (cuerr ));
211+ ucs_log ( log_level , "cuIpcOpenMemHandle() failed: %s" ,
212+ uct_cuda_base_cu_get_error_string (cuerr ));
212213 status = (cuerr == CUDA_ERROR_ALREADY_MAPPED ) ?
213214 UCS_ERR_ALREADY_EXISTS : UCS_ERR_INVALID_PARAM ;
214215 }
@@ -227,35 +228,38 @@ uct_cuda_ipc_init_access_desc(CUmemAccessDesc *access_desc, CUdevice cu_dev)
227228}
228229
229230static ucs_status_t
230- uct_cuda_ipc_open_memhandle_vmm (uct_cuda_ipc_rkey_t * key , CUdevice cu_dev ,
231- CUdeviceptr * mapped_addr )
231+ uct_cuda_ipc_open_memhandle_vmm (const uct_cuda_ipc_rkey_t * key , CUdevice cu_dev ,
232+ CUdeviceptr * mapped_addr ,
233+ ucs_log_level_t log_level )
232234{
233235 CUmemAccessDesc access_desc = {};
234236 ucs_status_t status ;
235237 CUdeviceptr dptr ;
236238 CUmemGenericAllocationHandle handle ;
237239
238- status = UCT_CUDADRV_FUNC_LOG_ERR (cuMemImportFromShareableHandle (& handle ,
240+ status = UCT_CUDADRV_FUNC (cuMemImportFromShareableHandle (& handle ,
239241 (void * )& key -> ph .handle .fabric_handle ,
240- CU_MEM_HANDLE_TYPE_FABRIC ));
242+ CU_MEM_HANDLE_TYPE_FABRIC ), log_level );
241243 if (status != UCS_OK ) {
242244 goto out ;
243245 }
244246
245- status =
246- UCT_CUDADRV_FUNC_LOG_ERR ( cuMemAddressReserve ( & dptr , key -> b_len , 0 , 0 , 0 ) );
247+ status = UCT_CUDADRV_FUNC ( cuMemAddressReserve ( & dptr , key -> b_len , 0 , 0 , 0 ),
248+ log_level );
247249 if (status != UCS_OK ) {
248250 goto release_handle ;
249251 }
250252
251- status = UCT_CUDADRV_FUNC_LOG_ERR (cuMemMap (dptr , key -> b_len , 0 , handle , 0 ));
253+ status = UCT_CUDADRV_FUNC (cuMemMap (dptr , key -> b_len , 0 , handle , 0 ),
254+ log_level );
252255 if (status != UCS_OK ) {
253256 goto release_va_range ;
254257 }
255258
256259 uct_cuda_ipc_init_access_desc (& access_desc , cu_dev );
257260
258- status = UCT_CUDADRV_FUNC_LOG_ERR (cuMemSetAccess (dptr , key -> b_len , & access_desc , 1 ));
261+ status = UCT_CUDADRV_FUNC (cuMemSetAccess (dptr , key -> b_len , & access_desc , 1 ),
262+ log_level );
259263 if (status != UCS_OK ) {
260264 goto unmap_range ;
261265 }
@@ -315,8 +319,9 @@ static ucs_status_t cuda_ipc_rem_mpool_cache_create(uct_cuda_ipc_rkey_t *key,
315319}
316320
317321static ucs_status_t
318- uct_cuda_ipc_open_memhandle_mempool (uct_cuda_ipc_rkey_t * key ,
319- CUdevice cu_dev , CUdeviceptr * mapped_addr )
322+ uct_cuda_ipc_open_memhandle_mempool (uct_cuda_ipc_rkey_t * key , CUdevice cu_dev ,
323+ CUdeviceptr * mapped_addr ,
324+ ucs_log_level_t log_level )
320325{
321326 khash_t (cuda_ipc_rem_mpool_cache ) * hash = & uct_cuda_ipc_rem_mpool_cache .hash ;
322327 const CUmemFabricHandle * hkey = & key -> ph .handle .fabric_handle ;
@@ -360,18 +365,18 @@ uct_cuda_ipc_open_memhandle_mempool(uct_cuda_ipc_rkey_t *key,
360365 }
361366
362367out_import_pointer :
363- status = UCT_CUDADRV_FUNC_LOG_ERR (cuMemPoolImportPointer (mapped_addr ,
364- key -> ph . pool , (CUmemPoolPtrExportData * )& key -> ph .ptr ));
368+ status = UCT_CUDADRV_FUNC (cuMemPoolImportPointer (mapped_addr , key -> ph . pool ,
369+ (CUmemPoolPtrExportData * )& key -> ph .ptr ), log_level );
365370
366371err :
367372 pthread_rwlock_unlock (& uct_cuda_ipc_rem_mpool_cache .lock );
368373 return status ;
369374}
370375#endif
371376
372- static ucs_status_t uct_cuda_ipc_open_memhandle ( uct_cuda_ipc_rkey_t * key ,
373- CUdevice cu_dev ,
374- CUdeviceptr * mapped_addr )
377+ static ucs_status_t
378+ uct_cuda_ipc_open_memhandle ( uct_cuda_ipc_rkey_t * key , CUdevice cu_dev ,
379+ CUdeviceptr * mapped_addr , ucs_log_level_t log_level )
375380{
376381 ucs_log_level_t level ;
377382
@@ -380,12 +385,14 @@ static ucs_status_t uct_cuda_ipc_open_memhandle(uct_cuda_ipc_rkey_t *key,
380385 switch (key -> ph .handle_type ) {
381386 case UCT_CUDA_IPC_KEY_HANDLE_TYPE_LEGACY :
382387 return uct_cuda_ipc_open_memhandle_legacy (key -> ph .handle .legacy , cu_dev ,
383- mapped_addr );
388+ mapped_addr , log_level );
384389#if HAVE_CUDA_FABRIC
385390 case UCT_CUDA_IPC_KEY_HANDLE_TYPE_VMM :
386- return uct_cuda_ipc_open_memhandle_vmm (key , cu_dev , mapped_addr );
391+ return uct_cuda_ipc_open_memhandle_vmm (key , cu_dev , mapped_addr ,
392+ log_level );
387393 case UCT_CUDA_IPC_KEY_HANDLE_TYPE_MEMPOOL :
388- return uct_cuda_ipc_open_memhandle_mempool (key , cu_dev , mapped_addr );
394+ return uct_cuda_ipc_open_memhandle_mempool (key , cu_dev , mapped_addr ,
395+ log_level );
389396#endif
390397 case UCT_CUDA_IPC_KEY_HANDLE_TYPE_NO_IPC :
391398 level = UCS_LOG_LEVEL_DEBUG ;
@@ -520,8 +527,9 @@ ucs_status_t uct_cuda_ipc_unmap_memhandle(pid_t pid, uintptr_t d_bptr,
520527}
521528
522529UCS_PROFILE_FUNC (ucs_status_t , uct_cuda_ipc_map_memhandle ,
523- (key , cu_dev , mapped_addr ),
524- uct_cuda_ipc_rkey_t * key , CUdevice cu_dev , void * * mapped_addr )
530+ (key , cu_dev , mapped_addr , log_level ),
531+ uct_cuda_ipc_rkey_t * key , CUdevice cu_dev , void * * mapped_addr ,
532+ ucs_log_level_t log_level )
525533{
526534 uct_cuda_ipc_cache_t * cache ;
527535 ucs_status_t status ;
@@ -570,22 +578,23 @@ UCS_PROFILE_FUNC(ucs_status_t, uct_cuda_ipc_map_memhandle,
570578 }
571579 }
572580
573- status = uct_cuda_ipc_open_memhandle (key , cu_dev , (CUdeviceptr * )mapped_addr );
581+ status = uct_cuda_ipc_open_memhandle (key , cu_dev , (CUdeviceptr * )mapped_addr ,
582+ log_level );
574583 if (ucs_unlikely (status != UCS_OK )) {
575584 if (ucs_likely (status == UCS_ERR_ALREADY_EXISTS )) {
576585 /* unmap all overlapping regions and retry*/
577586 uct_cuda_ipc_cache_invalidate_regions (cache , (void * )key -> d_bptr ,
578587 UCS_PTR_BYTE_OFFSET (key -> d_bptr ,
579588 key -> b_len ));
580589 status = uct_cuda_ipc_open_memhandle (key , cu_dev ,
581- (CUdeviceptr * )mapped_addr );
590+ (CUdeviceptr * )mapped_addr ,
591+ log_level );
582592 if (ucs_unlikely (status != UCS_OK )) {
583593 if (ucs_likely (status == UCS_ERR_ALREADY_EXISTS )) {
584594 /* unmap all cache entries and retry */
585595 uct_cuda_ipc_cache_purge (cache );
586- status =
587- uct_cuda_ipc_open_memhandle (key , cu_dev ,
588- (CUdeviceptr * )mapped_addr );
596+ status = uct_cuda_ipc_open_memhandle (
597+ key , cu_dev , (CUdeviceptr * )mapped_addr , log_level );
589598 if (status != UCS_OK ) {
590599 ucs_fatal ("%s: failed to open ipc mem handle. addr:%p "
591600 "len:%lu (%s)" , cache -> name ,
0 commit comments