Skip to content

Commit 4c707c0

Browse files
committed
UCT/IB: Check EFA-specific GPUDirect support
1 parent 3842a65 commit 4c707c0

File tree

3 files changed

+14
-5
lines changed

3 files changed

+14
-5
lines changed

src/uct/ib/base/ib_md.c

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1203,8 +1203,8 @@ void uct_ib_md_parse_relaxed_order(uct_ib_md_t *md,
12031203
uct_ib_device_name(&md->dev), md->relaxed_order ? "en" : "dis");
12041204
}
12051205

1206-
static void uct_ib_check_gpudirect_driver(uct_ib_md_t *md, const char *file,
1207-
ucs_memory_type_t mem_type)
1206+
void uct_ib_check_gpudirect_driver(uct_ib_md_t *md, const char *file,
1207+
ucs_memory_type_t mem_type)
12081208
{
12091209
if (md->reg_mem_types & UCS_BIT(mem_type)) {
12101210
return;
@@ -1316,8 +1316,8 @@ ucs_status_t uct_ib_md_open_common(uct_ib_md_t *md,
13161316
md->check_subnet_filter = 1;
13171317
}
13181318

1319-
md->reg_mem_types = UCS_BIT(UCS_MEMORY_TYPE_HOST) |
1320-
md->reg_nonblock_mem_types;
1319+
md->reg_mem_types |= UCS_BIT(UCS_MEMORY_TYPE_HOST) |
1320+
md->reg_nonblock_mem_types;
13211321

13221322
/* Check for GPU-direct support */
13231323
if (md_config->enable_gpudirect_rdma != UCS_NO) {
@@ -1333,7 +1333,6 @@ ucs_status_t uct_ib_md_open_common(uct_ib_md_t *md,
13331333
md, "/sys/module/nv_peer_mem/version",
13341334
UCS_MEMORY_TYPE_CUDA);
13351335

1336-
13371336
/* check if ROCM KFD driver is loaded */
13381337
uct_ib_check_gpudirect_driver(md, "/dev/kfd", UCS_MEMORY_TYPE_ROCM);
13391338

src/uct/ib/base/ib_md.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -431,4 +431,8 @@ ucs_status_t uct_ib_memh_alloc(uct_ib_md_t *md, size_t length,
431431
unsigned mem_flags, size_t memh_base_size,
432432
size_t mr_size, uct_ib_mem_t **memh_p);
433433

434+
void uct_ib_check_gpudirect_driver(uct_ib_md_t *md,
435+
const char *file,
436+
ucs_memory_type_t mem_type);
437+
434438
#endif

src/uct/ib/efa/base/ib_efa_md.c

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,12 @@ static ucs_status_t uct_ib_efa_md_open(struct ibv_device *ibv_device,
7575
*/
7676
dev->req_notify_cq_support = 0;
7777

78+
if (md_config->enable_gpudirect_rdma != UCS_NO) {
79+
uct_ib_check_gpudirect_driver(
80+
&md->super, "/sys/module/efa_nv_peermem/version",
81+
UCS_MEMORY_TYPE_CUDA);
82+
}
83+
7884
status = uct_ib_md_open_common(&md->super, ibv_device, md_config);
7985
if (status != UCS_OK) {
8086
goto err_md_free;

0 commit comments

Comments
 (0)