Skip to content

Commit

Permalink
src/hmem,prov/shm: introduce gdrcopy awareness to hmem copy
Browse files Browse the repository at this point in the history
From v1.19, OFI_HMEM_DATA_GDRCOPY_HANDLE flag will signal the presence
of gdrcopy handle in ofi_mr.hmem_data. SHM provider can take advantage
of gdrcopy to achieve lower memcpy latencies from/to cuda devices.

This patch introduces the logic to select gdrcopy on hmem copy paths,
with the exception of cuda IPC which is not supported by gdrcopy.

Signed-off-by: Wenduo Wang <wenduwan@amazon.com>
  • Loading branch information
wenduwan authored and shefty committed May 5, 2023
1 parent f767757 commit 2c5f988
Show file tree
Hide file tree
Showing 3 changed files with 33 additions and 2 deletions.
7 changes: 7 additions & 0 deletions prov/shm/src/smr_msg.c
Original file line number Diff line number Diff line change
Expand Up @@ -315,7 +315,14 @@ static ssize_t smr_generic_sendmsg(struct smr_ep *ep, const struct iovec *iov,
use_ipc = ofi_hmem_is_ipc_enabled(iface) &&
smr_desc->flags & FI_HMEM_DEVICE_ONLY &&
!(op_flags & FI_INJECT);

if (iface == FI_HMEM_CUDA &&
(smr_desc->flags & OFI_HMEM_DATA_GDRCOPY_HANDLE)) {
assert(smr_desc->hmem_data);
gdrcopy_available = true;
}
}

proto = smr_select_proto(iface, use_ipc, smr_cma_enabled(ep, peer_smr),
gdrcopy_available, op, total_len, op_flags);

Expand Down
6 changes: 6 additions & 0 deletions prov/shm/src/smr_rma.c
Original file line number Diff line number Diff line change
Expand Up @@ -182,6 +182,12 @@ static ssize_t smr_generic_rma(struct smr_ep *ep, const struct iovec *iov,
use_ipc = ofi_hmem_is_ipc_enabled(iface) &&
smr_desc->flags & FI_HMEM_DEVICE_ONLY &&
!(op_flags & FI_INJECT);

if (iface == FI_HMEM_CUDA &&
(smr_desc->flags & OFI_HMEM_DATA_GDRCOPY_HANDLE)) {
assert(smr_desc->hmem_data);
gdrcopy_available = true;
}
}
proto = smr_select_proto(iface, use_ipc, smr_cma_enabled(ep, peer_smr),
gdrcopy_available, op, total_len, op_flags);
Expand Down
22 changes: 20 additions & 2 deletions src/hmem.c
Original file line number Diff line number Diff line change
Expand Up @@ -273,7 +273,8 @@ static ssize_t ofi_copy_mr_iov(struct ofi_mr **mr, const struct iovec *iov,
size_t size, int dir)
{
uint64_t done = 0, len;
uint64_t hmem_iface, hmem_device;
uint64_t hmem_iface, hmem_device, hmem_flags;
void *hmem_data;
char *hmem_buf;
size_t i;
int ret;
Expand All @@ -285,12 +286,29 @@ static ssize_t ofi_copy_mr_iov(struct ofi_mr **mr, const struct iovec *iov,

if (mr && mr[i]) {
hmem_iface = mr[i]->iface;
hmem_flags = mr[i]->flags;
hmem_device = mr[i]->device;
hmem_data = mr[i]->hmem_data;
} else {
hmem_iface = FI_HMEM_SYSTEM;
hmem_flags = 0;
hmem_device = 0;
hmem_data = NULL;
}
if (dir == OFI_COPY_BUF_TO_IOV)

if (hmem_iface == FI_HMEM_CUDA && (hmem_flags & OFI_HMEM_DATA_GDRCOPY_HANDLE)) {
/**
* TODO: Fine tune the max data size to switch from gdrcopy to cudaMemcpy
* Note: buf must be on the host since gdrcopy does not support D2D copy
*/
if (dir == OFI_COPY_BUF_TO_IOV)
cuda_gdrcopy_to_dev((uint64_t) hmem_data, hmem_buf,
(char *) buf + done, len);
else
cuda_gdrcopy_from_dev((uint64_t) hmem_data, (char *) buf + done,
hmem_buf, len);
ret = FI_SUCCESS;
} else if (dir == OFI_COPY_BUF_TO_IOV)
ret = ofi_copy_to_hmem(hmem_iface, hmem_device, hmem_buf,
(char *)buf + done, len);
else
Expand Down

0 comments on commit 2c5f988

Please sign in to comment.