Skip to content

Commit

Permalink
Merge pull request #9408 from arun-chandran-edarath/amd_buffer_transfer
Browse files Browse the repository at this point in the history
ARCH/X86: Introduce non temporal buffer transfer
  • Loading branch information
yosefe committed May 7, 2024
2 parents 33089b3 + a86933e commit 63f85a9
Show file tree
Hide file tree
Showing 35 changed files with 607 additions and 66 deletions.
15 changes: 15 additions & 0 deletions contrib/test_jenkins.sh
Original file line number Diff line number Diff line change
Expand Up @@ -1051,6 +1051,18 @@ run_release_mode_tests() {
test_ucm_hooks
}

#
# Run nt_buffer_transfer tests
#
run_nt_buffer_transfer_tests() {
if lscpu | grep -q 'AuthenticAMD'
then
build release --enable-gtest --enable-optimizations
echo "==== Running nt_buffer_transfer tests ===="
./test/gtest/gtest --gtest_filter="test_arch.nt_buffer_transfer_*"
fi
}

set_ucx_common_test_env() {
export UCX_HANDLE_ERRORS=bt
export UCX_ERROR_SIGNALS=SIGILL,SIGSEGV,SIGBUS,SIGFPE,SIGPIPE,SIGABRT
Expand Down Expand Up @@ -1102,6 +1114,9 @@ run_tests() {

# release mode tests
do_distributed_task 0 4 run_release_mode_tests

# nt_buffer_transfer tests
do_distributed_task 0 4 run_nt_buffer_transfer_tests
}

run_test_proto_disable() {
Expand Down
3 changes: 2 additions & 1 deletion src/tools/info/sys_info.c
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
* Copyright (c) NVIDIA CORPORATION & AFFILIATES, 2001-2015. ALL RIGHTS RESERVED.
* Copyright (C) Shanghai Zhaoxin Semiconductor Co., Ltd. 2020. ALL RIGHTS RESERVED.
* Copyright (C) Tactical Computing Labs, LLC. 2022. ALL RIGHTS RESERVED.
* Copyright (C) Advanced Micro Devices, Inc. 2024. ALL RIGHTS RESERVED.
*
* See file LICENSE for terms.
*/
Expand Down Expand Up @@ -46,7 +47,7 @@ static double measure_memcpy_bandwidth(size_t size)
iter = 0;
start_time = ucs_get_time();
do {
ucs_memcpy_relaxed(dst, src, size);
ucs_memcpy_relaxed(dst, src, size, UCS_ARCH_MEMCPY_NT_NONE, size);
end_time = ucs_get_time();
++iter;
} while (end_time < start_time + ucs_time_from_sec(0.5));
Expand Down
12 changes: 8 additions & 4 deletions src/ucp/core/ucp_am.c
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
/**
* Copyright (C) Los Alamos National Security, LLC. 2019 ALL RIGHTS RESERVED.
* Copyright (c) NVIDIA CORPORATION & AFFILIATES, 2019. ALL RIGHTS RESERVED.
* Copyright (C) Advanced Micro Devices, Inc. 2024. ALL RIGHTS RESERVED.
*
* See file LICENSE for terms.
*/
Expand Down Expand Up @@ -1350,7 +1351,7 @@ ucp_am_copy_data_fragment(ucp_recv_desc_t *first_rdesc, void *data,
{
UCS_PROFILE_NAMED_CALL("am_memcpy_recv", ucs_memcpy_relaxed,
UCS_PTR_BYTE_OFFSET(first_rdesc + 1, offset),
data, length);
data, length, UCS_ARCH_MEMCPY_NT_SOURCE, length);
first_rdesc->am_first.remaining -= length;
}

Expand Down Expand Up @@ -1507,19 +1508,22 @@ UCS_PROFILE_FUNC(ucs_status_t, ucp_am_long_first_handler,
/* Copy first fragment and base headers before the data, it will be needed
* for middle fragments processing. */
UCS_PROFILE_NAMED_CALL("am_memcpy_recv", ucs_memcpy_relaxed,
first_rdesc + 1, first_ftr, sizeof(*first_ftr));
first_rdesc + 1, first_ftr, sizeof(*first_ftr),
UCS_ARCH_MEMCPY_NT_SOURCE, sizeof(*first_ftr));
UCS_PROFILE_NAMED_CALL("am_memcpy_recv", ucs_memcpy_relaxed,
UCS_PTR_BYTE_OFFSET(first_rdesc + 1,
sizeof(*first_ftr)),
hdr, sizeof(*hdr));
hdr, sizeof(*hdr), UCS_ARCH_MEMCPY_NT_SOURCE,
sizeof(*hdr));

/* Copy user header to the end of message */
user_hdr = UCS_PTR_BYTE_OFFSET(first_ftr, -user_hdr_length);
UCS_PROFILE_NAMED_CALL("am_memcpy_recv", ucs_memcpy_relaxed,
UCS_PTR_BYTE_OFFSET(first_rdesc + 1,
first_rdesc->payload_offset +
first_ftr->total_size),
user_hdr, user_hdr_length);
user_hdr, user_hdr_length,
UCS_ARCH_MEMCPY_NT_SOURCE, user_hdr_length);

/* Copy all already arrived middle fragments to the data buffer */
ucs_queue_for_each_safe(mid_rdesc, iter, &ep_ext->am.mid_rdesc_q,
Expand Down
13 changes: 9 additions & 4 deletions src/ucp/dt/datatype_iter.inl
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
/**
* Copyright (c) NVIDIA CORPORATION & AFFILIATES, 2020. ALL RIGHTS RESERVED.
* Copyright (C) Advanced Micro Devices, Inc. 2024. ALL RIGHTS RESERVED.
*
* See file LICENSE for terms.
*/
Expand Down Expand Up @@ -378,7 +379,8 @@ ucp_datatype_iter_next_pack(const ucp_datatype_iter_t *dt_iter,
src = UCS_PTR_BYTE_OFFSET(dt_iter->type.contig.buffer,
dt_iter->offset);
ucp_dt_contig_pack(worker, dest, src, length,
(ucs_memory_type_t)dt_iter->mem_info.type);
(ucs_memory_type_t)dt_iter->mem_info.type,
dt_iter->length);
break;
case UCP_DATATYPE_IOV:
ucp_datatype_iter_iov_check(dt_iter);
Expand All @@ -389,7 +391,8 @@ ucp_datatype_iter_next_pack(const ucp_datatype_iter_t *dt_iter,
dt_iter->type.iov.iov, length,
&next_iter->type.iov.iov_offset,
&next_iter->type.iov.iov_index,
(ucs_memory_type_t)dt_iter->mem_info.type);
(ucs_memory_type_t)dt_iter->mem_info.type,
dt_iter->length);
break;
case UCP_DATATYPE_GENERIC:
if (max_length != 0) {
Expand Down Expand Up @@ -443,7 +446,8 @@ ucp_datatype_iter_unpack(ucp_datatype_iter_t *dt_iter, ucp_worker_h worker,
ucs_assert(dt_iter->mem_info.type < UCS_MEMORY_TYPE_LAST);
dest = UCS_PTR_BYTE_OFFSET(dt_iter->type.contig.buffer, offset);
ucp_dt_contig_unpack(worker, dest, src, length,
(ucs_memory_type_t)dt_iter->mem_info.type);
(ucs_memory_type_t)dt_iter->mem_info.type,
dt_iter->length);
status = UCS_OK;
break;
case UCP_DATATYPE_IOV:
Expand All @@ -453,7 +457,8 @@ ucp_datatype_iter_unpack(ucp_datatype_iter_t *dt_iter, ucp_worker_h worker,
length,
&dt_iter->type.iov.iov_offset,
&dt_iter->type.iov.iov_index,
(ucs_memory_type_t)dt_iter->mem_info.type);
(ucs_memory_type_t)dt_iter->mem_info.type,
dt_iter->length);
ucs_assert(unpacked_length <= length);
dt_iter->offset += unpacked_length;
status = UCS_OK;
Expand Down
5 changes: 3 additions & 2 deletions src/ucp/dt/dt.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
/**
* Copyright (c) NVIDIA CORPORATION & AFFILIATES, 2001-2017. ALL RIGHTS RESERVED.
* Copyright (C) Advanced Micro Devices, Inc. 2024. ALL RIGHTS RESERVED.
*
* See file LICENSE for terms.
*/
Expand Down Expand Up @@ -112,14 +113,14 @@ size_t ucp_dt_pack(ucp_worker_h worker, ucp_datatype_t datatype,
case UCP_DATATYPE_CONTIG:
ucp_dt_contig_pack(worker, dest,
UCS_PTR_BYTE_OFFSET(src, state->offset),
length, mem_type);
length, mem_type, length);
result_len = length;
break;

case UCP_DATATYPE_IOV:
UCS_PROFILE_CALL_VOID(ucp_dt_iov_gather, worker, dest, src, length,
&state->dt.iov.iov_offset,
&state->dt.iov.iovcnt_offset, mem_type);
&state->dt.iov.iovcnt_offset, mem_type, length);
result_len = length;
break;

Expand Down
15 changes: 12 additions & 3 deletions src/ucp/dt/dt.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
/**
* Copyright (c) NVIDIA CORPORATION & AFFILIATES, 2016. ALL RIGHTS RESERVED.
* Copyright (C) Advanced Micro Devices, Inc. 2024. ALL RIGHTS RESERVED.
*
* See file LICENSE for terms.
*/
Expand Down Expand Up @@ -68,11 +69,19 @@ void ucp_mem_type_unpack(ucp_worker_h worker, void *buffer,


static UCS_F_ALWAYS_INLINE void
ucp_memcpy_pack_unpack(void *buffer, const void *data, size_t length,
const char *name)
ucp_memcpy_pack(void *buffer, const void *data, size_t length,
size_t total_len, const char *name)
{
UCS_PROFILE_NAMED_CALL(name, ucs_memcpy_relaxed, buffer, data, length);
UCS_PROFILE_NAMED_CALL(name, ucs_memcpy_relaxed, buffer, data, length,
UCS_ARCH_MEMCPY_NT_DEST, total_len);
}

static UCS_F_ALWAYS_INLINE void
ucp_memcpy_unpack(void *buffer, const void *data, size_t length,
size_t total_len, const char *name)
{
UCS_PROFILE_NAMED_CALL(name, ucs_memcpy_relaxed, buffer, data, length,
UCS_ARCH_MEMCPY_NT_SOURCE, total_len);
}

#endif /* UCP_DT_H_ */
9 changes: 5 additions & 4 deletions src/ucp/dt/dt_contig.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
/**
* Copyright (c) NVIDIA CORPORATION & AFFILIATES, 2001-2015. ALL RIGHTS RESERVED.
* Copyright (C) Advanced Micro Devices, Inc. 2024. ALL RIGHTS RESERVED.
*
* See file LICENSE for terms.
*/
Expand Down Expand Up @@ -37,10 +38,10 @@ static UCS_F_ALWAYS_INLINE size_t ucp_contig_dt_length(ucp_datatype_t datatype,

static UCS_F_ALWAYS_INLINE void
ucp_dt_contig_pack(ucp_worker_h worker, void *dest, const void *src,
size_t length, ucs_memory_type_t mem_type)
size_t length, ucs_memory_type_t mem_type, size_t total_len)
{
if (ucs_likely(UCP_MEM_IS_ACCESSIBLE_FROM_CPU(mem_type))) {
ucp_memcpy_pack_unpack(dest, src, length, "memcpy_pack");
ucp_memcpy_pack(dest, src, length, total_len, "memcpy_pack");
} else {
ucp_mem_type_pack(worker, dest, src, length, mem_type);
}
Expand All @@ -49,10 +50,10 @@ ucp_dt_contig_pack(ucp_worker_h worker, void *dest, const void *src,

static UCS_F_ALWAYS_INLINE void
ucp_dt_contig_unpack(ucp_worker_h worker, void *dest, const void *src,
size_t length, ucs_memory_type_t mem_type)
size_t length, ucs_memory_type_t mem_type, size_t total_len)
{
if (ucs_likely(UCP_MEM_IS_ACCESSIBLE_FROM_CPU(mem_type))) {
ucp_memcpy_pack_unpack(dest, src, length, "memcpy_unpack");
ucp_memcpy_unpack(dest, src, length, total_len, "memcpy_unpack");
} else {
ucp_mem_type_unpack(worker, dest, src, length, mem_type);
}
Expand Down
9 changes: 5 additions & 4 deletions src/ucp/dt/dt_iov.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
/**
* Copyright (c) NVIDIA CORPORATION & AFFILIATES, 2001-2015. ALL RIGHTS RESERVED.
* Copyright (C) Advanced Micro Devices, Inc. 2024. ALL RIGHTS RESERVED.
*
* See file LICENSE for terms.
*/
Expand All @@ -24,7 +25,7 @@

void ucp_dt_iov_gather(ucp_worker_h worker, void *dest, const ucp_dt_iov_t *iov,
size_t length, size_t *iov_offset, size_t *iovcnt_offset,
ucs_memory_type_t mem_type)
ucs_memory_type_t mem_type, size_t total_len)
{
size_t length_it = 0;
size_t item_len, item_reminder, item_len_to_copy;
Expand All @@ -39,7 +40,7 @@ void ucp_dt_iov_gather(ucp_worker_h worker, void *dest, const ucp_dt_iov_t *iov,
ucp_dt_contig_pack(worker, UCS_PTR_BYTE_OFFSET(dest, length_it),
UCS_PTR_BYTE_OFFSET(iov[*iovcnt_offset].buffer,
*iov_offset),
item_len_to_copy, mem_type);
item_len_to_copy, mem_type, total_len);
length_it += item_len_to_copy;

ucs_assert(length_it <= length);
Expand All @@ -55,7 +56,7 @@ void ucp_dt_iov_gather(ucp_worker_h worker, void *dest, const ucp_dt_iov_t *iov,
size_t ucp_dt_iov_scatter(ucp_worker_h worker, const ucp_dt_iov_t *iov,
size_t iovcnt, const void *src, size_t length,
size_t *iov_offset, size_t *iovcnt_offset,
ucs_memory_type_t mem_type)
ucs_memory_type_t mem_type, size_t total_len)
{
size_t length_it = 0;
size_t item_len, item_len_to_copy;
Expand All @@ -70,7 +71,7 @@ size_t ucp_dt_iov_scatter(ucp_worker_h worker, const ucp_dt_iov_t *iov,
UCS_PTR_BYTE_OFFSET(iov[*iovcnt_offset].buffer,
*iov_offset),
UCS_PTR_BYTE_OFFSET(src, length_it),
item_len_to_copy, mem_type);
item_len_to_copy, mem_type, total_len);
length_it += item_len_to_copy;

ucs_assert(length_it <= length);
Expand Down
4 changes: 2 additions & 2 deletions src/ucp/dt/dt_iov.h
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ static inline size_t ucp_dt_iov_length(const ucp_dt_iov_t *iov, size_t iovcnt)
*/
void ucp_dt_iov_gather(ucp_worker_h worker, void *dest, const ucp_dt_iov_t *iov,
size_t length, size_t *iov_offset, size_t *iovcnt_offset,
ucs_memory_type_t mem_type);
ucs_memory_type_t mem_type, size_t total_len);


/**
Expand All @@ -81,7 +81,7 @@ void ucp_dt_iov_gather(ucp_worker_h worker, void *dest, const ucp_dt_iov_t *iov,
size_t ucp_dt_iov_scatter(ucp_worker_h worker, const ucp_dt_iov_t *iov,
size_t iovcnt, const void *src, size_t length,
size_t *iov_offset, size_t *iovcnt_offset,
ucs_memory_type_t mem_type);
ucs_memory_type_t mem_type, size_t total_len);


/**
Expand Down
8 changes: 5 additions & 3 deletions src/ucp/rma/amo_offload.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
/**
* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* Copyright (C) 2024, Advanced Micro Devices, Inc. All rights reserved.
*
* See file LICENSE for terms.
*/
Expand All @@ -22,7 +23,8 @@ ucp_amo_memtype_unpack_reply_buffer(ucp_request_t *req)
{
ucp_dt_contig_unpack(req->send.ep->worker, req->send.amo.reply_buffer,
&req->send.amo.result, req->send.state.dt_iter.length,
ucp_amo_request_reply_mem_type(req));
ucp_amo_request_reply_mem_type(req),
req->send.state.dt_iter.length);
}

static void ucp_proto_amo_completion(uct_completion_t *self)
Expand Down Expand Up @@ -71,7 +73,7 @@ ucp_proto_amo_progress(uct_pending_req_t *self, ucp_operation_id_t op_id,
UCS_MEMORY_TYPE_HOST;
ucp_dt_contig_pack(req->send.ep->worker, &req->send.amo.value,
req->send.state.dt_iter.type.contig.buffer,
op_size, mem_type);
op_size, mem_type, op_size);
req->flags |= UCP_REQUEST_FLAG_PROTO_AMO_PACKED;
}

Expand All @@ -84,7 +86,7 @@ ucp_proto_amo_progress(uct_pending_req_t *self, ucp_operation_id_t op_id,
if (op_id == UCP_OP_ID_AMO_CSWAP) {
ucp_dt_contig_pack(ep->worker, &req->send.amo.result,
req->send.amo.reply_buffer, op_size,
ucp_amo_request_reply_mem_type(req));
ucp_amo_request_reply_mem_type(req), op_size);
}

req->flags |= UCP_REQUEST_FLAG_PROTO_INITIALIZED;
Expand Down
8 changes: 5 additions & 3 deletions src/ucp/rma/amo_sw.c
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
/**
* Copyright (c) NVIDIA CORPORATION & AFFILIATES, 2001-2018. ALL RIGHTS RESERVED.
* Copyright (C) Huawei Technologies Co., Ltd. 2021. ALL RIGHTS RESERVED.
* Copyright (C) Advanced Micro Devices, Inc. 2024. ALL RIGHTS RESERVED.
*
* See file LICENSE for terms.
*/
Expand Down Expand Up @@ -38,10 +39,10 @@ static size_t ucp_amo_sw_pack(void *dest, ucp_request_t *req, int fetch,

if (worker->context->config.ext.proto_enable) {
ucp_dt_contig_pack(worker, atomich + 1, &req->send.amo.value, size,
UCS_MEMORY_TYPE_HOST);
UCS_MEMORY_TYPE_HOST, size);
if (req->send.amo.uct_op == UCT_ATOMIC_OP_CSWAP) {
ucp_dt_contig_pack(worker, cswaph, req->send.amo.reply_buffer, size,
ucp_amo_request_reply_mem_type(req));
ucp_amo_request_reply_mem_type(req), size);
length += size;
}
} else {
Expand Down Expand Up @@ -303,7 +304,8 @@ UCS_PROFILE_FUNC(ucs_status_t, ucp_atomic_rep_handler, (arg, data, length, am_fl

if (worker->context->config.ext.proto_enable) {
ucp_dt_contig_unpack(worker, req->send.amo.reply_buffer, hdr + 1,
frag_length, ucp_amo_request_reply_mem_type(req));
frag_length, ucp_amo_request_reply_mem_type(req),
frag_length);
} else {
memcpy(req->send.buffer, hdr + 1, frag_length);
}
Expand Down
3 changes: 2 additions & 1 deletion src/ucp/rma/get_offload.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
/**
* Copyright (c) NVIDIA CORPORATION & AFFILIATES, 2020. ALL RIGHTS RESERVED.
* Copyright (C) Advanced Micro Devices, Inc. 2024. ALL RIGHTS RESERVED.
*
* See file LICENSE for terms.
*/
Expand All @@ -20,7 +21,7 @@ static void ucp_proto_get_offload_bcopy_unpack(void *arg, const void *data,
size_t length)
{
void *dest = arg;
ucs_memcpy_relaxed(dest, data, length);
ucs_memcpy_relaxed(dest, data, length, UCS_ARCH_MEMCPY_NT_SOURCE, length);
}

static UCS_F_ALWAYS_INLINE ucs_status_t
Expand Down
6 changes: 4 additions & 2 deletions src/ucp/rma/rma_sw.c
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
/**
* Copyright (c) NVIDIA CORPORATION & AFFILIATES, 2001-2018. ALL RIGHTS RESERVED.
* Copyright (C) Huawei Technologies Co., Ltd. 2021. ALL RIGHTS RESERVED.
* Copyright (C) Advanced Micro Devices, Inc. 2024. ALL RIGHTS RESERVED.
*
* See file LICENSE for terms.
*/
Expand Down Expand Up @@ -161,7 +162,8 @@ UCS_PROFILE_FUNC(ucs_status_t, ucp_put_handler, (arg, data, length, am_flags),
UCP_WORKER_GET_EP_BY_ID(&ep, worker, puth->ep_id, return UCS_OK,
"SW PUT request");
ucp_dt_contig_unpack(worker, (void*)puth->address, puth + 1,
length - sizeof(*puth), puth->mem_type);
length - sizeof(*puth), puth->mem_type,
length - sizeof(*puth));
ucp_rma_sw_send_cmpl(ep);
return UCS_OK;
}
Expand Down Expand Up @@ -195,7 +197,7 @@ static size_t ucp_rma_sw_pack_get_reply(void *dest, void *arg)
hdr->offset = req->send.state.dt_iter.offset;
ucp_dt_contig_pack(req->send.ep->worker, hdr + 1,
(char*)req->send.buffer + hdr->offset, length,
req->send.mem_type);
req->send.mem_type, req->send.length);

return sizeof(*hdr) + length;
}
Expand Down

0 comments on commit 63f85a9

Please sign in to comment.