Skip to content

Commit ed7b07b

Browse files
authored
UCT/API: Introduce device API (#10829)
1 parent 34317a7 commit ed7b07b

40 files changed

+676
-194
lines changed

buildlib/tools/coverity.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -102,7 +102,7 @@ run_coverity() {
102102
if [ "${ucx_build_type}" == "devel" ]; then
103103
cov-manage-emit --dir $cov_build --tu-pattern "file('.*/test/gtest/common/googletest/*')" delete || :
104104
fi
105-
cov-analyze --jobs $parallel_jobs $COV_OPT --security --concurrency --dir $cov_build
105+
cov-analyze --jobs $parallel_jobs $COV_OPT --disable PARSE_ERROR --security --concurrency --dir $cov_build
106106
nerrors=$(cov-format-errors --dir $cov_build | awk '/Processing [0-9]+ errors?/ { print $2 }')
107107

108108
if [ $nerrors -gt 0 ]; then

contrib/check_inst_headers.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ do
2828
fi
2929

3030
# devices files should be ignored for now
31-
if test "$hfile" != "${hfile#ucp/api/device/}"
31+
if test "$hfile" != "${hfile#uc[pt]/api/device/}"
3232
then
3333
echo "SKIPPED $hfile (device compiler)"
3434
continue

src/tools/info/tl_info.c

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -138,6 +138,7 @@ static void print_iface_info(uct_worker_h worker, uct_md_h md,
138138
};
139139
uct_iface_config_t *iface_config;
140140
uct_iface_attr_t iface_attr;
141+
uct_iface_attr_v2_t iface_attr_v2;
141142
char max_eps_str[32];
142143
ucs_status_t status;
143144
uct_iface_h iface;
@@ -343,6 +344,15 @@ static void print_iface_info(uct_worker_h worker, uct_md_h md,
343344
printf("# error handling:%s\n", buf);
344345
}
345346

347+
iface_attr_v2.field_mask = UCT_IFACE_ATTR_FIELD_DEVICE_MEM_ELEMENT_SIZE;
348+
status = uct_iface_query_v2(iface, &iface_attr_v2);
349+
if (status != UCS_OK) {
350+
printf("# < failed to query interface >\n");
351+
} else {
352+
printf("# device mem_element: %zu bytes\n",
353+
iface_attr_v2.device_mem_element_size);
354+
}
355+
346356
uct_iface_close(iface);
347357
printf("#\n");
348358
}

src/ucp/api/device/ucp_device_impl.h

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,12 +7,25 @@
77
#ifndef UCP_DEVICE_IMPL_H
88
#define UCP_DEVICE_IMPL_H
99

10+
#include "ucp_host.h"
1011
#include "ucp_device_types.h"
1112

13+
#include <uct/api/device/uct_device_impl.h>
1214
#include <ucs/sys/compiler_def.h>
1315
#include <ucs/type/status.h>
1416
#include <stdint.h>
1517

18+
/**
19+
* @ingroup UCP_DEVICE
20+
* @brief GPU request descriptor of a given batch
21+
*
22+
* This request tracks a batch of memory operations in progress. It can be used
23+
* with @ref ucp_device_progress_req to detect request completion.
24+
*/
25+
typedef struct ucp_device_request {
26+
uct_device_completion_t comp;
27+
} ucp_device_request_t;
28+
1629

1730
/**
1831
* @ingroup UCP_DEVICE
@@ -226,6 +239,19 @@ ucp_device_put_multi_partial(ucp_device_mem_list_handle_h mem_list,
226239
}
227240

228241

242+
/**
243+
* @ingroup UCP_DEVICE
244+
*
245+
* @brief Initialize a device request.
246+
*
247+
* @param [out] req Device request to initialize.
248+
*/
249+
UCS_F_DEVICE void ucp_device_request_init(ucp_device_request_t *req)
250+
{
251+
uct_device_completion_init(&req->comp);
252+
}
253+
254+
229255
/**
230256
* @ingroup UCP_DEVICE
231257
* @brief Progress a device request containing a batch of operations.
@@ -246,6 +272,11 @@ template <ucp_device_level_t level = UCP_DEVICE_LEVEL_THREAD>
246272
UCS_F_DEVICE ucs_status_t
247273
ucp_device_progress_req(ucp_device_request_t *req)
248274
{
275+
if (ucs_likely(req->comp.count == 0)) {
276+
return req->comp.status;
277+
}
278+
279+
/* TODO call uct progress function */
249280
return UCS_ERR_NOT_IMPLEMENTED;
250281
}
251282

src/ucp/api/device/ucp_device_types.h

Lines changed: 17 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -7,12 +7,11 @@
77
#ifndef UCP_DEVICE_TYPES_H
88
#define UCP_DEVICE_TYPES_H
99

10-
#include <uct/api/uct.h>
10+
#include <uct/api/device/uct_device_types.h>
11+
#include <uct/api/uct_def.h>
1112

1213

13-
typedef struct ucp_mem_list_elem {
14-
} ucp_device_mem_list_elem_t;
15-
14+
#define UCP_DEVICE_MEM_LIST_MAX_EPS 2
1615

1716
/**
1817
* @ingroup UCP_DEVICE
@@ -26,50 +25,36 @@ typedef struct ucp_mem_list_elem {
2625
*/
2726
typedef struct {
2827
/**
29-
* Allow runtime ABI compatibility checks, between host and device code.
28+
* Structure version. Allow runtime ABI compatibility checks between host
29+
* and device code.
3030
*/
31-
int version;
31+
uint16_t version;
3232

3333
/**
3434
* Protocol index computed by host handle management functions when
3535
* creating handle.
3636
*/
37-
int proto_idx;
37+
uint8_t proto_idx;
3838

3939
/**
40-
* Array of pointers to UCT exported endpoints, used for multi-lane
41-
* transfers.
40+
* Number of UCT device endpoints found in @a uct_ep array.
4241
*/
43-
uct_ep_h *uct_ep;
44-
45-
/**
46-
* Number of UCT exported endpoints found in @a uct_ep array.
47-
*/
48-
unsigned num_uct_eps;
42+
uint8_t num_uct_eps;
4943

5044
/**
5145
* Number of entries in the memory descriptors array @a elems.
5246
*/
53-
unsigned mem_list_length;
47+
uint32_t mem_list_length;
5448

5549
/**
56-
* Array of memory descriptors containing memory pairs to be used by device
57-
* functions for memory transfers.
50+
* Array of pointers to UCT device endpoints, used for multi-lane
51+
* transfers.
5852
*/
59-
ucp_device_mem_list_elem_t elems[];
60-
} ucp_device_mem_list_handle_t;
53+
uct_device_ep_h uct_device_eps[UCP_DEVICE_MEM_LIST_MAX_EPS];
6154

62-
typedef ucp_device_mem_list_handle_t *ucp_device_mem_list_handle_h;
63-
64-
65-
/**
66-
* @ingroup UCP_DEVICE
67-
* @brief GPU request descriptor of a given batch
68-
*
69-
* This request tracks a batch of memory operations in progress. It can be used
70-
* with @ref ucp_device_progress_req to detect request completion.
71-
*/
72-
typedef struct ucp_device_request {
73-
} ucp_device_request_t;
55+
/* (mem_list_length * num_uct_eps) uct_device_mem_element objects will
56+
follow this structure. The size of each element is according to the
57+
selected transport. */
58+
} ucp_device_mem_list_handle_t;
7459

7560
#endif /* UCP_DEVICE_TYPES_H */

src/ucp/api/device/ucp_host.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -118,7 +118,6 @@ typedef struct ucp_mem_list_params {
118118
*
119119
* Host side does not have access to the content of this descriptor.
120120
*/
121-
struct ucp_device_mem_list_handle;
122121
typedef struct ucp_device_mem_list_handle *ucp_device_mem_list_handle_h;
123122

124123

src/uct/Makefile.am

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,9 @@ nobase_dist_libuct_la_HEADERS = \
2020
api/tl.h \
2121
api/uct_def.h \
2222
api/uct.h \
23-
api/version.h
23+
api/version.h \
24+
api/device/uct_device_impl.h \
25+
api/device/uct_device_types.h
2426

2527
noinst_HEADERS = \
2628
base/uct_md.h \
Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
/**
2+
* Copyright (c) NVIDIA CORPORATION & AFFILIATES, 2025. ALL RIGHTS RESERVED.
3+
*
4+
* See file LICENSE for terms.
5+
*/
6+
7+
#ifndef UCT_DEVICE_IMPL_H
8+
#define UCT_DEVICE_IMPL_H
9+
10+
#include "uct_device_types.h"
11+
12+
#include <uct/api/uct_def.h>
13+
#include <ucs/sys/compiler_def.h>
14+
15+
16+
/**
17+
* @ingroup UCT_DEVICE
18+
* @brief Posts one memory put operation.
19+
*
20+
* This device routine writes a single memory block from the local address @a address
21+
* to the remote address @a remote_address using the device endpoint @a device_ep.
22+
* The memory element @a mem_elem must be valid and contain the local and remote
23+
* memory regions to be transferred.
24+
*
25+
* User can pass @a comp to track execution and completion status.
26+
* The @a flags parameter can be used to modify the behavior
27+
* of the routine.
28+
*
29+
* @param [in] device_ep Device endpoint to be used for the operation.
30+
* @param [in] mem_elem Memory element representing the memory to be transferred.
31+
* @param [in] address Local virtual address to send data from.
32+
* @param [in] remote_address Remote virtual address to write data to.
33+
* @param [in] length Length in bytes of the data to send.
34+
* @param [in] flags Flags to modify the function behavior.
35+
* @param [in] comp Completion object to track the progress of operation.
36+
*
37+
* @return Error code as defined by @ref ucs_status_t
38+
*/
39+
template<uct_device_level_t level = UCT_DEVICE_LEVEL_THREAD>
40+
UCS_F_DEVICE ucs_status_t uct_device_ep_put_single(
41+
uct_device_ep_h device_ep, const uct_device_mem_element_t *mem_elem,
42+
const void *address, uint64_t remote_address, size_t length,
43+
uint64_t flags, uct_device_completion_t *comp)
44+
{
45+
if (device_ep->uct_tl_id == UCT_DEVICE_TL_RC_MLX5_GDA) {
46+
// return uct_rc_mlx5_gda_ep_put_single(device_ep, mem_elem, address,
47+
// remote_address, length, flags,
48+
// comp);
49+
} else if (device_ep->uct_tl_id == UCT_DEVICE_TL_CUDA_IPC) {
50+
// return uct_cuda_ipc_ep_put_single(device_ep, mem_elem, address,
51+
// remote_address, length, flags, comp);
52+
}
53+
return UCS_ERR_UNSUPPORTED;
54+
}
55+
56+
57+
/**
58+
* @ingroup UCT_DEVICE
59+
* @brief Initialize a device completion object.
60+
*
61+
* @param [out] comp Device completion object to initialize.
62+
*/
63+
template<uct_device_level_t level = UCT_DEVICE_LEVEL_THREAD>
64+
UCS_F_DEVICE void uct_device_completion_init(uct_device_completion_t *comp)
65+
{
66+
comp->count = 0;
67+
comp->status = UCS_OK;
68+
}
69+
70+
#endif
Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
/**
2+
* Copyright (c) NVIDIA CORPORATION & AFFILIATES, 2025. ALL RIGHTS RESERVED.
3+
*
4+
* See file LICENSE for terms.
5+
*/
6+
7+
#ifndef UCT_DEVICE_TYPES_H
8+
#define UCT_DEVICE_TYPES_H
9+
10+
#include <ucs/type/status.h>
11+
#include <stdint.h>
12+
13+
/**
14+
* @defgroup UCT_DEVICE Device API
15+
* @ingroup UCT_API
16+
* * This section describes UCT Device API.
17+
* @{
18+
* @}
19+
*/
20+
21+
/* Cooperation level when calling device functions */
22+
typedef enum {
23+
UCT_DEVICE_LEVEL_THREAD,
24+
UCT_DEVICE_LEVEL_WARP,
25+
UCT_DEVICE_LEVEL_BLOCK,
26+
UCT_DEVICE_LEVEL_GRID
27+
} uct_device_level_t;
28+
29+
30+
/* Device transport id (for internal use) */
31+
typedef enum {
32+
UCT_DEVICE_TL_RC_MLX5_GDA,
33+
UCT_DEVICE_TL_CUDA_IPC,
34+
UCT_DEVICE_TL_LAST
35+
} uct_device_tl_id_t;
36+
37+
38+
/* Base class for all device endpoints */
39+
typedef struct uct_device_ep {
40+
uint8_t uct_tl_id; /* Defined in uct_device_tl_id_t */
41+
} uct_device_ep_t;
42+
43+
44+
/* Completion object for device operations */
45+
typedef struct uct_device_completion {
46+
uint32_t count; /* How many operations are pending */
47+
ucs_status_t status; /* Status of the operation */
48+
} uct_device_completion_t;
49+
50+
51+
/* Base structure for all device memory elements */
52+
struct uct_device_mem_element {
53+
};
54+
55+
#endif

src/uct/api/uct.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -437,6 +437,7 @@ typedef enum uct_atomic_op {
437437

438438
/* Interface capability */
439439
#define UCT_IFACE_FLAG_INTER_NODE UCS_BIT(54) /**< Interface is inter-node capable */
440+
#define UCT_IFACE_FLAG_DEVICE_EP UCS_BIT(55) /**< Interface supports device endpoint */
440441
/**
441442
* @}
442443
*/

0 commit comments

Comments
 (0)