Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 9 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ Additionally, there is a `./build_and_run.sh` script that will call `./build.sh`

### C++

To build and install C++ library to `${CONDA_PREFIX}`, with both Python and RMM support, as well as building all tests and benchmarks (with CUDA support) run:
To build and install the C++ library to `${CONDA_PREFIX}`, with Python support and CCCL CUDA buffer support, as well as building all tests and benchmarks with CUDA/CCCL support, run:

```
mkdir cpp/build
Expand All @@ -53,8 +53,9 @@ cmake .. -DCMAKE_INSTALL_PREFIX=${CONDA_PREFIX} \
-DBUILD_BENCHMARKS=ON \
-DCMAKE_BUILD_TYPE=Release \
-DUCXX_ENABLE_PYTHON=ON \
-DUCXX_ENABLE_RMM=ON \
-DUCXX_BENCHMARKS_ENABLE_CUDA=ON
-DUCXX_ENABLE_CCCL=ON \
-DUCXX_BENCHMARKS_ENABLE_CUDA=ON \
-DUCXX_BENCHMARKS_ENABLE_CCCL=ON
make -j install
```

Expand Down Expand Up @@ -122,7 +123,8 @@ It is recommended to use `UCX_TCP_CM_REUSEADDR=y` when binding to interfaces wit

#### CCCL Memory Support

When built with `UCXX_ENABLE_CCCL=ON`, additional CCCL-based memory types are available:
When built with `UCXX_ENABLE_CCCL=ON`, `UCXX_BENCHMARKS_ENABLE_CUDA=ON`, and
`UCXX_BENCHMARKS_ENABLE_CCCL=ON`, additional CCCL-based memory types are available:

```
# Server with CCCL device memory pool
Expand All @@ -138,14 +140,16 @@ $ UCX_TCP_CM_REUSEADDR=y ./benchmarks/ucxx_perftest -m cccl-shared -s 1048576 -n
$ ./benchmarks/ucxx_perftest -m cccl-shared -s 1048576 -n 10 127.0.0.1
```

**Additional CCCL Memory Types (with `-DUCXX_ENABLE_CCCL=ON`):**
**Additional CCCL Memory Types:**
- `cccl-device` - CCCL device memory pool
- `cccl-shared` - CCCL shared memory resource
- `cccl-cuda-async` - CCCL CUDA async memory resource
- `cccl-cuda-async-managed` - CCCL CUDA async managed memory resource

**Requirements for CCCL Support:**
- UCXX compiled with `UCXX_ENABLE_CCCL=ON`
- Benchmarks compiled with `UCXX_BENCHMARKS_ENABLE_CUDA=ON`
- Benchmarks compiled with `UCXX_BENCHMARKS_ENABLE_CCCL=ON`
- CCCL library available (fetched automatically via CMake)

### Python
Expand Down
7 changes: 4 additions & 3 deletions build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,8 @@ BUILD_TESTS=OFF
BUILD_EXAMPLES=OFF
BUILD_DISABLE_DEPRECATION_WARNINGS=ON
BUILD_COMPILE_COMMANDS=OFF
UCXX_ENABLE_RMM=OFF
UCXX_TESTS_ENABLE_RMM=OFF
UCXX_BENCHMARKS_ENABLE_RMM=OFF
UCXX_ENABLE_CCCL=OFF
UCXX_BENCHMARKS_ENABLE_CUDA=OFF

Expand Down Expand Up @@ -151,7 +152,6 @@ if hasArg ucxx_tests && ! hasArg ucxx; then
fi

if buildAll || hasArg libucxx_python || hasArg libucxx_tests || hasArg libucxx_examples; then
UCXX_ENABLE_RMM=ON
UCXX_ENABLE_CCCL=ON
fi

Expand Down Expand Up @@ -188,7 +188,8 @@ if buildAll || hasArg libucxx; then
-DDISABLE_DEPRECATION_WARNINGS=${BUILD_DISABLE_DEPRECATION_WARNINGS} \
-DCMAKE_BUILD_TYPE=${BUILD_TYPE} \
-DCMAKE_EXPORT_COMPILE_COMMANDS=${BUILD_COMPILE_COMMANDS} \
-DUCXX_ENABLE_RMM=${UCXX_ENABLE_RMM} \
-DUCXX_TESTS_ENABLE_RMM=${UCXX_TESTS_ENABLE_RMM} \
-DUCXX_BENCHMARKS_ENABLE_RMM=${UCXX_BENCHMARKS_ENABLE_RMM} \
-DUCXX_ENABLE_CCCL=${UCXX_ENABLE_CCCL} \
-DUCXX_BENCHMARKS_ENABLE_CUDA=${UCXX_BENCHMARKS_ENABLE_CUDA} \
"${EXTRA_CMAKE_ARGS[@]}"
Expand Down
4 changes: 1 addition & 3 deletions ci/build_wheel_libucxx.sh
Original file line number Diff line number Diff line change
Expand Up @@ -28,14 +28,12 @@ rapids-pip-retry install \
# 0 really means "add --no-build-isolation" (ref: https://github.com/pypa/pip/issues/5735)
export PIP_NO_BUILD_ISOLATION=0

export SKBUILD_CMAKE_ARGS="-DUCXX_ENABLE_RMM=ON -DUCXX_ENABLE_CCCL=ON"
export SKBUILD_CMAKE_ARGS="-DUCXX_ENABLE_CCCL=ON"

./ci/build_wheel.sh "${package_name}" "${package_dir}"

python -m auditwheel repair \
--exclude "libucp.so.0" \
--exclude "librapids_logger.so" \
--exclude "librmm.so" \
-w "${RAPIDS_WHEEL_BLD_OUTPUT_DIR}" \
${package_dir}/dist/*

Expand Down
2 changes: 0 additions & 2 deletions ci/build_wheel_ucxx.sh
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,6 @@ export RAPIDS_PY_API
python -m auditwheel repair \
--exclude "libucp.so.0" \
--exclude "libucxx.so" \
--exclude "librapids_logger.so" \
--exclude "librmm.so" \
-w "${RAPIDS_WHEEL_BLD_OUTPUT_DIR}" \
${package_dir}/dist/*

Expand Down
4 changes: 0 additions & 4 deletions conda/recipes/libucxx/recipe.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -100,19 +100,16 @@ outputs:
- ${{ stdlib("c") }}
host:
- cuda-version =${{ cuda_version }}
- librmm ${{ rapids_version }}
- ucx
run:
- ${{ pin_compatible("cuda-version", upper_bound="x", lower_bound="x") }}
- ${{ pin_compatible("librmm", upper_bound="x.x") }}
- ucx >=1.18.0,<1.21.0
run_exports:
- ${{ pin_subpackage("libucxx", upper_bound="x.x") }}
ignore_run_exports:
by_name:
- cuda-cudart
- cuda-version
- librmm
- ucx
tests:
- script:
Expand Down Expand Up @@ -173,7 +170,6 @@ outputs:
by_name:
- cuda-cudart
- cuda-version
- librmm
- libucxx
- ucx
about:
Expand Down
3 changes: 0 additions & 3 deletions conda/recipes/ucxx/recipe.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,6 @@ outputs:
- python =${{ py_abi_min }}
- python-abi3 ${{ py_abi_min }}.*
- rapids-build-backend >=0.4.0,<0.5.0
- rmm ${{ rapids_version }}
- scikit-build-core>=0.11.0
- ucx
- libucxx =${{ version }}
Expand All @@ -106,15 +105,13 @@ outputs:
- python
- ucx >=1.18.0,<1.21.0
- ${{ pin_compatible("cuda-version", upper_bound="x", lower_bound="x") }}
- ${{ pin_compatible("rmm", upper_bound="x.x") }}
- libucxx =${{ version }}
run_constraints:
- cupy >=13.6.0
ignore_run_exports:
by_name:
- cuda-cudart
- cuda-version
- librmm
- libucxx
- python_abi
- ucx
Expand Down
28 changes: 11 additions & 17 deletions cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,8 @@ option(BUILD_TESTS "Configure CMake to build tests" ON)
option(BUILD_BENCHMARKS "Configure CMake to build benchmarks" OFF)
option(BUILD_EXAMPLES "Configure CMake to build examples" OFF)
option(BUILD_SHARED_LIBS "Build UCXX shared libraries" ON)
option(UCXX_ENABLE_RMM "Enable support for CUDA multi-buffer transfer with RMM" OFF)
option(UCXX_TESTS_ENABLE_RMM "Enable RMM-backed test code paths" OFF)
option(UCXX_BENCHMARKS_ENABLE_RMM "Enable RMM-backed benchmark code paths" OFF)
# TODO: Flip UCXX_ENABLE_CCCL default to OFF once devcontainer builds pass -DUCXX_ENABLE_CCCL=ON
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We need to update the devcontainers to use CCCL here, then we can change this flag here. That way CCCL support stays optional.

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for pointing that, opened rapidsai/devcontainers#705 to address that.

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@pentschev Would it be too much to rename UCXX_ENABLE_RMM to UCXX_ENABLE_RMM_TESTING?
I had the same question as @bdice here

option(UCXX_ENABLE_CCCL "Enable support for CUDA buffer with CCCL" ON)
option(DISABLE_DEPRECATION_WARNINGS "Disable warnings generated from deprecated declarations." OFF)
Expand All @@ -51,10 +52,8 @@ message(VERBOSE "UCXX: Configure CMake to build tests: ${BUILD_TESTS}")
message(VERBOSE "UCXX: Configure CMake to build benchmarks: ${BUILD_BENCHMARKS}")
message(VERBOSE "UCXX: Configure CMake to build examples: ${BUILD_EXAMPLES}")
message(VERBOSE "UCXX: Build UCXX shared libraries: ${BUILD_SHARED_LIBS}")
message(
VERBOSE
"UCXX: Enable support for CUDA multi-buffer transfer with RMM (DEPRECATED): ${UCXX_ENABLE_RMM}"
)
message(VERBOSE "UCXX: Enable RMM-backed test code paths: ${UCXX_TESTS_ENABLE_RMM}")
message(VERBOSE "UCXX: Enable RMM-backed benchmark code paths: ${UCXX_BENCHMARKS_ENABLE_RMM}")
message(VERBOSE "UCXX: Enable support for CUDA buffer with CCCL: ${UCXX_ENABLE_CCCL}")
message(
VERBOSE
Expand Down Expand Up @@ -97,11 +96,13 @@ rapids_find_package(

# add third party dependencies using CPM
rapids_cpm_init()
# find rmm
if(UCXX_ENABLE_RMM)
message(DEPRECATION "UCXX_ENABLE_RMM is deprecated and will be removed in a future release. "
"Use UCXX_ENABLE_CCCL instead."
)
# find rmm for enabled test and benchmark code paths
if((BUILD_TESTS AND UCXX_TESTS_ENABLE_RMM)
OR (BUILD_BENCHMARKS
AND UCXX_BENCHMARKS_ENABLE_CUDA
AND UCXX_BENCHMARKS_ENABLE_RMM
)
)
include(cmake/thirdparty/get_rmm.cmake)
endif()
# find cccl
Expand Down Expand Up @@ -181,13 +182,6 @@ target_include_directories(

target_compile_definitions(ucxx PUBLIC "$<$<COMPILE_LANGUAGE:CXX>:${UCXX_CXX_DEFINITIONS}>")

# Enable RMM if necessary
if(UCXX_ENABLE_RMM)
target_link_libraries(ucxx PUBLIC rmm::rmm)

target_compile_definitions(ucxx PUBLIC UCXX_ENABLE_RMM)
endif()

# Enable CCCL if necessary
if(UCXX_ENABLE_CCCL)
find_package(CUDAToolkit REQUIRED)
Expand Down
4 changes: 2 additions & 2 deletions cpp/benchmarks/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -45,8 +45,8 @@ function(ConfigureBench CMAKE_BENCH_NAME)
target_link_libraries(${CMAKE_BENCH_NAME} PRIVATE CUDA::cudart_static)
endif()

# RMM memory resources for CUDA benchmarks (requires UCXX_ENABLE_RMM and get_rmm.cmake)
if(UCXX_BENCHMARKS_ENABLE_CUDA AND UCXX_ENABLE_RMM)
# RMM memory resources for CUDA benchmarks.
if(UCXX_BENCHMARKS_ENABLE_CUDA AND UCXX_BENCHMARKS_ENABLE_RMM)
target_compile_definitions(${CMAKE_BENCH_NAME} PRIVATE UCXX_BENCHMARKS_ENABLE_RMM)
target_link_libraries(${CMAKE_BENCH_NAME} PRIVATE rmm::rmm)
endif()
Expand Down
6 changes: 5 additions & 1 deletion cpp/examples/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# =================================================================================
# cmake-format: off
# SPDX-FileCopyrightText: Copyright (c) 2023-2025, NVIDIA CORPORATION & AFFILIATES.
# SPDX-FileCopyrightText: Copyright (c) 2023-2026, NVIDIA CORPORATION & AFFILIATES.
# SPDX-License-Identifier: BSD-3-Clause
# cmake-format: on
# =================================================================================
Expand Down Expand Up @@ -31,6 +31,10 @@ function(ConfigureBench CMAKE_BENCH_NAME)
CXX_STANDARD_REQUIRED ON
)
target_link_libraries(${CMAKE_BENCH_NAME} PRIVATE ucxx $<TARGET_NAME_IF_EXISTS:conda_env>)
if(UCXX_ENABLE_CCCL)
find_package(CUDAToolkit REQUIRED)
target_link_libraries(${CMAKE_BENCH_NAME} PRIVATE CUDA::cudart_static)
endif()
add_custom_command(
OUTPUT UCXX_EXAMPLES
COMMAND ${CMAKE_BENCH_NAME}
Expand Down
42 changes: 11 additions & 31 deletions cpp/examples/basic.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,8 @@
#include <ucxx/utils/sockaddr.h>
#include <ucxx/utils/ucx.h>

#if UCXX_ENABLE_RMM
#include <rmm/device_buffer.hpp>
#if UCXX_ENABLE_CCCL
#include <cuda_runtime_api.h>
#endif

class ListenerContext {
Expand Down Expand Up @@ -91,14 +91,12 @@ static void printUsage()
std::cerr << " 'thread-polling', 'thread-blocking' and 'wait' (default: 'blocking')"
<< std::endl;
std::cerr << " -p <port> Port number to listen at" << std::endl;
std::cerr
<< " -s <send_buffer_type> Send buffer type, valid values are: 'host', 'rmm', 'cccl' "
"(default: 'host')"
<< std::endl;
std::cerr
<< " -r <recv_buffer_type> Recv buffer type, valid values are: 'host', 'rmm', 'cccl' "
"(default: 'host')"
<< std::endl;
std::cerr << " -s <send_buffer_type> Send buffer type, valid values are: 'host', 'cccl' "
"(default: 'host')"
<< std::endl;
std::cerr << " -r <recv_buffer_type> Recv buffer type, valid values are: 'host', 'cccl' "
"(default: 'host')"
<< std::endl;
std::cerr << " -h Print this help" << std::endl;
std::cerr << std::endl;
}
Expand All @@ -124,14 +122,6 @@ struct args {
auto parseBufferType = [](const std::string& bufferTypeString) {
if (bufferTypeString == "host") {
return ucxx::BufferType::Host;
} else if (bufferTypeString == "rmm") {
#if UCXX_ENABLE_RMM
return ucxx::BufferType::RMM;
#else
std::cerr << "RMM support not enabled, please compile with -DUCXX_ENABLE_RMM=1"
<< std::endl;
return ucxx::BufferType::Invalid;
#endif
} else if (bufferTypeString == "cccl") {
#if UCXX_ENABLE_CCCL
return ucxx::BufferType::CCCL;
Expand Down Expand Up @@ -228,15 +218,6 @@ std::shared_ptr<ucxx::Buffer> makeBuffer(ucxx::BufferType bufferType, T* values,
switch (bufferType) {
case ucxx::BufferType::Host:
return std::make_shared<ucxx::HostBuffer>(values, size * sizeof(T));
case ucxx::BufferType::RMM:
#if UCXX_ENABLE_RMM
{
auto buf =
std::make_unique<rmm::device_buffer>(values, size * sizeof(T), rmm::cuda_stream_default);
rmm::cuda_stream_default.synchronize();
return std::make_shared<ucxx::RMMBuffer>(std::move(buf));
}
#endif
case ucxx::BufferType::CCCL:
#if UCXX_ENABLE_CCCL
{
Expand All @@ -255,7 +236,7 @@ auto verify_buffers(ucxx::Buffer* expected, ucxx::Buffer* actual)
std::vector<uint8_t> host_expected, host_actual;
void *host_expected_ptr, *host_actual_ptr;

#if UCXX_ENABLE_CCCL || UCXX_ENABLE_RMM
#if UCXX_ENABLE_CCCL
auto copy_to_host = [](auto& buffer, auto& host_buffer) {
// copy device buffer to host
host_buffer.resize(buffer->getSize());
Expand All @@ -272,14 +253,13 @@ auto verify_buffers(ucxx::Buffer* expected, ucxx::Buffer* actual)
};
#endif

if (expected->getType() == ucxx::BufferType::RMM ||
expected->getType() == ucxx::BufferType::CCCL) {
if (expected->getType() == ucxx::BufferType::CCCL) {
host_expected_ptr = copy_to_host(expected, host_expected);
} else {
host_expected_ptr = expected->data();
}

if (actual->getType() == ucxx::BufferType::RMM || actual->getType() == ucxx::BufferType::CCCL) {
if (actual->getType() == ucxx::BufferType::CCCL) {
host_actual_ptr = copy_to_host(actual, host_actual);
} else {
host_actual_ptr = actual->data();
Expand Down
6 changes: 1 addition & 5 deletions cpp/include/ucxx/api.h
Original file line number Diff line number Diff line change
@@ -1,13 +1,9 @@
/**
* SPDX-FileCopyrightText: Copyright (c) 2022-2025, NVIDIA CORPORATION & AFFILIATES.
* SPDX-FileCopyrightText: Copyright (c) 2022-2026, NVIDIA CORPORATION & AFFILIATES.
* SPDX-License-Identifier: BSD-3-Clause
*/
#pragma once

#ifndef UCXX_ENABLE_RMM
#define UCXX_ENABLE_RMM 0
#endif

#include <ucxx/address.h>
#include <ucxx/buffer.h>
#include <ucxx/constructors.h>
Expand Down
Loading
Loading