From 4f01c559ff61f2512ae14bae0d86f4a11a1bef93 Mon Sep 17 00:00:00 2001 From: Mark Harris <783069+harrism@users.noreply.github.com> Date: Wed, 8 May 2024 15:48:34 +1000 Subject: [PATCH] Refactor polymorphic allocator to use device_async_resource_ref (#1555) Fixes #1540. Fixes #1453 by removing unused `polymorphic_allocator::resource()`. Authors: - Mark Harris (https://github.com/harrism) Approvers: - Michael Schellenberger Costa (https://github.com/miscco) - Rong Ou (https://github.com/rongou) URL: https://github.com/rapidsai/rmm/pull/1555 --- .clang-format | 1 + .gitignore | 1 + .../rmm/mr/device/polymorphic_allocator.hpp | 90 ++++++++++++++----- .../mr/device/polymorphic_allocator_tests.cpp | 13 +-- 4 files changed, 75 insertions(+), 30 deletions(-) diff --git a/.clang-format b/.clang-format index 35ebdbfae..951a549d0 100644 --- a/.clang-format +++ b/.clang-format @@ -46,6 +46,7 @@ BraceWrapping: SplitEmptyFunction: false SplitEmptyRecord: false SplitEmptyNamespace: false +BreakAfterAttributes: Leave BreakAfterJavaFieldAnnotations: false BreakBeforeBinaryOperators: None BreakBeforeBraces: WebKit diff --git a/.gitignore b/.gitignore index f4993502b..2d0b150e1 100644 --- a/.gitignore +++ b/.gitignore @@ -21,6 +21,7 @@ dist/ rmm.egg-info/ python/build python/*/build +python/rmm/docs/_build python/rmm/**/_lib/**/*.cpp !python/rmm/_lib/_torch_allocator.cpp python/rmm/**/_lib/**/*.h diff --git a/include/rmm/mr/device/polymorphic_allocator.hpp b/include/rmm/mr/device/polymorphic_allocator.hpp index e2fb4b0cf..0b63b4691 100644 --- a/include/rmm/mr/device/polymorphic_allocator.hpp +++ b/include/rmm/mr/device/polymorphic_allocator.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2021, NVIDIA CORPORATION. + * Copyright (c) 2020-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -17,7 +17,6 @@ #pragma once #include -#include #include #include @@ -26,7 +25,11 @@ #include namespace rmm::mr { - +/** + * @addtogroup device_memory_resources + * @{ + * @file + */ /** * @brief A stream ordered Allocator using a `rmm::mr::device_memory_resource` to satisfy * (de)allocations. @@ -55,21 +58,21 @@ class polymorphic_allocator { /** * @brief Construct a `polymorphic_allocator` using the provided memory resource. * - * This constructor provides an implicit conversion from `memory_resource*`. + * This constructor provides an implicit conversion from `device_async_resource_ref`. * - * @param mr The `device_memory_resource` to use as the underlying resource. + * @param mr The upstream memory resource to use for allocation. */ - polymorphic_allocator(device_memory_resource* mr) : mr_{mr} {} + polymorphic_allocator(device_async_resource_ref mr) : mr_{mr} {} /** - * @brief Construct a `polymorphic_allocator` using `other.resource()` as the underlying memory - * resource. + * @brief Construct a `polymorphic_allocator` using the underlying memory resource of `other`. * - * @param other The `polymorphic_resource` whose `resource()` will be used as the underlying + * @param other The `polymorphic_allocator` whose memory resource will be used as the underlying * resource of the new `polymorphic_allocator`. */ template - polymorphic_allocator(polymorphic_allocator const& other) noexcept : mr_{other.resource()} + polymorphic_allocator(polymorphic_allocator const& other) noexcept + : mr_{other.get_upstream_resource()} { } @@ -82,14 +85,15 @@ class polymorphic_allocator { */ value_type* allocate(std::size_t num, cuda_stream_view stream) { - return static_cast(resource()->allocate(num * sizeof(T), stream)); + return static_cast( + get_upstream_resource().allocate_async(num * sizeof(T), stream)); } /** * @brief Deallocates storage pointed to by `ptr`. * - * `ptr` must have been allocated from a `rmm::mr::device_memory_resource` `r` that compares equal - * to `*resource()` using `r.allocate(n * sizeof(T))`. + * `ptr` must have been allocated from a memory resource `r` that compares equal + * to `get_upstream_resource()` using `r.allocate(n * sizeof(T))`. * * @param ptr Pointer to memory to deallocate * @param num Number of objects originally allocated @@ -97,7 +101,7 @@ class polymorphic_allocator { */ void deallocate(value_type* ptr, std::size_t num, cuda_stream_view stream) { - resource()->deallocate(ptr, num * sizeof(T), stream); + get_upstream_resource().deallocate_async(ptr, num * sizeof(T), stream); } /** @@ -108,24 +112,40 @@ class polymorphic_allocator { return mr_; } - /** - * @brief Returns pointer to the underlying `rmm::mr::device_memory_resource`. - * - * @return Pointer to the underlying resource. - */ - [[nodiscard]] device_memory_resource* resource() const noexcept { return mr_; } - private: - device_memory_resource* mr_{ + rmm::device_async_resource_ref mr_{ get_current_device_resource()}; ///< Underlying resource used for (de)allocation }; +/** + * @brief Compare two `polymorphic_allocator`s for equality. + * + * Two `polymorphic_allocator`s are equal if their underlying memory resources compare equal. + * + * @tparam T Type of the first allocator + * @tparam U Type of the second allocator + * @param lhs The first allocator to compare + * @param rhs The second allocator to compare + * @return true if the two allocators are equal, false otherwise + */ template bool operator==(polymorphic_allocator const& lhs, polymorphic_allocator const& rhs) { - return lhs.resource()->is_equal(*rhs.resource()); + return lhs.get_upstream_resource() == rhs.get_upstream_resource(); } +/** + * @brief Compare two `polymorphic_allocator`s for inequality. + * + * Two `polymorphic_allocator`s are not equal if their underlying memory resources compare not + * equal. + * + * @tparam T Type of the first allocator + * @tparam U Type of the second allocator + * @param lhs The first allocator to compare + * @param rhs The second allocator to compare + * @return true if the two allocators are not equal, false otherwise + */ template bool operator!=(polymorphic_allocator const& lhs, polymorphic_allocator const& rhs) { @@ -237,12 +257,34 @@ class stream_allocator_adaptor { cuda_stream_view stream_; ///< Stream on which (de)allocations are performed }; +/** + * @brief Compare two `stream_allocator_adaptor`s for equality. + * + * Two `stream_allocator_adaptor`s are equal if their underlying allocators compare equal. + * + * @tparam A Type of the first allocator + * @tparam O Type of the second allocator + * @param lhs The first allocator to compare + * @param rhs The second allocator to compare + * @return true if the two allocators are equal, false otherwise + */ template bool operator==(stream_allocator_adaptor const& lhs, stream_allocator_adaptor const& rhs) { return lhs.underlying_allocator() == rhs.underlying_allocator(); } +/** + * @brief Compare two `stream_allocator_adaptor`s for inequality. + * + * Two `stream_allocator_adaptor`s are not equal if their underlying allocators compare not equal. + * + * @tparam A Type of the first allocator + * @tparam O Type of the second allocator + * @param lhs The first allocator to compare + * @param rhs The second allocator to compare + * @return true if the two allocators are not equal, false otherwise + */ template bool operator!=(stream_allocator_adaptor const& lhs, stream_allocator_adaptor const& rhs) { @@ -264,5 +306,5 @@ auto make_stream_allocator_adaptor(Allocator const& allocator, cuda_stream_view { return stream_allocator_adaptor{allocator, stream}; } - +/** @} */ // end of group } // namespace rmm::mr diff --git a/tests/mr/device/polymorphic_allocator_tests.cpp b/tests/mr/device/polymorphic_allocator_tests.cpp index 3b73d4a49..d433e010c 100644 --- a/tests/mr/device/polymorphic_allocator_tests.cpp +++ b/tests/mr/device/polymorphic_allocator_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2021, NVIDIA CORPORATION. + * Copyright (c) 2020-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -33,14 +33,15 @@ struct allocator_test : public ::testing::Test { TEST_F(allocator_test, default_resource) { rmm::mr::polymorphic_allocator allocator{}; - EXPECT_EQ(allocator.resource(), rmm::mr::get_current_device_resource()); + EXPECT_EQ(allocator.get_upstream_resource(), + rmm::device_async_resource_ref{rmm::mr::get_current_device_resource()}); } TEST_F(allocator_test, custom_resource) { rmm::mr::cuda_memory_resource mr; rmm::mr::polymorphic_allocator allocator{&mr}; - EXPECT_EQ(allocator.resource(), &mr); + EXPECT_EQ(allocator.get_upstream_resource(), rmm::device_async_resource_ref{mr}); } void test_conversion(rmm::mr::polymorphic_allocator /*unused*/) {} @@ -48,7 +49,7 @@ void test_conversion(rmm::mr::polymorphic_allocator /*unused*/) {} TEST_F(allocator_test, implicit_conversion) { rmm::mr::cuda_memory_resource mr; - test_conversion(&mr); + test_conversion(rmm::device_async_resource_ref{mr}); } TEST_F(allocator_test, self_equality) @@ -84,7 +85,7 @@ TEST_F(allocator_test, copy_ctor_same_type) rmm::mr::polymorphic_allocator alloc0; rmm::mr::polymorphic_allocator alloc1{alloc0}; EXPECT_EQ(alloc0, alloc1); - EXPECT_EQ(alloc0.resource(), alloc1.resource()); + EXPECT_EQ(alloc0.get_upstream_resource(), alloc1.get_upstream_resource()); } TEST_F(allocator_test, copy_ctor_different_type) @@ -92,7 +93,7 @@ TEST_F(allocator_test, copy_ctor_different_type) rmm::mr::polymorphic_allocator alloc0; rmm::mr::polymorphic_allocator alloc1{alloc0}; EXPECT_EQ(alloc0, alloc1); - EXPECT_EQ(alloc0.resource(), alloc1.resource()); + EXPECT_EQ(alloc0.get_upstream_resource(), alloc1.get_upstream_resource()); } TEST_F(allocator_test, rebind)