From 4f01c559ff61f2512ae14bae0d86f4a11a1bef93 Mon Sep 17 00:00:00 2001
From: Mark Harris <783069+harrism@users.noreply.github.com>
Date: Wed, 8 May 2024 15:48:34 +1000
Subject: [PATCH] Refactor polymorphic allocator to use
 device_async_resource_ref (#1555)

Fixes #1540.
Fixes #1453 by removing unused `polymorphic_allocator::resource()`.

Authors:
  - Mark Harris (https://github.com/harrism)

Approvers:
  - Michael Schellenberger Costa (https://github.com/miscco)
  - Rong Ou (https://github.com/rongou)

URL: https://github.com/rapidsai/rmm/pull/1555
---
 .clang-format                                 |  1 +
 .gitignore                                    |  1 +
 .../rmm/mr/device/polymorphic_allocator.hpp   | 90 ++++++++++++++-----
 .../mr/device/polymorphic_allocator_tests.cpp | 13 +--
 4 files changed, 75 insertions(+), 30 deletions(-)
diff --git a/.clang-format b/.clang-format
index 35ebdbfae..951a549d0 100644
--- a/.clang-format
+++ b/.clang-format
@@ -46,6 +46,7 @@ BraceWrapping:
   SplitEmptyFunction: false
   SplitEmptyRecord: false
   SplitEmptyNamespace: false
+BreakAfterAttributes: Leave
 BreakAfterJavaFieldAnnotations: false
 BreakBeforeBinaryOperators: None
 BreakBeforeBraces: WebKit
diff --git a/.gitignore b/.gitignore
index f4993502b..2d0b150e1 100644
--- a/.gitignore
+++ b/.gitignore
@@ -21,6 +21,7 @@ dist/
 rmm.egg-info/
 python/build
 python/*/build
+python/rmm/docs/_build
 python/rmm/**/_lib/**/*.cpp
 !python/rmm/_lib/_torch_allocator.cpp
 python/rmm/**/_lib/**/*.h
diff --git a/include/rmm/mr/device/polymorphic_allocator.hpp b/include/rmm/mr/device/polymorphic_allocator.hpp
index e2fb4b0cf..0b63b4691 100644
--- a/include/rmm/mr/device/polymorphic_allocator.hpp
+++ b/include/rmm/mr/device/polymorphic_allocator.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2021, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -17,7 +17,6 @@
 #pragma once
 
 #include <rmm/cuda_stream_view.hpp>
-#include <rmm/mr/device/device_memory_resource.hpp>
 #include <rmm/mr/device/per_device_resource.hpp>
 #include <rmm/resource_ref.hpp>
 
@@ -26,7 +25,11 @@
 #include <type_traits>
 
 namespace rmm::mr {
-
+/**
+ * @addtogroup device_memory_resources
+ * @{
+ * @file
+ */
 /**
  * @brief A stream ordered Allocator using a `rmm::mr::device_memory_resource` to satisfy
  * (de)allocations.
@@ -55,21 +58,21 @@ class polymorphic_allocator {
   /**
    * @brief Construct a `polymorphic_allocator` using the provided memory resource.
    *
-   * This constructor provides an implicit conversion from `memory_resource*`.
+   * This constructor provides an implicit conversion from `device_async_resource_ref`.
    *
-   * @param mr The `device_memory_resource` to use as the underlying resource.
+   * @param mr The upstream memory resource to use for allocation.
    */
-  polymorphic_allocator(device_memory_resource* mr) : mr_{mr} {}
+  polymorphic_allocator(device_async_resource_ref mr) : mr_{mr} {}
 
   /**
-   * @brief Construct a `polymorphic_allocator` using `other.resource()` as the underlying memory
-   * resource.
+   * @brief Construct a `polymorphic_allocator` using the underlying memory resource of `other`.
    *
-   * @param other The `polymorphic_resource` whose `resource()` will be used as the underlying
+   * @param other The `polymorphic_allocator` whose memory resource will be used as the underlying
    * resource of the new `polymorphic_allocator`.
    */
   template <typename U>
-  polymorphic_allocator(polymorphic_allocator<U> const& other) noexcept : mr_{other.resource()}
+  polymorphic_allocator(polymorphic_allocator<U> const& other) noexcept
+    : mr_{other.get_upstream_resource()}
   {
   }
 
@@ -82,14 +85,15 @@ class polymorphic_allocator {
    */
   value_type* allocate(std::size_t num, cuda_stream_view stream)
   {
-    return static_cast<value_type*>(resource()->allocate(num * sizeof(T), stream));
+    return static_cast<value_type*>(
+      get_upstream_resource().allocate_async(num * sizeof(T), stream));
   }
 
   /**
    * @brief Deallocates storage pointed to by `ptr`.
    *
-   * `ptr` must have been allocated from a `rmm::mr::device_memory_resource` `r` that compares equal
-   * to `*resource()` using `r.allocate(n * sizeof(T))`.
+   * `ptr` must have been allocated from a memory resource `r` that compares equal
+   * to `get_upstream_resource()` using `r.allocate(n * sizeof(T))`.
    *
    * @param ptr Pointer to memory to deallocate
    * @param num Number of objects originally allocated
@@ -97,7 +101,7 @@ class polymorphic_allocator {
    */
   void deallocate(value_type* ptr, std::size_t num, cuda_stream_view stream)
   {
-    resource()->deallocate(ptr, num * sizeof(T), stream);
+    get_upstream_resource().deallocate_async(ptr, num * sizeof(T), stream);
   }
 
   /**
@@ -108,24 +112,40 @@ class polymorphic_allocator {
     return mr_;
   }
 
-  /**
-   * @brief Returns pointer to the underlying `rmm::mr::device_memory_resource`.
-   *
-   * @return Pointer to the underlying resource.
-   */
-  [[nodiscard]] device_memory_resource* resource() const noexcept { return mr_; }
-
  private:
-  device_memory_resource* mr_{
+  rmm::device_async_resource_ref mr_{
     get_current_device_resource()};  ///< Underlying resource used for (de)allocation
 };
 
+/**
+ * @brief Compare two `polymorphic_allocator`s for equality.
+ *
+ * Two `polymorphic_allocator`s are equal if their underlying memory resources compare equal.
+ *
+ * @tparam T Type of the first allocator
+ * @tparam U Type of the second allocator
+ * @param lhs The first allocator to compare
+ * @param rhs The second allocator to compare
+ * @return true if the two allocators are equal, false otherwise
+ */
 template <typename T, typename U>
 bool operator==(polymorphic_allocator<T> const& lhs, polymorphic_allocator<U> const& rhs)
 {
-  return lhs.resource()->is_equal(*rhs.resource());
+  return lhs.get_upstream_resource() == rhs.get_upstream_resource();
 }
 
+/**
+ * @brief Compare two `polymorphic_allocator`s for inequality.
+ *
+ * Two `polymorphic_allocator`s are not equal if their underlying memory resources compare not
+ * equal.
+ *
+ * @tparam T Type of the first allocator
+ * @tparam U Type of the second allocator
+ * @param lhs The first allocator to compare
+ * @param rhs The second allocator to compare
+ * @return true if the two allocators are not equal, false otherwise
+ */
 template <typename T, typename U>
 bool operator!=(polymorphic_allocator<T> const& lhs, polymorphic_allocator<U> const& rhs)
 {
@@ -237,12 +257,34 @@ class stream_allocator_adaptor {
   cuda_stream_view stream_;  ///< Stream on which (de)allocations are performed
 };
 
+/**
+ * @brief Compare two `stream_allocator_adaptor`s for equality.
+ *
+ * Two `stream_allocator_adaptor`s are equal if their underlying allocators compare equal.
+ *
+ * @tparam A Type of the first allocator
+ * @tparam O Type of the second allocator
+ * @param lhs The first allocator to compare
+ * @param rhs The second allocator to compare
+ * @return true if the two allocators are equal, false otherwise
+ */
 template <typename A, typename O>
 bool operator==(stream_allocator_adaptor<A> const& lhs, stream_allocator_adaptor<O> const& rhs)
 {
   return lhs.underlying_allocator() == rhs.underlying_allocator();
 }
 
+/**
+ * @brief Compare two `stream_allocator_adaptor`s for inequality.
+ *
+ * Two `stream_allocator_adaptor`s are not equal if their underlying allocators compare not equal.
+ *
+ * @tparam A Type of the first allocator
+ * @tparam O Type of the second allocator
+ * @param lhs The first allocator to compare
+ * @param rhs The second allocator to compare
+ * @return true if the two allocators are not equal, false otherwise
+ */
 template <typename A, typename O>
 bool operator!=(stream_allocator_adaptor<A> const& lhs, stream_allocator_adaptor<O> const& rhs)
 {
@@ -264,5 +306,5 @@ auto make_stream_allocator_adaptor(Allocator const& allocator, cuda_stream_view
 {
   return stream_allocator_adaptor<Allocator>{allocator, stream};
 }
-
+/** @} */  // end of group
 }  // namespace rmm::mr
diff --git a/tests/mr/device/polymorphic_allocator_tests.cpp b/tests/mr/device/polymorphic_allocator_tests.cpp
index 3b73d4a49..d433e010c 100644
--- a/tests/mr/device/polymorphic_allocator_tests.cpp
+++ b/tests/mr/device/polymorphic_allocator_tests.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2021, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -33,14 +33,15 @@ struct allocator_test : public ::testing::Test {
 TEST_F(allocator_test, default_resource)
 {
   rmm::mr::polymorphic_allocator<int> allocator{};
-  EXPECT_EQ(allocator.resource(), rmm::mr::get_current_device_resource());
+  EXPECT_EQ(allocator.get_upstream_resource(),
+            rmm::device_async_resource_ref{rmm::mr::get_current_device_resource()});
 }
 
 TEST_F(allocator_test, custom_resource)
 {
   rmm::mr::cuda_memory_resource mr;
   rmm::mr::polymorphic_allocator<int> allocator{&mr};
-  EXPECT_EQ(allocator.resource(), &mr);
+  EXPECT_EQ(allocator.get_upstream_resource(), rmm::device_async_resource_ref{mr});
 }
 
 void test_conversion(rmm::mr::polymorphic_allocator<int> /*unused*/) {}
@@ -48,7 +49,7 @@ void test_conversion(rmm::mr::polymorphic_allocator<int> /*unused*/) {}
 TEST_F(allocator_test, implicit_conversion)
 {
   rmm::mr::cuda_memory_resource mr;
-  test_conversion(&mr);
+  test_conversion(rmm::device_async_resource_ref{mr});
 }
 
 TEST_F(allocator_test, self_equality)
@@ -84,7 +85,7 @@ TEST_F(allocator_test, copy_ctor_same_type)
   rmm::mr::polymorphic_allocator<int> alloc0;
   rmm::mr::polymorphic_allocator<int> alloc1{alloc0};
   EXPECT_EQ(alloc0, alloc1);
-  EXPECT_EQ(alloc0.resource(), alloc1.resource());
+  EXPECT_EQ(alloc0.get_upstream_resource(), alloc1.get_upstream_resource());
 }
 
 TEST_F(allocator_test, copy_ctor_different_type)
@@ -92,7 +93,7 @@ TEST_F(allocator_test, copy_ctor_different_type)
   rmm::mr::polymorphic_allocator<int> alloc0;
   rmm::mr::polymorphic_allocator<double> alloc1{alloc0};
   EXPECT_EQ(alloc0, alloc1);
-  EXPECT_EQ(alloc0.resource(), alloc1.resource());
+  EXPECT_EQ(alloc0.get_upstream_resource(), alloc1.get_upstream_resource());
 }
 
 TEST_F(allocator_test, rebind)