Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Require explicit pool size in pool_memory_resource and move some things out of detail namespace #1417

Merged
Merged
Show file tree
Hide file tree
Changes from 12 commits
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
c43a8c1
Add new util to get a fraction of available device mem, move availabl…
harrism Dec 19, 2023
d238daa
Deprecate old pool_mr ctors (optional initial size) and add new ctors…
harrism Dec 19, 2023
3d65d4c
Update all tests and resources to use new pool ctors and util
harrism Dec 19, 2023
66d85b4
Rename fraction_of_free_device_memory to percent_of_free_device_memory
harrism Dec 20, 2023
265de9b
clang-tidy Ignore 50 and 100 magic numbers
harrism Dec 20, 2023
0be364b
Remove straggler includes of removed file.
harrism Dec 20, 2023
266afa9
Merge branch 'branch-24.02' into fea-explicit-initial-pool-size
harrism Dec 20, 2023
5d66f40
Another missed include.
harrism Dec 20, 2023
fae5b73
Add detail::available_device_memory back as an alias of rmm::availabl…
harrism Jan 9, 2024
92c0653
merge branch 24.02
harrism Jan 9, 2024
2acf759
copyright
harrism Jan 9, 2024
782ff55
document (and deprecate) available_device_memory alias
harrism Jan 9, 2024
0b4c968
Respond to feedback from @wence-
harrism Jan 9, 2024
4f91478
Include doxygen deprecated output in docs
wence- Jan 9, 2024
f581809
Minor docstring fixes
wence- Jan 9, 2024
bafd70a
Don't use zero for default size in test.
harrism Jan 10, 2024
a77d215
Add non-detail alignment utilities
harrism Jan 10, 2024
07dffa3
Duplicate (for now) alignment utilities in rmm:: namespace since outs…
harrism Jan 10, 2024
8afff2d
Don't deprecate anything just yet (until cuDF/cuGraph updated)
harrism Jan 10, 2024
0140bd4
Merge branch 'fea-explicit-initial-pool-size' of github.com:harrism/r…
harrism Jan 10, 2024
91752c8
Make percent_of_free_device_memory do what it says on the tin.
harrism Jan 10, 2024
baf429c
Fix remaining uses of pool ctor in docs and code
harrism Jan 10, 2024
c90e81c
Fix overflow in percent_of_free_device_memory
harrism Jan 10, 2024
c2843be
Fix Cython to provide explicit initial size
harrism Jan 10, 2024
6e0aeaa
Respond to review suggestions in aligned.hpp
harrism Jan 10, 2024
c3c61e1
Fix quoted auto includes
harrism Jan 10, 2024
014ac5b
missed file for detail changes
harrism Jan 10, 2024
909b733
Add utilities doxygen group
harrism Jan 11, 2024
0fc3fba
Add utilities to sphinx docs
harrism Jan 11, 2024
6f9b0bd
Minimal changes to squash doc build warnings
wence- Jan 11, 2024
4ae13fc
docs: Fix custom handler for missing references
wence- Jan 11, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .clang-tidy
Original file line number Diff line number Diff line change
Expand Up @@ -62,8 +62,8 @@ CheckOptions:
value: 'alignment'
- key: cppcoreguidelines-avoid-magic-numbers.IgnorePowersOf2IntegerValues
value: '1'
- key: readability-magic-numbers.IgnorePowersOf2IntegerValues
value: '1'
- key: cppcoreguidelines-avoid-magic-numbers.IgnoredIntegerValues
value: "0;1;2;3;4;50;100"
- key: cppcoreguidelines-avoid-do-while.IgnoreMacros
value: 'true'
...
9 changes: 6 additions & 3 deletions benchmarks/device_uvector/device_uvector_bench.cu
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2020-2022, NVIDIA CORPORATION.
* Copyright (c) 2020-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -16,6 +16,7 @@

#include "../synchronization/synchronization.hpp"

#include <rmm/cuda_device.hpp>
#include <rmm/cuda_stream.hpp>
#include <rmm/detail/error.hpp>
#include <rmm/device_uvector.hpp>
Expand All @@ -38,7 +39,8 @@
void BM_UvectorSizeConstruction(benchmark::State& state)
{
rmm::mr::cuda_memory_resource cuda_mr{};
rmm::mr::pool_memory_resource<rmm::mr::cuda_memory_resource> mr{&cuda_mr};
rmm::mr::pool_memory_resource<rmm::mr::cuda_memory_resource> mr{
&cuda_mr, rmm::percent_of_free_device_memory(1. / 2)};
harrism marked this conversation as resolved.
Show resolved Hide resolved
rmm::mr::set_current_device_resource(&mr);

for (auto _ : state) { // NOLINT(clang-analyzer-deadcode.DeadStores)
Expand All @@ -59,7 +61,8 @@ BENCHMARK(BM_UvectorSizeConstruction)
void BM_ThrustVectorSizeConstruction(benchmark::State& state)
{
rmm::mr::cuda_memory_resource cuda_mr{};
rmm::mr::pool_memory_resource<rmm::mr::cuda_memory_resource> mr{&cuda_mr};
rmm::mr::pool_memory_resource<rmm::mr::cuda_memory_resource> mr{
&cuda_mr, rmm::percent_of_free_device_memory(50)};
rmm::mr::set_current_device_resource(&mr);

for (auto _ : state) { // NOLINT(clang-analyzer-deadcode.DeadStores)
Expand Down
5 changes: 3 additions & 2 deletions benchmarks/random_allocations/random_allocations.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019-2022, NVIDIA CORPORATION.
* Copyright (c) 2019-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -16,6 +16,7 @@

#include <benchmarks/utilities/cxxopts.hpp>

#include <rmm/cuda_device.hpp>
#include <rmm/mr/device/arena_memory_resource.hpp>
#include <rmm/mr/device/binning_memory_resource.hpp>
#include <rmm/mr/device/cuda_async_memory_resource.hpp>
Expand Down Expand Up @@ -170,7 +171,7 @@ inline auto make_pool()

inline auto make_arena()
{
auto free = rmm::detail::available_device_memory().first;
auto free = rmm::available_device_memory().first;
constexpr auto reserve{64UL << 20}; // Leave some space for CUDA overhead.
return rmm::mr::make_owning_wrapper<rmm::mr::arena_memory_resource>(make_cuda(), free - reserve);
}
Expand Down
49 changes: 48 additions & 1 deletion include/rmm/cuda_device.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2021, NVIDIA CORPORATION.
* Copyright (c) 2021-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -15,6 +15,7 @@
*/
#pragma once

#include <rmm/detail/aligned.hpp>
#include <rmm/detail/error.hpp>

#include <cuda_runtime_api.h>
Expand Down Expand Up @@ -102,6 +103,52 @@ inline int get_num_cuda_devices()
return num_dev;
}

/**
* @brief Returns the available and total device memory in bytes for the current device
*
* @return The available and total device memory in bytes for the current device as a std::pair.
*/
inline std::pair<std::size_t, std::size_t> available_device_memory()
harrism marked this conversation as resolved.
Show resolved Hide resolved
{
std::size_t free{};
std::size_t total{};
RMM_CUDA_TRY(cudaMemGetInfo(&free, &total));
return {free, total};
}

namespace detail {

/**
* @brief Returns the available and total device memory in bytes for the current device
*
* @deprecated Use rmm::available_device_memory instead.
*
* @return The available and total device memory in bytes for the current device as a std::pair.
*/
[[deprecated("Use `rmm::available_device_memory` instead.")]] //
const auto available_device_memory = rmm::available_device_memory;

} // namespace detail

/**
* @brief Returns the approximate specified percent of free device memory on the current CUDA
* device, aligned to the nearest CUDA allocation size.
*
* @param percent The percent of free memory to return. Defaults to 50%.
harrism marked this conversation as resolved.
Show resolved Hide resolved
*
* @return The recommended initial device memory pool size in bytes.
*/
inline std::size_t percent_of_free_device_memory(int percent = 50)
{
auto const [free, total] = rmm::available_device_memory();

double fraction = static_cast<double>(percent) / 100;
harrism marked this conversation as resolved.
Show resolved Hide resolved

return rmm::detail::align_up(
std::min(free, static_cast<std::size_t>(static_cast<double>(total) * fraction)),
rmm::detail::CUDA_ALLOCATION_ALIGNMENT);
}

/**
* @brief RAII class that sets the current CUDA device to the specified device on construction
* and restores the previous device on destruction.
Expand Down
31 changes: 0 additions & 31 deletions include/rmm/detail/cuda_util.hpp

This file was deleted.

5 changes: 2 additions & 3 deletions include/rmm/mr/device/cuda_async_memory_resource.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2021-2022, NVIDIA CORPORATION.
* Copyright (c) 2021-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -17,7 +17,6 @@

#include <rmm/cuda_device.hpp>
#include <rmm/cuda_stream_view.hpp>
#include <rmm/detail/cuda_util.hpp>
#include <rmm/detail/dynamic_load_runtime.hpp>
#include <rmm/detail/error.hpp>
#include <rmm/mr/device/cuda_async_view_memory_resource.hpp>
Expand Down Expand Up @@ -120,7 +119,7 @@ class cuda_async_memory_resource final : public device_memory_resource {
pool_handle(), cudaMemPoolReuseAllowOpportunistic, &disabled));
}

auto const [free, total] = rmm::detail::available_device_memory();
auto const [free, total] = rmm::available_device_memory();

// Need an l-value to take address to pass to cudaMemPoolSetAttribute
uint64_t threshold = release_threshold.value_or(total);
Expand Down
3 changes: 1 addition & 2 deletions include/rmm/mr/device/cuda_async_view_memory_resource.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2021, NVIDIA CORPORATION.
* Copyright (c) 2021-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -17,7 +17,6 @@

#include <rmm/cuda_device.hpp>
#include <rmm/cuda_stream_view.hpp>
#include <rmm/detail/cuda_util.hpp>
#include <rmm/detail/dynamic_load_runtime.hpp>
#include <rmm/detail/error.hpp>
#include <rmm/mr/device/device_memory_resource.hpp>
Expand Down
6 changes: 3 additions & 3 deletions include/rmm/mr/device/detail/arena.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019-2023, NVIDIA CORPORATION.
* Copyright (c) 2019-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -16,9 +16,9 @@

#pragma once

#include <rmm/cuda_device.hpp>
#include <rmm/cuda_stream_view.hpp>
#include <rmm/detail/aligned.hpp>
#include <rmm/detail/cuda_util.hpp>
#include <rmm/detail/error.hpp>
#include <rmm/detail/logging_assert.hpp>
#include <rmm/logger.hpp>
Expand Down Expand Up @@ -692,7 +692,7 @@ class global_arena final {
*/
constexpr std::size_t default_size() const
{
auto const [free, total] = rmm::detail::available_device_memory();
auto const [free, total] = rmm::available_device_memory();
return free / 2;
}

Expand Down
95 changes: 64 additions & 31 deletions include/rmm/mr/device/pool_memory_resource.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2020-2023, NVIDIA CORPORATION.
* Copyright (c) 2020-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -17,7 +17,6 @@

#include <rmm/cuda_stream_view.hpp>
#include <rmm/detail/aligned.hpp>
#include <rmm/detail/cuda_util.hpp>
#include <rmm/detail/error.hpp>
#include <rmm/detail/logging_assert.hpp>
#include <rmm/logger.hpp>
Expand Down Expand Up @@ -110,6 +109,29 @@ class pool_memory_resource final
friend class detail::stream_ordered_memory_resource<pool_memory_resource<Upstream>,
detail::coalescing_free_list>;

/**
* @brief [DEPRECATED] Construct a `pool_memory_resource` and allocate the initial device memory
* pool using `upstream_mr`.
*
* @throws rmm::logic_error if `upstream_mr == nullptr`
* @throws rmm::logic_error if `initial_pool_size` is neither the default nor aligned to a
* multiple of pool_memory_resource::allocation_alignment bytes.
* @throws rmm::logic_error if `maximum_pool_size` is neither the default nor aligned to a
* multiple of pool_memory_resource::allocation_alignment bytes.
*
* @param upstream_mr The memory_resource from which to allocate blocks for the pool.
* @param initial_pool_size Minimum size, in bytes, of the initial pool. Defaults to zero.
* @param maximum_pool_size Maximum size, in bytes, that the pool can grow to. Defaults to all
* of the available memory from the upstream resource.
*/
[[deprecated("Must specify initial_pool_size")]] //
explicit pool_memory_resource(Upstream* upstream_mr,
thrust::optional<std::size_t> initial_pool_size = thrust::nullopt,
thrust::optional<std::size_t> maximum_pool_size = thrust::nullopt)
harrism marked this conversation as resolved.
Show resolved Hide resolved
: pool_memory_resource(upstream_mr, initial_pool_size.value_or(0), maximum_pool_size)
{
}

/**
* @brief Construct a `pool_memory_resource` and allocate the initial device memory pool using
harrism marked this conversation as resolved.
Show resolved Hide resolved
* `upstream_mr`.
Expand All @@ -121,21 +143,44 @@ class pool_memory_resource final
* multiple of pool_memory_resource::allocation_alignment bytes.
*
* @param upstream_mr The memory_resource from which to allocate blocks for the pool.
* @param initial_pool_size Minimum size, in bytes, of the initial pool. Defaults to half of the
* available memory on the current device.
* @param initial_pool_size Minimum size, in bytes, of the initial pool. Defaults to zero.
* @param maximum_pool_size Maximum size, in bytes, that the pool can grow to. Defaults to all
* of the available memory on the current device.
* of the available memory from the upstream resource.
*/
explicit pool_memory_resource(Upstream* upstream_mr,
template <typename Upstream2 = Upstream,
cuda::std::enable_if_t<cuda::mr::async_resource<Upstream2>, int> = 0>
[[deprecated("Must specify initial_pool_size")]] //
explicit pool_memory_resource(Upstream2& upstream_mr,
thrust::optional<std::size_t> initial_pool_size = thrust::nullopt,
thrust::optional<std::size_t> maximum_pool_size = thrust::nullopt)
: pool_memory_resource(upstream_mr, initial_pool_size.value_or(0), maximum_pool_size)
{
}

/**
* @brief Construct a `pool_memory_resource` and allocate the initial device memory pool using
* `upstream_mr`.
*
* @throws rmm::logic_error if `upstream_mr == nullptr`
* @throws rmm::logic_error if `initial_pool_size` is not aligned to a multiple of
* pool_memory_resource::allocation_alignment bytes.
* @throws rmm::logic_error if `maximum_pool_size` is neither the default nor aligned to a
* multiple of pool_memory_resource::allocation_alignment bytes.
*
* @param upstream_mr The memory_resource from which to allocate blocks for the pool.
* @param initial_pool_size Minimum size, in bytes, of the initial pool.
* @param maximum_pool_size Maximum size, in bytes, that the pool can grow to. Defaults to all
* of the available from the upstream resource.
*/
explicit pool_memory_resource(Upstream* upstream_mr,
std::size_t initial_pool_size,
thrust::optional<std::size_t> maximum_pool_size = thrust::nullopt)
: upstream_mr_{[upstream_mr]() {
RMM_EXPECTS(nullptr != upstream_mr, "Unexpected null upstream pointer.");
return upstream_mr;
}()}
{
RMM_EXPECTS(rmm::detail::is_aligned(initial_pool_size.value_or(0),
rmm::detail::CUDA_ALLOCATION_ALIGNMENT),
RMM_EXPECTS(rmm::detail::is_aligned(initial_pool_size, rmm::detail::CUDA_ALLOCATION_ALIGNMENT),
"Error, Initial pool size required to be a multiple of 256 bytes");
RMM_EXPECTS(rmm::detail::is_aligned(maximum_pool_size.value_or(0),
rmm::detail::CUDA_ALLOCATION_ALIGNMENT),
Expand All @@ -149,21 +194,20 @@ class pool_memory_resource final
* `upstream_mr`.
*
* @throws rmm::logic_error if `upstream_mr == nullptr`
* @throws rmm::logic_error if `initial_pool_size` is neither the default nor aligned to a
* multiple of pool_memory_resource::allocation_alignment bytes.
* @throws rmm::logic_error if `initial_pool_size` is not aligned to a multiple of
* pool_memory_resource::allocation_alignment bytes.
* @throws rmm::logic_error if `maximum_pool_size` is neither the default nor aligned to a
* multiple of pool_memory_resource::allocation_alignment bytes.
*
* @param upstream_mr The memory_resource from which to allocate blocks for the pool.
* @param initial_pool_size Minimum size, in bytes, of the initial pool. Defaults to half of the
* available memory on the current device.
* @param initial_pool_size Minimum size, in bytes, of the initial pool.
* @param maximum_pool_size Maximum size, in bytes, that the pool can grow to. Defaults to all
* of the available memory on the current device.
* of the available memory from the upstream resource.
*/
template <typename Upstream2 = Upstream,
cuda::std::enable_if_t<cuda::mr::async_resource<Upstream2>, int> = 0>
explicit pool_memory_resource(Upstream2& upstream_mr,
thrust::optional<std::size_t> initial_pool_size = thrust::nullopt,
std::size_t initial_pool_size,
thrust::optional<std::size_t> maximum_pool_size = thrust::nullopt)
: pool_memory_resource(cuda::std::addressof(upstream_mr), initial_pool_size, maximum_pool_size)
{
Expand Down Expand Up @@ -286,28 +330,17 @@ class pool_memory_resource final
* @param maximum_size The optional maximum size for the pool
harrism marked this conversation as resolved.
Show resolved Hide resolved
*/
// NOLINTNEXTLINE(bugprone-easily-swappable-parameters)
harrism marked this conversation as resolved.
Show resolved Hide resolved
void initialize_pool(thrust::optional<std::size_t> initial_size,
thrust::optional<std::size_t> maximum_size)
void initialize_pool(std::size_t initial_size, thrust::optional<std::size_t> maximum_size)
{
auto const try_size = [&]() {
if (not initial_size.has_value()) {
auto const [free, total] = (get_upstream()->supports_get_mem_info())
? get_upstream()->get_mem_info(cuda_stream_legacy)
: rmm::detail::available_device_memory();
return rmm::detail::align_up(std::min(free, total / 2),
rmm::detail::CUDA_ALLOCATION_ALIGNMENT);
}
return initial_size.value();
}();

current_pool_size_ = 0; // try_to_expand will set this if it succeeds
maximum_pool_size_ = maximum_size;

RMM_EXPECTS(try_size <= maximum_pool_size_.value_or(std::numeric_limits<std::size_t>::max()),
"Initial pool size exceeds the maximum pool size!");
RMM_EXPECTS(
initial_size <= maximum_pool_size_.value_or(std::numeric_limits<std::size_t>::max()),
"Initial pool size exceeds the maximum pool size!");

if (try_size > 0) {
auto const block = try_to_expand(try_size, try_size, cuda_stream_legacy);
if (initial_size > 0) {
auto const block = try_to_expand(initial_size, initial_size, cuda_stream_legacy);
this->insert_block(block, cuda_stream_legacy);
}
}
Expand Down
Loading