Skip to content

Commit

Permalink
cuda_async_memory_resource built on cudaMallocAsync (#676)
Browse files Browse the repository at this point in the history
This PR adds a new device memory resource, `cuda_async_memory_resource`, which uses `cudaMallocAsync`.

Closes #671 

Merging this also depends on CI support for CUDA 11.2

TODO:
 - [x] Extend tests and benchmarks to exercise the new resource
 - [x] Implement `get_mem_info` correctly.
 - [x] ~Consider a constructor which takes a CUDA memory pool handle to use (currently uses the default pool)~ Edit: leave this for a followup because pools have multiple parameters and requirements aren't clear.
 - [ ] Test on a system without cudaMallocAsync support to verify that compiling with CUDA 11.2 but running on an earlier version fails gracefully

Authors:
  - Mark Harris (@harrism)

Approvers:
  - Jake Hemstad (@jrhemstad)
  - Keith Kraus (@kkraus14)
  - Leo Fang (@leofang)
  - Rong Ou (@rongou)

URL: #676
  • Loading branch information
harrism committed Jan 27, 2021
1 parent e899c68 commit afe237c
Show file tree
Hide file tree
Showing 8 changed files with 218 additions and 8 deletions.
15 changes: 14 additions & 1 deletion benchmarks/random_allocations/random_allocations.cpp
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019-2020, NVIDIA CORPORATION.
* Copyright (c) 2019-2021, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -18,6 +18,7 @@

#include <rmm/mr/device/arena_memory_resource.hpp>
#include <rmm/mr/device/binning_memory_resource.hpp>
#include <rmm/mr/device/cuda_async_memory_resource.hpp>
#include <rmm/mr/device/cuda_memory_resource.hpp>
#include <rmm/mr/device/device_memory_resource.hpp>
#include <rmm/mr/device/owning_wrapper.hpp>
Expand Down Expand Up @@ -157,6 +158,8 @@ void uniform_random_allocations(rmm::mr::device_memory_resource& mr,
/// MR factory functions
inline auto make_cuda() { return std::make_shared<rmm::mr::cuda_memory_resource>(); }

inline auto make_cuda_async() { return std::make_shared<rmm::mr::cuda_async_memory_resource>(); }

inline auto make_pool()
{
return rmm::mr::make_owning_wrapper<rmm::mr::pool_memory_resource>(make_cuda());
Expand Down Expand Up @@ -235,6 +238,9 @@ void declare_benchmark(std::string name)
{
if (name == "cuda")
BENCHMARK_CAPTURE(BM_RandomAllocations, cuda_mr, &make_cuda)->Apply(benchmark_range);
if (name == "cuda_async")
BENCHMARK_CAPTURE(BM_RandomAllocations, cuda_async_mr, &make_cuda_async)
->Apply(benchmark_range);
else if (name == "binning")
BENCHMARK_CAPTURE(BM_RandomAllocations, binning_mr, &make_binning)->Apply(benchmark_range);
else if (name == "pool")
Expand Down Expand Up @@ -288,6 +294,9 @@ int main(int argc, char** argv)
std::map<std::string, MRFactoryFunc> const funcs({{"arena", &make_arena},
{"binning", &make_binning},
{"cuda", &make_cuda},
#ifdef RMM_CUDA_MALLOC_ASYNC_SUPPORT
{"cuda_async", &make_cuda_async},
#endif
{"pool", &make_pool}});
auto resource = args["resource"].as<std::string>();

Expand All @@ -309,7 +318,11 @@ int main(int argc, char** argv)
std::string mr_name = args["resource"].as<std::string>();
declare_benchmark(mr_name);
} else {
#ifdef RMM_CUDA_MALLOC_ASYNC_SUPPORT
std::array<std::string, 5> mrs{"pool", "binning", "arena", "cuda_async", "cuda"};
#else
std::array<std::string, 4> mrs{"pool", "binning", "arena", "cuda"};
#endif
std::for_each(std::cbegin(mrs), std::cend(mrs), [](auto const& s) { declare_benchmark(s); });
}
::benchmark::RunSpecifiedBenchmarks();
Expand Down
2 changes: 1 addition & 1 deletion include/rmm/detail/error.hpp
Expand Up @@ -130,7 +130,7 @@ class out_of_range : public std::out_of_range {
#define GET_RMM_FAIL_MACRO(_1, _2, NAME, ...) NAME
#define RMM_FAIL_2(_what, _exception_type) \
throw _exception_type{"RMM failure at:" __FILE__ ":" RMM_STRINGIFY(__LINE__) ": " _what};
#define RMM_FAIL_1(_what) RMM_FAIL_2(_call, rmm::logic_error)
#define RMM_FAIL_1(_what) RMM_FAIL_2(_what, rmm::logic_error)

/**
* @brief Error checking macro for CUDA runtime API functions.
Expand Down
146 changes: 146 additions & 0 deletions include/rmm/mr/device/cuda_async_memory_resource.hpp
@@ -0,0 +1,146 @@
/*
* Copyright (c) 2021, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once

#include <rmm/detail/error.hpp>
#include <rmm/mr/device/device_memory_resource.hpp>
#include "rmm/cuda_stream_view.hpp"

#include <cuda_runtime_api.h>

#if CUDART_VERSION >= 11020 // 11.2 introduced cudaMallocAsync
#define RMM_CUDA_MALLOC_ASYNC_SUPPORT
#endif

namespace rmm {
namespace mr {

/**
* @brief `device_memory_resource` derived class that uses `cudaMallocAsync`/`cudaFreeAsync` for
* allocation/deallocation.
*/
class cuda_async_memory_resource final : public device_memory_resource {
public:
/**
* @brief Default constructor
*
* @throws rmm::runtime_error if the CUDA version does not support `cudaMallocAsync`
*/
cuda_async_memory_resource()
{
#ifdef RMM_CUDA_MALLOC_ASYNC_SUPPORT
// Check if cudaMallocAsync Memory pool supported
int device{0};
RMM_CUDA_TRY(cudaGetDevice(&device));
int v{0};
auto e = cudaDeviceGetAttribute(&v, cudaDevAttrMemoryPoolsSupported, device);
RMM_EXPECTS(e == cudaSuccess && v == 1,
"cudaMallocAsync not supported with this CUDA driver/runtime version");
#else
RMM_FAIL("cudaMallocAsync not supported");
#endif
}

~cuda_async_memory_resource() = default;
cuda_async_memory_resource(cuda_async_memory_resource const&) = default;
cuda_async_memory_resource(cuda_async_memory_resource&&) = default;
cuda_async_memory_resource& operator=(cuda_async_memory_resource const&) = default;
cuda_async_memory_resource& operator=(cuda_async_memory_resource&&) = default;

/**
* @brief Query whether the resource supports use of non-null CUDA streams for
* allocation/deallocation. `cuda_memory_resource` does not support streams.
*
* @returns bool true
*/
bool supports_streams() const noexcept override { return true; }

/**
* @brief Query whether the resource supports the get_mem_info API.
*
* @return true
*/
bool supports_get_mem_info() const noexcept override { return false; }

private:
/**
* @brief Allocates memory of size at least `bytes` using cudaMalloc.
*
* The returned pointer has at least 256B alignment.
*
* @throws `rmm::bad_alloc` if the requested allocation could not be fulfilled
*
* @param bytes The size, in bytes, of the allocation
* @return void* Pointer to the newly allocated memory
*/
void* do_allocate(std::size_t bytes, rmm::cuda_stream_view stream) override
{
void* p{nullptr};
#ifdef RMM_CUDA_MALLOC_ASYNC_SUPPORT
if (bytes > 0) { RMM_CUDA_TRY(cudaMallocAsync(&p, bytes, stream.value()), rmm::bad_alloc); }
#else
(void)bytes;
(void)stream;
#endif
return p;
}

/**
* @brief Deallocate memory pointed to by \p p.
*
* @throws Nothing.
*
* @param p Pointer to be deallocated
*/
void do_deallocate(void* p, std::size_t, rmm::cuda_stream_view stream) override
{
#ifdef RMM_CUDA_MALLOC_ASYNC_SUPPORT
if (p != nullptr) { RMM_ASSERT_CUDA_SUCCESS(cudaFreeAsync(p, stream.value())); }
#else
(void)p;
(void)stream;
#endif
}

/**
* @brief Compare this resource to another.
*
* @throws Nothing.
*
* @param other The other resource to compare to
* @return true If the two resources are equivalent
* @return false If the two resources are not equal
*/
bool do_is_equal(device_memory_resource const& other) const noexcept override
{
return dynamic_cast<cuda_async_memory_resource const*>(&other) != nullptr;
}

/**
* @brief Get free and available memory for memory resource
*
* @throws `rmm::cuda_error` if unable to retrieve memory info.
*
* @return std::pair contaiing free_size and total_size of memory
*/
std::pair<size_t, size_t> do_get_mem_info(rmm::cuda_stream_view) const override
{
return std::make_pair(0, 0);
}
};

} // namespace mr
} // namespace rmm
4 changes: 4 additions & 0 deletions tests/CMakeLists.txt
Expand Up @@ -98,6 +98,10 @@ ConfigureTest(DEVICE_MR_TEST "${DEVICE_MR_TEST_SRC}")
set(POOL_MR_TEST_SRC "${CMAKE_CURRENT_SOURCE_DIR}/mr/device/pool_mr_tests.cpp")
ConfigureTest(POOL_MR_TEST "${POOL_MR_TEST_SRC}")

# cuda_async mr tests
set(CUDA_ASYNC_MR_TEST_SRC "${CMAKE_CURRENT_SOURCE_DIR}/mr/device/cuda_async_mr_tests.cpp")
ConfigureTest(CUDA_ASYNC_MR_TEST "${CUDA_ASYNC_MR_TEST_SRC}")

# thrust allocator tests

set(THRUST_ALLOCATOR_TEST_SRC "${CMAKE_CURRENT_SOURCE_DIR}/mr/device/thrust_allocator_tests.cu")
Expand Down
40 changes: 40 additions & 0 deletions tests/mr/device/cuda_async_mr_tests.cpp
@@ -0,0 +1,40 @@
/*
* Copyright (c) 2021, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include <rmm/detail/error.hpp>
#include <rmm/mr/device/cuda_async_memory_resource.hpp>

#include <gtest/gtest.h>

namespace rmm {
namespace test {
namespace {

using cuda_async_mr = rmm::mr::cuda_async_memory_resource;

TEST(PoolTest, ThrowIfNotSupported)
{
auto construct_mr = []() { cuda_async_mr mr; };
#ifndef RMM_CUDA_MALLOC_ASYNC_SUPPORT
EXPECT_THROW(construct_mr(), rmm::logic_error);
#else
EXPECT_NO_THROW(construct_mr());
#endif
}

} // namespace
} // namespace test
} // namespace rmm
5 changes: 4 additions & 1 deletion tests/mr/device/mr_test.hpp
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019-2020, NVIDIA CORPORATION.
* Copyright (c) 2019-2021, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -22,6 +22,7 @@
#include <rmm/cuda_stream_view.hpp>
#include <rmm/mr/device/arena_memory_resource.hpp>
#include <rmm/mr/device/binning_memory_resource.hpp>
#include <rmm/mr/device/cuda_async_memory_resource.hpp>
#include <rmm/mr/device/cuda_memory_resource.hpp>
#include <rmm/mr/device/device_memory_resource.hpp>
#include <rmm/mr/device/fixed_size_memory_resource.hpp>
Expand Down Expand Up @@ -224,6 +225,8 @@ struct mr_test : public ::testing::TestWithParam<mr_factory> {
/// MR factory functions
inline auto make_cuda() { return std::make_shared<rmm::mr::cuda_memory_resource>(); }

inline auto make_cuda_async() { return std::make_shared<rmm::mr::cuda_async_memory_resource>(); }

inline auto make_managed() { return std::make_shared<rmm::mr::managed_memory_resource>(); }

inline auto make_pool()
Expand Down
8 changes: 6 additions & 2 deletions tests/mr/device/mr_tests.cpp
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019, NVIDIA CORPORATION.
* Copyright (c) 2019-2021, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -14,9 +14,10 @@
* limitations under the License.
*/

#include <rmm/mr/device/per_device_resource.hpp>
#include "mr_test.hpp"

#include <rmm/mr/device/per_device_resource.hpp>

#include <gtest/gtest.h>

namespace rmm {
Expand All @@ -26,6 +27,9 @@ namespace {
INSTANTIATE_TEST_CASE_P(ResourceTests,
mr_test,
::testing::Values(mr_factory{"CUDA", &make_cuda},
#ifdef RMM_CUDA_MALLOC_ASYNC_SUPPORT
mr_factory{"CUDA_Async", &make_cuda_async},
#endif
mr_factory{"Managed", &make_managed},
mr_factory{"Pool", &make_pool},
mr_factory{"Arena", &make_arena},
Expand Down
6 changes: 3 additions & 3 deletions tests/mr/device/pool_mr_tests.cpp
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019, NVIDIA CORPORATION.
* Copyright (c) 2021, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -14,14 +14,14 @@
* limitations under the License.
*/

#include <rmm/detail/aligned.hpp>
#include <rmm/detail/error.hpp>
#include <rmm/device_buffer.hpp>
#include <rmm/mr/device/cuda_memory_resource.hpp>
#include <rmm/mr/device/device_memory_resource.hpp>
#include <rmm/mr/device/limiting_resource_adaptor.hpp>
#include <rmm/mr/device/per_device_resource.hpp>
#include <rmm/mr/device/pool_memory_resource.hpp>
#include "rmm/detail/aligned.hpp"
#include "rmm/mr/device/limiting_resource_adaptor.hpp"

#include <gtest/gtest.h>

Expand Down

0 comments on commit afe237c

Please sign in to comment.