diff --git a/include/rmm/cuda_device.hpp b/include/rmm/cuda_device.hpp index e4a35ee16..054bbb920 100644 --- a/include/rmm/cuda_device.hpp +++ b/include/rmm/cuda_device.hpp @@ -34,7 +34,7 @@ struct cuda_device_id { using value_type = int; ///< Integer type used for device identifier /** - * @brief Construct a `cuda_device_id` from the specified integer value + * @brief Construct a `cuda_device_id` from the specified integer value. * * @param dev_id The device's integer identifier */ @@ -43,6 +43,35 @@ struct cuda_device_id { /// @briefreturn{The wrapped integer value} [[nodiscard]] constexpr value_type value() const noexcept { return id_; } + // TODO re-add doxygen comment specifier /** for these hidden friend operators once this Breathe + // bug is fixed: https://github.com/breathe-doc/breathe/issues/916 + //! @cond Doxygen_Suppress + /** + * @brief Compare two `cuda_device_id`s for equality. + * + * @param lhs The first `cuda_device_id` to compare. + * @param rhs The second `cuda_device_id` to compare. + * @return true if the two `cuda_device_id`s wrap the same integer value, false otherwise. + */ + [[nodiscard]] constexpr friend bool operator==(cuda_device_id const& lhs, + cuda_device_id const& rhs) noexcept + { + return lhs.value() == rhs.value(); + } + + /** + * @brief Compare two `cuda_device_id`s for inequality. + * + * @param lhs The first `cuda_device_id` to compare. + * @param rhs The second `cuda_device_id` to compare. + * @return true if the two `cuda_device_id`s wrap different integer values, false otherwise. + */ + [[nodiscard]] constexpr friend bool operator!=(cuda_device_id const& lhs, + cuda_device_id const& rhs) noexcept + { + return lhs.value() != rhs.value(); + } + //! @endcond private: value_type id_; }; @@ -84,16 +113,17 @@ struct cuda_set_device_raii { * @param dev_id The device to set as the current CUDA device */ explicit cuda_set_device_raii(cuda_device_id dev_id) - : old_device_{get_current_cuda_device()}, needs_reset_{old_device_.value() != dev_id.value()} + : old_device_{get_current_cuda_device()}, + needs_reset_{dev_id.value() >= 0 && old_device_ != dev_id} { - if (needs_reset_) RMM_ASSERT_CUDA_SUCCESS(cudaSetDevice(dev_id.value())); + if (needs_reset_) { RMM_ASSERT_CUDA_SUCCESS(cudaSetDevice(dev_id.value())); } } /** * @brief Reactivates the previous CUDA device */ ~cuda_set_device_raii() noexcept { - if (needs_reset_) RMM_ASSERT_CUDA_SUCCESS(cudaSetDevice(old_device_.value())); + if (needs_reset_) { RMM_ASSERT_CUDA_SUCCESS(cudaSetDevice(old_device_.value())); } } cuda_set_device_raii(cuda_set_device_raii const&) = delete; diff --git a/include/rmm/device_buffer.hpp b/include/rmm/device_buffer.hpp index c69b9206b..20fa4f36e 100644 --- a/include/rmm/device_buffer.hpp +++ b/include/rmm/device_buffer.hpp @@ -15,6 +15,7 @@ */ #pragma once +#include #include #include #include @@ -109,6 +110,7 @@ class device_buffer { mr::device_memory_resource* mr = mr::get_current_device_resource()) : _stream{stream}, _mr{mr} { + cuda_set_device_raii dev{_device}; allocate_async(size); } @@ -137,6 +139,7 @@ class device_buffer { mr::device_memory_resource* mr = mr::get_current_device_resource()) : _stream{stream}, _mr{mr} { + cuda_set_device_raii dev{_device}; allocate_async(size); copy_async(source_data, size); } @@ -185,12 +188,14 @@ class device_buffer { _size{other._size}, _capacity{other._capacity}, _stream{other.stream()}, - _mr{other._mr} + _mr{other._mr}, + _device{other._device} { other._data = nullptr; other._size = 0; other._capacity = 0; other.set_stream(cuda_stream_view{}); + other._device = cuda_device_id{-1}; } /** @@ -210,18 +215,21 @@ class device_buffer { device_buffer& operator=(device_buffer&& other) noexcept { if (&other != this) { + cuda_set_device_raii dev{_device}; deallocate_async(); _data = other._data; _size = other._size; _capacity = other._capacity; set_stream(other.stream()); - _mr = other._mr; + _mr = other._mr; + _device = other._device; other._data = nullptr; other._size = 0; other._capacity = 0; other.set_stream(cuda_stream_view{}); + other._device = cuda_device_id{-1}; } return *this; } @@ -235,6 +243,7 @@ class device_buffer { */ ~device_buffer() noexcept { + cuda_set_device_raii dev{_device}; deallocate_async(); _mr = nullptr; _stream = cuda_stream_view{}; @@ -262,6 +271,7 @@ class device_buffer { { set_stream(stream); if (new_capacity > capacity()) { + cuda_set_device_raii dev{_device}; auto tmp = device_buffer{new_capacity, stream, _mr}; auto const old_size = size(); RMM_CUDA_TRY(cudaMemcpyAsync(tmp.data(), data(), size(), cudaMemcpyDefault, stream.value())); @@ -303,6 +313,7 @@ class device_buffer { if (new_size <= capacity()) { _size = new_size; } else { + cuda_set_device_raii dev{_device}; auto tmp = device_buffer{new_size, stream, _mr}; RMM_CUDA_TRY(cudaMemcpyAsync(tmp.data(), data(), size(), cudaMemcpyDefault, stream.value())); *this = std::move(tmp); @@ -326,6 +337,7 @@ class device_buffer { { set_stream(stream); if (size() != capacity()) { + cuda_set_device_raii dev{_device}; // Invoke copy ctor on self which only copies `[0, size())` and swap it // with self. The temporary `device_buffer` will hold the old contents // which will then be destroyed @@ -407,6 +419,7 @@ class device_buffer { mr::device_memory_resource* _mr{ mr::get_current_device_resource()}; ///< The memory resource used to ///< allocate/deallocate device memory + cuda_device_id _device{get_current_cuda_device()}; /** * @brief Allocates the specified amount of memory and updates the size/capacity accordingly. @@ -457,6 +470,7 @@ class device_buffer { { if (bytes > 0) { RMM_EXPECTS(nullptr != source, "Invalid copy from nullptr."); + RMM_EXPECTS(nullptr != _data, "Invalid copy to nullptr."); RMM_CUDA_TRY(cudaMemcpyAsync(_data, source, bytes, cudaMemcpyDefault, stream().value())); } diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 36c3aa043..752496279 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -186,4 +186,7 @@ ConfigureTest(BINNING_MR_TEST mr/device/binning_mr_tests.cpp) # callback memory resource tests ConfigureTest(CALLBACK_MR_TEST mr/device/callback_mr_tests.cpp) +# container multidevice tests +ConfigureTest(CONTAINER_MULTIDEVICE_TEST container_multidevice_tests.cu) + rapids_test_install_relocatable(INSTALL_COMPONENT_SET testing DESTINATION bin/gtests/librmm) diff --git a/tests/container_multidevice_tests.cu b/tests/container_multidevice_tests.cu new file mode 100644 index 000000000..9de9ddf40 --- /dev/null +++ b/tests/container_multidevice_tests.cu @@ -0,0 +1,149 @@ +/* + * Copyright (c) 2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "device_check_resource_adaptor.hpp" +#include "rmm/mr/device/per_device_resource.hpp" + +#include +#include +#include +#include + +#include + +#include + +template +struct ContainerMultiDeviceTest : public ::testing::Test {}; + +using containers = + ::testing::Types, rmm::device_scalar>; + +TYPED_TEST_CASE(ContainerMultiDeviceTest, containers); + +TYPED_TEST(ContainerMultiDeviceTest, CreateDestroyDifferentActiveDevice) +{ + // Get the number of cuda devices + int num_devices = rmm::get_num_cuda_devices(); + + // only run on multidevice systems + if (num_devices >= 2) { + rmm::cuda_set_device_raii dev{rmm::cuda_device_id{0}}; + auto* orig_mr = rmm::mr::get_current_device_resource(); + auto check_mr = device_check_resource_adaptor{orig_mr}; + rmm::mr::set_current_device_resource(&check_mr); + + { + if constexpr (std::is_same_v>) { + auto buf = TypeParam(rmm::cuda_stream_view{}); + RMM_ASSERT_CUDA_SUCCESS(cudaSetDevice(1)); // force dtor with different active device + } else { + auto buf = TypeParam(128, rmm::cuda_stream_view{}); + RMM_ASSERT_CUDA_SUCCESS(cudaSetDevice(1)); // force dtor with different active device + } + } + + RMM_ASSERT_CUDA_SUCCESS(cudaSetDevice(0)); + rmm::mr::set_current_device_resource(orig_mr); + } +} + +TYPED_TEST(ContainerMultiDeviceTest, CreateMoveDestroyDifferentActiveDevice) +{ + // Get the number of cuda devices + int num_devices = rmm::get_num_cuda_devices(); + + // only run on multidevice systems + if (num_devices >= 2) { + rmm::cuda_set_device_raii dev{rmm::cuda_device_id{0}}; + auto* orig_mr = rmm::mr::get_current_device_resource(); + auto check_mr = device_check_resource_adaptor{orig_mr}; + rmm::mr::set_current_device_resource(&check_mr); + + { + auto buf_1 = []() { + if constexpr (std::is_same_v>) { + return TypeParam(rmm::cuda_stream_view{}); + } else { + return TypeParam(128, rmm::cuda_stream_view{}); + } + }(); + + { + if constexpr (std::is_same_v>) { + // device_vector does not have a constructor that takes a stream + auto buf_0 = TypeParam(rmm::cuda_stream_view{}); + buf_1 = std::move(buf_0); + } else { + auto buf_0 = TypeParam(128, rmm::cuda_stream_view{}); + buf_1 = std::move(buf_0); + } + } + + RMM_ASSERT_CUDA_SUCCESS(cudaSetDevice(1)); // force dtor with different active device + } + + RMM_ASSERT_CUDA_SUCCESS(cudaSetDevice(0)); + rmm::mr::set_current_device_resource(orig_mr); + } +} + +TYPED_TEST(ContainerMultiDeviceTest, ResizeDifferentActiveDevice) +{ + // Get the number of cuda devices + int num_devices = rmm::get_num_cuda_devices(); + + // only run on multidevice systems + if (num_devices >= 2) { + rmm::cuda_set_device_raii dev{rmm::cuda_device_id{0}}; + auto* orig_mr = rmm::mr::get_current_device_resource(); + auto check_mr = device_check_resource_adaptor{orig_mr}; + rmm::mr::set_current_device_resource(&check_mr); + + if constexpr (not std::is_same_v>) { + auto buf = TypeParam(128, rmm::cuda_stream_view{}); + RMM_ASSERT_CUDA_SUCCESS(cudaSetDevice(1)); // force resize with different active device + buf.resize(1024, rmm::cuda_stream_view{}); + } + + RMM_ASSERT_CUDA_SUCCESS(cudaSetDevice(0)); + rmm::mr::set_current_device_resource(orig_mr); + } +} + +TYPED_TEST(ContainerMultiDeviceTest, ShrinkDifferentActiveDevice) +{ + // Get the number of cuda devices + int num_devices = rmm::get_num_cuda_devices(); + + // only run on multidevice systems + if (num_devices >= 2) { + rmm::cuda_set_device_raii dev{rmm::cuda_device_id{0}}; + auto* orig_mr = rmm::mr::get_current_device_resource(); + auto check_mr = device_check_resource_adaptor{orig_mr}; + rmm::mr::set_current_device_resource(&check_mr); + + if constexpr (not std::is_same_v>) { + auto buf = TypeParam(128, rmm::cuda_stream_view{}); + RMM_ASSERT_CUDA_SUCCESS(cudaSetDevice(1)); // force resize with different active device + buf.resize(64, rmm::cuda_stream_view{}); + buf.shrink_to_fit(rmm::cuda_stream_view{}); + } + + RMM_ASSERT_CUDA_SUCCESS(cudaSetDevice(0)); + rmm::mr::set_current_device_resource(orig_mr); + } +} diff --git a/tests/device_buffer_tests.cu b/tests/device_buffer_tests.cu index d4c34385e..e0d8e5555 100644 --- a/tests/device_buffer_tests.cu +++ b/tests/device_buffer_tests.cu @@ -14,8 +14,6 @@ * limitations under the License. */ -#include - #include #include #include @@ -29,9 +27,12 @@ #include #include + +#include + namespace testing { namespace thrust = THRUST_NS_QUALIFIER; -} +} // namespace testing using namespace testing; #include diff --git a/tests/device_check_resource_adaptor.hpp b/tests/device_check_resource_adaptor.hpp new file mode 100644 index 000000000..f9ad4cf70 --- /dev/null +++ b/tests/device_check_resource_adaptor.hpp @@ -0,0 +1,77 @@ +/* + * Copyright (c) 2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include + +#include + +class device_check_resource_adaptor final : public rmm::mr::device_memory_resource { + public: + device_check_resource_adaptor(rmm::mr::device_memory_resource* upstream) + : device_id{rmm::get_current_cuda_device()}, upstream_(upstream) + { + } + + [[nodiscard]] bool supports_streams() const noexcept override + { + return upstream_->supports_streams(); + } + + [[nodiscard]] bool supports_get_mem_info() const noexcept override + { + return upstream_->supports_get_mem_info(); + } + + [[nodiscard]] device_memory_resource* get_upstream() const noexcept { return upstream_; } + + private: + [[nodiscard]] bool check_device_id() const { return device_id == rmm::get_current_cuda_device(); } + + void* do_allocate(std::size_t bytes, rmm::cuda_stream_view stream) override + { + bool const is_correct_device = check_device_id(); + EXPECT_TRUE(is_correct_device); + if (is_correct_device) { return upstream_->allocate(bytes, stream); } + return nullptr; + } + + void do_deallocate(void* ptr, std::size_t bytes, rmm::cuda_stream_view stream) override + { + bool const is_correct_device = check_device_id(); + EXPECT_TRUE(is_correct_device); + if (is_correct_device) { upstream_->deallocate(ptr, bytes, stream); } + } + + [[nodiscard]] bool do_is_equal( + rmm::mr::device_memory_resource const& other) const noexcept override + { + if (this == &other) { return true; } + auto const* cast = dynamic_cast(&other); + if (cast != nullptr) { return upstream_->is_equal(*cast->get_upstream()); } + return upstream_->is_equal(other); + } + + [[nodiscard]] std::pair do_get_mem_info( + rmm::cuda_stream_view stream) const override + { + return upstream_->get_mem_info(stream); + } + + rmm::cuda_device_id device_id; + rmm::mr::device_memory_resource* upstream_{}; +};