-
Notifications
You must be signed in to change notification settings - Fork 871
/
hostdevice_vector.hpp
241 lines (193 loc) · 8.54 KB
/
hostdevice_vector.hpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
/*
* Copyright (c) 2019-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once
#include "config_utils.hpp"
#include "hostdevice_span.hpp"
#include <cudf/detail/utilities/rmm_host_vector.hpp>
#include <cudf/io/memory_resource.hpp>
#include <cudf/utilities/default_stream.hpp>
#include <cudf/utilities/error.hpp>
#include <cudf/utilities/span.hpp>
#include <rmm/cuda_stream_view.hpp>
#include <rmm/device_uvector.hpp>
#include <rmm/mr/host/host_memory_resource.hpp>
namespace cudf::detail {
/**
* @brief A helper class that wraps fixed-length device memory for the GPU, and
* a mirror host pinned memory for the CPU.
*
* This abstraction allocates a specified fixed chunk of device memory that can
* initialized upfront, or gradually initialized as required.
* The host-side memory can be used to manipulate data on the CPU before and
* after operating on the same data on the GPU.
*/
template <typename T>
class hostdevice_vector {
public:
using value_type = T;
hostdevice_vector() : hostdevice_vector(0, cudf::get_default_stream()) {}
explicit hostdevice_vector(size_t size, rmm::cuda_stream_view stream)
: hostdevice_vector(size, size, stream)
{
}
explicit hostdevice_vector(size_t initial_size, size_t max_size, rmm::cuda_stream_view stream)
: h_data({cudf::io::get_host_memory_resource(), stream}), d_data(max_size, stream)
{
CUDF_EXPECTS(initial_size <= max_size, "initial_size cannot be larger than max_size");
h_data.reserve(max_size);
h_data.resize(initial_size);
}
void push_back(T const& data)
{
CUDF_EXPECTS(size() < capacity(),
"Cannot insert data into hostdevice_vector because capacity has been exceeded.");
h_data.push_back(data);
}
[[nodiscard]] size_t capacity() const noexcept { return d_data.size(); }
[[nodiscard]] size_t size() const noexcept { return h_data.size(); }
[[nodiscard]] size_t size_bytes() const noexcept { return sizeof(T) * size(); }
[[nodiscard]] bool empty() const noexcept { return size() == 0; }
[[nodiscard]] T& operator[](size_t i) { return h_data[i]; }
[[nodiscard]] T const& operator[](size_t i) const { return h_data[i]; }
[[nodiscard]] T* host_ptr(size_t offset = 0) { return h_data.data() + offset; }
[[nodiscard]] T const* host_ptr(size_t offset = 0) const { return h_data.data() + offset; }
[[nodiscard]] T* begin() { return host_ptr(); }
[[nodiscard]] T const* begin() const { return host_ptr(); }
[[nodiscard]] T* end() { return host_ptr(size()); }
[[nodiscard]] T const* end() const { return host_ptr(size()); }
[[nodiscard]] T& front() { return h_data.front(); }
[[nodiscard]] T const& front() const { return front(); }
[[nodiscard]] T& back() { return h_data.back(); }
[[nodiscard]] T const& back() const { return back(); }
[[nodiscard]] T* device_ptr(size_t offset = 0) { return d_data.data() + offset; }
[[nodiscard]] T const* device_ptr(size_t offset = 0) const { return d_data.data() + offset; }
[[nodiscard]] T* d_begin() { return device_ptr(); }
[[nodiscard]] T const* d_begin() const { return device_ptr(); }
[[nodiscard]] T* d_end() { return device_ptr(size()); }
[[nodiscard]] T const* d_end() const { return device_ptr(size()); }
/**
* @brief Returns the specified element from device memory
*
* @note This function incurs a device to host memcpy and should be used sparingly.
* @note This function synchronizes `stream`.
*
* @throws rmm::out_of_range exception if `element_index >= size()`
*
* @param element_index Index of the desired element
* @param stream The stream on which to perform the copy
* @return The value of the specified element
*/
[[nodiscard]] T element(std::size_t element_index, rmm::cuda_stream_view stream) const
{
return d_data.element(element_index, stream);
}
operator cudf::host_span<T>() { return {host_ptr(), size()}; }
operator cudf::host_span<T const>() const { return {host_ptr(), size()}; }
operator cudf::device_span<T>() { return {device_ptr(), size()}; }
operator cudf::device_span<T const>() const { return {device_ptr(), size()}; }
void host_to_device_async(rmm::cuda_stream_view stream)
{
CUDF_CUDA_TRY(
cudaMemcpyAsync(device_ptr(), host_ptr(), size_bytes(), cudaMemcpyDefault, stream.value()));
}
void host_to_device_sync(rmm::cuda_stream_view stream)
{
host_to_device_async(stream);
stream.synchronize();
}
void device_to_host_async(rmm::cuda_stream_view stream)
{
CUDF_CUDA_TRY(
cudaMemcpyAsync(host_ptr(), device_ptr(), size_bytes(), cudaMemcpyDefault, stream.value()));
}
void device_to_host_sync(rmm::cuda_stream_view stream)
{
device_to_host_async(stream);
stream.synchronize();
}
/**
* @brief Converts a hostdevice_vector into a hostdevice_span.
*
* @return A typed hostdevice_span of the hostdevice_vector's data
*/
[[nodiscard]] operator hostdevice_span<T>()
{
return hostdevice_span<T>{h_data.data(), d_data.data(), size()};
}
/**
* @brief Converts a part of a hostdevice_vector into a hostdevice_span.
*
* @param offset The offset of the first element in the subspan
* @param count The number of elements in the subspan
* @return A typed hostdevice_span of the hostdevice_vector's data
*/
[[nodiscard]] hostdevice_span<T> subspan(size_t offset, size_t count)
{
CUDF_EXPECTS(offset < d_data.size(), "Offset is out of bounds.");
CUDF_EXPECTS(count <= d_data.size() - offset,
"The span with given offset and count is out of bounds.");
return hostdevice_span<T>{h_data.data() + offset, d_data.data() + offset, count};
}
private:
cudf::detail::rmm_host_vector<T> h_data;
rmm::device_uvector<T> d_data;
};
/**
* @brief Wrapper around hostdevice_vector to enable two-dimensional indexing.
*
* Does not incur additional allocations.
*/
template <typename T>
class hostdevice_2dvector {
public:
hostdevice_2dvector() : hostdevice_2dvector(0, 0, cudf::get_default_stream()) {}
hostdevice_2dvector(size_t rows, size_t columns, rmm::cuda_stream_view stream)
: _data{rows * columns, stream}, _size{rows, columns}
{
}
operator device_2dspan<T>() { return {_data.device_ptr(), _size}; }
operator device_2dspan<T const>() const { return {_data.device_ptr(), _size}; }
device_2dspan<T> device_view() { return static_cast<device_2dspan<T>>(*this); }
device_2dspan<T> device_view() const { return static_cast<device_2dspan<T const>>(*this); }
operator host_2dspan<T>() { return {_data.host_ptr(), _size}; }
operator host_2dspan<T const>() const { return {_data.host_ptr(), _size}; }
host_2dspan<T> host_view() { return static_cast<host_2dspan<T>>(*this); }
host_2dspan<T> host_view() const { return static_cast<host_2dspan<T const>>(*this); }
host_span<T> operator[](size_t row)
{
return {_data.host_ptr() + host_2dspan<T>::flatten_index(row, 0, _size), _size.second};
}
host_span<T const> operator[](size_t row) const
{
return {_data.host_ptr() + host_2dspan<T>::flatten_index(row, 0, _size), _size.second};
}
auto size() const noexcept { return _size; }
auto count() const noexcept { return _size.first * _size.second; }
auto is_empty() const noexcept { return count() == 0; }
T* base_host_ptr(size_t offset = 0) { return _data.host_ptr(offset); }
T* base_device_ptr(size_t offset = 0) { return _data.device_ptr(offset); }
T const* base_host_ptr(size_t offset = 0) const { return _data.host_ptr(offset); }
T const* base_device_ptr(size_t offset = 0) const { return _data.device_ptr(offset); }
size_t size_bytes() const noexcept { return _data.size_bytes(); }
void host_to_device_async(rmm::cuda_stream_view stream) { _data.host_to_device_async(stream); }
void host_to_device_sync(rmm::cuda_stream_view stream) { _data.host_to_device_sync(stream); }
void device_to_host_async(rmm::cuda_stream_view stream) { _data.device_to_host_async(stream); }
void device_to_host_sync(rmm::cuda_stream_view stream) { _data.device_to_host_sync(stream); }
private:
hostdevice_vector<T> _data;
typename host_2dspan<T>::size_type _size;
};
} // namespace cudf::detail