forked from rapidsai/cudf
/
gather.cuh
665 lines (614 loc) · 28.2 KB
/
gather.cuh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
/*
* Copyright (c) 2019-2021, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once
#include <cudf/detail/copy.hpp>
#include <cudf/detail/indexalator.cuh>
#include <cudf/detail/null_mask.hpp>
#include <cudf/detail/utilities/assert.cuh>
#include <cudf/detail/utilities/cuda.cuh>
#include <cudf/detail/valid_if.cuh>
#include <cudf/dictionary/dictionary_column_view.hpp>
#include <cudf/dictionary/dictionary_factories.hpp>
#include <cudf/lists/detail/gather.cuh>
#include <cudf/lists/lists_column_view.hpp>
#include <cudf/strings/detail/gather.cuh>
#include <cudf/structs/structs_column_view.hpp>
#include <cudf/table/table.hpp>
#include <cudf/table/table_device_view.cuh>
#include <cudf/types.hpp>
#include <cudf/utilities/bit.hpp>
#include <cudf/utilities/traits.hpp>
#include <cudf/utilities/type_dispatcher.hpp>
#include <rmm/cuda_stream_view.hpp>
#include <rmm/device_vector.hpp>
#include <rmm/exec_policy.hpp>
#include <algorithm>
#include <thrust/functional.h>
#include <thrust/gather.h>
#include <thrust/host_vector.h>
#include <thrust/iterator/transform_iterator.h>
#include <thrust/logical.h>
namespace cudf {
namespace detail {
/**
* @brief Function object to check if an index is within the bounds [begin, end).
*/
template <typename map_type>
struct bounds_checker {
size_type begin;
size_type end;
__device__ bounds_checker(size_type begin_, size_type end_) : begin{begin_}, end{end_} {}
__device__ bool operator()(map_type const index) { return ((index >= begin) && (index < end)); }
};
/**
* @brief The operation to perform when a gather map index is out of bounds
*/
enum class gather_bitmask_op {
DONT_CHECK, // Don't check for out of bounds indices
PASSTHROUGH, // Preserve mask at rows with out of bounds indices
NULLIFY, // Nullify rows with out of bounds indices
};
template <gather_bitmask_op Op, typename MapIterator>
struct gather_bitmask_functor {
table_device_view input;
bitmask_type** masks;
MapIterator gather_map;
__device__ bool operator()(size_type mask_idx, size_type bit_idx)
{
auto row_idx = gather_map[bit_idx];
auto col = input.column(mask_idx);
if (Op != gather_bitmask_op::DONT_CHECK) {
bool out_of_range = is_signed_iterator<MapIterator>() ? (row_idx < 0 || row_idx >= col.size())
: row_idx >= col.size();
if (out_of_range) {
if (Op == gather_bitmask_op::PASSTHROUGH) {
return bit_is_set(masks[mask_idx], bit_idx);
} else if (Op == gather_bitmask_op::NULLIFY) {
return false;
}
}
}
return col.is_valid(row_idx);
}
};
/**
* @brief Function for calling gather using iterators.
*
* Used by column_gatherer_impl definitions below.
*
* @tparam InputIterator Type for gather source data
* @tparam OutputIterator Type for gather results
* @tparam MapIterator Iterator type for the gather map
*
* @param source_itr Source data up to `source_size`
* @param source_size Maximum index value for source data
* @param target_itr Output iterator for gather result
* @param gather_map_begin Start of the gather map
* @param gather_map_end End of the gather map
* @param nullify_out_of_bounds True if map values are checked against `source_size`
* @param stream CUDA stream used for kernel launches.
*/
template <typename InputItr, typename OutputItr, typename MapIterator>
void gather_helper(InputItr source_itr,
size_type source_size,
OutputItr target_itr,
MapIterator gather_map_begin,
MapIterator gather_map_end,
bool nullify_out_of_bounds,
rmm::cuda_stream_view stream)
{
using map_type = typename std::iterator_traits<MapIterator>::value_type;
if (nullify_out_of_bounds) {
thrust::gather_if(rmm::exec_policy(stream),
gather_map_begin,
gather_map_end,
gather_map_begin,
source_itr,
target_itr,
bounds_checker<map_type>{0, source_size});
} else {
thrust::gather(
rmm::exec_policy(stream), gather_map_begin, gather_map_end, source_itr, target_itr);
}
}
// Error case when no other overload or specialization is available
template <typename Element, typename Enable = void>
struct column_gatherer_impl {
std::unique_ptr<column> operator()(...) { CUDF_FAIL("Unsupported type in gather."); }
};
/**
* @brief Function object for gathering a type-erased
* column. To be used with the cudf::type_dispatcher.
*/
struct column_gatherer {
/**
* @brief Type-dispatched function to gather from one column to another based
* on a `gather_map`.
*
* @tparam Element Dispatched type for the column being gathered
* @tparam MapIterator Iterator type for the gather map
* @param source_column View into the column to gather from
* @param gather_map_begin Beginning of iterator range of integral values representing the gather
* map
* @param gather_map_end End of iterator range of integral values representing the gather map
* @param nullify_out_of_bounds Nullify values in `gather_map` that are out of bounds
* @param stream CUDA stream used for device memory operations and kernel launches.
* @param mr Device memory resource used to allocate the returned column's device memory
*/
template <typename Element, typename MapIterator>
std::unique_ptr<column> operator()(column_view const& source_column,
MapIterator gather_map_begin,
MapIterator gather_map_end,
bool nullify_out_of_bounds,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
column_gatherer_impl<Element> gatherer{};
return gatherer(
source_column, gather_map_begin, gather_map_end, nullify_out_of_bounds, stream, mr);
}
};
/**
* @brief Function object for gathering a type-erased column.
*
* To be used with column_gatherer to provide specialization to handle
* fixed-width, string and other types.
*
* @tparam Element Dispatched type for the column being gathered
* @tparam MapIterator Iterator type for the gather map
*/
template <typename Element>
struct column_gatherer_impl<Element, std::enable_if_t<is_rep_layout_compatible<Element>()>> {
/**
* @brief Type-dispatched function to gather from one column to another based
* on a `gather_map`.
*
* This handles fixed width type column_views only.
*
* @param source_column View into the column to gather from
* @param gather_map_begin Beginning of iterator range of integral values representing the gather
* map
* @param gather_map_end End of iterator range of integral values representing the gather map
* @param nullify_out_of_bounds Nullify values in `gather_map` that are out of bounds
* @param stream CUDA stream used for device memory operations and kernel launches.
* @param mr Device memory resource used to allocate the returned column's device memory
*/
template <typename MapIterator>
std::unique_ptr<column> operator()(column_view const& source_column,
MapIterator gather_map_begin,
MapIterator gather_map_end,
bool nullify_out_of_bounds,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
auto const num_rows = cudf::distance(gather_map_begin, gather_map_end);
auto const policy = cudf::mask_allocation_policy::NEVER;
auto destination_column =
cudf::detail::allocate_like(source_column, num_rows, policy, stream, mr);
gather_helper(source_column.data<Element>(),
source_column.size(),
destination_column->mutable_view().template begin<Element>(),
gather_map_begin,
gather_map_end,
nullify_out_of_bounds,
stream);
return destination_column;
}
};
/**
* @brief Function object for gathering a type-erased
* column. To be used with column_gatherer to provide specialization for
* string_view.
*
* @tparam MapIterator Iterator type for the gather map
*/
template <>
struct column_gatherer_impl<string_view> {
/**
* @brief Type-dispatched function to gather from one column to another based
* on a `gather_map`. This handles string_view type column_views only.
*
* @param source_column View into the column to gather from
* @param gather_map_begin Beginning of iterator range of integral values representing the gather
* map
* @param gather_map_end End of iterator range of integral values representing the gather map
* @param nullify_out_of_bounds Nullify values in `gather_map` that are out of bounds
* @param stream CUDA stream used for device memory operations and kernel launches.
* @param mr Device memory resource used to allocate the returned column's device memory
*/
template <typename MapItType>
std::unique_ptr<column> operator()(column_view const& source_column,
MapItType gather_map_begin,
MapItType gather_map_end,
bool nullify_out_of_bounds,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
if (true == nullify_out_of_bounds) {
return cudf::strings::detail::gather<true>(
strings_column_view(source_column), gather_map_begin, gather_map_end, stream, mr);
} else {
return cudf::strings::detail::gather<false>(
strings_column_view(source_column), gather_map_begin, gather_map_end, stream, mr);
}
}
};
/**
* @brief Column gather specialization for list_view column type.
*
* @tparam MapItRoot Iterator type to access the incoming root column.
*
* This functor is invoked only on the root column of a hierarchy of list
* columns. Recursion is handled internally.
*/
template <>
struct column_gatherer_impl<list_view> {
/**
* @brief Gather a list column from a hierarchy of list columns.
*
* This function is similar to gather_list_nested() but the difference is
* significant. This particular level takes a templated gather map iterator of
* any type. As we start recursing, we need to be able to generate new gather
* maps for each level. To do this requires manifesting a buffer of intermediate
* data. If we were to do that at level N and then wrap it in an anonymous iterator
* to be passed to level N+1, these buffers of data would remain resident for the
* entirety of the recursion. But if level N+1 could create it's own iterator
* internally from a buffer passed to it by level N, it could then -delete- that
* buffer of data after using it, keeping the amount of extra memory needed
* to a minimum. see comment on "memory optimization" inside cudf::list::gather_list_nested
*
* The tree of calls can be visualized like this:
*
* @code{.pseudo}
* R : this operator
* N : lists::detail::gather_list_nested
* L : lists::detail::gather_list_leaf
*
* R
* / \
* L N
* \
* N
* \
* ...
* \
* L
* @endcode
*
* This is the start of the recursion - we will only ever get in here once.
* We will only ever travel down the left branch or the right branch, and we
* will always end up in a final call to gather_list_leaf.
*
* @param column View into the column to gather from
* @param gather_map_begin iterator representing the start of the range to gather from
* @param gather_map_end iterator representing the end of the range to gather from
* @param nullify_out_of_bounds Nullify values in the gather map that are out of bounds
* @param stream CUDA stream on which to execute kernels
* @param mr Memory resource to use for all allocations
*
* @returns column with elements gathered based on the gather map
*
*/
template <typename MapItRoot>
std::unique_ptr<column> operator()(column_view const& column,
MapItRoot gather_map_begin,
MapItRoot gather_map_end,
bool nullify_out_of_bounds,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
lists_column_view list(column);
auto gather_map_size = std::distance(gather_map_begin, gather_map_end);
// if the gather map is empty, return an empty column
if (gather_map_size == 0) { return empty_like(column); }
// generate gather_data for the next level (N+1)
lists::detail::gather_data gd = nullify_out_of_bounds
? lists::detail::make_gather_data<true>(
column, gather_map_begin, gather_map_size, stream, mr)
: lists::detail::make_gather_data<false>(
column, gather_map_begin, gather_map_size, stream, mr);
// the nesting case.
if (list.child().type() == cudf::data_type{type_id::LIST}) {
// gather children
auto child = lists::detail::gather_list_nested(list.get_sliced_child(stream), gd, stream, mr);
// return the final column
return make_lists_column(gather_map_size,
std::move(gd.offsets),
std::move(child),
0,
rmm::device_buffer{0, stream, mr});
}
// it's a leaf. do a regular gather
auto child = lists::detail::gather_list_leaf(list.get_sliced_child(stream), gd, stream, mr);
// assemble final column
return make_lists_column(gather_map_size,
std::move(gd.offsets),
std::move(child),
0,
rmm::device_buffer{0, stream, mr});
}
};
/**
* @brief Column gather specialization for dictionary column type.
*/
template <>
struct column_gatherer_impl<dictionary32> {
/**
* @brief Type-dispatched function to gather from one column to another based
* on a `gather_map`.
*
* @param source_column View into the column to gather from
* @param gather_map_begin Beginning of iterator range of integral values representing the gather
* map
* @param gather_map_end End of iterator range of integral values representing the gather map
* @param nullify_out_of_bounds Nullify values in `gather_map` that are out of bounds
* @param stream CUDA stream used for device memory operations and kernel launches.
* @param mr Device memory resource used to allocate the returned column's device memory
* @return New dictionary column with gathered rows.
*/
template <typename MapItType>
std::unique_ptr<column> operator()(column_view const& source_column,
MapItType gather_map_begin,
MapItType gather_map_end,
bool nullify_out_of_bounds,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
dictionary_column_view dictionary(source_column);
auto output_count = std::distance(gather_map_begin, gather_map_end);
if (output_count == 0) return make_empty_column(data_type{type_id::DICTIONARY32});
// The gather could cause some keys to be abandoned -- no indices point to them.
// In this case, we could do further work to remove the abandoned keys and
// reshuffle the indices values.
// We decided we will copy the keys for gather since the keys column should
// be relatively smallish.
// Also, there are scenarios where the keys are common with other dictionaries
// and the original intention was to share the keys here.
auto keys_copy = std::make_unique<column>(dictionary.keys(), stream, mr);
// Perform gather on just the indices
column_view indices = dictionary.get_indices_annotated();
auto new_indices = cudf::detail::allocate_like(
indices, output_count, cudf::mask_allocation_policy::NEVER, stream, mr);
gather_helper(
cudf::detail::indexalator_factory::make_input_iterator(indices),
indices.size(),
cudf::detail::indexalator_factory::make_output_iterator(new_indices->mutable_view()),
gather_map_begin,
gather_map_end,
nullify_out_of_bounds,
stream);
// dissect the column's contents
auto indices_type = new_indices->type();
auto null_count = new_indices->null_count(); // get this before calling release()
auto contents = new_indices->release(); // new_indices will now be empty
// build the output indices column from the contents' data component
auto indices_column = std::make_unique<column>(indices_type,
static_cast<size_type>(output_count),
std::move(*(contents.data.release())),
rmm::device_buffer{0, stream, mr},
0); // set null count to 0
// finally, build the dictionary with the null_mask component and the keys and indices
return make_dictionary_column(std::move(keys_copy),
std::move(indices_column),
std::move(*(contents.null_mask.release())),
null_count);
}
};
template <>
struct column_gatherer_impl<struct_view> {
template <typename MapItRoot>
std::unique_ptr<column> operator()(column_view const& column,
MapItRoot gather_map_begin,
MapItRoot gather_map_end,
bool nullify_out_of_bounds,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
structs_column_view structs_column(column);
auto gather_map_size{std::distance(gather_map_begin, gather_map_end)};
if (gather_map_size == 0) { return empty_like(column); }
std::vector<std::unique_ptr<cudf::column>> output_struct_members;
std::transform(structs_column.child_begin(),
structs_column.child_end(),
std::back_inserter(output_struct_members),
[&gather_map_begin, &gather_map_end, nullify_out_of_bounds, stream, mr](
cudf::column_view const& col) {
return cudf::type_dispatcher<dispatch_storage_type>(col.type(),
column_gatherer{},
col,
gather_map_begin,
gather_map_end,
nullify_out_of_bounds,
stream,
mr);
});
gather_bitmask(
// Table view of struct column.
cudf::table_view{
std::vector<cudf::column_view>{structs_column.child_begin(), structs_column.child_end()}},
gather_map_begin,
output_struct_members,
nullify_out_of_bounds ? gather_bitmask_op::NULLIFY : gather_bitmask_op::DONT_CHECK,
stream,
mr);
return cudf::make_structs_column(
gather_map_size,
std::move(output_struct_members),
0,
rmm::device_buffer{0, stream, mr}, // Null mask will be fixed up in cudf::gather().
stream,
mr);
}
};
/**
* @brief Function object for applying a transformation on the gathermap
* that converts negative indices to positive indices
*
* A negative index `i` is transformed to `i + size`, where `size` is
* the number of elements in the column being gathered from.
* Allowable values for the index `i` are in the range `[-size, size)`.
* Thus, when gathering from a column of size `10`, the index `-1`
* is transformed to `9` (i.e., the last element), `-2` is transformed
* to `8` (the second-to-last element) and so on.
* Positive indices are unchanged by this transformation.
*/
template <typename map_type>
struct index_converter : public thrust::unary_function<map_type, map_type> {
index_converter(size_type n_rows) : n_rows(n_rows) {}
__device__ map_type operator()(map_type in) const { return ((in % n_rows) + n_rows) % n_rows; }
size_type n_rows;
};
template <gather_bitmask_op Op, typename GatherMap>
void gather_bitmask(table_device_view input,
GatherMap gather_map_begin,
bitmask_type** masks,
size_type mask_count,
size_type mask_size,
size_type* valid_counts,
rmm::cuda_stream_view stream)
{
if (mask_size == 0) { return; }
constexpr size_type block_size = 256;
using Selector = gather_bitmask_functor<Op, decltype(gather_map_begin)>;
auto selector = Selector{input, masks, gather_map_begin};
auto counting_it = thrust::make_counting_iterator(0);
auto kernel =
valid_if_n_kernel<decltype(counting_it), decltype(counting_it), Selector, block_size>;
cudf::detail::grid_1d grid{mask_size, block_size, 1};
kernel<<<grid.num_blocks, block_size, 0, stream.value()>>>(
counting_it, counting_it, selector, masks, mask_count, mask_size, valid_counts);
}
template <typename MapIterator>
void gather_bitmask(table_view const& source,
MapIterator gather_map,
std::vector<std::unique_ptr<column>>& target,
gather_bitmask_op op,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
if (target.empty()) { return; }
// Validate that all target columns have the same size
auto const target_rows = target.front()->size();
CUDF_EXPECTS(std::all_of(target.begin(),
target.end(),
[target_rows](auto const& col) { return target_rows == col->size(); }),
"Column size mismatch");
// Create null mask if source is nullable but target is not
for (size_t i = 0; i < target.size(); ++i) {
if ((source.column(i).nullable() or op == gather_bitmask_op::NULLIFY) and
not target[i]->nullable()) {
auto const state =
op == gather_bitmask_op::PASSTHROUGH ? mask_state::ALL_VALID : mask_state::UNINITIALIZED;
auto mask = detail::create_null_mask(target[i]->size(), state, stream, mr);
target[i]->set_null_mask(std::move(mask), 0);
}
}
// Make device array of target bitmask pointers
thrust::host_vector<bitmask_type*> target_masks(target.size());
std::transform(target.begin(), target.end(), target_masks.begin(), [](auto const& col) {
return col->mutable_view().null_mask();
});
rmm::device_vector<bitmask_type*> d_target_masks(target_masks);
auto const masks = d_target_masks.data().get();
auto const device_source = table_device_view::create(source, stream);
auto d_valid_counts = rmm::device_vector<size_type>(target.size());
// Dispatch operation enum to get implementation
auto const impl = [op]() {
switch (op) {
case gather_bitmask_op::DONT_CHECK:
return gather_bitmask<gather_bitmask_op::DONT_CHECK, MapIterator>;
case gather_bitmask_op::PASSTHROUGH:
return gather_bitmask<gather_bitmask_op::PASSTHROUGH, MapIterator>;
case gather_bitmask_op::NULLIFY:
return gather_bitmask<gather_bitmask_op::NULLIFY, MapIterator>;
default: CUDF_FAIL("Invalid gather_bitmask_op");
}
}();
impl(*device_source,
gather_map,
masks,
target.size(),
target_rows,
d_valid_counts.data().get(),
stream);
// Copy the valid counts into each column
auto const valid_counts = thrust::host_vector<size_type>(d_valid_counts);
for (size_t i = 0; i < target.size(); ++i) {
if (target[i]->nullable()) {
auto const null_count = target_rows - valid_counts[i];
target[i]->set_null_count(null_count);
}
}
}
/**
* @brief Gathers the specified rows of a set of columns according to a gather map.
*
* Gathers the rows of the source columns according to `gather_map` such that row "i"
* in the resulting table's columns will contain row "gather_map[i]" from the source columns.
* The number of rows in the result table will be equal to the number of elements in
* `gather_map`.
*
* A negative value `i` in the `gather_map` is interpreted as `i+n`, where
* `n` is the number of rows in the `source_table`.
*
* tparam MapIterator Iterator type for the gather map
* @param[in] source_table View into the table containing the input columns whose rows will be
* gathered
* @param[in] gather_map_begin Beginning of iterator range of integer indices that map the rows in
* the source columns to rows in the destination columns
* @param[in] gather_map_end End of iterator range of integer indices that map the rows in the
* source columns to rows in the destination columns
* @param[in] bounds_policy Policy to apply to account for possible out-of-bound indices
* `DONT_CHECK` skips all bound checking for gather map values. `NULLIFY` coerces rows that
* corresponds to out-of-bound indices in the gather map to be null elements. Callers should
* use `DONT_CHECK` when they are certain that the gather_map contains only valid indices for
* better performance. In case there are out-of-bound indices in the gather map, the behavior
* is undefined. Defaults to `DONT_CHECK`.
* @param[in] mr Device memory resource used to allocate the returned table's device memory
* @param[in] stream CUDA stream used for device memory operations and kernel launches.
* @return cudf::table Result of the gather
*/
template <typename MapIterator>
std::unique_ptr<table> gather(
table_view const& source_table,
MapIterator gather_map_begin,
MapIterator gather_map_end,
out_of_bounds_policy bounds_policy = out_of_bounds_policy::DONT_CHECK,
rmm::cuda_stream_view stream = rmm::cuda_stream_default,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
{
std::vector<std::unique_ptr<column>> destination_columns;
// TODO: Could be beneficial to use streams internally here
for (auto const& source_column : source_table) {
// The data gather for n columns will be put on the first n streams
destination_columns.push_back(
cudf::type_dispatcher<dispatch_storage_type>(source_column.type(),
column_gatherer{},
source_column,
gather_map_begin,
gather_map_end,
bounds_policy == out_of_bounds_policy::NULLIFY,
stream,
mr));
}
gather_bitmask_op const op = bounds_policy == out_of_bounds_policy::NULLIFY
? gather_bitmask_op::NULLIFY
: gather_bitmask_op::DONT_CHECK;
gather_bitmask(source_table, gather_map_begin, destination_columns, op, stream, mr);
return std::make_unique<table>(std::move(destination_columns));
}
} // namespace detail
} // namespace cudf