Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix out-of-bound access in cudf::detail::label_segments #11497

Merged
merged 3 commits into from
Aug 9, 2022
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 19 additions & 14 deletions cpp/include/cudf/detail/labeling/label_segments.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -67,10 +67,12 @@ void label_segments(InputIterator offsets_begin,
OutputIterator label_end,
rmm::cuda_stream_view stream)
{
auto const num_labels = thrust::distance(label_begin, label_end);

// If the output array is empty, that means we have all empty segments.
// In such cases, we must terminate immediately. Otherwise, the `for_each` loop below may try to
// access memory of the output array, resulting in "illegal memory access" error.
if (thrust::distance(label_begin, label_end) == 0) { return; }
if (num_labels == 0) { return; }

// When the output array is not empty, always fill it with `0` value first.
using OutputType = typename thrust::iterator_value<OutputIterator>::type;
Expand All @@ -83,19 +85,22 @@ void label_segments(InputIterator offsets_begin,
// very large segment.
if (thrust::distance(offsets_begin, offsets_end) <= 2) { return; }

thrust::for_each(rmm::exec_policy(stream),
offsets_begin + 1, // exclude the first offset value
offsets_end - 1, // exclude the last offset value
[offsets = offsets_begin, output = label_begin] __device__(auto const idx) {
// Zero-normalized offsets.
auto const dst_idx = idx - (*offsets);

// Scatter value `1` to the index at (idx - offsets[0]).
// In case we have repeated offsets (i.e., we have empty segments), this
// `atomicAdd` call will make sure the label values corresponding to these
// empty segments will be skipped in the output.
atomicAdd(&output[dst_idx], OutputType{1});
});
thrust::for_each(
rmm::exec_policy(stream),
offsets_begin + 1, // exclude the first offset value
offsets_end - 1, // exclude the last offset value
[num_labels, offsets = offsets_begin, output = label_begin] __device__(auto const idx) {
// Zero-normalized offsets.
auto const dst_idx = idx - (*offsets);
upsj marked this conversation as resolved.
Show resolved Hide resolved

// Scatter value `1` to the index at (idx - offsets[0]).
// Note that we need to check for out of bound, since the offset values may be invalid due to
// empty segments at the end.
// In case we have repeated offsets (i.e., we have empty segments), this `atomicAdd` call will
// make sure the label values corresponding to these empty segments will be skipped in the
// output.
if (dst_idx < num_labels) { atomicAdd(&output[dst_idx], OutputType{1}); }
});
thrust::inclusive_scan(rmm::exec_policy(stream), label_begin, label_end, label_begin);
}

Expand Down