Skip to content

Commit

Permalink
Use warp per string for long strings in cudf::strings::contains() (#1…
Browse files Browse the repository at this point in the history
…0739)

Improves the performance on `cudf::strings::contains()` for long strings. This executes a warp per string to match a target over sections of a single string in parallel. The benchmark showed this to be faster than the current implementation only for longer strings (greater than 64 bytes). It also proved somewhat faster and more consistent than a pure character-parallel approach.

This change may also help improve the performance of the regex `contains_re()` function in the future.

Authors:
  - David Wendt (https://github.com/davidwendt)

Approvers:
  - Mike Wilson (https://github.com/hyperbolic2346)
  - Bradley Dice (https://github.com/bdice)

URL: #10739
  • Loading branch information
davidwendt committed May 2, 2022
1 parent 027c34a commit 6128e0d
Show file tree
Hide file tree
Showing 2 changed files with 106 additions and 2 deletions.
88 changes: 86 additions & 2 deletions cpp/src/strings/search/find.cu
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@
#include <cudf/column/column_factories.hpp>
#include <cudf/detail/null_mask.hpp>
#include <cudf/detail/nvtx/ranges.hpp>
#include <cudf/detail/utilities/cuda.cuh>
#include <cudf/detail/utilities/device_atomics.cuh>
#include <cudf/scalar/scalar_factories.hpp>
#include <cudf/strings/detail/utilities.hpp>
#include <cudf/strings/find.hpp>
Expand All @@ -28,6 +30,7 @@
#include <rmm/cuda_stream_view.hpp>
#include <rmm/exec_policy.hpp>

#include <thrust/binary_search.h>
#include <thrust/iterator/counting_iterator.h>
#include <thrust/transform.h>

Expand Down Expand Up @@ -162,6 +165,81 @@ std::unique_ptr<column> rfind(strings_column_view const& strings,

namespace detail {
namespace {

/**
* @brief Threshold to decide on using string or warp parallel functions.
*
* If the average byte length of a string in a column exceeds this value then
* the warp-parallel `contains_warp_fn` function is used.
* Otherwise, the string-parallel function in `contains_fn` is used.
*
* This is only used for the scalar version of `contains()` right now.
*/
constexpr size_type AVG_CHAR_BYTES_THRESHOLD = 64;

/**
* @brief Check if `d_target` appears in a row in `d_strings`.
*
* This executes as a warp per string/row.
*/
struct contains_warp_fn {
column_device_view const d_strings;
string_view const d_target;
bool* d_results;

__device__ void operator()(std::size_t idx)
{
auto const str_idx = static_cast<size_type>(idx / cudf::detail::warp_size);
if (d_strings.is_null(str_idx)) { return; }
// get the string for this warp
auto const d_str = d_strings.element<string_view>(str_idx);
// each thread of the warp will check just part of the string
auto found = false;
for (auto i = static_cast<size_type>(idx % cudf::detail::warp_size);
!found && (i + d_target.size_bytes()) < d_str.size_bytes();
i += cudf::detail::warp_size) {
// check the target matches this part of the d_str data
if (d_target.compare(d_str.data() + i, d_target.size_bytes()) == 0) { found = true; }
}
if (found) { atomicOr(d_results + str_idx, true); }
}
};

std::unique_ptr<column> contains_warp_parallel(strings_column_view const& input,
string_scalar const& target,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
CUDF_EXPECTS(target.is_valid(stream), "Parameter target must be valid.");
auto d_target = string_view(target.data(), target.size());

// create output column
auto results = make_numeric_column(data_type{type_id::BOOL8},
input.size(),
cudf::detail::copy_bitmask(input.parent(), stream, mr),
input.null_count(),
stream,
mr);

// fill the output with `false` unless the `d_target` is empty
auto results_view = results->mutable_view();
thrust::fill(rmm::exec_policy(stream),
results_view.begin<bool>(),
results_view.end<bool>(),
d_target.empty());

if (!d_target.empty()) {
// launch warp per string
auto d_strings = column_device_view::create(input.parent(), stream);
thrust::for_each_n(rmm::exec_policy(stream),
thrust::make_counting_iterator<std::size_t>(0),
static_cast<std::size_t>(input.size()) * cudf::detail::warp_size,
contains_warp_fn{*d_strings, d_target, results_view.data<bool>()});
}
results->set_null_count(input.null_count());
return results;
}

/**
* @brief Utility to return a bool column indicating the presence of
* a given target string in a strings column.
Expand Down Expand Up @@ -286,15 +364,21 @@ std::unique_ptr<column> contains_fn(strings_column_view const& strings,
} // namespace

std::unique_ptr<column> contains(
strings_column_view const& strings,
strings_column_view const& input,
string_scalar const& target,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
{
// use warp parallel when the average string width is greater than the threshold
if (!input.is_empty() && ((input.chars_size() / input.size()) > AVG_CHAR_BYTES_THRESHOLD)) {
return contains_warp_parallel(input, target, stream, mr);
}

// benchmark measurements showed this to be faster for smaller strings
auto pfn = [] __device__(string_view d_string, string_view d_target) {
return d_string.find(d_target) >= 0;
};
return contains_fn(strings, target, pfn, stream, mr);
return contains_fn(input, target, pfn, stream, mr);
}

std::unique_ptr<column> contains(
Expand Down
20 changes: 20 additions & 0 deletions cpp/tests/strings/find_tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,26 @@ TEST_F(StringsFindTest, Contains)
}
}

TEST_F(StringsFindTest, ContainsLongStrings)
{
cudf::test::strings_column_wrapper strings(
{"Héllo, there world and goodbye",
"quick brown fox jumped over the lazy brown dog; the fat cats jump in place without moving",
"the following code snippet demonstrates how to use search for values in an ordered range",
"it returns the last position where value could be inserted without violating the ordering",
"algorithms execution is parallelized as determined by an execution policy. t",
"he this is a continuation of previous row to make sure string boundaries are honored",
""});
auto strings_view = cudf::strings_column_view(strings);
auto results = cudf::strings::contains(strings_view, cudf::string_scalar("e"));
cudf::test::fixed_width_column_wrapper<bool> expected({1, 1, 1, 1, 1, 1, 0});
CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected);

results = cudf::strings::contains(strings_view, cudf::string_scalar(" the "));
cudf::test::fixed_width_column_wrapper<bool> expected2({0, 1, 0, 1, 0, 0, 0});
CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected2);
}

TEST_F(StringsFindTest, StartsWith)
{
cudf::test::strings_column_wrapper strings({"Héllo", "thesé", "", "lease", "tést strings", ""},
Expand Down

0 comments on commit 6128e0d

Please sign in to comment.