Skip to content

Commit

Permalink
Move some nvtext benchmarks to nvbench (#13368)
Browse files Browse the repository at this point in the history
Moves some of the nvtext benchmarks to nvbench to help provide a more useful baseline when making improvements to performance. These run with varying parameters for column size and string length. The remaining benchmarks are more involved a may be updated in follow-on PRs.

Reference: #13048

Authors:
  - David Wendt (https://github.com/davidwendt)

Approvers:
  - Nghia Truong (https://github.com/ttnghia)
  - Yunsong Wang (https://github.com/PointKernel)
  - Robert Maynard (https://github.com/robertmaynard)

URL: #13368
  • Loading branch information
davidwendt committed May 24, 2023
1 parent 132540e commit 19554a1
Show file tree
Hide file tree
Showing 5 changed files with 103 additions and 208 deletions.
9 changes: 4 additions & 5 deletions cpp/benchmarks/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -272,12 +272,11 @@ ConfigureBench(BINARYOP_BENCH binaryop/binaryop.cpp binaryop/compiled_binaryop.c

# ##################################################################################################
# * nvtext benchmark -------------------------------------------------------------------
ConfigureBench(
TEXT_BENCH text/ngrams.cpp text/normalize.cpp text/normalize_spaces.cpp text/replace.cpp
text/subword.cpp text/tokenize.cpp
)
ConfigureBench(TEXT_BENCH text/ngrams.cpp text/subword.cpp)

ConfigureNVBench(TEXT_NVBENCH text/minhash.cpp)
ConfigureNVBench(
TEXT_NVBENCH text/minhash.cpp text/normalize.cpp text/replace.cpp text/tokenize.cpp
)

# ##################################################################################################
# * strings benchmark -------------------------------------------------------------------
Expand Down
69 changes: 30 additions & 39 deletions cpp/benchmarks/text/normalize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,59 +16,50 @@

#include <benchmarks/common/generate_input.hpp>
#include <benchmarks/fixture/benchmark_fixture.hpp>
#include <benchmarks/synchronization/synchronization.hpp>

#include <cudf/scalar/scalar.hpp>
#include <cudf/strings/strings_column_view.hpp>
#include <cudf/utilities/default_stream.hpp>

#include <nvtext/normalize.hpp>

class TextNormalize : public cudf::benchmark {};
#include <nvbench/nvbench.cuh>

static void BM_normalize(benchmark::State& state, bool to_lower)
static void bench_normalize(nvbench::state& state)
{
auto const n_rows = static_cast<cudf::size_type>(state.range(0));
auto const max_str_length = static_cast<cudf::size_type>(state.range(1));
auto const num_rows = static_cast<cudf::size_type>(state.get_int64("num_rows"));
auto const row_width = static_cast<cudf::size_type>(state.get_int64("row_width"));
auto const normalize_type = state.get_string("type");

if (static_cast<std::size_t>(num_rows) * static_cast<std::size_t>(row_width) >=
static_cast<std::size_t>(std::numeric_limits<cudf::size_type>::max())) {
state.skip("Skip benchmarks greater than size_type limit");
}

data_profile const profile = data_profile_builder().distribution(
cudf::type_id::STRING, distribution_id::NORMAL, 0, max_str_length);
auto const column = create_random_column(cudf::type_id::STRING, row_count{n_rows}, profile);
cudf::type_id::STRING, distribution_id::NORMAL, 0, row_width);
auto const column = create_random_column(cudf::type_id::STRING, row_count{num_rows}, profile);
cudf::strings_column_view input(column->view());

for (auto _ : state) {
cuda_event_timer raii(state, true, cudf::get_default_stream());
nvtext::normalize_characters(input, to_lower);
}
state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value()));

state.SetBytesProcessed(state.iterations() * input.chars_size());
}
auto chars_size = input.chars_size();
state.add_global_memory_reads<nvbench::int8_t>(chars_size);
state.add_global_memory_writes<nvbench::int8_t>(chars_size);

static void generate_bench_args(benchmark::internal::Benchmark* b)
{
int const min_rows = 1 << 12;
int const max_rows = 1 << 24;
int const row_mult = 8;
int const min_rowlen = 1 << 5;
int const max_rowlen = 1 << 13;
int const len_mult = 4;
for (int row_count = min_rows; row_count <= max_rows; row_count *= row_mult) {
for (int rowlen = min_rowlen; rowlen <= max_rowlen; rowlen *= len_mult) {
// avoid generating combinations that exceed the cudf column limit
size_t total_chars = static_cast<size_t>(row_count) * rowlen * 4;
if (total_chars < static_cast<size_t>(std::numeric_limits<cudf::size_type>::max())) {
b->Args({row_count, rowlen});
}
}
if (normalize_type == "spaces") {
state.exec(nvbench::exec_tag::sync,
[&](nvbench::launch& launch) { auto result = nvtext::normalize_spaces(input); });
} else {
bool const to_lower = (normalize_type == "to_lower");
state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
auto result = nvtext::normalize_characters(input, to_lower);
});
}
}

#define NVTEXT_BENCHMARK_DEFINE(name, lower) \
BENCHMARK_DEFINE_F(TextNormalize, name) \
(::benchmark::State & st) { BM_normalize(st, lower); } \
BENCHMARK_REGISTER_F(TextNormalize, name) \
->Apply(generate_bench_args) \
->UseManualTime() \
->Unit(benchmark::kMillisecond);

NVTEXT_BENCHMARK_DEFINE(characters, false)
NVTEXT_BENCHMARK_DEFINE(to_lower, true)
NVBENCH_BENCH(bench_normalize)
.set_name("normalize")
.add_int64_axis("row_width", {32, 64, 128, 256, 512, 1024})
.add_int64_axis("num_rows", {4096, 32768, 262144, 2097152, 16777216})
.add_string_axis("type", {"spaces", "characters", "to_lower"});
66 changes: 0 additions & 66 deletions cpp/benchmarks/text/normalize_spaces.cpp

This file was deleted.

59 changes: 24 additions & 35 deletions cpp/benchmarks/text/replace.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,23 +15,26 @@
*/

#include <benchmarks/fixture/benchmark_fixture.hpp>
#include <benchmarks/string/string_bench_args.hpp>
#include <benchmarks/synchronization/synchronization.hpp>

#include <cudf_test/column_wrapper.hpp>

#include <cudf/strings/strings_column_view.hpp>

#include <nvtext/replace.hpp>

#include <random>
#include <nvbench/nvbench.cuh>

class TextReplace : public cudf::benchmark {};
#include <random>

static void BM_replace(benchmark::State& state)
static void bench_replace(nvbench::state& state)
{
auto const n_rows = static_cast<cudf::size_type>(state.range(0));
auto const n_length = static_cast<cudf::size_type>(state.range(1));
auto const num_rows = static_cast<cudf::size_type>(state.get_int64("num_rows"));
auto const row_width = static_cast<cudf::size_type>(state.get_int64("row_width"));

if (static_cast<std::size_t>(num_rows) * static_cast<std::size_t>(row_width) >=
static_cast<std::size_t>(std::numeric_limits<cudf::size_type>::max())) {
state.skip("Skip benchmarks greater than size_type limit");
}

std::vector<std::string> words{" ", "one ", "two ", "three ", "four ",
"five ", "six ", "sevén ", "eight ", "nine ",
Expand All @@ -41,46 +44,32 @@ static void BM_replace(benchmark::State& state)
std::default_random_engine generator;
std::uniform_int_distribution<int> tokens_dist(0, words.size() - 1);
std::string row; // build a row of random tokens
while (static_cast<int>(row.size()) < n_length)
while (static_cast<cudf::size_type>(row.size()) < row_width)
row += words[tokens_dist(generator)];

std::uniform_int_distribution<int> position_dist(0, 16);

auto elements = cudf::detail::make_counting_transform_iterator(
0, [&](auto idx) { return row.c_str() + position_dist(generator); });
cudf::test::strings_column_wrapper input(elements, elements + n_rows);
cudf::test::strings_column_wrapper input(elements, elements + num_rows);
cudf::strings_column_view view(input);

cudf::test::strings_column_wrapper targets({"one", "two", "sevén", "zero"});
cudf::test::strings_column_wrapper replacements({"1", "2", "7", "0"});

for (auto _ : state) {
cuda_event_timer raii(state, true);
nvtext::replace_tokens(
view, cudf::strings_column_view(targets), cudf::strings_column_view(replacements));
}
state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value()));

state.SetBytesProcessed(state.iterations() * view.chars_size());
}
auto chars_size = view.chars_size();
state.add_global_memory_reads<nvbench::int8_t>(chars_size);
state.add_global_memory_writes<nvbench::int8_t>(chars_size);

static void generate_bench_args(benchmark::internal::Benchmark* b)
{
int const min_rows = 1 << 12;
int const max_rows = 1 << 24;
int const row_multiplier = 8;
int const min_row_length = 1 << 5;
int const max_row_length = 1 << 13;
int const length_multiplier = 4;
generate_string_bench_args(
b, min_rows, max_rows, row_multiplier, min_row_length, max_row_length, length_multiplier);
state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
auto result = nvtext::replace_tokens(
view, cudf::strings_column_view(targets), cudf::strings_column_view(replacements));
});
}

#define NVTEXT_BENCHMARK_DEFINE(name) \
BENCHMARK_DEFINE_F(TextReplace, name) \
(::benchmark::State & st) { BM_replace(st); } \
BENCHMARK_REGISTER_F(TextReplace, name) \
->Apply(generate_bench_args) \
->UseManualTime() \
->Unit(benchmark::kMillisecond);

NVTEXT_BENCHMARK_DEFINE(replace)
NVBENCH_BENCH(bench_replace)
.set_name("replace")
.add_int64_axis("row_width", {32, 64, 128, 256, 512, 1024})
.add_int64_axis("num_rows", {4096, 32768, 262144, 2097152, 16777216});
108 changes: 45 additions & 63 deletions cpp/benchmarks/text/tokenize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,6 @@

#include <benchmarks/common/generate_input.hpp>
#include <benchmarks/fixture/benchmark_fixture.hpp>
#include <benchmarks/string/string_bench_args.hpp>
#include <benchmarks/synchronization/synchronization.hpp>

#include <cudf_test/column_wrapper.hpp>

Expand All @@ -28,73 +26,57 @@
#include <nvtext/ngrams_tokenize.hpp>
#include <nvtext/tokenize.hpp>

class TextTokenize : public cudf::benchmark {};
#include <nvbench/nvbench.cuh>

enum class tokenize_type { single, multi, count, count_multi, ngrams, characters };

static void BM_tokenize(benchmark::State& state, tokenize_type tt)
static void bench_tokenize(nvbench::state& state)
{
auto const n_rows = static_cast<cudf::size_type>(state.range(0));
auto const max_str_length = static_cast<cudf::size_type>(state.range(1));
auto const num_rows = static_cast<cudf::size_type>(state.get_int64("num_rows"));
auto const row_width = static_cast<cudf::size_type>(state.get_int64("row_width"));
auto const tokenize_type = state.get_string("type");

if (static_cast<std::size_t>(num_rows) * static_cast<std::size_t>(row_width) >=
static_cast<std::size_t>(std::numeric_limits<cudf::size_type>::max())) {
state.skip("Skip benchmarks greater than size_type limit");
}

data_profile const profile = data_profile_builder().distribution(
cudf::type_id::STRING, distribution_id::NORMAL, 0, max_str_length);
auto const column = create_random_column(cudf::type_id::STRING, row_count{n_rows}, profile);
cudf::type_id::STRING, distribution_id::NORMAL, 0, row_width);
auto const column = create_random_column(cudf::type_id::STRING, row_count{num_rows}, profile);
cudf::strings_column_view input(column->view());
cudf::test::strings_column_wrapper delimiters({" ", "+", "-"});

for (auto _ : state) {
cuda_event_timer raii(state, true, cudf::get_default_stream());
switch (tt) {
case tokenize_type::single:
// single whitespace delimiter
nvtext::tokenize(input);
break;
case tokenize_type::multi:
nvtext::tokenize(input, cudf::strings_column_view(delimiters));
break;
case tokenize_type::count:
// single whitespace delimiter
nvtext::count_tokens(input);
break;
case tokenize_type::count_multi:
nvtext::count_tokens(input, cudf::strings_column_view(delimiters));
break;
case tokenize_type::ngrams:
// default is bigrams
nvtext::ngrams_tokenize(input);
break;
case tokenize_type::characters:
// every character becomes a string
nvtext::character_tokenize(input);
break;
}
}
state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value()));

state.SetBytesProcessed(state.iterations() * input.chars_size());
}
auto chars_size = input.chars_size();
state.add_global_memory_reads<nvbench::int8_t>(chars_size);
state.add_global_memory_writes<nvbench::int8_t>(chars_size);

static void generate_bench_args(benchmark::internal::Benchmark* b)
{
int const min_rows = 1 << 12;
int const max_rows = 1 << 24;
int const row_mult = 8;
int const min_rowlen = 1 << 5;
int const max_rowlen = 1 << 13;
int const len_mult = 4;
generate_string_bench_args(b, min_rows, max_rows, row_mult, min_rowlen, max_rowlen, len_mult);
if (tokenize_type == "whitespace") {
state.exec(nvbench::exec_tag::sync,
[&](nvbench::launch& launch) { auto result = nvtext::tokenize(input); });
} else if (tokenize_type == "multi") {
cudf::test::strings_column_wrapper delimiters({" ", "+", "-"});
state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
auto result = nvtext::tokenize(input, cudf::strings_column_view(delimiters));
});
} else if (tokenize_type == "count") {
state.exec(nvbench::exec_tag::sync,
[&](nvbench::launch& launch) { auto result = nvtext::count_tokens(input); });
} else if (tokenize_type == "count_multi") {
cudf::test::strings_column_wrapper delimiters({" ", "+", "-"});
state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
auto result = nvtext::count_tokens(input, cudf::strings_column_view(delimiters));
});
} else if (tokenize_type == "ngrams") {
state.exec(nvbench::exec_tag::sync,
[&](nvbench::launch& launch) { auto result = nvtext::ngrams_tokenize(input); });
} else if (tokenize_type == "characters") {
state.exec(nvbench::exec_tag::sync,
[&](nvbench::launch& launch) { auto result = nvtext::character_tokenize(input); });
}
}

#define NVTEXT_BENCHMARK_DEFINE(name) \
BENCHMARK_DEFINE_F(TextTokenize, name) \
(::benchmark::State & st) { BM_tokenize(st, tokenize_type::name); } \
BENCHMARK_REGISTER_F(TextTokenize, name) \
->Apply(generate_bench_args) \
->UseManualTime() \
->Unit(benchmark::kMillisecond);

NVTEXT_BENCHMARK_DEFINE(single)
NVTEXT_BENCHMARK_DEFINE(multi)
NVTEXT_BENCHMARK_DEFINE(count)
NVTEXT_BENCHMARK_DEFINE(count_multi)
NVTEXT_BENCHMARK_DEFINE(ngrams)
NVTEXT_BENCHMARK_DEFINE(characters)
NVBENCH_BENCH(bench_tokenize)
.set_name("tokenize")
.add_int64_axis("row_width", {32, 64, 128, 256, 512, 1024})
.add_int64_axis("num_rows", {4096, 32768, 262144, 2097152, 16777216})
.add_string_axis("type", {"whitespace", "multi", "count", "count_multi", "ngrams", "characters"});

0 comments on commit 19554a1

Please sign in to comment.