Skip to content

Commit

Permalink
Add num_aggregations axis and remove multistream benchmark
Browse files Browse the repository at this point in the history
  • Loading branch information
srinivasyadav18 committed Jul 5, 2024
1 parent ca049ff commit 32181eb
Show file tree
Hide file tree
Showing 4 changed files with 24 additions and 109 deletions.
9 changes: 2 additions & 7 deletions cpp/benchmarks/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -226,13 +226,8 @@ ConfigureBench(
)

ConfigureNVBench(
GROUPBY_NVBENCH
groupby/group_max.cpp
groupby/group_max_multistream.cpp
groupby/group_max_multithreaded.cpp
groupby/group_nunique.cpp
groupby/group_rank.cpp
groupby/group_struct_keys.cpp
GROUPBY_NVBENCH groupby/group_max.cpp groupby/group_max_multithreaded.cpp
groupby/group_nunique.cpp groupby/group_rank.cpp groupby/group_struct_keys.cpp
)

# ##################################################################################################
Expand Down
16 changes: 11 additions & 5 deletions cpp/benchmarks/groupby/group_max.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -48,20 +48,25 @@ void groupby_max_helper(nvbench::state& state,
cudf::type_to_id<Type>(), row_count{num_rows}, data_profile{builder});
}();

auto const num_aggregations = state.get_int64("num_aggregations");

auto keys_view = keys->view();
auto gb_obj = cudf::groupby::groupby(cudf::table_view({keys_view, keys_view, keys_view}));

std::vector<cudf::groupby::aggregation_request> requests;
requests.emplace_back(cudf::groupby::aggregation_request());
requests[0].values = vals->view();
requests[0].aggregations.push_back(cudf::make_max_aggregation<cudf::groupby_aggregation>());
for (int64_t i = 0; i < num_aggregations; i++) {
requests.emplace_back(cudf::groupby::aggregation_request());
requests[i].values = vals->view();
requests[i].aggregations.push_back(cudf::make_max_aggregation<cudf::groupby_aggregation>());
}

auto const mem_stats_logger = cudf::memory_stats_logger();
state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value()));
state.exec(nvbench::exec_tag::sync,
[&](nvbench::launch& launch) { auto const result = gb_obj.aggregate(requests); });
auto const elapsed_time = state.get_summary("nv/cold/time/gpu/mean").get_float64("value");
state.add_element_count(static_cast<double>(num_rows) / elapsed_time / 1'000'000., "Mrows/s");
state.add_element_count(
static_cast<double>(num_rows * num_aggregations) / elapsed_time / 1'000'000., "Mrows/s");
state.add_buffer_size(
mem_stats_logger.peak_memory_usage(), "peak_memory_usage", "peak_memory_usage");
}
Expand Down Expand Up @@ -91,7 +96,8 @@ NVBENCH_BENCH_TYPES(bench_groupby_max,
.set_name("groupby_max")
.add_int64_axis("cardinality", {0})
.add_int64_power_of_two_axis("num_rows", {12, 18, 24})
.add_float64_axis("null_probability", {0, 0.1, 0.9});
.add_float64_axis("null_probability", {0, 0.1, 0.9})
.add_int64_axis("num_aggregations", {1, 2, 4, 8, 16, 32});

NVBENCH_BENCH_TYPES(bench_groupby_max_cardinality, NVBENCH_TYPE_AXES(nvbench::type_list<int32_t>))
.set_name("groupby_max_cardinality")
Expand Down
91 changes: 0 additions & 91 deletions cpp/benchmarks/groupby/group_max_multistream.cpp

This file was deleted.

17 changes: 11 additions & 6 deletions cpp/benchmarks/groupby/group_max_multithreaded.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ void bench_groupby_max_multithreaded(nvbench::state& state, nvbench::type_list<T
auto const num_rows = static_cast<cudf::size_type>(state.get_int64("num_rows"));
auto const null_probability = state.get_float64("null_probability");
auto const num_threads = state.get_int64("num_threads");
auto const num_aggregations = state.get_int64("num_aggregations");

auto const keys = [&] {
data_profile const profile =
Expand Down Expand Up @@ -61,28 +62,31 @@ void bench_groupby_max_multithreaded(nvbench::state& state, nvbench::type_list<T

std::vector<std::vector<cudf::groupby::aggregation_request>> requests(num_threads);
for (int64_t i = 0; i < num_threads; i++) {
requests[i].emplace_back(cudf::groupby::aggregation_request());
requests[i][0].values = vals->view();
requests[i][0].aggregations.push_back(cudf::make_max_aggregation<cudf::groupby_aggregation>());
for (int64_t j = 0; j < num_aggregations; j++) {
requests[i].emplace_back(cudf::groupby::aggregation_request());
requests[i][j].values = vals->view();
requests[i][j].aggregations.push_back(
cudf::make_max_aggregation<cudf::groupby_aggregation>());
}
}

auto const mem_stats_logger = cudf::memory_stats_logger();
state.exec(
nvbench::exec_tag::sync | nvbench::exec_tag::timer, [&](nvbench::launch& launch, auto& timer) {
auto perform_agg = [&](int64_t index) { gb_obj.aggregate(requests[index], streams[index]); };
threads.paused = true;
for (int64_t i = 0; i < num_threads; ++i) {
threads.submit(perform_agg, i);
}
timer.start();
threads.paused = false;
threads.wait_for_tasks();
cudf::detail::join_streams(streams, cudf::get_default_stream());
timer.stop();
});

auto const elapsed_time = state.get_summary("nv/cold/time/gpu/mean").get_float64("value");
state.add_element_count(static_cast<double>(num_rows) / elapsed_time / 1'000'000., "Mrows/s");
state.add_element_count(
static_cast<double>(num_rows * num_threads * num_aggregations) / elapsed_time / 1'000'000.,
"Mrows/s");
state.add_buffer_size(
mem_stats_logger.peak_memory_usage(), "peak_memory_usage", "peak_memory_usage");
}
Expand All @@ -93,4 +97,5 @@ NVBENCH_BENCH_TYPES(bench_groupby_max_multithreaded,
.add_int64_axis("cardinality", {0})
.add_int64_power_of_two_axis("num_rows", {12, 18})
.add_float64_axis("null_probability", {0, 0.1, 0.9})
.add_int64_axis("num_aggregations", {1, 2, 4, 8, 16, 32})
.add_int64_axis("num_threads", {1, 2, 4, 8});

0 comments on commit 32181eb

Please sign in to comment.