diff --git a/cpp/benchmarks/CMakeLists.txt b/cpp/benchmarks/CMakeLists.txt index 78cb35865e9..80a52f8ff5c 100644 --- a/cpp/benchmarks/CMakeLists.txt +++ b/cpp/benchmarks/CMakeLists.txt @@ -102,6 +102,7 @@ ConfigureBench(SEARCH_BENCH search/search_benchmark.cu) ################################################################################################### # - sort benchmark -------------------------------------------------------------------------------- ConfigureBench(SORT_BENCH + sort/rank_benchmark.cpp sort/sort_benchmark.cpp sort/sort_strings_benchmark.cpp) diff --git a/cpp/benchmarks/sort/rank_benchmark.cpp b/cpp/benchmarks/sort/rank_benchmark.cpp new file mode 100644 index 00000000000..60be95b9112 --- /dev/null +++ b/cpp/benchmarks/sort/rank_benchmark.cpp @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +class Rank : public cudf::benchmark { +}; + +static void BM_rank(benchmark::State& state, bool nulls) +{ + using Type = int; + using column_wrapper = cudf::test::fixed_width_column_wrapper; + std::default_random_engine generator; + std::uniform_int_distribution distribution(0, 100); + + const cudf::size_type n_rows{(cudf::size_type)state.range(0)}; + + // Create columns with values in the range [0,100) + column_wrapper input = [&, n_rows]() { + auto elements = cudf::detail::make_counting_transform_iterator( + 0, [&](auto row) { return distribution(generator); }); + if (!nulls) return column_wrapper(elements, elements + n_rows); + auto valids = cudf::detail::make_counting_transform_iterator( + 0, [](auto i) { return i % 100 == 0 ? false : true; }); + return column_wrapper(elements, elements + n_rows, valids); + }(); + + for (auto _ : state) { + cuda_event_timer raii(state, true, rmm::cuda_stream_default); + + auto result = cudf::rank(input, + cudf::rank_method::FIRST, + cudf::order::ASCENDING, + nulls ? cudf::null_policy::INCLUDE : cudf::null_policy::EXCLUDE, + cudf::null_order::AFTER, + false); + } +} + +#define RANK_BENCHMARK_DEFINE(name, nulls) \ + BENCHMARK_DEFINE_F(Rank, name) \ + (::benchmark::State & st) { BM_rank(st, nulls); } \ + BENCHMARK_REGISTER_F(Rank, name) \ + ->RangeMultiplier(8) \ + ->Ranges({{1 << 10, 1 << 26}}) \ + ->UseManualTime() \ + ->Unit(benchmark::kMillisecond); + +RANK_BENCHMARK_DEFINE(no_nulls, false) +RANK_BENCHMARK_DEFINE(nulls, true) diff --git a/cpp/src/copying/sample.cu b/cpp/src/copying/sample.cu index db0984068cf..42dc9f76b18 100644 --- a/cpp/src/copying/sample.cu +++ b/cpp/src/copying/sample.cu @@ -25,7 +25,6 @@ #include -#include #include #include #include diff --git a/cpp/src/groupby/sort/group_count_scan.cu b/cpp/src/groupby/sort/group_count_scan.cu index 4ad533aebdc..0caef47f0e3 100644 --- a/cpp/src/groupby/sort/group_count_scan.cu +++ b/cpp/src/groupby/sort/group_count_scan.cu @@ -23,7 +23,6 @@ #include #include -#include #include namespace cudf { diff --git a/cpp/src/join/semi_join.cu b/cpp/src/join/semi_join.cu index 80a1ef9e204..8a1409a686b 100644 --- a/cpp/src/join/semi_join.cu +++ b/cpp/src/join/semi_join.cu @@ -31,7 +31,6 @@ #include #include -#include #include namespace cudf { diff --git a/cpp/src/lists/count_elements.cu b/cpp/src/lists/count_elements.cu index ba366b3a020..84ca171d455 100644 --- a/cpp/src/lists/count_elements.cu +++ b/cpp/src/lists/count_elements.cu @@ -25,7 +25,6 @@ #include #include -#include #include #include diff --git a/cpp/src/sort/rank.cu b/cpp/src/sort/rank.cu index d24bf2bcd7c..66548ac1e73 100644 --- a/cpp/src/sort/rank.cu +++ b/cpp/src/sort/rank.cu @@ -27,7 +27,6 @@ #include #include -#include #include #include @@ -55,13 +54,13 @@ struct unique_comparator { }; // Assign rank from 1 to n unique values. Equal values get same rank value. -rmm::device_vector sorted_dense_rank(column_view input_col, - column_view sorted_order_view, - rmm::cuda_stream_view stream) +rmm::device_uvector sorted_dense_rank(column_view input_col, + column_view sorted_order_view, + rmm::cuda_stream_view stream) { auto device_table = table_device_view::create(table_view{{input_col}}, stream); auto const input_size = input_col.size(); - rmm::device_vector dense_rank_sorted(input_size); + rmm::device_uvector dense_rank_sorted(input_size, stream); auto sorted_index_order = thrust::make_permutation_iterator( sorted_order_view.begin(), thrust::make_counting_iterator(0)); if (input_col.has_nulls()) { @@ -70,14 +69,14 @@ rmm::device_vector sorted_dense_rank(column_view input_col, auto unique_it = cudf::detail::make_counting_transform_iterator(0, conv); thrust::inclusive_scan( - rmm::exec_policy(stream), unique_it, unique_it + input_size, dense_rank_sorted.data().get()); + rmm::exec_policy(stream), unique_it, unique_it + input_size, dense_rank_sorted.data()); } else { auto conv = unique_comparator( *device_table, sorted_index_order); auto unique_it = cudf::detail::make_counting_transform_iterator(0, conv); thrust::inclusive_scan( - rmm::exec_policy(stream), unique_it, unique_it + input_size, dense_rank_sorted.data().get()); + rmm::exec_policy(stream), unique_it, unique_it + input_size, dense_rank_sorted.data()); } return dense_rank_sorted; } @@ -100,7 +99,7 @@ template -void tie_break_ranks_transform(rmm::device_vector const &dense_rank_sorted, +void tie_break_ranks_transform(cudf::device_span dense_rank_sorted, TieIterator tie_iter, column_view const &sorted_order_view, outputIterator rank_iter, @@ -109,9 +108,9 @@ void tie_break_ranks_transform(rmm::device_vector const &dense_rank_s rmm::cuda_stream_view stream) { auto const input_size = sorted_order_view.size(); - rmm::device_vector tie_sorted(input_size, 0); // algorithm: reduce_by_key(dense_rank, 1, n, reduction_tie_breaker) // reduction_tie_breaker = min, max, min_count + rmm::device_uvector tie_sorted(sorted_order_view.size(), stream); thrust::reduce_by_key(rmm::exec_policy(stream), dense_rank_sorted.begin(), dense_rank_sorted.end(), @@ -146,7 +145,7 @@ void rank_first(column_view sorted_order_view, } template -void rank_dense(rmm::device_vector const &dense_rank_sorted, +void rank_dense(cudf::device_span dense_rank_sorted, column_view sorted_order_view, mutable_column_view rank_mutable_view, rmm::cuda_stream_view stream) @@ -160,7 +159,7 @@ void rank_dense(rmm::device_vector const &dense_rank_sorted, } template -void rank_min(rmm::device_vector const &group_keys, +void rank_min(cudf::device_span group_keys, column_view sorted_order_view, mutable_column_view rank_mutable_view, rmm::cuda_stream_view stream) @@ -178,7 +177,7 @@ void rank_min(rmm::device_vector const &group_keys, } template -void rank_max(rmm::device_vector const &group_keys, +void rank_max(cudf::device_span group_keys, column_view sorted_order_view, mutable_column_view rank_mutable_view, rmm::cuda_stream_view stream) @@ -195,7 +194,7 @@ void rank_max(rmm::device_vector const &group_keys, stream); } -void rank_average(rmm::device_vector const &group_keys, +void rank_average(cudf::device_span group_keys, column_view sorted_order_view, mutable_column_view rank_mutable_view, rmm::cuda_stream_view stream) @@ -206,16 +205,18 @@ void rank_average(rmm::device_vector const &group_keys, // Calculate Min of ranks and Count of equal values // algorithm: reduce_by_key(dense_rank, 1, n, min_count) // transform(min+(count-1)/2), scatter - using MinCount = thrust::tuple; + using MinCount = thrust::pair; tie_break_ranks_transform( group_keys, - thrust::make_zip_iterator(thrust::make_tuple(thrust::make_counting_iterator(1), - thrust::make_constant_iterator(1))), + cudf::detail::make_counting_transform_iterator(1, + [] __device__(auto i) { + return MinCount{i, 1}; + }), sorted_order_view, rank_mutable_view.begin(), [] __device__(auto rank_count1, auto rank_count2) { - return MinCount{std::min(thrust::get<0>(rank_count1), thrust::get<0>(rank_count2)), - thrust::get<1>(rank_count1) + thrust::get<1>(rank_count2)}; + return MinCount{std::min(rank_count1.first, rank_count2.first), + rank_count1.second + rank_count2.second}; }, [] __device__(MinCount minrank_count) { // min+(count-1)/2 return static_cast(thrust::get<0>(minrank_count)) + @@ -261,12 +262,12 @@ std::unique_ptr rank(column_view const &input, // dense: All equal values have same rank and rank always increases by 1 between groups // acts as key for min, max, average to denote equal value groups - rmm::device_vector const dense_rank_sorted = + rmm::device_uvector const dense_rank_sorted = [&method, &input, &sorted_order_view, &stream] { if (method != rank_method::FIRST) return sorted_dense_rank(input, sorted_order_view, stream); else - return rmm::device_vector(); + return rmm::device_uvector(0, stream); }(); if (output_type.id() == type_id::FLOAT64) { @@ -314,7 +315,7 @@ std::unique_ptr rank(column_view const &input, auto rank_iter = rank_mutable_view.begin(); size_type const count = (null_handling == null_policy::EXCLUDE) ? input.size() - input.null_count() : input.size(); - auto drs = dense_rank_sorted.data().get(); + auto drs = dense_rank_sorted.data(); bool const is_dense = (method == rank_method::DENSE); thrust::transform(rmm::exec_policy(stream), rank_iter,