Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Convert cudf::rank to use device_uvector #8029

Merged
merged 7 commits into from
Apr 26, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions cpp/benchmarks/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,7 @@ ConfigureBench(SEARCH_BENCH search/search_benchmark.cu)
###################################################################################################
# - sort benchmark --------------------------------------------------------------------------------
ConfigureBench(SORT_BENCH
sort/rank_benchmark.cpp
sort/sort_benchmark.cpp
sort/sort_strings_benchmark.cpp)

Expand Down
74 changes: 74 additions & 0 deletions cpp/benchmarks/sort/rank_benchmark.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
/*
* Copyright (c) 2021, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include <cudf/sorting.hpp>

#include <cudf_test/base_fixture.hpp>
#include <cudf_test/column_utilities.hpp>
#include <cudf_test/column_wrapper.hpp>
#include <cudf_test/cudf_gtest.hpp>
#include <cudf_test/table_utilities.hpp>

#include <benchmark/benchmark.h>
#include <benchmarks/common/generate_benchmark_input.hpp>
#include <benchmarks/fixture/benchmark_fixture.hpp>
#include <benchmarks/synchronization/synchronization.hpp>

class Rank : public cudf::benchmark {
};

static void BM_rank(benchmark::State& state, bool nulls)
{
using Type = int;
using column_wrapper = cudf::test::fixed_width_column_wrapper<Type>;
std::default_random_engine generator;
std::uniform_int_distribution<int> distribution(0, 100);

const cudf::size_type n_rows{(cudf::size_type)state.range(0)};

// Create columns with values in the range [0,100)
column_wrapper input = [&, n_rows]() {
auto elements = cudf::detail::make_counting_transform_iterator(
0, [&](auto row) { return distribution(generator); });
if (!nulls) return column_wrapper(elements, elements + n_rows);
auto valids = cudf::detail::make_counting_transform_iterator(
0, [](auto i) { return i % 100 == 0 ? false : true; });
return column_wrapper(elements, elements + n_rows, valids);
}();

for (auto _ : state) {
cuda_event_timer raii(state, true, rmm::cuda_stream_default);

auto result = cudf::rank(input,
cudf::rank_method::FIRST,
cudf::order::ASCENDING,
nulls ? cudf::null_policy::INCLUDE : cudf::null_policy::EXCLUDE,
cudf::null_order::AFTER,
false);
}
}

#define RANK_BENCHMARK_DEFINE(name, nulls) \
BENCHMARK_DEFINE_F(Rank, name) \
(::benchmark::State & st) { BM_rank(st, nulls); } \
BENCHMARK_REGISTER_F(Rank, name) \
->RangeMultiplier(8) \
->Ranges({{1 << 10, 1 << 26}}) \
->UseManualTime() \
->Unit(benchmark::kMillisecond);

RANK_BENCHMARK_DEFINE(no_nulls, false)
RANK_BENCHMARK_DEFINE(nulls, true)
1 change: 0 additions & 1 deletion cpp/src/copying/sample.cu
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@

#include <rmm/cuda_stream_view.hpp>

#include <thrust/device_vector.h>
#include <thrust/random.h>
#include <thrust/random/uniform_int_distribution.h>
#include <thrust/shuffle.h>
Expand Down
1 change: 0 additions & 1 deletion cpp/src/groupby/sort/group_count_scan.cu
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@
#include <thrust/scan.h>

#include <rmm/cuda_stream_view.hpp>
#include <rmm/device_vector.hpp>
#include <rmm/exec_policy.hpp>

namespace cudf {
Expand Down
1 change: 0 additions & 1 deletion cpp/src/join/semi_join.cu
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@

#include <rmm/cuda_stream_view.hpp>
#include <rmm/device_uvector.hpp>
#include <rmm/device_vector.hpp>
#include <rmm/exec_policy.hpp>

namespace cudf {
Expand Down
1 change: 0 additions & 1 deletion cpp/src/lists/count_elements.cu
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@
#include <cudf/utilities/error.hpp>

#include <rmm/cuda_stream_view.hpp>
#include <rmm/device_vector.hpp>
#include <rmm/exec_policy.hpp>

#include <thrust/transform.h>
Expand Down
43 changes: 22 additions & 21 deletions cpp/src/sort/rank.cu
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@
#include <cudf/utilities/error.hpp>

#include <rmm/cuda_stream_view.hpp>
#include <rmm/device_vector.hpp>
#include <rmm/exec_policy.hpp>

#include <thrust/iterator/discard_iterator.h>
Expand Down Expand Up @@ -55,13 +54,13 @@ struct unique_comparator {
};

// Assign rank from 1 to n unique values. Equal values get same rank value.
rmm::device_vector<size_type> sorted_dense_rank(column_view input_col,
column_view sorted_order_view,
rmm::cuda_stream_view stream)
rmm::device_uvector<size_type> sorted_dense_rank(column_view input_col,
column_view sorted_order_view,
rmm::cuda_stream_view stream)
{
auto device_table = table_device_view::create(table_view{{input_col}}, stream);
auto const input_size = input_col.size();
rmm::device_vector<size_type> dense_rank_sorted(input_size);
rmm::device_uvector<size_type> dense_rank_sorted(input_size, stream);
auto sorted_index_order = thrust::make_permutation_iterator(
sorted_order_view.begin<size_type>(), thrust::make_counting_iterator<size_type>(0));
if (input_col.has_nulls()) {
Expand All @@ -70,14 +69,14 @@ rmm::device_vector<size_type> sorted_dense_rank(column_view input_col,
auto unique_it = cudf::detail::make_counting_transform_iterator(0, conv);

thrust::inclusive_scan(
rmm::exec_policy(stream), unique_it, unique_it + input_size, dense_rank_sorted.data().get());
rmm::exec_policy(stream), unique_it, unique_it + input_size, dense_rank_sorted.data());
} else {
auto conv = unique_comparator<false, size_type, decltype(sorted_index_order)>(
*device_table, sorted_index_order);
auto unique_it = cudf::detail::make_counting_transform_iterator(0, conv);

thrust::inclusive_scan(
rmm::exec_policy(stream), unique_it, unique_it + input_size, dense_rank_sorted.data().get());
rmm::exec_policy(stream), unique_it, unique_it + input_size, dense_rank_sorted.data());
}
return dense_rank_sorted;
}
Expand All @@ -100,7 +99,7 @@ template <typename TieType,
typename TieBreaker,
typename Transformer,
typename TieIterator>
void tie_break_ranks_transform(rmm::device_vector<size_type> const &dense_rank_sorted,
void tie_break_ranks_transform(cudf::device_span<size_type const> dense_rank_sorted,
TieIterator tie_iter,
column_view const &sorted_order_view,
outputIterator rank_iter,
Expand All @@ -109,9 +108,9 @@ void tie_break_ranks_transform(rmm::device_vector<size_type> const &dense_rank_s
rmm::cuda_stream_view stream)
{
auto const input_size = sorted_order_view.size();
rmm::device_vector<TieType> tie_sorted(input_size, 0);
// algorithm: reduce_by_key(dense_rank, 1, n, reduction_tie_breaker)
// reduction_tie_breaker = min, max, min_count
rmm::device_uvector<TieType> tie_sorted(sorted_order_view.size(), stream);
thrust::reduce_by_key(rmm::exec_policy(stream),
dense_rank_sorted.begin(),
dense_rank_sorted.end(),
Expand Down Expand Up @@ -146,7 +145,7 @@ void rank_first(column_view sorted_order_view,
}

template <typename outputType>
void rank_dense(rmm::device_vector<size_type> const &dense_rank_sorted,
void rank_dense(cudf::device_span<size_type const> dense_rank_sorted,
column_view sorted_order_view,
mutable_column_view rank_mutable_view,
rmm::cuda_stream_view stream)
Expand All @@ -160,7 +159,7 @@ void rank_dense(rmm::device_vector<size_type> const &dense_rank_sorted,
}

template <typename outputType>
void rank_min(rmm::device_vector<size_type> const &group_keys,
void rank_min(cudf::device_span<size_type const> group_keys,
column_view sorted_order_view,
mutable_column_view rank_mutable_view,
rmm::cuda_stream_view stream)
Expand All @@ -178,7 +177,7 @@ void rank_min(rmm::device_vector<size_type> const &group_keys,
}

template <typename outputType>
void rank_max(rmm::device_vector<size_type> const &group_keys,
void rank_max(cudf::device_span<size_type const> group_keys,
column_view sorted_order_view,
mutable_column_view rank_mutable_view,
rmm::cuda_stream_view stream)
Expand All @@ -195,7 +194,7 @@ void rank_max(rmm::device_vector<size_type> const &group_keys,
stream);
}

void rank_average(rmm::device_vector<size_type> const &group_keys,
void rank_average(cudf::device_span<size_type const> group_keys,
column_view sorted_order_view,
mutable_column_view rank_mutable_view,
rmm::cuda_stream_view stream)
Expand All @@ -206,16 +205,18 @@ void rank_average(rmm::device_vector<size_type> const &group_keys,
// Calculate Min of ranks and Count of equal values
// algorithm: reduce_by_key(dense_rank, 1, n, min_count)
// transform(min+(count-1)/2), scatter
using MinCount = thrust::tuple<size_type, size_type>;
using MinCount = thrust::pair<size_type, size_type>;
tie_break_ranks_transform<MinCount>(
group_keys,
thrust::make_zip_iterator(thrust::make_tuple(thrust::make_counting_iterator<size_type>(1),
thrust::make_constant_iterator<size_type>(1))),
cudf::detail::make_counting_transform_iterator(1,
[] __device__(auto i) {
return MinCount{i, 1};
}),
sorted_order_view,
rank_mutable_view.begin<double>(),
[] __device__(auto rank_count1, auto rank_count2) {
return MinCount{std::min(thrust::get<0>(rank_count1), thrust::get<0>(rank_count2)),
thrust::get<1>(rank_count1) + thrust::get<1>(rank_count2)};
return MinCount{std::min(rank_count1.first, rank_count2.first),
rank_count1.second + rank_count2.second};
},
[] __device__(MinCount minrank_count) { // min+(count-1)/2
return static_cast<double>(thrust::get<0>(minrank_count)) +
Expand Down Expand Up @@ -261,12 +262,12 @@ std::unique_ptr<column> rank(column_view const &input,

// dense: All equal values have same rank and rank always increases by 1 between groups
// acts as key for min, max, average to denote equal value groups
rmm::device_vector<size_type> const dense_rank_sorted =
rmm::device_uvector<size_type> const dense_rank_sorted =
[&method, &input, &sorted_order_view, &stream] {
if (method != rank_method::FIRST)
return sorted_dense_rank(input, sorted_order_view, stream);
else
return rmm::device_vector<size_type>();
return rmm::device_uvector<size_type>(0, stream);
}();

if (output_type.id() == type_id::FLOAT64) {
Expand Down Expand Up @@ -314,7 +315,7 @@ std::unique_ptr<column> rank(column_view const &input,
auto rank_iter = rank_mutable_view.begin<double>();
size_type const count =
(null_handling == null_policy::EXCLUDE) ? input.size() - input.null_count() : input.size();
auto drs = dense_rank_sorted.data().get();
auto drs = dense_rank_sorted.data();
bool const is_dense = (method == rank_method::DENSE);
thrust::transform(rmm::exec_policy(stream),
rank_iter,
Expand Down