Skip to content

Commit

Permalink
Update mixed_join to use experimental row hasher and comparator (#1…
Browse files Browse the repository at this point in the history
…3028)

Part of #11844 

`mixed_join` cannot support nested types as the conditional part relies on AST. This PR adds no new tests or benchmarks for this reason. 

[Benchmarks](#13028 (comment))

Authors:
  - Divye Gala (https://github.com/divyegala)

Approvers:
  - Nghia Truong (https://github.com/ttnghia)
  - David Wendt (https://github.com/davidwendt)

URL: #13028
  • Loading branch information
divyegala committed Apr 21, 2023
1 parent c7f2342 commit bccf3ab
Show file tree
Hide file tree
Showing 15 changed files with 165 additions and 72 deletions.
4 changes: 2 additions & 2 deletions cpp/src/join/join_common_utils.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -60,9 +60,9 @@ using mixed_multimap_type = cuco::static_multimap<hash_value_type,
using semi_map_type = cuco::
static_map<hash_value_type, size_type, cuda::thread_scope_device, hash_table_allocator_type>;

using row_hash = cudf::row_hasher<default_hash, cudf::nullate::DYNAMIC>;
using row_hash_legacy = cudf::row_hasher<default_hash, cudf::nullate::DYNAMIC>;

using row_equality = cudf::row_equality_comparator<cudf::nullate::DYNAMIC>;
using row_equality_legacy = cudf::row_equality_comparator<cudf::nullate::DYNAMIC>;

bool is_trivial_join(table_view const& left, table_view const& right, join_kind join_type);
} // namespace detail
Expand Down
38 changes: 28 additions & 10 deletions cpp/src/join/mixed_join.cu
Original file line number Diff line number Diff line change
Expand Up @@ -109,9 +109,9 @@ mixed_join(
// If evaluating the expression may produce null outputs we create a nullable
// output column and follow the null-supporting expression evaluation code
// path.
auto const has_nulls =
cudf::has_nested_nulls(left_equality) || cudf::has_nested_nulls(right_equality) ||
binary_predicate.may_evaluate_null(left_conditional, right_conditional, stream);
auto const has_nulls = cudf::nullate::DYNAMIC{
cudf::has_nulls(left_equality) || cudf::has_nulls(right_equality) ||
binary_predicate.may_evaluate_null(left_conditional, right_conditional, stream)};

auto const parser = ast::detail::expression_parser{
binary_predicate, left_conditional, right_conditional, has_nulls, stream, mr};
Expand All @@ -125,8 +125,6 @@ mixed_join(
auto& build = swap_tables ? left_equality : right_equality;
auto probe_view = table_device_view::create(probe, stream);
auto build_view = table_device_view::create(build, stream);
row_equality equality_probe{
cudf::nullate::DYNAMIC{has_nulls}, *probe_view, *build_view, compare_nulls};

// Don't use multimap_type because we want a CG size of 1.
mixed_multimap_type hash_table{
Expand Down Expand Up @@ -169,6 +167,14 @@ mixed_join(
std::optional<rmm::device_uvector<size_type>> matches_per_row{};
device_span<size_type const> matches_per_row_span{};

auto const preprocessed_probe =
experimental::row::equality::preprocessed_table::create(probe, stream);
auto const row_hash = cudf::experimental::row::hash::row_hasher{preprocessed_probe};
auto const hash_probe = row_hash.device_hasher(has_nulls);
auto const row_comparator =
cudf::experimental::row::equality::two_table_comparator{preprocessed_probe, preprocessed_build};
auto const equality_probe = row_comparator.equal_to<false>(has_nulls, compare_nulls);

if (output_size_data.has_value()) {
join_size = output_size_data->first;
matches_per_row_span = output_size_data->second;
Expand All @@ -191,6 +197,7 @@ mixed_join(
*right_conditional_view,
*probe_view,
*build_view,
hash_probe,
equality_probe,
kernel_join_type,
hash_table_view,
Expand All @@ -205,6 +212,7 @@ mixed_join(
*right_conditional_view,
*probe_view,
*build_view,
hash_probe,
equality_probe,
kernel_join_type,
hash_table_view,
Expand Down Expand Up @@ -248,6 +256,7 @@ mixed_join(
*right_conditional_view,
*probe_view,
*build_view,
hash_probe,
equality_probe,
kernel_join_type,
hash_table_view,
Expand All @@ -263,6 +272,7 @@ mixed_join(
*right_conditional_view,
*probe_view,
*build_view,
hash_probe,
equality_probe,
kernel_join_type,
hash_table_view,
Expand Down Expand Up @@ -365,9 +375,9 @@ compute_mixed_join_output_size(table_view const& left_equality,
// If evaluating the expression may produce null outputs we create a nullable
// output column and follow the null-supporting expression evaluation code
// path.
auto const has_nulls =
cudf::has_nested_nulls(left_equality) || cudf::has_nested_nulls(right_equality) ||
binary_predicate.may_evaluate_null(left_conditional, right_conditional, stream);
auto const has_nulls = cudf::nullate::DYNAMIC{
cudf::has_nulls(left_equality) || cudf::has_nulls(right_equality) ||
binary_predicate.may_evaluate_null(left_conditional, right_conditional, stream)};

auto const parser = ast::detail::expression_parser{
binary_predicate, left_conditional, right_conditional, has_nulls, stream, mr};
Expand All @@ -381,8 +391,6 @@ compute_mixed_join_output_size(table_view const& left_equality,
auto& build = swap_tables ? left_equality : right_equality;
auto probe_view = table_device_view::create(probe, stream);
auto build_view = table_device_view::create(build, stream);
row_equality equality_probe{
cudf::nullate::DYNAMIC{has_nulls}, *probe_view, *build_view, compare_nulls};

// Don't use multimap_type because we want a CG size of 1.
mixed_multimap_type hash_table{
Expand Down Expand Up @@ -419,6 +427,14 @@ compute_mixed_join_output_size(table_view const& left_equality,
// Allocate storage for the counter used to get the size of the join output
rmm::device_scalar<std::size_t> size(0, stream, mr);

auto const preprocessed_probe =
experimental::row::equality::preprocessed_table::create(probe, stream);
auto const row_hash = cudf::experimental::row::hash::row_hasher{preprocessed_probe};
auto const hash_probe = row_hash.device_hasher(has_nulls);
auto const row_comparator =
cudf::experimental::row::equality::two_table_comparator{preprocessed_probe, preprocessed_build};
auto const equality_probe = row_comparator.equal_to<false>(has_nulls, compare_nulls);

// Determine number of output rows without actually building the output to simply
// find what the size of the output will be.
if (has_nulls) {
Expand All @@ -428,6 +444,7 @@ compute_mixed_join_output_size(table_view const& left_equality,
*right_conditional_view,
*probe_view,
*build_view,
hash_probe,
equality_probe,
join_type,
hash_table_view,
Expand All @@ -442,6 +459,7 @@ compute_mixed_join_output_size(table_view const& left_equality,
*right_conditional_view,
*probe_view,
*build_view,
hash_probe,
equality_probe,
join_type,
hash_table_view,
Expand Down
20 changes: 17 additions & 3 deletions cpp/src/join/mixed_join_common_utils.cuh
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2022, NVIDIA CORPORATION.
* Copyright (c) 2022-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -19,6 +19,7 @@

#include <cudf/ast/detail/expression_evaluator.cuh>
#include <cudf/detail/utilities/cuda.cuh>
#include <cudf/table/experimental/row_operators.cuh>

#include <rmm/cuda_stream_view.hpp>
#include <rmm/device_uvector.hpp>
Expand All @@ -28,6 +29,13 @@
namespace cudf {
namespace detail {

using row_hash =
cudf::experimental::row::hash::device_row_hasher<default_hash, cudf::nullate::DYNAMIC>;

// // This alias is used by mixed_joins, which support only non-nested types
using row_equality = cudf::experimental::row::equality::strong_index_comparator_adapter<
cudf::experimental::row::equality::device_row_comparator<false, cudf::nullate::DYNAMIC>>;

/**
* @brief Equality comparator for use with cuco map methods that require expression evaluation.
*
Expand Down Expand Up @@ -79,12 +87,15 @@ struct single_expression_equality : expression_equality<has_nulls> {
__device__ __forceinline__ bool operator()(hash_value_type const build_row_index,
hash_value_type const probe_row_index) const noexcept
{
using cudf::experimental::row::lhs_index_type;
using cudf::experimental::row::rhs_index_type;

auto output_dest = cudf::ast::detail::value_expression_result<bool, has_nulls>();
// Two levels of checks:
// 1. The contents of the columns involved in the equality condition are equal.
// 2. The predicate evaluated on the relevant columns (already encoded in the evaluator)
// evaluates to true.
if (this->equality_probe(probe_row_index, build_row_index)) {
if (this->equality_probe(lhs_index_type{probe_row_index}, rhs_index_type{build_row_index})) {
auto const lrow_idx = this->swap_tables ? build_row_index : probe_row_index;
auto const rrow_idx = this->swap_tables ? probe_row_index : build_row_index;
this->evaluator.evaluate(output_dest,
Expand Down Expand Up @@ -127,14 +138,17 @@ struct pair_expression_equality : public expression_equality<has_nulls> {
__device__ __forceinline__ bool operator()(pair_type const& build_row,
pair_type const& probe_row) const noexcept
{
using cudf::experimental::row::lhs_index_type;
using cudf::experimental::row::rhs_index_type;

auto output_dest = cudf::ast::detail::value_expression_result<bool, has_nulls>();
// Three levels of checks:
// 1. Row hashes of the columns involved in the equality condition are equal.
// 2. The contents of the columns involved in the equality condition are equal.
// 3. The predicate evaluated on the relevant columns (already encoded in the evaluator)
// evaluates to true.
if ((probe_row.first == build_row.first) &&
this->equality_probe(probe_row.second, build_row.second)) {
this->equality_probe(lhs_index_type{probe_row.second}, rhs_index_type{build_row.second})) {
auto const lrow_idx = this->swap_tables ? build_row.second : probe_row.second;
auto const rrow_idx = this->swap_tables ? probe_row.second : build_row.second;
this->evaluator.evaluate(
Expand Down
3 changes: 2 additions & 1 deletion cpp/src/join/mixed_join_kernel.cu
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2022, NVIDIA CORPORATION.
* Copyright (c) 2022-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -24,6 +24,7 @@ template __global__ void mixed_join<DEFAULT_JOIN_BLOCK_SIZE, false>(
table_device_view right_table,
table_device_view probe,
table_device_view build,
row_hash const hash_probe,
row_equality const equality_probe,
join_kind const join_type,
cudf::detail::mixed_multimap_type::device_view hash_table_view,
Expand Down
4 changes: 2 additions & 2 deletions cpp/src/join/mixed_join_kernel.cuh
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2022, NVIDIA CORPORATION.
* Copyright (c) 2022-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -42,6 +42,7 @@ __launch_bounds__(block_size) __global__
table_device_view right_table,
table_device_view probe,
table_device_view build,
row_hash const hash_probe,
row_equality const equality_probe,
join_kind const join_type,
cudf::detail::mixed_multimap_type::device_view hash_table_view,
Expand Down Expand Up @@ -70,7 +71,6 @@ __launch_bounds__(block_size) __global__
auto evaluator = cudf::ast::detail::expression_evaluator<has_nulls>(
left_table, right_table, device_expression_data);

row_hash hash_probe{nullate::DYNAMIC{has_nulls}, probe};
auto const empty_key_sentinel = hash_table_view.get_empty_key_sentinel();
make_pair_function pair_func{hash_probe, empty_key_sentinel};

Expand Down
3 changes: 2 additions & 1 deletion cpp/src/join/mixed_join_kernel_nulls.cu
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2022, NVIDIA CORPORATION.
* Copyright (c) 2022-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -24,6 +24,7 @@ template __global__ void mixed_join<DEFAULT_JOIN_BLOCK_SIZE, true>(
table_device_view right_table,
table_device_view probe,
table_device_view build,
row_hash const hash_probe,
row_equality const equality_probe,
join_kind const join_type,
cudf::detail::mixed_multimap_type::device_view hash_table_view,
Expand Down
6 changes: 5 additions & 1 deletion cpp/src/join/mixed_join_kernels.cuh
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2022, NVIDIA CORPORATION.
* Copyright (c) 2022-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -41,6 +41,7 @@ namespace detail {
* @param[in] right_table The right table
* @param[in] probe The table with which to probe the hash table for matches.
* @param[in] build The table with which the hash table was built.
* @param[in] hash_probe The hasher used for the probe table.
* @param[in] equality_probe The equality comparator used when probing the hash table.
* @param[in] join_type The type of join to be performed
* @param[in] hash_table_view The hash table built from `build`.
Expand All @@ -62,6 +63,7 @@ __global__ void compute_mixed_join_output_size(
table_device_view right_table,
table_device_view probe,
table_device_view build,
row_hash const hash_probe,
row_equality const equality_probe,
join_kind const join_type,
cudf::detail::mixed_multimap_type::device_view hash_table_view,
Expand All @@ -87,6 +89,7 @@ __global__ void compute_mixed_join_output_size(
* @param[in] right_table The right table
* @param[in] probe The table with which to probe the hash table for matches.
* @param[in] build The table with which the hash table was built.
* @param[in] hash_probe The hasher used for the probe table.
* @param[in] equality_probe The equality comparator used when probing the hash table.
* @param[in] join_type The type of join to be performed
* @param[in] hash_table_view The hash table built from `build`.
Expand All @@ -105,6 +108,7 @@ __global__ void mixed_join(table_device_view left_table,
table_device_view right_table,
table_device_view probe,
table_device_view build,
row_hash const hash_probe,
row_equality const equality_probe,
join_kind const join_type,
cudf::detail::mixed_multimap_type::device_view hash_table_view,
Expand Down
7 changes: 4 additions & 3 deletions cpp/src/join/mixed_join_kernels_semi.cu
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2022, NVIDIA CORPORATION.
* Copyright (c) 2022-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -37,6 +37,7 @@ __launch_bounds__(block_size) __global__
table_device_view right_table,
table_device_view probe,
table_device_view build,
row_hash const hash_probe,
row_equality const equality_probe,
join_kind const join_type,
cudf::detail::semi_map_type::device_view hash_table_view,
Expand Down Expand Up @@ -64,8 +65,6 @@ __launch_bounds__(block_size) __global__
auto evaluator = cudf::ast::detail::expression_evaluator<has_nulls>(
left_table, right_table, device_expression_data);

row_hash hash_probe{nullate::DYNAMIC{has_nulls}, probe};

if (outer_row_index < outer_num_rows) {
// Figure out the number of elements for this key.
auto equality = single_expression_equality<has_nulls>{
Expand All @@ -83,6 +82,7 @@ template __global__ void mixed_join_semi<DEFAULT_JOIN_BLOCK_SIZE, true>(
table_device_view right_table,
table_device_view probe,
table_device_view build,
row_hash const hash_probe,
row_equality const equality_probe,
join_kind const join_type,
cudf::detail::semi_map_type::device_view hash_table_view,
Expand All @@ -96,6 +96,7 @@ template __global__ void mixed_join_semi<DEFAULT_JOIN_BLOCK_SIZE, false>(
table_device_view right_table,
table_device_view probe,
table_device_view build,
row_hash const hash_probe,
row_equality const equality_probe,
join_kind const join_type,
cudf::detail::semi_map_type::device_view hash_table_view,
Expand Down
6 changes: 5 additions & 1 deletion cpp/src/join/mixed_join_kernels_semi.cuh
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2022, NVIDIA CORPORATION.
* Copyright (c) 2022-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -41,6 +41,7 @@ namespace detail {
* @param[in] right_table The right table
* @param[in] probe The table with which to probe the hash table for matches.
* @param[in] build The table with which the hash table was built.
* @param[in] hash_probe The hasher used for the probe table.
* @param[in] equality_probe The equality comparator used when probing the hash table.
* @param[in] join_type The type of join to be performed
* @param[in] hash_table_view The hash table built from `build`.
Expand All @@ -62,6 +63,7 @@ __global__ void compute_mixed_join_output_size_semi(
table_device_view right_table,
table_device_view probe,
table_device_view build,
row_hash const hash_probe,
row_equality const equality_probe,
join_kind const join_type,
cudf::detail::semi_map_type::device_view hash_table_view,
Expand All @@ -87,6 +89,7 @@ __global__ void compute_mixed_join_output_size_semi(
* @param[in] right_table The right table
* @param[in] probe The table with which to probe the hash table for matches.
* @param[in] build The table with which the hash table was built.
* @param[in] hash_probe The hasher used for the probe table.
* @param[in] equality_probe The equality comparator used when probing the hash table.
* @param[in] join_type The type of join to be performed
* @param[in] hash_table_view The hash table built from `build`.
Expand All @@ -104,6 +107,7 @@ __global__ void mixed_join_semi(table_device_view left_table,
table_device_view right_table,
table_device_view probe,
table_device_view build,
row_hash const hash_probe,
row_equality const equality_probe,
join_kind const join_type,
cudf::detail::semi_map_type::device_view hash_table_view,
Expand Down

0 comments on commit bccf3ab

Please sign in to comment.