Skip to content

Commit

Permalink
Check column types with a utility that handles nesting correctly.
Browse files Browse the repository at this point in the history
  • Loading branch information
bdice committed Mar 11, 2024
1 parent c794ce4 commit 39d14c8
Show file tree
Hide file tree
Showing 30 changed files with 135 additions and 69 deletions.
5 changes: 3 additions & 2 deletions cpp/include/cudf/detail/scatter.cuh
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2020-2023, NVIDIA CORPORATION.
* Copyright (c) 2020-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -30,6 +30,7 @@
#include <cudf/strings/string_view.cuh>
#include <cudf/utilities/default_stream.hpp>
#include <cudf/utilities/traits.hpp>
#include <cudf/utilities/type_checks.hpp>

#include <rmm/cuda_stream_view.hpp>
#include <rmm/exec_policy.hpp>
Expand Down Expand Up @@ -212,7 +213,7 @@ struct column_scatterer_impl<dictionary32> {
// check the keys match
dictionary_column_view const source(source_in);
dictionary_column_view const target(target_in);
CUDF_EXPECTS(source.keys().type() == target.keys().type(),
CUDF_EXPECTS(cudf::column_types_equal(source.keys(), target.keys()),
"scatter dictionary keys must be the same type");

// first combine keys so both dictionaries have the same set
Expand Down
23 changes: 22 additions & 1 deletion cpp/include/cudf/utilities/type_checks.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2021-2023, NVIDIA CORPORATION.
* Copyright (c) 2021-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -17,6 +17,8 @@

#include <cudf/column/column_view.hpp>

#include <algorithm>

namespace cudf {

/**
Expand Down Expand Up @@ -47,4 +49,23 @@ bool column_types_equal(column_view const& lhs, column_view const& rhs);
*/
bool column_types_equivalent(column_view const& lhs, column_view const& rhs);

/**
* @brief Compare the types of a range of `column_views`
*
* This function returns true if cudf::column_types_equal is true for every
* pair of consecutive columns in the range.
*
* @tparam ForwardIt Forward iterator
* @param first The first column
* @param last The last column
* @return true if all column types match
*/
template <typename ForwardIt>
inline bool all_column_types_equal(ForwardIt first, ForwardIt last)
{
return std::all_of(std::next(first), last, [want = *first](auto const& c) {
return cudf::column_types_equal(want, c);
});
}

} // namespace cudf
7 changes: 2 additions & 5 deletions cpp/src/copying/concatenate.cu
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
#include <cudf/table/table.hpp>
#include <cudf/table/table_device_view.cuh>
#include <cudf/utilities/default_stream.hpp>
#include <cudf/utilities/type_checks.hpp>

#include <rmm/cuda_stream_view.hpp>
#include <rmm/exec_policy.hpp>
Expand Down Expand Up @@ -460,11 +461,7 @@ void traverse_children::operator()<cudf::list_view>(host_span<column_view const>
*/
void bounds_and_type_check(host_span<column_view const> cols, rmm::cuda_stream_view stream)
{
CUDF_EXPECTS(std::all_of(cols.begin(),
cols.end(),
[expected_type = cols.front().type()](auto const& c) {
return c.type() == expected_type;
}),
CUDF_EXPECTS(cudf::all_column_types_equal(cols.begin(), cols.end()),
"Type mismatch in columns to concatenate.");

// total size of all concatenated rows
Expand Down
13 changes: 10 additions & 3 deletions cpp/src/copying/copy.cu
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019-2023, NVIDIA CORPORATION.
* Copyright (c) 2019-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -26,6 +26,7 @@
#include <cudf/strings/string_view.cuh>
#include <cudf/utilities/default_stream.hpp>
#include <cudf/utilities/traits.hpp>
#include <cudf/utilities/type_checks.hpp>

#include <rmm/cuda_stream_view.hpp>
#include <rmm/device_uvector.hpp>
Expand Down Expand Up @@ -358,7 +359,7 @@ std::unique_ptr<column> copy_if_else(column_view const& lhs,
CUDF_EXPECTS(boolean_mask.size() == lhs.size(),
"Boolean mask column must be the same size as lhs and rhs columns");
CUDF_EXPECTS(lhs.size() == rhs.size(), "Both columns must be of the size");
CUDF_EXPECTS(lhs.type() == rhs.type(), "Both inputs must be of the same type");
CUDF_EXPECTS(cudf::column_types_equal(lhs, rhs), "Both inputs must be of the same type");

return copy_if_else(lhs, rhs, lhs.has_nulls(), rhs.has_nulls(), boolean_mask, stream, mr);
}
Expand All @@ -372,7 +373,9 @@ std::unique_ptr<column> copy_if_else(scalar const& lhs,
CUDF_EXPECTS(boolean_mask.size() == rhs.size(),
"Boolean mask column must be the same size as rhs column");

auto rhs_type =
// TODO: Need some utility like cudf::column_types_equivalent for scalars to
// ensure nested types are handled correctly.
auto const rhs_type =
cudf::is_dictionary(rhs.type()) ? cudf::dictionary_column_view(rhs).keys_type() : rhs.type();
CUDF_EXPECTS(lhs.type() == rhs_type, "Both inputs must be of the same type");

Expand All @@ -388,6 +391,8 @@ std::unique_ptr<column> copy_if_else(column_view const& lhs,
CUDF_EXPECTS(boolean_mask.size() == lhs.size(),
"Boolean mask column must be the same size as lhs column");

// TODO: Need some utility like cudf::column_types_equivalent for scalars to
// ensure nested types are handled correctly.
auto lhs_type =
cudf::is_dictionary(lhs.type()) ? cudf::dictionary_column_view(lhs).keys_type() : lhs.type();
CUDF_EXPECTS(lhs_type == rhs.type(), "Both inputs must be of the same type");
Expand All @@ -401,6 +406,8 @@ std::unique_ptr<column> copy_if_else(scalar const& lhs,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
// TODO: Need some utility like cudf::column_types_equivalent for scalars to
// ensure nested types are handled correctly.
CUDF_EXPECTS(lhs.type() == rhs.type(), "Both inputs must be of the same type");
return copy_if_else(
lhs, rhs, !lhs.is_valid(stream), !rhs.is_valid(stream), boolean_mask, stream, mr);
Expand Down
7 changes: 4 additions & 3 deletions cpp/src/copying/copy_range.cu
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
#include <cudf/utilities/default_stream.hpp>
#include <cudf/utilities/error.hpp>
#include <cudf/utilities/traits.hpp>
#include <cudf/utilities/type_checks.hpp>

#include <rmm/cuda_stream_view.hpp>

Expand Down Expand Up @@ -145,7 +146,7 @@ std::unique_ptr<cudf::column> out_of_place_copy_range_dispatch::operator()<cudf:
// check the keys in the source and target
cudf::dictionary_column_view const dict_source(source);
cudf::dictionary_column_view const dict_target(target);
CUDF_EXPECTS(dict_source.keys().type() == dict_target.keys().type(),
CUDF_EXPECTS(cudf::column_types_equal(dict_source.keys(), dict_target.keys()),
"dictionary keys must be the same type");

// combine keys so both dictionaries have the same set
Expand Down Expand Up @@ -207,7 +208,7 @@ void copy_range_in_place(column_view const& source,
(source_begin <= source_end) && (target_begin >= 0) &&
(target_begin <= target.size() - (source_end - source_begin)),
"Range is out of bounds.");
CUDF_EXPECTS(target.type() == source.type(), "Data type mismatch.");
CUDF_EXPECTS(cudf::column_types_equal(target, source), "Data type mismatch.");
CUDF_EXPECTS(target.nullable() || not source.has_nulls(),
"target should be nullable if source has null values.");

Expand All @@ -233,7 +234,7 @@ std::unique_ptr<column> copy_range(column_view const& source,
(source_begin <= source_end) && (target_begin >= 0) &&
(target_begin <= target.size() - (source_end - source_begin)),
"Range is out of bounds.");
CUDF_EXPECTS(target.type() == source.type(), "Data type mismatch.");
CUDF_EXPECTS(cudf::column_types_equal(target, source), "Data type mismatch.");

return cudf::type_dispatcher<dispatch_storage_type>(
target.type(),
Expand Down
22 changes: 9 additions & 13 deletions cpp/src/copying/scatter.cu
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,8 @@ struct column_scalar_scatterer_impl {
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr) const
{
// TODO: Need some utility like cudf::column_types_equivalent for scalars to
// ensure nested types are handled correctly.
CUDF_EXPECTS(source.get().type() == target.type(), "scalar and column types must match");

// make a copy of data and null mask from source
Expand Down Expand Up @@ -140,6 +142,8 @@ struct column_scalar_scatterer_impl<string_view, MapIterator> {
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr) const
{
// TODO: Need some utility like cudf::column_types_equivalent for scalars to
// ensure nested types are handled correctly.
CUDF_EXPECTS(source.get().type() == target.type(), "scalar and column types must match");

auto const scalar_impl = static_cast<string_scalar const*>(&source.get());
Expand Down Expand Up @@ -299,12 +303,7 @@ std::unique_ptr<table> scatter(table_view const& source,
"Number of columns in source and target not equal");
CUDF_EXPECTS(scatter_map.size() <= source.num_rows(),
"Size of scatter map must be equal to or less than source rows");
CUDF_EXPECTS(std::equal(source.begin(),
source.end(),
target.begin(),
[](auto const& col1, auto const& col2) {
return col1.type().id() == col2.type().id();
}),
CUDF_EXPECTS(cudf::have_same_types(source, target),
"Column types do not match between source and target");
CUDF_EXPECTS(not scatter_map.has_nulls(), "Scatter map contains nulls");

Expand Down Expand Up @@ -430,13 +429,8 @@ std::unique_ptr<table> boolean_mask_scatter(table_view const& input,
"Boolean mask size and number of target rows mismatch");
CUDF_EXPECTS(boolean_mask.type().id() == type_id::BOOL8, "Mask must be of Boolean type");
// Count valid pair of input and columns as per type at each column index i
CUDF_EXPECTS(
std::all_of(thrust::counting_iterator<size_type>(0),
thrust::counting_iterator<size_type>(target.num_columns()),
[&input, &target](auto index) {
return ((input.column(index).type().id()) == (target.column(index).type().id()));
}),
"Type mismatch in input column and target column");
CUDF_EXPECTS(cudf::have_same_types(input, target),
"Type mismatch in input column and target column");

if (target.num_rows() != 0) {
std::vector<std::unique_ptr<column>> out_columns(target.num_columns());
Expand Down Expand Up @@ -470,6 +464,8 @@ std::unique_ptr<table> boolean_mask_scatter(

// Count valid pair of input and columns as per type at each column/scalar index i
CUDF_EXPECTS(
// TODO: Need some utility like cudf::column_types_equivalent for scalars to
// ensure nested types are handled correctly.
std::all_of(thrust::counting_iterator<size_type>(0),
thrust::counting_iterator<size_type>(target.num_columns()),
[&input, &target](auto index) {
Expand Down
4 changes: 3 additions & 1 deletion cpp/src/copying/shift.cu
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019-2023, NVIDIA CORPORATION.
* Copyright (c) 2019-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -156,6 +156,8 @@ std::unique_ptr<column> shift(column_view const& input,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
// TODO: Need some utility like cudf::column_types_equivalent for scalars to
// ensure nested types are handled correctly.
CUDF_EXPECTS(input.type() == fill_value.type(),
"shift requires each fill value type to match the corresponding column type.");

Expand Down
5 changes: 3 additions & 2 deletions cpp/src/dictionary/add_keys.cu
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2020-2023, NVIDIA CORPORATION.
* Copyright (c) 2020-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -29,6 +29,7 @@
#include <cudf/table/table.hpp>
#include <cudf/table/table_view.hpp>
#include <cudf/utilities/default_stream.hpp>
#include <cudf/utilities/type_checks.hpp>

#include <rmm/mr/device/per_device_resource.hpp>

Expand All @@ -53,7 +54,7 @@ std::unique_ptr<column> add_keys(dictionary_column_view const& dictionary_column
{
CUDF_EXPECTS(!new_keys.has_nulls(), "Keys must not have nulls");
auto old_keys = dictionary_column.keys(); // [a,b,c,d,f]
CUDF_EXPECTS(new_keys.type() == old_keys.type(), "Keys must be the same type");
CUDF_EXPECTS(cudf::column_types_equal(new_keys, old_keys), "Keys must be the same type");
// first, concatenate the keys together
// [a,b,c,d,f] + [d,b,e] = [a,b,c,d,f,d,b,e]
auto combined_keys = cudf::detail::concatenate(
Expand Down
2 changes: 2 additions & 0 deletions cpp/src/dictionary/detail/concatenate.cu
Original file line number Diff line number Diff line change
Expand Up @@ -220,6 +220,8 @@ std::unique_ptr<column> concatenate(host_span<column_view const> columns,
// empty column may not have keys so we create an empty column_view place-holder
if (dict_view.is_empty()) return column_view{keys_type, 0, nullptr, nullptr, 0};
auto keys = dict_view.keys();
// TODO: Compare using cudf::column_types_equal to ensure nested types are
// handled correctly.
CUDF_EXPECTS(keys.type() == keys_type, "key types of all dictionary columns must match");
return keys;
});
Expand Down
5 changes: 3 additions & 2 deletions cpp/src/dictionary/remove_keys.cu
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2020-2023, NVIDIA CORPORATION.
* Copyright (c) 2020-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -26,6 +26,7 @@
#include <cudf/table/table.hpp>
#include <cudf/table/table_view.hpp>
#include <cudf/utilities/default_stream.hpp>
#include <cudf/utilities/type_checks.hpp>

#include <rmm/cuda_stream_view.hpp>
#include <rmm/exec_policy.hpp>
Expand Down Expand Up @@ -154,7 +155,7 @@ std::unique_ptr<column> remove_keys(dictionary_column_view const& dictionary_col
{
CUDF_EXPECTS(!keys_to_remove.has_nulls(), "keys_to_remove must not have nulls");
auto const keys_view = dictionary_column.keys();
CUDF_EXPECTS(keys_view.type() == keys_to_remove.type(), "keys types must match");
CUDF_EXPECTS(cudf::column_types_equal(keys_view, keys_to_remove), "keys types must match");

// locate keys to remove by searching the keys column
auto const matches = cudf::detail::contains(keys_to_remove, keys_view, stream, mr);
Expand Down
7 changes: 5 additions & 2 deletions cpp/src/dictionary/replace.cu
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2020-2022, NVIDIA CORPORATION.
* Copyright (c) 2020-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -24,6 +24,7 @@
#include <cudf/dictionary/detail/search.hpp>
#include <cudf/dictionary/detail/update_keys.hpp>
#include <cudf/dictionary/dictionary_factories.hpp>
#include <cudf/utilities/type_checks.hpp>

#include <rmm/cuda_stream_view.hpp>

Expand Down Expand Up @@ -83,7 +84,7 @@ std::unique_ptr<column> replace_nulls(dictionary_column_view const& input,
{
if (input.is_empty()) { return cudf::empty_like(input.parent()); }
if (!input.has_nulls()) { return std::make_unique<cudf::column>(input.parent(), stream, mr); }
CUDF_EXPECTS(input.keys().type() == replacement.keys().type(), "keys must match");
CUDF_EXPECTS(cudf::column_types_equal(input.keys(), replacement.keys()), "keys must match");
CUDF_EXPECTS(replacement.size() == input.size(), "column sizes must match");

// first combine the keys so both input dictionaries have the same set
Expand Down Expand Up @@ -118,6 +119,8 @@ std::unique_ptr<column> replace_nulls(dictionary_column_view const& input,
if (!input.has_nulls() || !replacement.is_valid(stream)) {
return std::make_unique<cudf::column>(input.parent(), stream, mr);
}
// TODO: Need some utility like cudf::column_types_equivalent for scalars to
// ensure nested types are handled correctly.
CUDF_EXPECTS(input.keys().type() == replacement.type(), "keys must match scalar type");

// first add the replacement to the keys so only the indices need to be processed
Expand Down
6 changes: 5 additions & 1 deletion cpp/src/dictionary/search.cu
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2020-2023, NVIDIA CORPORATION.
* Copyright (c) 2020-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -73,6 +73,8 @@ struct find_index_fn {
{
if (!key.is_valid(stream))
return type_dispatcher(input.indices().type(), dispatch_scalar_index{}, 0, false, stream, mr);
// TODO: Need some utility like cudf::column_types_equivalent for scalars to
// ensure nested types are handled correctly.
CUDF_EXPECTS(input.keys().type() == key.type(),
"search key type must match dictionary keys type");

Expand Down Expand Up @@ -115,6 +117,8 @@ struct find_insert_index_fn {
{
if (!key.is_valid(stream))
return type_dispatcher(input.indices().type(), dispatch_scalar_index{}, 0, false, stream, mr);
// TODO: Need some utility like cudf::column_types_equivalent for scalars to
// ensure nested types are handled correctly.
CUDF_EXPECTS(input.keys().type() == key.type(),
"search key type must match dictionary keys type");

Expand Down
6 changes: 3 additions & 3 deletions cpp/src/dictionary/set_keys.cu
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2020-2023, NVIDIA CORPORATION.
* Copyright (c) 2020-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -29,6 +29,7 @@
#include <cudf/dictionary/dictionary_factories.hpp>
#include <cudf/stream_compaction.hpp>
#include <cudf/utilities/default_stream.hpp>
#include <cudf/utilities/type_checks.hpp>

#include <rmm/cuda_stream_view.hpp>
#include <rmm/exec_policy.hpp>
Expand Down Expand Up @@ -115,15 +116,14 @@ struct dispatch_compute_indices {

} // namespace

//
std::unique_ptr<column> set_keys(dictionary_column_view const& dictionary_column,
column_view const& new_keys,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
CUDF_EXPECTS(!new_keys.has_nulls(), "keys parameter must not have nulls");
auto keys = dictionary_column.keys();
CUDF_EXPECTS(keys.type() == new_keys.type(), "keys types must match");
CUDF_EXPECTS(cudf::column_types_equal(keys, new_keys), "keys types must match");

// copy the keys -- use cudf::distinct to make sure there are no duplicates,
// then sort the results.
Expand Down
Loading

0 comments on commit 39d14c8

Please sign in to comment.