Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Deprecate unflatten_nested_columns #11421

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 0 additions & 29 deletions cpp/include/cudf/detail/structs/utilities.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -151,35 +151,6 @@ flattened_table flatten_nested_columns(
std::vector<null_order> const& null_precedence,
column_nullability nullability = column_nullability::MATCH_INCOMING);

/**
* @brief Unflatten columns flattened as by `flatten_nested_columns()`,
* based on the provided `blueprint`.
*
* cudf::flatten_nested_columns() executes depth first, and serializes the struct null vector
* before the child/member columns.
* E.g. STRUCT_1< STRUCT_2< A, B >, C > is flattened to:
* 1. Null Vector for STRUCT_1
* 2. Null Vector for STRUCT_2
* 3. Member STRUCT_2::A
* 4. Member STRUCT_2::B
* 5. Member STRUCT_1::C
*
* `unflatten_nested_columns()` reconstructs nested columns from flattened input that follows
* the convention above.
*
* Note: This function requires a null-mask vector for each STRUCT column, including for nested
* STRUCT members.
*
* @param flattened "Flattened" `table` of input columns, following the conventions in
* `flatten_nested_columns()`.
* @param blueprint The exemplar `table_view` with nested columns intact, whose structure defines
* the nesting of the reconstructed output table.
* @return std::unique_ptr<cudf::table> Unflattened table (with nested STRUCT columns) reconstructed
* based on `blueprint`.
*/
std::unique_ptr<cudf::table> unflatten_nested_columns(std::unique_ptr<cudf::table>&& flattened,
table_view const& blueprint);

/**
* @brief Push down nulls from a parent mask into a child column, using bitwise AND.
*
Expand Down
92 changes: 0 additions & 92 deletions cpp/src/structs/utilities.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -209,98 +209,6 @@ flattened_table flatten_nested_columns(table_view const& input,
return table_flattener{input, column_order, null_precedence, nullability}();
}

namespace {
using vector_of_columns = std::vector<std::unique_ptr<cudf::column>>;
using column_index_t = typename vector_of_columns::size_type;

// Forward declaration, to enable recursion via `unflattener`.
std::unique_ptr<cudf::column> unflatten_struct(vector_of_columns& flattened,
column_index_t& current_index,
cudf::column_view const& blueprint);

/**
* @brief Helper functor to reconstruct STRUCT columns from its flattened member columns.
*
*/
class unflattener {
public:
unflattener(vector_of_columns& flattened_, column_index_t& current_index_)
: flattened{flattened_}, current_index{current_index_}
{
}

auto operator()(column_view const& blueprint)
{
return is_struct(blueprint) ? unflatten_struct(flattened, current_index, blueprint)
: std::move(flattened[current_index++]);
}

private:
vector_of_columns& flattened;
column_index_t& current_index;

}; // class unflattener;

std::unique_ptr<cudf::column> unflatten_struct(vector_of_columns& flattened,
column_index_t& current_index,
cudf::column_view const& blueprint)
{
// "Consume" columns from `flattened`, starting at `current_index`,
// based on the provided `blueprint` struct col. Recurse for struct children.
CUDF_EXPECTS(blueprint.type().id() == type_id::STRUCT,
"Expected blueprint column to be a STRUCT column.");

CUDF_EXPECTS(current_index < flattened.size(), "STRUCT column can't have 0 children.");

auto const num_rows = flattened[current_index]->size();

// cudf::flatten_nested_columns() executes depth first, and serializes the struct null vector
// before the child/member columns.
// E.g. STRUCT_1< STRUCT_2< A, B >, C > is flattened to:
// 1. Null Vector for STRUCT_1
// 2. Null Vector for STRUCT_2
// 3. Member STRUCT_2::A
// 4. Member STRUCT_2::B
// 5. Member STRUCT_1::C
//
// Extract null-vector *before* child columns are constructed.
auto struct_null_column_contents = flattened[current_index++]->release();
auto unflattening_iter =
thrust::make_transform_iterator(blueprint.child_begin(), unflattener{flattened, current_index});

return cudf::make_structs_column(
num_rows,
vector_of_columns{unflattening_iter, unflattening_iter + blueprint.num_children()},
UNKNOWN_NULL_COUNT, // Do count?
std::move(*struct_null_column_contents.null_mask));
}
} // namespace

std::unique_ptr<cudf::table> unflatten_nested_columns(std::unique_ptr<cudf::table>&& flattened,
table_view const& blueprint)
{
// Bail, if LISTs are present.
auto const has_lists = std::any_of(blueprint.begin(), blueprint.end(), is_or_has_nested_lists);
CUDF_EXPECTS(not has_lists, "Unflattening LIST columns is not supported.");

// If there are no STRUCTs, unflattening is a NOOP.
auto const has_structs = std::any_of(blueprint.begin(), blueprint.end(), is_struct);
if (not has_structs) {
return std::move(flattened); // Unchanged.
}

// There be struct columns.
// Note: Requires null vectors for all struct input columns.
auto flattened_columns = flattened->release();
auto current_idx = column_index_t{0};

auto unflattening_iter =
thrust::make_transform_iterator(blueprint.begin(), unflattener{flattened_columns, current_idx});

return std::make_unique<cudf::table>(
vector_of_columns{unflattening_iter, unflattening_iter + blueprint.num_columns()});
}

// Helper function to superimpose validity of parent struct
// over the specified member (child) column.
void superimpose_parent_nulls(bitmask_type const* parent_null_mask,
Expand Down
159 changes: 123 additions & 36 deletions cpp/tests/structs/utilities_tests.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2021, NVIDIA CORPORATION.
* Copyright (c) 2021-2022, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -30,26 +30,13 @@

namespace cudf::test {

/**
* @brief Round-trip input table through flatten/unflatten,
* verify that the table remains equivalent.
*/
void flatten_unflatten_compare(table_view const& input_table)
{
using namespace cudf::structs::detail;

auto flattened = flatten_nested_columns(input_table, {}, {}, column_nullability::FORCE);
auto unflattened =
unflatten_nested_columns(std::make_unique<cudf::table>(flattened), input_table);

CUDF_TEST_EXPECT_TABLES_EQUIVALENT(input_table, unflattened->view());
}

using namespace cudf;
using namespace iterators;
using namespace cudf::structs::detail;
using strings = strings_column_wrapper;
using dictionary = dictionary_column_wrapper<std::string>;
using structs = structs_column_wrapper;
using bools = fixed_width_column_wrapper<bool>;

template <typename T>
using nums = fixed_width_column_wrapper<T, int32_t>;
Expand All @@ -66,7 +53,7 @@ struct TypedStructUtilitiesTest : StructUtilitiesTest {

TYPED_TEST_SUITE(TypedStructUtilitiesTest, FixedWidthTypes);

TYPED_TEST(TypedStructUtilitiesTest, ListsAtTopLevelUnsupported)
TYPED_TEST(TypedStructUtilitiesTest, ListsAtTopLevel)
{
using T = TypeParam;
using lists = lists_column_wrapper<T, int32_t>;
Expand All @@ -75,8 +62,10 @@ TYPED_TEST(TypedStructUtilitiesTest, ListsAtTopLevelUnsupported)
auto lists_col = lists{{0, 1}, {22, 33}, {44, 55, 66}};
auto nums_col = nums{{0, 1, 2}, null_at(6)};

EXPECT_THROW(flatten_unflatten_compare(cudf::table_view{{lists_col, nums_col}}),
cudf::logic_error);
auto table = cudf::table_view{{lists_col, nums_col}};

CUDF_TEST_EXPECT_TABLES_EQUAL(table,
flatten_nested_columns(table, {}, {}, column_nullability::FORCE));
}

TYPED_TEST(TypedStructUtilitiesTest, NestedListsUnsupported)
Expand All @@ -88,10 +77,10 @@ TYPED_TEST(TypedStructUtilitiesTest, NestedListsUnsupported)
auto lists_member = lists{{0, 1}, {22, 33}, {44, 55, 66}};
auto nums_member = nums{{0, 1, 2}, null_at(6)};
auto structs_col = structs{{nums_member, lists_member}};
auto nums_col = nums{{0, 1, 2}, null_at(6)};

auto nums_col = nums{{0, 1, 2}, null_at(6)};

EXPECT_THROW(flatten_unflatten_compare(cudf::table_view{{nums_col, structs_col}}),
EXPECT_THROW(flatten_nested_columns(
cudf::table_view{{nums_col, structs_col}}, {}, {}, column_nullability::FORCE),
cudf::logic_error);
}

Expand All @@ -104,7 +93,10 @@ TYPED_TEST(TypedStructUtilitiesTest, NoStructs)
auto strings_col = strings{{"", "1", "22", "333", "4444", "55555", "666666"}, null_at(1)};
auto nuther_nums_col = nums{{0, 1, 2, 3, 4, 5, 6}, null_at(6)};

flatten_unflatten_compare(cudf::table_view{{nums_col, strings_col, nuther_nums_col}});
auto table = cudf::table_view{{nums_col, strings_col, nuther_nums_col}};

CUDF_TEST_EXPECT_TABLES_EQUAL(table,
flatten_nested_columns(table, {}, {}, column_nullability::FORCE));
}

TYPED_TEST(TypedStructUtilitiesTest, SingleLevelStruct)
Expand All @@ -116,8 +108,19 @@ TYPED_TEST(TypedStructUtilitiesTest, SingleLevelStruct)
auto strings_member = strings{{"", "1", "22", "333", "4444", "55555", "666666"}, null_at(1)};
auto structs_col = structs{{nums_member, strings_member}};
auto nums_col = nums{{0, 1, 2, 3, 4, 5, 6}, null_at(6)};

flatten_unflatten_compare(cudf::table_view{{nums_col, structs_col}});
auto table = cudf::table_view{{nums_col, structs_col}};

auto expected_nums_col_1 = cudf::column(nums_col);
auto expected_structs_col = bools{{1, 1, 1, 1, 1, 1, 1}};
auto expected_nums_col_2 =
cudf::column(static_cast<cudf::structs_column_view>(structs_col).get_sliced_child(0));
auto expected_strings_col =
cudf::column(static_cast<cudf::structs_column_view>(structs_col).get_sliced_child(1));
auto expected = cudf::table_view{
{expected_nums_col_1, expected_structs_col, expected_nums_col_2, expected_strings_col}};

CUDF_TEST_EXPECT_TABLES_EQUAL(expected,
flatten_nested_columns(table, {}, {}, column_nullability::FORCE));
}

TYPED_TEST(TypedStructUtilitiesTest, SingleLevelStructWithNulls)
Expand All @@ -129,8 +132,19 @@ TYPED_TEST(TypedStructUtilitiesTest, SingleLevelStructWithNulls)
auto strings_member = strings{{"", "1", "22", "333", "4444", "55555", "666666"}, null_at(1)};
auto structs_col = structs{{nums_member, strings_member}, null_at(2)};
auto nums_col = nums{{0, 1, 2, 3, 4, 5, 6}, null_at(6)};

flatten_unflatten_compare(cudf::table_view{{nums_col, structs_col}});
auto table = cudf::table_view{{nums_col, structs_col}};

auto expected_nums_col_1 = cudf::column(nums_col);
auto expected_structs_col = bools{{1, 1, 0, 1, 1, 1, 1}, null_at(2)};
auto expected_nums_col_2 =
cudf::column(static_cast<cudf::structs_column_view>(structs_col).get_sliced_child(0));
auto expected_strings_col =
cudf::column(static_cast<cudf::structs_column_view>(structs_col).get_sliced_child(1));
auto expected = cudf::table_view{
{expected_nums_col_1, expected_structs_col, expected_nums_col_2, expected_strings_col}};

CUDF_TEST_EXPECT_TABLES_EQUAL(expected,
flatten_nested_columns(table, {}, {}, column_nullability::FORCE));
}

TYPED_TEST(TypedStructUtilitiesTest, StructOfStruct)
Expand All @@ -147,8 +161,26 @@ TYPED_TEST(TypedStructUtilitiesTest, StructOfStruct)

auto struct_1_nums_member = nums{{0, 1, 22, 33, 44, 55, 66}, null_at(3)};
auto struct_of_structs_col = structs{{struct_1_nums_member, structs_1_structs_member}};

flatten_unflatten_compare(cudf::table_view{{nums_col, struct_of_structs_col}});
auto table = cudf::table_view{{nums_col, struct_of_structs_col}};

auto expected_nums_col_1 = cudf::column(nums_col);
auto expected_structs_col_1 = bools{{1, 1, 1, 1, 1, 1, 1}};
auto expected_nums_col_2 =
cudf::column(static_cast<cudf::structs_column_view>(struct_of_structs_col).get_sliced_child(0));
auto expected_structs_col_2 = bools{{1, 1, 1, 1, 1, 1, 1}};
auto expected_nums_col_3 = cudf::column(
static_cast<cudf::structs_column_view>(struct_of_structs_col).get_sliced_child(1).child(0));
auto expected_strings_col = cudf::column(
static_cast<cudf::structs_column_view>(struct_of_structs_col).get_sliced_child(1).child(1));
auto expected = cudf::table_view{{expected_nums_col_1,
expected_structs_col_1,
expected_nums_col_2,
expected_structs_col_2,
expected_nums_col_3,
expected_strings_col}};

CUDF_TEST_EXPECT_TABLES_EQUAL(expected,
flatten_nested_columns(table, {}, {}, column_nullability::FORCE));
}

TYPED_TEST(TypedStructUtilitiesTest, StructOfStructWithNullsAtLeafLevel)
Expand All @@ -166,8 +198,26 @@ TYPED_TEST(TypedStructUtilitiesTest, StructOfStructWithNullsAtLeafLevel)

auto struct_1_nums_member = nums{{0, 1, 22, 33, 44, 55, 66}, null_at(3)};
auto struct_of_structs_col = structs{{struct_1_nums_member, structs_1_structs_member}};

flatten_unflatten_compare(cudf::table_view{{nums_col, struct_of_structs_col}});
auto table = cudf::table_view{{nums_col, struct_of_structs_col}};

auto expected_nums_col_1 = cudf::column(nums_col);
auto expected_structs_col_1 = bools{{1, 1, 1, 1, 1, 1, 1}};
auto expected_nums_col_2 =
cudf::column(static_cast<cudf::structs_column_view>(struct_of_structs_col).get_sliced_child(0));
auto expected_structs_col_2 = bools{{1, 1, 0, 1, 1, 1, 1}, null_at(2)};
auto expected_nums_col_3 = cudf::column(
static_cast<cudf::structs_column_view>(struct_of_structs_col).get_sliced_child(1).child(0));
auto expected_strings_col = cudf::column(
static_cast<cudf::structs_column_view>(struct_of_structs_col).get_sliced_child(1).child(1));
auto expected = cudf::table_view{{expected_nums_col_1,
expected_structs_col_1,
expected_nums_col_2,
expected_structs_col_2,
expected_nums_col_3,
expected_strings_col}};

CUDF_TEST_EXPECT_TABLES_EQUAL(expected,
flatten_nested_columns(table, {}, {}, column_nullability::FORCE));
}

TYPED_TEST(TypedStructUtilitiesTest, StructOfStructWithNullsAtTopLevel)
Expand All @@ -185,8 +235,26 @@ TYPED_TEST(TypedStructUtilitiesTest, StructOfStructWithNullsAtTopLevel)
auto struct_1_nums_member = nums{{0, 1, 22, 33, 44, 55, 66}, null_at(3)};
auto struct_of_structs_col =
structs{{struct_1_nums_member, structs_1_structs_member}, null_at(4)};

flatten_unflatten_compare(cudf::table_view{{nums_col, struct_of_structs_col}});
auto table = cudf::table_view{{nums_col, struct_of_structs_col}};

auto expected_nums_col_1 = cudf::column(nums_col);
auto expected_structs_col_1 = bools{{1, 1, 1, 1, 0, 1, 1}, null_at(4)};
auto expected_nums_col_2 =
cudf::column(static_cast<cudf::structs_column_view>(struct_of_structs_col).get_sliced_child(0));
auto expected_structs_col_2 = bools{{1, 1, 1, 1, 0, 1, 1}, null_at(4)};
auto expected_nums_col_3 = cudf::column(
static_cast<cudf::structs_column_view>(struct_of_structs_col).get_sliced_child(1).child(0));
auto expected_strings_col = cudf::column(
static_cast<cudf::structs_column_view>(struct_of_structs_col).get_sliced_child(1).child(1));
auto expected = cudf::table_view{{expected_nums_col_1,
expected_structs_col_1,
expected_nums_col_2,
expected_structs_col_2,
expected_nums_col_3,
expected_strings_col}};

CUDF_TEST_EXPECT_TABLES_EQUAL(expected,
flatten_nested_columns(table, {}, {}, column_nullability::FORCE));
}

TYPED_TEST(TypedStructUtilitiesTest, StructOfStructWithNullsAtAllLevels)
Expand All @@ -205,8 +273,26 @@ TYPED_TEST(TypedStructUtilitiesTest, StructOfStructWithNullsAtAllLevels)
auto struct_1_nums_member = nums{{0, 1, 22, 33, 44, 55, 66}, null_at(3)};
auto struct_of_structs_col =
structs{{struct_1_nums_member, structs_1_structs_member}, null_at(4)};

flatten_unflatten_compare(cudf::table_view{{nums_col, struct_of_structs_col}});
auto table = cudf::table_view{{nums_col, struct_of_structs_col}};

auto expected_nums_col_1 = cudf::column(nums_col);
auto expected_structs_col_1 = bools{{1, 1, 1, 1, 0, 1, 1}, null_at(4)};
auto expected_nums_col_2 =
cudf::column(static_cast<cudf::structs_column_view>(struct_of_structs_col).get_sliced_child(0));
auto expected_structs_col_2 = bools{{1, 1, 0, 1, 0, 1, 1}, {1, 1, 0, 1, 0, 1, 1}};
auto expected_nums_col_3 = cudf::column(
static_cast<cudf::structs_column_view>(struct_of_structs_col).get_sliced_child(1).child(0));
auto expected_strings_col = cudf::column(
static_cast<cudf::structs_column_view>(struct_of_structs_col).get_sliced_child(1).child(1));
auto expected = cudf::table_view{{expected_nums_col_1,
expected_structs_col_1,
expected_nums_col_2,
expected_structs_col_2,
expected_nums_col_3,
expected_strings_col}};

CUDF_TEST_EXPECT_TABLES_EQUAL(expected,
flatten_nested_columns(table, {}, {}, column_nullability::FORCE));
}

TYPED_TEST(TypedStructUtilitiesTest, ListsAreUnsupported)
Expand All @@ -222,7 +308,8 @@ TYPED_TEST(TypedStructUtilitiesTest, ListsAreUnsupported)

auto structs_with_lists_col = structs{lists_member, ints_member};

EXPECT_THROW(flatten_unflatten_compare(cudf::table_view{{structs_with_lists_col}}),
EXPECT_THROW(flatten_nested_columns(
cudf::table_view{{structs_with_lists_col}}, {}, {}, column_nullability::FORCE),
cudf::logic_error);
}

Expand Down