diff --git a/cpp/include/cudf/detail/structs/utilities.hpp b/cpp/include/cudf/detail/structs/utilities.hpp index 7d8ac5c9325..1a4b8f02dd3 100644 --- a/cpp/include/cudf/detail/structs/utilities.hpp +++ b/cpp/include/cudf/detail/structs/utilities.hpp @@ -151,35 +151,6 @@ flattened_table flatten_nested_columns( std::vector const& null_precedence, column_nullability nullability = column_nullability::MATCH_INCOMING); -/** - * @brief Unflatten columns flattened as by `flatten_nested_columns()`, - * based on the provided `blueprint`. - * - * cudf::flatten_nested_columns() executes depth first, and serializes the struct null vector - * before the child/member columns. - * E.g. STRUCT_1< STRUCT_2< A, B >, C > is flattened to: - * 1. Null Vector for STRUCT_1 - * 2. Null Vector for STRUCT_2 - * 3. Member STRUCT_2::A - * 4. Member STRUCT_2::B - * 5. Member STRUCT_1::C - * - * `unflatten_nested_columns()` reconstructs nested columns from flattened input that follows - * the convention above. - * - * Note: This function requires a null-mask vector for each STRUCT column, including for nested - * STRUCT members. - * - * @param flattened "Flattened" `table` of input columns, following the conventions in - * `flatten_nested_columns()`. - * @param blueprint The exemplar `table_view` with nested columns intact, whose structure defines - * the nesting of the reconstructed output table. - * @return std::unique_ptr Unflattened table (with nested STRUCT columns) reconstructed - * based on `blueprint`. - */ -std::unique_ptr unflatten_nested_columns(std::unique_ptr&& flattened, - table_view const& blueprint); - /** * @brief Push down nulls from a parent mask into a child column, using bitwise AND. * diff --git a/cpp/src/structs/utilities.cpp b/cpp/src/structs/utilities.cpp index 1d5ebfaa7fc..bf4216b6983 100644 --- a/cpp/src/structs/utilities.cpp +++ b/cpp/src/structs/utilities.cpp @@ -209,98 +209,6 @@ flattened_table flatten_nested_columns(table_view const& input, return table_flattener{input, column_order, null_precedence, nullability}(); } -namespace { -using vector_of_columns = std::vector>; -using column_index_t = typename vector_of_columns::size_type; - -// Forward declaration, to enable recursion via `unflattener`. -std::unique_ptr unflatten_struct(vector_of_columns& flattened, - column_index_t& current_index, - cudf::column_view const& blueprint); - -/** - * @brief Helper functor to reconstruct STRUCT columns from its flattened member columns. - * - */ -class unflattener { - public: - unflattener(vector_of_columns& flattened_, column_index_t& current_index_) - : flattened{flattened_}, current_index{current_index_} - { - } - - auto operator()(column_view const& blueprint) - { - return is_struct(blueprint) ? unflatten_struct(flattened, current_index, blueprint) - : std::move(flattened[current_index++]); - } - - private: - vector_of_columns& flattened; - column_index_t& current_index; - -}; // class unflattener; - -std::unique_ptr unflatten_struct(vector_of_columns& flattened, - column_index_t& current_index, - cudf::column_view const& blueprint) -{ - // "Consume" columns from `flattened`, starting at `current_index`, - // based on the provided `blueprint` struct col. Recurse for struct children. - CUDF_EXPECTS(blueprint.type().id() == type_id::STRUCT, - "Expected blueprint column to be a STRUCT column."); - - CUDF_EXPECTS(current_index < flattened.size(), "STRUCT column can't have 0 children."); - - auto const num_rows = flattened[current_index]->size(); - - // cudf::flatten_nested_columns() executes depth first, and serializes the struct null vector - // before the child/member columns. - // E.g. STRUCT_1< STRUCT_2< A, B >, C > is flattened to: - // 1. Null Vector for STRUCT_1 - // 2. Null Vector for STRUCT_2 - // 3. Member STRUCT_2::A - // 4. Member STRUCT_2::B - // 5. Member STRUCT_1::C - // - // Extract null-vector *before* child columns are constructed. - auto struct_null_column_contents = flattened[current_index++]->release(); - auto unflattening_iter = - thrust::make_transform_iterator(blueprint.child_begin(), unflattener{flattened, current_index}); - - return cudf::make_structs_column( - num_rows, - vector_of_columns{unflattening_iter, unflattening_iter + blueprint.num_children()}, - UNKNOWN_NULL_COUNT, // Do count? - std::move(*struct_null_column_contents.null_mask)); -} -} // namespace - -std::unique_ptr unflatten_nested_columns(std::unique_ptr&& flattened, - table_view const& blueprint) -{ - // Bail, if LISTs are present. - auto const has_lists = std::any_of(blueprint.begin(), blueprint.end(), is_or_has_nested_lists); - CUDF_EXPECTS(not has_lists, "Unflattening LIST columns is not supported."); - - // If there are no STRUCTs, unflattening is a NOOP. - auto const has_structs = std::any_of(blueprint.begin(), blueprint.end(), is_struct); - if (not has_structs) { - return std::move(flattened); // Unchanged. - } - - // There be struct columns. - // Note: Requires null vectors for all struct input columns. - auto flattened_columns = flattened->release(); - auto current_idx = column_index_t{0}; - - auto unflattening_iter = - thrust::make_transform_iterator(blueprint.begin(), unflattener{flattened_columns, current_idx}); - - return std::make_unique( - vector_of_columns{unflattening_iter, unflattening_iter + blueprint.num_columns()}); -} - // Helper function to superimpose validity of parent struct // over the specified member (child) column. void superimpose_parent_nulls(bitmask_type const* parent_null_mask, diff --git a/cpp/tests/structs/utilities_tests.cpp b/cpp/tests/structs/utilities_tests.cpp index b26ea87c5b8..d58568cd1b5 100644 --- a/cpp/tests/structs/utilities_tests.cpp +++ b/cpp/tests/structs/utilities_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, NVIDIA CORPORATION. + * Copyright (c) 2021-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -30,26 +30,13 @@ namespace cudf::test { -/** - * @brief Round-trip input table through flatten/unflatten, - * verify that the table remains equivalent. - */ -void flatten_unflatten_compare(table_view const& input_table) -{ - using namespace cudf::structs::detail; - - auto flattened = flatten_nested_columns(input_table, {}, {}, column_nullability::FORCE); - auto unflattened = - unflatten_nested_columns(std::make_unique(flattened), input_table); - - CUDF_TEST_EXPECT_TABLES_EQUIVALENT(input_table, unflattened->view()); -} - using namespace cudf; using namespace iterators; +using namespace cudf::structs::detail; using strings = strings_column_wrapper; using dictionary = dictionary_column_wrapper; using structs = structs_column_wrapper; +using bools = fixed_width_column_wrapper; template using nums = fixed_width_column_wrapper; @@ -66,7 +53,7 @@ struct TypedStructUtilitiesTest : StructUtilitiesTest { TYPED_TEST_SUITE(TypedStructUtilitiesTest, FixedWidthTypes); -TYPED_TEST(TypedStructUtilitiesTest, ListsAtTopLevelUnsupported) +TYPED_TEST(TypedStructUtilitiesTest, ListsAtTopLevel) { using T = TypeParam; using lists = lists_column_wrapper; @@ -75,8 +62,10 @@ TYPED_TEST(TypedStructUtilitiesTest, ListsAtTopLevelUnsupported) auto lists_col = lists{{0, 1}, {22, 33}, {44, 55, 66}}; auto nums_col = nums{{0, 1, 2}, null_at(6)}; - EXPECT_THROW(flatten_unflatten_compare(cudf::table_view{{lists_col, nums_col}}), - cudf::logic_error); + auto table = cudf::table_view{{lists_col, nums_col}}; + + CUDF_TEST_EXPECT_TABLES_EQUAL(table, + flatten_nested_columns(table, {}, {}, column_nullability::FORCE)); } TYPED_TEST(TypedStructUtilitiesTest, NestedListsUnsupported) @@ -88,10 +77,10 @@ TYPED_TEST(TypedStructUtilitiesTest, NestedListsUnsupported) auto lists_member = lists{{0, 1}, {22, 33}, {44, 55, 66}}; auto nums_member = nums{{0, 1, 2}, null_at(6)}; auto structs_col = structs{{nums_member, lists_member}}; + auto nums_col = nums{{0, 1, 2}, null_at(6)}; - auto nums_col = nums{{0, 1, 2}, null_at(6)}; - - EXPECT_THROW(flatten_unflatten_compare(cudf::table_view{{nums_col, structs_col}}), + EXPECT_THROW(flatten_nested_columns( + cudf::table_view{{nums_col, structs_col}}, {}, {}, column_nullability::FORCE), cudf::logic_error); } @@ -104,7 +93,10 @@ TYPED_TEST(TypedStructUtilitiesTest, NoStructs) auto strings_col = strings{{"", "1", "22", "333", "4444", "55555", "666666"}, null_at(1)}; auto nuther_nums_col = nums{{0, 1, 2, 3, 4, 5, 6}, null_at(6)}; - flatten_unflatten_compare(cudf::table_view{{nums_col, strings_col, nuther_nums_col}}); + auto table = cudf::table_view{{nums_col, strings_col, nuther_nums_col}}; + + CUDF_TEST_EXPECT_TABLES_EQUAL(table, + flatten_nested_columns(table, {}, {}, column_nullability::FORCE)); } TYPED_TEST(TypedStructUtilitiesTest, SingleLevelStruct) @@ -116,8 +108,19 @@ TYPED_TEST(TypedStructUtilitiesTest, SingleLevelStruct) auto strings_member = strings{{"", "1", "22", "333", "4444", "55555", "666666"}, null_at(1)}; auto structs_col = structs{{nums_member, strings_member}}; auto nums_col = nums{{0, 1, 2, 3, 4, 5, 6}, null_at(6)}; - - flatten_unflatten_compare(cudf::table_view{{nums_col, structs_col}}); + auto table = cudf::table_view{{nums_col, structs_col}}; + + auto expected_nums_col_1 = cudf::column(nums_col); + auto expected_structs_col = bools{{1, 1, 1, 1, 1, 1, 1}}; + auto expected_nums_col_2 = + cudf::column(static_cast(structs_col).get_sliced_child(0)); + auto expected_strings_col = + cudf::column(static_cast(structs_col).get_sliced_child(1)); + auto expected = cudf::table_view{ + {expected_nums_col_1, expected_structs_col, expected_nums_col_2, expected_strings_col}}; + + CUDF_TEST_EXPECT_TABLES_EQUAL(expected, + flatten_nested_columns(table, {}, {}, column_nullability::FORCE)); } TYPED_TEST(TypedStructUtilitiesTest, SingleLevelStructWithNulls) @@ -129,8 +132,19 @@ TYPED_TEST(TypedStructUtilitiesTest, SingleLevelStructWithNulls) auto strings_member = strings{{"", "1", "22", "333", "4444", "55555", "666666"}, null_at(1)}; auto structs_col = structs{{nums_member, strings_member}, null_at(2)}; auto nums_col = nums{{0, 1, 2, 3, 4, 5, 6}, null_at(6)}; - - flatten_unflatten_compare(cudf::table_view{{nums_col, structs_col}}); + auto table = cudf::table_view{{nums_col, structs_col}}; + + auto expected_nums_col_1 = cudf::column(nums_col); + auto expected_structs_col = bools{{1, 1, 0, 1, 1, 1, 1}, null_at(2)}; + auto expected_nums_col_2 = + cudf::column(static_cast(structs_col).get_sliced_child(0)); + auto expected_strings_col = + cudf::column(static_cast(structs_col).get_sliced_child(1)); + auto expected = cudf::table_view{ + {expected_nums_col_1, expected_structs_col, expected_nums_col_2, expected_strings_col}}; + + CUDF_TEST_EXPECT_TABLES_EQUAL(expected, + flatten_nested_columns(table, {}, {}, column_nullability::FORCE)); } TYPED_TEST(TypedStructUtilitiesTest, StructOfStruct) @@ -147,8 +161,26 @@ TYPED_TEST(TypedStructUtilitiesTest, StructOfStruct) auto struct_1_nums_member = nums{{0, 1, 22, 33, 44, 55, 66}, null_at(3)}; auto struct_of_structs_col = structs{{struct_1_nums_member, structs_1_structs_member}}; - - flatten_unflatten_compare(cudf::table_view{{nums_col, struct_of_structs_col}}); + auto table = cudf::table_view{{nums_col, struct_of_structs_col}}; + + auto expected_nums_col_1 = cudf::column(nums_col); + auto expected_structs_col_1 = bools{{1, 1, 1, 1, 1, 1, 1}}; + auto expected_nums_col_2 = + cudf::column(static_cast(struct_of_structs_col).get_sliced_child(0)); + auto expected_structs_col_2 = bools{{1, 1, 1, 1, 1, 1, 1}}; + auto expected_nums_col_3 = cudf::column( + static_cast(struct_of_structs_col).get_sliced_child(1).child(0)); + auto expected_strings_col = cudf::column( + static_cast(struct_of_structs_col).get_sliced_child(1).child(1)); + auto expected = cudf::table_view{{expected_nums_col_1, + expected_structs_col_1, + expected_nums_col_2, + expected_structs_col_2, + expected_nums_col_3, + expected_strings_col}}; + + CUDF_TEST_EXPECT_TABLES_EQUAL(expected, + flatten_nested_columns(table, {}, {}, column_nullability::FORCE)); } TYPED_TEST(TypedStructUtilitiesTest, StructOfStructWithNullsAtLeafLevel) @@ -166,8 +198,26 @@ TYPED_TEST(TypedStructUtilitiesTest, StructOfStructWithNullsAtLeafLevel) auto struct_1_nums_member = nums{{0, 1, 22, 33, 44, 55, 66}, null_at(3)}; auto struct_of_structs_col = structs{{struct_1_nums_member, structs_1_structs_member}}; - - flatten_unflatten_compare(cudf::table_view{{nums_col, struct_of_structs_col}}); + auto table = cudf::table_view{{nums_col, struct_of_structs_col}}; + + auto expected_nums_col_1 = cudf::column(nums_col); + auto expected_structs_col_1 = bools{{1, 1, 1, 1, 1, 1, 1}}; + auto expected_nums_col_2 = + cudf::column(static_cast(struct_of_structs_col).get_sliced_child(0)); + auto expected_structs_col_2 = bools{{1, 1, 0, 1, 1, 1, 1}, null_at(2)}; + auto expected_nums_col_3 = cudf::column( + static_cast(struct_of_structs_col).get_sliced_child(1).child(0)); + auto expected_strings_col = cudf::column( + static_cast(struct_of_structs_col).get_sliced_child(1).child(1)); + auto expected = cudf::table_view{{expected_nums_col_1, + expected_structs_col_1, + expected_nums_col_2, + expected_structs_col_2, + expected_nums_col_3, + expected_strings_col}}; + + CUDF_TEST_EXPECT_TABLES_EQUAL(expected, + flatten_nested_columns(table, {}, {}, column_nullability::FORCE)); } TYPED_TEST(TypedStructUtilitiesTest, StructOfStructWithNullsAtTopLevel) @@ -185,8 +235,26 @@ TYPED_TEST(TypedStructUtilitiesTest, StructOfStructWithNullsAtTopLevel) auto struct_1_nums_member = nums{{0, 1, 22, 33, 44, 55, 66}, null_at(3)}; auto struct_of_structs_col = structs{{struct_1_nums_member, structs_1_structs_member}, null_at(4)}; - - flatten_unflatten_compare(cudf::table_view{{nums_col, struct_of_structs_col}}); + auto table = cudf::table_view{{nums_col, struct_of_structs_col}}; + + auto expected_nums_col_1 = cudf::column(nums_col); + auto expected_structs_col_1 = bools{{1, 1, 1, 1, 0, 1, 1}, null_at(4)}; + auto expected_nums_col_2 = + cudf::column(static_cast(struct_of_structs_col).get_sliced_child(0)); + auto expected_structs_col_2 = bools{{1, 1, 1, 1, 0, 1, 1}, null_at(4)}; + auto expected_nums_col_3 = cudf::column( + static_cast(struct_of_structs_col).get_sliced_child(1).child(0)); + auto expected_strings_col = cudf::column( + static_cast(struct_of_structs_col).get_sliced_child(1).child(1)); + auto expected = cudf::table_view{{expected_nums_col_1, + expected_structs_col_1, + expected_nums_col_2, + expected_structs_col_2, + expected_nums_col_3, + expected_strings_col}}; + + CUDF_TEST_EXPECT_TABLES_EQUAL(expected, + flatten_nested_columns(table, {}, {}, column_nullability::FORCE)); } TYPED_TEST(TypedStructUtilitiesTest, StructOfStructWithNullsAtAllLevels) @@ -205,8 +273,26 @@ TYPED_TEST(TypedStructUtilitiesTest, StructOfStructWithNullsAtAllLevels) auto struct_1_nums_member = nums{{0, 1, 22, 33, 44, 55, 66}, null_at(3)}; auto struct_of_structs_col = structs{{struct_1_nums_member, structs_1_structs_member}, null_at(4)}; - - flatten_unflatten_compare(cudf::table_view{{nums_col, struct_of_structs_col}}); + auto table = cudf::table_view{{nums_col, struct_of_structs_col}}; + + auto expected_nums_col_1 = cudf::column(nums_col); + auto expected_structs_col_1 = bools{{1, 1, 1, 1, 0, 1, 1}, null_at(4)}; + auto expected_nums_col_2 = + cudf::column(static_cast(struct_of_structs_col).get_sliced_child(0)); + auto expected_structs_col_2 = bools{{1, 1, 0, 1, 0, 1, 1}, {1, 1, 0, 1, 0, 1, 1}}; + auto expected_nums_col_3 = cudf::column( + static_cast(struct_of_structs_col).get_sliced_child(1).child(0)); + auto expected_strings_col = cudf::column( + static_cast(struct_of_structs_col).get_sliced_child(1).child(1)); + auto expected = cudf::table_view{{expected_nums_col_1, + expected_structs_col_1, + expected_nums_col_2, + expected_structs_col_2, + expected_nums_col_3, + expected_strings_col}}; + + CUDF_TEST_EXPECT_TABLES_EQUAL(expected, + flatten_nested_columns(table, {}, {}, column_nullability::FORCE)); } TYPED_TEST(TypedStructUtilitiesTest, ListsAreUnsupported) @@ -222,7 +308,8 @@ TYPED_TEST(TypedStructUtilitiesTest, ListsAreUnsupported) auto structs_with_lists_col = structs{lists_member, ints_member}; - EXPECT_THROW(flatten_unflatten_compare(cudf::table_view{{structs_with_lists_col}}), + EXPECT_THROW(flatten_nested_columns( + cudf::table_view{{structs_with_lists_col}}, {}, {}, column_nullability::FORCE), cudf::logic_error); }