Skip to content

Commit

Permalink
Deprecate unflatten_nested_columns (#11421)
Browse files Browse the repository at this point in the history
Closes #10952 

After #10770 was merged there are no more uses of `unflatten_nested_columns`. This pr removes `unflatten_nested_columns` and adjusts the tests accordingly.

Authors:
  - Srikar Vanavasam (https://github.com/SrikarVanavasam)

Approvers:
  - Nghia Truong (https://github.com/ttnghia)
  - Karthikeyan (https://github.com/karthikeyann)
  - Vyas Ramasubramani (https://github.com/vyasr)

URL: #11421
  • Loading branch information
SrikarVanavasam committed Aug 3, 2022
1 parent b281fdf commit f31e5bd
Show file tree
Hide file tree
Showing 3 changed files with 123 additions and 157 deletions.
29 changes: 0 additions & 29 deletions cpp/include/cudf/detail/structs/utilities.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -151,35 +151,6 @@ flattened_table flatten_nested_columns(
std::vector<null_order> const& null_precedence,
column_nullability nullability = column_nullability::MATCH_INCOMING);

/**
* @brief Unflatten columns flattened as by `flatten_nested_columns()`,
* based on the provided `blueprint`.
*
* cudf::flatten_nested_columns() executes depth first, and serializes the struct null vector
* before the child/member columns.
* E.g. STRUCT_1< STRUCT_2< A, B >, C > is flattened to:
* 1. Null Vector for STRUCT_1
* 2. Null Vector for STRUCT_2
* 3. Member STRUCT_2::A
* 4. Member STRUCT_2::B
* 5. Member STRUCT_1::C
*
* `unflatten_nested_columns()` reconstructs nested columns from flattened input that follows
* the convention above.
*
* Note: This function requires a null-mask vector for each STRUCT column, including for nested
* STRUCT members.
*
* @param flattened "Flattened" `table` of input columns, following the conventions in
* `flatten_nested_columns()`.
* @param blueprint The exemplar `table_view` with nested columns intact, whose structure defines
* the nesting of the reconstructed output table.
* @return std::unique_ptr<cudf::table> Unflattened table (with nested STRUCT columns) reconstructed
* based on `blueprint`.
*/
std::unique_ptr<cudf::table> unflatten_nested_columns(std::unique_ptr<cudf::table>&& flattened,
table_view const& blueprint);

/**
* @brief Push down nulls from a parent mask into a child column, using bitwise AND.
*
Expand Down
92 changes: 0 additions & 92 deletions cpp/src/structs/utilities.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -209,98 +209,6 @@ flattened_table flatten_nested_columns(table_view const& input,
return table_flattener{input, column_order, null_precedence, nullability}();
}

namespace {
using vector_of_columns = std::vector<std::unique_ptr<cudf::column>>;
using column_index_t = typename vector_of_columns::size_type;

// Forward declaration, to enable recursion via `unflattener`.
std::unique_ptr<cudf::column> unflatten_struct(vector_of_columns& flattened,
column_index_t& current_index,
cudf::column_view const& blueprint);

/**
* @brief Helper functor to reconstruct STRUCT columns from its flattened member columns.
*
*/
class unflattener {
public:
unflattener(vector_of_columns& flattened_, column_index_t& current_index_)
: flattened{flattened_}, current_index{current_index_}
{
}

auto operator()(column_view const& blueprint)
{
return is_struct(blueprint) ? unflatten_struct(flattened, current_index, blueprint)
: std::move(flattened[current_index++]);
}

private:
vector_of_columns& flattened;
column_index_t& current_index;

}; // class unflattener;

std::unique_ptr<cudf::column> unflatten_struct(vector_of_columns& flattened,
column_index_t& current_index,
cudf::column_view const& blueprint)
{
// "Consume" columns from `flattened`, starting at `current_index`,
// based on the provided `blueprint` struct col. Recurse for struct children.
CUDF_EXPECTS(blueprint.type().id() == type_id::STRUCT,
"Expected blueprint column to be a STRUCT column.");

CUDF_EXPECTS(current_index < flattened.size(), "STRUCT column can't have 0 children.");

auto const num_rows = flattened[current_index]->size();

// cudf::flatten_nested_columns() executes depth first, and serializes the struct null vector
// before the child/member columns.
// E.g. STRUCT_1< STRUCT_2< A, B >, C > is flattened to:
// 1. Null Vector for STRUCT_1
// 2. Null Vector for STRUCT_2
// 3. Member STRUCT_2::A
// 4. Member STRUCT_2::B
// 5. Member STRUCT_1::C
//
// Extract null-vector *before* child columns are constructed.
auto struct_null_column_contents = flattened[current_index++]->release();
auto unflattening_iter =
thrust::make_transform_iterator(blueprint.child_begin(), unflattener{flattened, current_index});

return cudf::make_structs_column(
num_rows,
vector_of_columns{unflattening_iter, unflattening_iter + blueprint.num_children()},
UNKNOWN_NULL_COUNT, // Do count?
std::move(*struct_null_column_contents.null_mask));
}
} // namespace

std::unique_ptr<cudf::table> unflatten_nested_columns(std::unique_ptr<cudf::table>&& flattened,
table_view const& blueprint)
{
// Bail, if LISTs are present.
auto const has_lists = std::any_of(blueprint.begin(), blueprint.end(), is_or_has_nested_lists);
CUDF_EXPECTS(not has_lists, "Unflattening LIST columns is not supported.");

// If there are no STRUCTs, unflattening is a NOOP.
auto const has_structs = std::any_of(blueprint.begin(), blueprint.end(), is_struct);
if (not has_structs) {
return std::move(flattened); // Unchanged.
}

// There be struct columns.
// Note: Requires null vectors for all struct input columns.
auto flattened_columns = flattened->release();
auto current_idx = column_index_t{0};

auto unflattening_iter =
thrust::make_transform_iterator(blueprint.begin(), unflattener{flattened_columns, current_idx});

return std::make_unique<cudf::table>(
vector_of_columns{unflattening_iter, unflattening_iter + blueprint.num_columns()});
}

// Helper function to superimpose validity of parent struct
// over the specified member (child) column.
void superimpose_parent_nulls(bitmask_type const* parent_null_mask,
Expand Down
159 changes: 123 additions & 36 deletions cpp/tests/structs/utilities_tests.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2021, NVIDIA CORPORATION.
* Copyright (c) 2021-2022, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -30,26 +30,13 @@

namespace cudf::test {

/**
* @brief Round-trip input table through flatten/unflatten,
* verify that the table remains equivalent.
*/
void flatten_unflatten_compare(table_view const& input_table)
{
using namespace cudf::structs::detail;

auto flattened = flatten_nested_columns(input_table, {}, {}, column_nullability::FORCE);
auto unflattened =
unflatten_nested_columns(std::make_unique<cudf::table>(flattened), input_table);

CUDF_TEST_EXPECT_TABLES_EQUIVALENT(input_table, unflattened->view());
}

using namespace cudf;
using namespace iterators;
using namespace cudf::structs::detail;
using strings = strings_column_wrapper;
using dictionary = dictionary_column_wrapper<std::string>;
using structs = structs_column_wrapper;
using bools = fixed_width_column_wrapper<bool>;

template <typename T>
using nums = fixed_width_column_wrapper<T, int32_t>;
Expand All @@ -66,7 +53,7 @@ struct TypedStructUtilitiesTest : StructUtilitiesTest {

TYPED_TEST_SUITE(TypedStructUtilitiesTest, FixedWidthTypes);

TYPED_TEST(TypedStructUtilitiesTest, ListsAtTopLevelUnsupported)
TYPED_TEST(TypedStructUtilitiesTest, ListsAtTopLevel)
{
using T = TypeParam;
using lists = lists_column_wrapper<T, int32_t>;
Expand All @@ -75,8 +62,10 @@ TYPED_TEST(TypedStructUtilitiesTest, ListsAtTopLevelUnsupported)
auto lists_col = lists{{0, 1}, {22, 33}, {44, 55, 66}};
auto nums_col = nums{{0, 1, 2}, null_at(6)};

EXPECT_THROW(flatten_unflatten_compare(cudf::table_view{{lists_col, nums_col}}),
cudf::logic_error);
auto table = cudf::table_view{{lists_col, nums_col}};

CUDF_TEST_EXPECT_TABLES_EQUAL(table,
flatten_nested_columns(table, {}, {}, column_nullability::FORCE));
}

TYPED_TEST(TypedStructUtilitiesTest, NestedListsUnsupported)
Expand All @@ -88,10 +77,10 @@ TYPED_TEST(TypedStructUtilitiesTest, NestedListsUnsupported)
auto lists_member = lists{{0, 1}, {22, 33}, {44, 55, 66}};
auto nums_member = nums{{0, 1, 2}, null_at(6)};
auto structs_col = structs{{nums_member, lists_member}};
auto nums_col = nums{{0, 1, 2}, null_at(6)};

auto nums_col = nums{{0, 1, 2}, null_at(6)};

EXPECT_THROW(flatten_unflatten_compare(cudf::table_view{{nums_col, structs_col}}),
EXPECT_THROW(flatten_nested_columns(
cudf::table_view{{nums_col, structs_col}}, {}, {}, column_nullability::FORCE),
cudf::logic_error);
}

Expand All @@ -104,7 +93,10 @@ TYPED_TEST(TypedStructUtilitiesTest, NoStructs)
auto strings_col = strings{{"", "1", "22", "333", "4444", "55555", "666666"}, null_at(1)};
auto nuther_nums_col = nums{{0, 1, 2, 3, 4, 5, 6}, null_at(6)};

flatten_unflatten_compare(cudf::table_view{{nums_col, strings_col, nuther_nums_col}});
auto table = cudf::table_view{{nums_col, strings_col, nuther_nums_col}};

CUDF_TEST_EXPECT_TABLES_EQUAL(table,
flatten_nested_columns(table, {}, {}, column_nullability::FORCE));
}

TYPED_TEST(TypedStructUtilitiesTest, SingleLevelStruct)
Expand All @@ -116,8 +108,19 @@ TYPED_TEST(TypedStructUtilitiesTest, SingleLevelStruct)
auto strings_member = strings{{"", "1", "22", "333", "4444", "55555", "666666"}, null_at(1)};
auto structs_col = structs{{nums_member, strings_member}};
auto nums_col = nums{{0, 1, 2, 3, 4, 5, 6}, null_at(6)};

flatten_unflatten_compare(cudf::table_view{{nums_col, structs_col}});
auto table = cudf::table_view{{nums_col, structs_col}};

auto expected_nums_col_1 = cudf::column(nums_col);
auto expected_structs_col = bools{{1, 1, 1, 1, 1, 1, 1}};
auto expected_nums_col_2 =
cudf::column(static_cast<cudf::structs_column_view>(structs_col).get_sliced_child(0));
auto expected_strings_col =
cudf::column(static_cast<cudf::structs_column_view>(structs_col).get_sliced_child(1));
auto expected = cudf::table_view{
{expected_nums_col_1, expected_structs_col, expected_nums_col_2, expected_strings_col}};

CUDF_TEST_EXPECT_TABLES_EQUAL(expected,
flatten_nested_columns(table, {}, {}, column_nullability::FORCE));
}

TYPED_TEST(TypedStructUtilitiesTest, SingleLevelStructWithNulls)
Expand All @@ -129,8 +132,19 @@ TYPED_TEST(TypedStructUtilitiesTest, SingleLevelStructWithNulls)
auto strings_member = strings{{"", "1", "22", "333", "4444", "55555", "666666"}, null_at(1)};
auto structs_col = structs{{nums_member, strings_member}, null_at(2)};
auto nums_col = nums{{0, 1, 2, 3, 4, 5, 6}, null_at(6)};

flatten_unflatten_compare(cudf::table_view{{nums_col, structs_col}});
auto table = cudf::table_view{{nums_col, structs_col}};

auto expected_nums_col_1 = cudf::column(nums_col);
auto expected_structs_col = bools{{1, 1, 0, 1, 1, 1, 1}, null_at(2)};
auto expected_nums_col_2 =
cudf::column(static_cast<cudf::structs_column_view>(structs_col).get_sliced_child(0));
auto expected_strings_col =
cudf::column(static_cast<cudf::structs_column_view>(structs_col).get_sliced_child(1));
auto expected = cudf::table_view{
{expected_nums_col_1, expected_structs_col, expected_nums_col_2, expected_strings_col}};

CUDF_TEST_EXPECT_TABLES_EQUAL(expected,
flatten_nested_columns(table, {}, {}, column_nullability::FORCE));
}

TYPED_TEST(TypedStructUtilitiesTest, StructOfStruct)
Expand All @@ -147,8 +161,26 @@ TYPED_TEST(TypedStructUtilitiesTest, StructOfStruct)

auto struct_1_nums_member = nums{{0, 1, 22, 33, 44, 55, 66}, null_at(3)};
auto struct_of_structs_col = structs{{struct_1_nums_member, structs_1_structs_member}};

flatten_unflatten_compare(cudf::table_view{{nums_col, struct_of_structs_col}});
auto table = cudf::table_view{{nums_col, struct_of_structs_col}};

auto expected_nums_col_1 = cudf::column(nums_col);
auto expected_structs_col_1 = bools{{1, 1, 1, 1, 1, 1, 1}};
auto expected_nums_col_2 =
cudf::column(static_cast<cudf::structs_column_view>(struct_of_structs_col).get_sliced_child(0));
auto expected_structs_col_2 = bools{{1, 1, 1, 1, 1, 1, 1}};
auto expected_nums_col_3 = cudf::column(
static_cast<cudf::structs_column_view>(struct_of_structs_col).get_sliced_child(1).child(0));
auto expected_strings_col = cudf::column(
static_cast<cudf::structs_column_view>(struct_of_structs_col).get_sliced_child(1).child(1));
auto expected = cudf::table_view{{expected_nums_col_1,
expected_structs_col_1,
expected_nums_col_2,
expected_structs_col_2,
expected_nums_col_3,
expected_strings_col}};

CUDF_TEST_EXPECT_TABLES_EQUAL(expected,
flatten_nested_columns(table, {}, {}, column_nullability::FORCE));
}

TYPED_TEST(TypedStructUtilitiesTest, StructOfStructWithNullsAtLeafLevel)
Expand All @@ -166,8 +198,26 @@ TYPED_TEST(TypedStructUtilitiesTest, StructOfStructWithNullsAtLeafLevel)

auto struct_1_nums_member = nums{{0, 1, 22, 33, 44, 55, 66}, null_at(3)};
auto struct_of_structs_col = structs{{struct_1_nums_member, structs_1_structs_member}};

flatten_unflatten_compare(cudf::table_view{{nums_col, struct_of_structs_col}});
auto table = cudf::table_view{{nums_col, struct_of_structs_col}};

auto expected_nums_col_1 = cudf::column(nums_col);
auto expected_structs_col_1 = bools{{1, 1, 1, 1, 1, 1, 1}};
auto expected_nums_col_2 =
cudf::column(static_cast<cudf::structs_column_view>(struct_of_structs_col).get_sliced_child(0));
auto expected_structs_col_2 = bools{{1, 1, 0, 1, 1, 1, 1}, null_at(2)};
auto expected_nums_col_3 = cudf::column(
static_cast<cudf::structs_column_view>(struct_of_structs_col).get_sliced_child(1).child(0));
auto expected_strings_col = cudf::column(
static_cast<cudf::structs_column_view>(struct_of_structs_col).get_sliced_child(1).child(1));
auto expected = cudf::table_view{{expected_nums_col_1,
expected_structs_col_1,
expected_nums_col_2,
expected_structs_col_2,
expected_nums_col_3,
expected_strings_col}};

CUDF_TEST_EXPECT_TABLES_EQUAL(expected,
flatten_nested_columns(table, {}, {}, column_nullability::FORCE));
}

TYPED_TEST(TypedStructUtilitiesTest, StructOfStructWithNullsAtTopLevel)
Expand All @@ -185,8 +235,26 @@ TYPED_TEST(TypedStructUtilitiesTest, StructOfStructWithNullsAtTopLevel)
auto struct_1_nums_member = nums{{0, 1, 22, 33, 44, 55, 66}, null_at(3)};
auto struct_of_structs_col =
structs{{struct_1_nums_member, structs_1_structs_member}, null_at(4)};

flatten_unflatten_compare(cudf::table_view{{nums_col, struct_of_structs_col}});
auto table = cudf::table_view{{nums_col, struct_of_structs_col}};

auto expected_nums_col_1 = cudf::column(nums_col);
auto expected_structs_col_1 = bools{{1, 1, 1, 1, 0, 1, 1}, null_at(4)};
auto expected_nums_col_2 =
cudf::column(static_cast<cudf::structs_column_view>(struct_of_structs_col).get_sliced_child(0));
auto expected_structs_col_2 = bools{{1, 1, 1, 1, 0, 1, 1}, null_at(4)};
auto expected_nums_col_3 = cudf::column(
static_cast<cudf::structs_column_view>(struct_of_structs_col).get_sliced_child(1).child(0));
auto expected_strings_col = cudf::column(
static_cast<cudf::structs_column_view>(struct_of_structs_col).get_sliced_child(1).child(1));
auto expected = cudf::table_view{{expected_nums_col_1,
expected_structs_col_1,
expected_nums_col_2,
expected_structs_col_2,
expected_nums_col_3,
expected_strings_col}};

CUDF_TEST_EXPECT_TABLES_EQUAL(expected,
flatten_nested_columns(table, {}, {}, column_nullability::FORCE));
}

TYPED_TEST(TypedStructUtilitiesTest, StructOfStructWithNullsAtAllLevels)
Expand All @@ -205,8 +273,26 @@ TYPED_TEST(TypedStructUtilitiesTest, StructOfStructWithNullsAtAllLevels)
auto struct_1_nums_member = nums{{0, 1, 22, 33, 44, 55, 66}, null_at(3)};
auto struct_of_structs_col =
structs{{struct_1_nums_member, structs_1_structs_member}, null_at(4)};

flatten_unflatten_compare(cudf::table_view{{nums_col, struct_of_structs_col}});
auto table = cudf::table_view{{nums_col, struct_of_structs_col}};

auto expected_nums_col_1 = cudf::column(nums_col);
auto expected_structs_col_1 = bools{{1, 1, 1, 1, 0, 1, 1}, null_at(4)};
auto expected_nums_col_2 =
cudf::column(static_cast<cudf::structs_column_view>(struct_of_structs_col).get_sliced_child(0));
auto expected_structs_col_2 = bools{{1, 1, 0, 1, 0, 1, 1}, {1, 1, 0, 1, 0, 1, 1}};
auto expected_nums_col_3 = cudf::column(
static_cast<cudf::structs_column_view>(struct_of_structs_col).get_sliced_child(1).child(0));
auto expected_strings_col = cudf::column(
static_cast<cudf::structs_column_view>(struct_of_structs_col).get_sliced_child(1).child(1));
auto expected = cudf::table_view{{expected_nums_col_1,
expected_structs_col_1,
expected_nums_col_2,
expected_structs_col_2,
expected_nums_col_3,
expected_strings_col}};

CUDF_TEST_EXPECT_TABLES_EQUAL(expected,
flatten_nested_columns(table, {}, {}, column_nullability::FORCE));
}

TYPED_TEST(TypedStructUtilitiesTest, ListsAreUnsupported)
Expand All @@ -222,7 +308,8 @@ TYPED_TEST(TypedStructUtilitiesTest, ListsAreUnsupported)

auto structs_with_lists_col = structs{lists_member, ints_member};

EXPECT_THROW(flatten_unflatten_compare(cudf::table_view{{structs_with_lists_col}}),
EXPECT_THROW(flatten_nested_columns(
cudf::table_view{{structs_with_lists_col}}, {}, {}, column_nullability::FORCE),
cudf::logic_error);
}

Expand Down

0 comments on commit f31e5bd

Please sign in to comment.