From 082e1b7e117c8912cd1b4beb2b4f5919d696f3cf Mon Sep 17 00:00:00 2001 From: Andrea Zonca Date: Fri, 3 May 2024 12:20:45 -0700 Subject: [PATCH] fix: is_valid checks in header-only library (#3091) * feat: set an explicit index in Indexed and IndexedOption Useful in particular for categorical builders * test: new tests checking presence of a bug in is_valid * style: pre-commit fixes * test: renamed test file with pull request number * test: use const instead of define * test: use ! instead of not operator (issue with Windows) * test: switch bug_fixed to 1, tests will fail until bug fixed * fix: reimplement is_valid to check max of index_ against len of content * test: print out error message * fix: is_valid in IndexedOption now checks index_ is less than len of content * test: IndexOption should have signed index to accommodate -1 * test: print out error from is_valid * fix: reimplemented is_valid for Union, now checks index is below length of content for each tag * feat: implement max_index tracking in Indexed * fix: set max_index_ also in extend_index * feat: append_valid() now calls append_valid(i) * fix: set max_index_ in extend_valid * feat: implement is_valid with max_index_ * feat: check index of Indexed is >0, if not set max_index_ to max int * feat: custom error message for negative index --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- .../layout-builder/awkward/LayoutBuilder.h | 93 ++++++++++---- header-only/tests/CMakeLists.txt | 4 + .../test_3091-layout-builder-is-valid.cpp | 116 ++++++++++++++++++ 3 files changed, 191 insertions(+), 22 deletions(-) create mode 100644 header-only/tests/test_3091-layout-builder-is-valid.cpp diff --git a/header-only/layout-builder/awkward/LayoutBuilder.h b/header-only/layout-builder/awkward/LayoutBuilder.h index 03ab0ba0d6..89df74022a 100644 --- a/header-only/layout-builder/awkward/LayoutBuilder.h +++ b/header-only/layout-builder/awkward/LayoutBuilder.h @@ -1121,7 +1121,8 @@ namespace awkward { /// buffer, using `AWKWARD_LAYOUTBUILDER_DEFAULT_OPTIONS` for initializing the buffer. Indexed() : index_( - awkward::GrowableBuffer(AWKWARD_LAYOUTBUILDER_DEFAULT_OPTIONS)) { + awkward::GrowableBuffer(AWKWARD_LAYOUTBUILDER_DEFAULT_OPTIONS)), + max_index_(0) { size_t id = 0; set_id(id); } @@ -1132,7 +1133,8 @@ namespace awkward { /// /// @param options Initial size configuration of a buffer. Indexed(const awkward::BuilderOptions& options) - : index_(awkward::GrowableBuffer(options)) { + : index_(awkward::GrowableBuffer(options)), + max_index_(0) { size_t id = 0; set_id(id); } @@ -1151,6 +1153,19 @@ namespace awkward { return content_; } + /// @brief Inserts an explicit value in the `index` buffer and + /// returns the reference to the builder content. + BUILDER& + append_index(size_t i) noexcept { + index_.append(i); + if (i > max_index_) { + max_index_ = i; + } else if (i < 0) { + max_index_ = UINTMAX_MAX; + } + return content_; + } + /// @brief Inserts `size` number indices in the `index` buffer /// and returns the reference to the builder content. /// @@ -1159,6 +1174,9 @@ namespace awkward { extend_index(size_t size) noexcept { size_t start = content_.length(); size_t stop = start + size; + if (stop - 1 > max_index_) { + max_index_ = stop - 1; + } for (size_t i = start; i < stop; i++) { index_.append(i); } @@ -1189,6 +1207,7 @@ namespace awkward { /// of the builder. void clear() noexcept { + max_index_ = 0; index_.clear(); content_.clear(); } @@ -1211,17 +1230,21 @@ namespace awkward { /// @brief Checks for validity and consistency. bool is_valid(std::string& error) const noexcept { - if (content_.length() != index_.length()) { + + if (max_index_ == UINTMAX_MAX) { std::stringstream out; - out << "Indexed node" << id_ << " has content length " - << content_.length() << " but index has length " << index_.length() - << "\n"; + out << "Indexed node" << id_ << " has a negative index\n"; + error.append(out.str()); + return false; + } else if (max_index_ >= content_.length()) { + std::stringstream out; + out << "Indexed node" << id_ << " has index " << max_index_ + << " but content has length " + << content_.length() << "\n"; error.append(out.str()); - return false; - } else { - return content_.is_valid(error); } + return content_.is_valid(error); } /// @brief Copies and concatenates all the accumulated data in each of the @@ -1290,6 +1313,9 @@ namespace awkward { /// @brief Unique form ID. size_t id_; + + /// @brief Keep track of maximum index value. + size_t max_index_; }; /// @class IndexedOption @@ -1310,7 +1336,8 @@ namespace awkward { IndexedOption() : index_( awkward::GrowableBuffer(AWKWARD_LAYOUTBUILDER_DEFAULT_OPTIONS)), - last_valid_(-1) { + last_valid_(-1), + max_index_(0) { size_t id = 0; set_id(id); } @@ -1322,7 +1349,8 @@ namespace awkward { /// @param options Initial size configuration of a buffer. IndexedOption(const awkward::BuilderOptions& options) : index_(awkward::GrowableBuffer(options)), - last_valid_(-1) { + last_valid_(-1), + max_index_(0) { size_t id = 0; set_id(id); } @@ -1338,7 +1366,18 @@ namespace awkward { BUILDER& append_valid() noexcept { last_valid_ = content_.length(); - index_.append(last_valid_); + return append_valid(last_valid_); + } + + /// @brief Inserts an explicit value in the `index` buffer and + /// returns the reference to the builder content. + BUILDER& + append_valid(size_t i) noexcept { + last_valid_ = content_.length(); + index_.append(i); + if (i > max_index_) { + max_index_ = i; + } return content_; } @@ -1351,6 +1390,9 @@ namespace awkward { size_t start = content_.length(); size_t stop = start + size; last_valid_ = stop - 1; + if (last_valid_ > max_index_) { + max_index_ = last_valid_; + } for (size_t i = start; i < stop; i++) { index_.append(i); } @@ -1411,17 +1453,16 @@ namespace awkward { /// @brief Checks for validity and consistency. bool is_valid(std::string& error) const noexcept { - if (content_.length() != last_valid_ + 1) { + if (max_index_ >= content_.length()) { std::stringstream out; - out << "IndexedOption node" << id_ << " has content length " - << content_.length() << " but last valid index is " << last_valid_ - << "\n"; + out << "IndexedOption node" << id_ << " has index " << max_index_ + << " but content has length " + << content_.length() << "\n"; error.append(out.str()); return false; - } else { - return content_.is_valid(error); } + return content_.is_valid(error); } /// @brief Retrieves the names and sizes (in bytes) of the buffers used @@ -1502,6 +1543,9 @@ namespace awkward { /// @brief Last valid index. size_t last_valid_; + + /// @brief Keep track of maximum index value. + size_t max_index_; }; /// @class Unmasked @@ -2298,11 +2342,16 @@ namespace awkward { auto index_sequence((std::index_sequence_for())); std::vector lengths = content_lengths(index_sequence); - for (size_t tag = 0; tag < contents_count_; tag++) { - if (lengths[tag] != last_valid_index_[tag] + 1) { + std::unique_ptr index_ptr(new INDEX[index_.length()]); + index_.concatenate(index_ptr.get()); + std::unique_ptr tags_ptr(new TAGS[tags_.length()]); + tags_.concatenate(tags_ptr.get()); + for (size_t i = 0; i < index_.length(); i++) { + if (index_ptr.get()[i] < 0 || index_ptr.get()[i] >= lengths[tags_ptr.get()[i]]) { std::stringstream out; - out << "Union node" << id_ << " has content length " << lengths[tag] - << " but index length " << last_valid_index_[tag] << "\n"; + out << "Union node" << id_ << " has index " << index_ptr.get()[i] + << " at position " << i << " but content has length " + << lengths[tags_ptr.get()[i]] << "\n"; error.append(out.str()); return false; diff --git a/header-only/tests/CMakeLists.txt b/header-only/tests/CMakeLists.txt index 3d16722ce7..8fe000acee 100644 --- a/header-only/tests/CMakeLists.txt +++ b/header-only/tests/CMakeLists.txt @@ -14,9 +14,13 @@ endmacro(addtest_nolibs) addtest_nolibs(test_1494-layout-builder test_1494-layout-builder.cpp) addtest_nolibs(test_1542-growable-buffer test_1542-growable-buffer.cpp) addtest_nolibs(test_1560-builder-options test_1560-builder-options.cpp) +addtest_nolibs(test_3091-layout-builder-is-valid + test_3091-layout-builder-is-valid.cpp) target_link_libraries(test_1494-layout-builder PRIVATE awkward::layout-builder) target_link_libraries(test_1542-growable-buffer PRIVATE awkward::growable-buffer) target_link_libraries(test_1560-builder-options PRIVATE awkward::builder-options) +target_link_libraries(test_3091-layout-builder-is-valid + PRIVATE awkward::layout-builder) diff --git a/header-only/tests/test_3091-layout-builder-is-valid.cpp b/header-only/tests/test_3091-layout-builder-is-valid.cpp new file mode 100644 index 0000000000..c18e757539 --- /dev/null +++ b/header-only/tests/test_3091-layout-builder-is-valid.cpp @@ -0,0 +1,116 @@ +// BSD 3-Clause License; see https://github.com/scikit-hep/awkward/blob/main/LICENSE + +#include "awkward/LayoutBuilder.h" + +// if BUG_FIXED is false, the tests confirm the presence of the bugs in is_valid +const bool BUG_FIXED=1; + +template +using IndexedBuilder = awkward::LayoutBuilder::Indexed; + +template +using IndexedOptionBuilder = awkward::LayoutBuilder::IndexedOption; + +template +using StringBuilder = awkward::LayoutBuilder::String; + +void +test_Indexed_categorical() { + IndexedBuilder> builder; + assert(builder.length() == 0); + builder.set_parameters("\"__array__\": \"categorical\""); + + auto& subbuilder = builder.append_index(0); + builder.append_index(1); + + subbuilder.append("zero"); + subbuilder.append("one"); + + std::string error; + assert(builder.is_valid(error) == true); + + // index and content could have different lengths + builder.append_index(1); + assert(builder.is_valid(error) == BUG_FIXED); +} + +void +test_Indexed_categorical_invalid_index() { + IndexedBuilder> builder; + assert(builder.length() == 0); + builder.set_parameters("\"__array__\": \"categorical\""); + + auto& subbuilder = builder.append_index(0); + builder.append_index(1); + + subbuilder.append("zero"); + subbuilder.append("one"); + + std::string error; + assert(builder.is_valid(error) == true); + + // index should be less than the length of content + subbuilder.append("two"); + builder.append_index(9); + bool assertion = builder.is_valid(error) == !BUG_FIXED; + std::cout << error << std::endl; + assert(assertion); +} + +void +test_IndexedOption_categorical() { + IndexedOptionBuilder> builder; + assert(builder.length() == 0); + builder.set_parameters("\"__array__\": \"categorical\""); + + builder.append_invalid(); + auto& subbuilder = builder.append_valid(1); + subbuilder.append("zero"); + builder.append_valid(1); + subbuilder.append("one"); + + std::string error; + bool assertion = builder.is_valid(error); + std::cout << error << std::endl; + assert(assertion); + + // index and content could have different lengths + builder.append_valid(1); + builder.append_valid(1); + builder.append_valid(1); + assert(builder.is_valid(error) == BUG_FIXED); +} + +void +test_IndexedOption_categorical_invalid_index() { + IndexedOptionBuilder> builder; + assert(builder.length() == 0); + builder.set_parameters("\"__array__\": \"categorical\""); + + builder.append_invalid(); + auto& subbuilder = builder.append_valid(1); + subbuilder.append("zero"); + builder.append_valid(1); + subbuilder.append("one"); + + std::string error; + bool assertion = builder.is_valid(error); + std::cout << error << std::endl; + assert(assertion); + + // index should be less than the length of content + builder.append_valid(9); + subbuilder.append("two"); + assertion = builder.is_valid(error) == !BUG_FIXED; + std::cout << error << std::endl; + assert(assertion); +} + +int main(int /* argc */, char ** /* argv */) { + test_Indexed_categorical(); + test_Indexed_categorical_invalid_index(); + test_IndexedOption_categorical(); + test_IndexedOption_categorical_invalid_index(); + + return 0; +}