diff --git a/include/seqan3/search/fm_index/detail/fm_index_cursor.hpp b/include/seqan3/search/fm_index/detail/fm_index_cursor.hpp index 70c3300950a..5c16421b9e7 100644 --- a/include/seqan3/search/fm_index/detail/fm_index_cursor.hpp +++ b/include/seqan3/search/fm_index/detail/fm_index_cursor.hpp @@ -62,19 +62,6 @@ struct fm_index_cursor_node return !(*this == rhs); } }; - -// std::tuple get_suffix_array_range(fm_index_cursor const & it) -// { -// return {node.lb, node.rb}; -// } -// -// std::tuple get_suffix_array_range(bi_fm_index_cursor const & it) -// { -// return {node.lb, node.rb}; -// } - -//!\publicsection - //!\} } diff --git a/include/seqan3/search/fm_index/fm_index_cursor.hpp b/include/seqan3/search/fm_index/fm_index_cursor.hpp index 3e94a52caf8..3f46ff71b7e 100644 --- a/include/seqan3/search/fm_index/fm_index_cursor.hpp +++ b/include/seqan3/search/fm_index/fm_index_cursor.hpp @@ -92,7 +92,7 @@ class fm_index_cursor //!\brief Right suffix array interval of the parent node. Needed for cycle_back(). size_type parent_rb{}; //!\brief Underlying index from the SDSL. - node_type node{}; + node_type node_{}; //!\brief Alphabet size of the index without delimiters sdsl_sigma_type sigma{}; @@ -165,7 +165,7 @@ class fm_index_cursor //! \brief Construct from given index. fm_index_cursor(index_t const & _index) noexcept : index(&_index), - node({0, _index.index.size() - 1, 0, 0}), + node_({0, _index.index.size() - 1, 0, 0}), sigma(_index.index.sigma - index_t::text_layout_mode) {} //\} @@ -185,11 +185,11 @@ class fm_index_cursor bool operator==(fm_index_cursor const & rhs) const noexcept { assert(index != nullptr); - assert(node != rhs.node || (query_length() == 0 || (parent_lb == rhs.parent_lb && parent_rb == rhs.parent_rb))); + assert(node_ != rhs.node_ || query_length() == 0 || (parent_lb == rhs.parent_lb && parent_rb == rhs.parent_rb)); // position in the implicit suffix tree is defined by the SA interval and depth. // No need to compare parent intervals - return node == rhs.node; + return node_ == rhs.node_; } /*!\brief Compares two cursors. @@ -235,7 +235,7 @@ class fm_index_cursor assert(index != nullptr); sdsl_char_type c = 1; // NOTE: start with 0 or 1 depending on implicit_sentintel - size_type _lb = node.lb, _rb = node.rb; + size_type _lb = node_.lb, _rb = node_.rb; while (c < sigma && !backward_search(index->index, index->index.comp2char[c], _lb, _rb)) { ++c; @@ -243,9 +243,9 @@ class fm_index_cursor if (c != sigma) { - parent_lb = node.lb; - parent_rb = node.rb; - node = {_lb, _rb, node.depth + 1, c}; + parent_lb = node_.lb; + parent_rb = node_.rb; + node_ = {_lb, _rb, node_.depth + 1, c}; return true; } return false; @@ -276,15 +276,15 @@ class fm_index_cursor assert(seqan3::to_rank(static_cast(c)) < ((index_type::text_layout_mode == text_layout::single) ? 255 : 254)); - size_type _lb = node.lb, _rb = node.rb; + size_type _lb = node_.lb, _rb = node_.rb; sdsl_char_type c_char = seqan3::to_rank(static_cast(c)) + 1; if (backward_search(index->index, c_char, _lb, _rb)) { - parent_lb = node.lb; - parent_rb = node.rb; - node = {_lb, _rb, node.depth + 1, c_char}; + parent_lb = node_.lb; + parent_rb = node_.rb; + node_ = {_lb, _rb, node_.depth + 1, c_char}; return true; } return false; @@ -325,7 +325,7 @@ class fm_index_cursor assert(index != nullptr); // range must not be empty! - size_type _lb = node.lb, _rb = node.rb; + size_type _lb = node_.lb, _rb = node_.rb; size_type new_parent_lb = parent_lb, new_parent_rb = parent_rb; sdsl_char_type c{}; @@ -348,7 +348,7 @@ class fm_index_cursor parent_lb = new_parent_lb; parent_rb = new_parent_rb; - node = {_lb, _rb, len + node.depth, c}; + node_ = {_lb, _rb, len + node_.depth, c}; return true; } @@ -384,7 +384,7 @@ class fm_index_cursor // parent_lb > parent_rb --> invalid interval assert(parent_lb <= parent_rb); - sdsl_char_type c = node.last_char + 1; + sdsl_char_type c = node_.last_char + 1; size_type _lb = parent_lb, _rb = parent_rb; while (c < sigma && !backward_search(index->index, index->index.comp2char[c], _lb, _rb)) @@ -394,7 +394,7 @@ class fm_index_cursor if (c != sigma) // Collection has additional sentinel as delimiter { - node = {_lb, _rb, node.depth, c}; + node_ = {_lb, _rb, node_.depth, c}; return true; } return false; @@ -420,7 +420,7 @@ class fm_index_cursor // parent_lb > parent_rb --> invalid interval assert(index != nullptr && query_length() > 0 && parent_lb <= parent_rb); - return index->index.comp2char[node.last_char] - 1; // text is not allowed to contain ranks of 0 + return index->index.comp2char[node_.last_char] - 1; // text is not allowed to contain ranks of 0 } /*!\brief Returns the length of the searched query. @@ -440,9 +440,9 @@ class fm_index_cursor size_type query_length() const noexcept { assert(index != nullptr); - assert(node.depth != 0 || (node.lb == 0 && node.rb == index->size() - 1)); // depth == 0 -> root node + assert(node_.depth != 0 || (node_.lb == 0 && node_.rb == index->size() - 1)); // depth == 0 -> root node - return node.depth; + return node_.depth; } /*!\brief Returns the searched query. @@ -474,7 +474,7 @@ class fm_index_cursor "The alphabet types of the given text and index differ."); assert(index != nullptr); - size_type const query_begin = offset() - index->index[node.lb]; + size_type const query_begin = offset() - index->index[node_.lb]; return text | views::slice(query_begin, query_begin + query_length()); } @@ -492,7 +492,7 @@ class fm_index_cursor assert(index != nullptr); // Position of query in concatenated text. - size_type const location = offset() - index->index[node.lb]; + size_type const location = offset() - index->index[node_.lb]; // The rank represents the number of start positions of the individual sequences/texts in the collection // before position `location + 1` and thereby also the number of delimiters. @@ -524,7 +524,7 @@ class fm_index_cursor { assert(index != nullptr); - return 1 + node.rb - node.lb; + return 1 + node_.rb - node_.lb; } /*!\brief Locates the occurrences of the searched query in the text. @@ -549,7 +549,7 @@ class fm_index_cursor occ.reserve(count()); for (size_type i = 0; i < count(); ++i) { - occ.emplace_back(0, offset() - index->index[node.lb + i]); + occ.emplace_back(0, offset() - index->index[node_.lb + i]); } return occ; @@ -567,7 +567,7 @@ class fm_index_cursor occ.reserve(count()); for (size_type i = 0; i < count(); ++i) { - size_type loc = offset() - index->index[node.lb + i]; + size_type loc = offset() - index->index[node_.lb + i]; size_type sequence_rank = index->text_begin_rs.rank(loc + 1); size_type sequence_position = loc - index->text_begin_ss.select(sequence_rank); occ.emplace_back(sequence_rank - 1, sequence_position); @@ -594,7 +594,7 @@ class fm_index_cursor { assert(index != nullptr); - return std::views::iota(node.lb, node.lb + count()) + return std::views::iota(node_.lb, node_.lb + count()) | std::views::transform([*this, _offset = offset()] (auto sa_pos) { return locate_result_value_type{0u, _offset - index->index[sa_pos]}; @@ -609,7 +609,7 @@ class fm_index_cursor { assert(index != nullptr); - return std::views::iota(node.lb, node.lb + count()) + return std::views::iota(node_.lb, node_.lb + count()) | std::views::transform([*this, _offset = offset()] (auto sa_pos) { return _offset - index->index[sa_pos]; @@ -621,6 +621,16 @@ class fm_index_cursor return locate_result_value_type{sequence_rank - 1, sequence_position}; }); } + + //!\cond DEV + /*!\brief Returns the suffix tree node. + * \sa seqan3::detail::fm_index_cursor_node + */ + node_type const & node() const noexcept + { + return node_; + } + //!\endcond }; //!\} diff --git a/test/unit/search/fm_index_cursor/fm_index_cursor_collection_test_template.hpp b/test/unit/search/fm_index_cursor/fm_index_cursor_collection_test_template.hpp index 9bc43860f27..ee63a559989 100644 --- a/test/unit/search/fm_index_cursor/fm_index_cursor_collection_test_template.hpp +++ b/test/unit/search/fm_index_cursor/fm_index_cursor_collection_test_template.hpp @@ -348,7 +348,23 @@ TYPED_TEST_P(fm_index_cursor_collection_test, concept_check) EXPECT_TRUE(seqan3::fm_index_cursor_specialisation); } +TYPED_TEST_P(fm_index_cursor_collection_test, node_access) +{ + using fm_index_type = typename TypeParam::index_type; + if constexpr (!seqan3::bi_fm_index_specialisation) + { + fm_index_type fm{this->text_col1}; // {"ACGACG", "ACGACG"} + TypeParam it = TypeParam(fm); + + EXPECT_TRUE(it.node() == (seqan3::detail::fm_index_cursor_node{0, fm.size() - 1, 0, 0})); + + it.extend_right(seqan3::assign_char_to('A', typename TestFixture::alphabet_type{})); + + EXPECT_TRUE(it.node() == (seqan3::detail::fm_index_cursor_node{1, 4, 1, 0})); + } +} + REGISTER_TYPED_TEST_SUITE_P(fm_index_cursor_collection_test, ctr, begin, extend_right_range, extend_right_range_empty_text, extend_right_char, extend_right_range_and_cycle, extend_right_char_and_cycle, extend_right_and_cycle, query, last_rank, incomplete_alphabet, - lazy_locate, extend_const_char_pointer, concept_check); + lazy_locate, extend_const_char_pointer, concept_check, node_access); diff --git a/test/unit/search/fm_index_cursor/fm_index_cursor_test_template.hpp b/test/unit/search/fm_index_cursor/fm_index_cursor_test_template.hpp index da26f256edd..e7f05d8e2ca 100644 --- a/test/unit/search/fm_index_cursor/fm_index_cursor_test_template.hpp +++ b/test/unit/search/fm_index_cursor/fm_index_cursor_test_template.hpp @@ -300,6 +300,22 @@ TYPED_TEST_P(fm_index_cursor_test, concept_check) EXPECT_TRUE(seqan3::fm_index_cursor_specialisation); } +TYPED_TEST_P(fm_index_cursor_test, node_access) +{ + using fm_index_type = typename TypeParam::index_type; + if constexpr (!seqan3::bi_fm_index_specialisation) + { + fm_index_type fm{this->text1}; // "ACGACG" + TypeParam it = TypeParam(fm); + + EXPECT_TRUE(it.node() == (seqan3::detail::fm_index_cursor_node{0, fm.size() - 1, 0, 0})); + + it.extend_right(seqan3::assign_char_to('A', typename TestFixture::alphabet_type{})); + + EXPECT_TRUE(it.node() == (seqan3::detail::fm_index_cursor_node{1, 2, 1, 0})); + } +} + REGISTER_TYPED_TEST_SUITE_P(fm_index_cursor_test, ctr, begin, extend_right_range, extend_right_char, extend_right_range_and_cycle, extend_right_char_and_cycle, extend_right_and_cycle, query, - last_rank, incomplete_alphabet, lazy_locate, concept_check); + last_rank, incomplete_alphabet, lazy_locate, concept_check, node_access);