Skip to content

Commit

Permalink
[FEATURE] Node access for fm_index_cursor
Browse files Browse the repository at this point in the history
  • Loading branch information
eseiler committed Aug 27, 2020
1 parent f12f197 commit 01bffc8
Show file tree
Hide file tree
Showing 4 changed files with 70 additions and 41 deletions.
13 changes: 0 additions & 13 deletions include/seqan3/search/fm_index/detail/fm_index_cursor.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -62,19 +62,6 @@ struct fm_index_cursor_node
return !(*this == rhs);
}
};

// std::tuple get_suffix_array_range(fm_index_cursor<index_t> const & it)
// {
// return {node.lb, node.rb};
// }
//
// std::tuple get_suffix_array_range(bi_fm_index_cursor<index_t> const & it)
// {
// return {node.lb, node.rb};
// }

//!\publicsection

//!\}

}
62 changes: 36 additions & 26 deletions include/seqan3/search/fm_index/fm_index_cursor.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ class fm_index_cursor
//!\brief Right suffix array interval of the parent node. Needed for cycle_back().
size_type parent_rb{};
//!\brief Underlying index from the SDSL.
node_type node{};
node_type node_{};
//!\brief Alphabet size of the index without delimiters
sdsl_sigma_type sigma{};

Expand Down Expand Up @@ -165,7 +165,7 @@ class fm_index_cursor
//! \brief Construct from given index.
fm_index_cursor(index_t const & _index) noexcept :
index(&_index),
node({0, _index.index.size() - 1, 0, 0}),
node_({0, _index.index.size() - 1, 0, 0}),
sigma(_index.index.sigma - index_t::text_layout_mode)
{}
//\}
Expand All @@ -185,11 +185,11 @@ class fm_index_cursor
bool operator==(fm_index_cursor const & rhs) const noexcept
{
assert(index != nullptr);
assert(node != rhs.node || (query_length() == 0 || (parent_lb == rhs.parent_lb && parent_rb == rhs.parent_rb)));
assert(node_ != rhs.node_ || query_length() == 0 || (parent_lb == rhs.parent_lb && parent_rb == rhs.parent_rb));

// position in the implicit suffix tree is defined by the SA interval and depth.
// No need to compare parent intervals
return node == rhs.node;
return node_ == rhs.node_;
}

/*!\brief Compares two cursors.
Expand Down Expand Up @@ -235,17 +235,17 @@ class fm_index_cursor
assert(index != nullptr);

sdsl_char_type c = 1; // NOTE: start with 0 or 1 depending on implicit_sentintel
size_type _lb = node.lb, _rb = node.rb;
size_type _lb = node_.lb, _rb = node_.rb;
while (c < sigma && !backward_search(index->index, index->index.comp2char[c], _lb, _rb))
{
++c;
}

if (c != sigma)
{
parent_lb = node.lb;
parent_rb = node.rb;
node = {_lb, _rb, node.depth + 1, c};
parent_lb = node_.lb;
parent_rb = node_.rb;
node_ = {_lb, _rb, node_.depth + 1, c};
return true;
}
return false;
Expand Down Expand Up @@ -276,15 +276,15 @@ class fm_index_cursor
assert(seqan3::to_rank(static_cast<index_alphabet_type>(c)) <
((index_type::text_layout_mode == text_layout::single) ? 255 : 254));

size_type _lb = node.lb, _rb = node.rb;
size_type _lb = node_.lb, _rb = node_.rb;

sdsl_char_type c_char = seqan3::to_rank(static_cast<index_alphabet_type>(c)) + 1;

if (backward_search(index->index, c_char, _lb, _rb))
{
parent_lb = node.lb;
parent_rb = node.rb;
node = {_lb, _rb, node.depth + 1, c_char};
parent_lb = node_.lb;
parent_rb = node_.rb;
node_ = {_lb, _rb, node_.depth + 1, c_char};
return true;
}
return false;
Expand Down Expand Up @@ -325,7 +325,7 @@ class fm_index_cursor

assert(index != nullptr); // range must not be empty!

size_type _lb = node.lb, _rb = node.rb;
size_type _lb = node_.lb, _rb = node_.rb;
size_type new_parent_lb = parent_lb, new_parent_rb = parent_rb;

sdsl_char_type c{};
Expand All @@ -348,7 +348,7 @@ class fm_index_cursor

parent_lb = new_parent_lb;
parent_rb = new_parent_rb;
node = {_lb, _rb, len + node.depth, c};
node_ = {_lb, _rb, len + node_.depth, c};
return true;
}

Expand Down Expand Up @@ -384,7 +384,7 @@ class fm_index_cursor
// parent_lb > parent_rb --> invalid interval
assert(parent_lb <= parent_rb);

sdsl_char_type c = node.last_char + 1;
sdsl_char_type c = node_.last_char + 1;
size_type _lb = parent_lb, _rb = parent_rb;

while (c < sigma && !backward_search(index->index, index->index.comp2char[c], _lb, _rb))
Expand All @@ -394,7 +394,7 @@ class fm_index_cursor

if (c != sigma) // Collection has additional sentinel as delimiter
{
node = {_lb, _rb, node.depth, c};
node_ = {_lb, _rb, node_.depth, c};
return true;
}
return false;
Expand All @@ -420,7 +420,7 @@ class fm_index_cursor
// parent_lb > parent_rb --> invalid interval
assert(index != nullptr && query_length() > 0 && parent_lb <= parent_rb);

return index->index.comp2char[node.last_char] - 1; // text is not allowed to contain ranks of 0
return index->index.comp2char[node_.last_char] - 1; // text is not allowed to contain ranks of 0
}

/*!\brief Returns the length of the searched query.
Expand All @@ -440,9 +440,9 @@ class fm_index_cursor
size_type query_length() const noexcept
{
assert(index != nullptr);
assert(node.depth != 0 || (node.lb == 0 && node.rb == index->size() - 1)); // depth == 0 -> root node
assert(node_.depth != 0 || (node_.lb == 0 && node_.rb == index->size() - 1)); // depth == 0 -> root node

return node.depth;
return node_.depth;
}

/*!\brief Returns the searched query.
Expand Down Expand Up @@ -474,7 +474,7 @@ class fm_index_cursor
"The alphabet types of the given text and index differ.");
assert(index != nullptr);

size_type const query_begin = offset() - index->index[node.lb];
size_type const query_begin = offset() - index->index[node_.lb];
return text | views::slice(query_begin, query_begin + query_length());
}

Expand All @@ -492,7 +492,7 @@ class fm_index_cursor
assert(index != nullptr);

// Position of query in concatenated text.
size_type const location = offset() - index->index[node.lb];
size_type const location = offset() - index->index[node_.lb];

// The rank represents the number of start positions of the individual sequences/texts in the collection
// before position `location + 1` and thereby also the number of delimiters.
Expand Down Expand Up @@ -524,7 +524,7 @@ class fm_index_cursor
{
assert(index != nullptr);

return 1 + node.rb - node.lb;
return 1 + node_.rb - node_.lb;
}

/*!\brief Locates the occurrences of the searched query in the text.
Expand All @@ -549,7 +549,7 @@ class fm_index_cursor
occ.reserve(count());
for (size_type i = 0; i < count(); ++i)
{
occ.emplace_back(0, offset() - index->index[node.lb + i]);
occ.emplace_back(0, offset() - index->index[node_.lb + i]);
}

return occ;
Expand All @@ -567,7 +567,7 @@ class fm_index_cursor
occ.reserve(count());
for (size_type i = 0; i < count(); ++i)
{
size_type loc = offset() - index->index[node.lb + i];
size_type loc = offset() - index->index[node_.lb + i];
size_type sequence_rank = index->text_begin_rs.rank(loc + 1);
size_type sequence_position = loc - index->text_begin_ss.select(sequence_rank);
occ.emplace_back(sequence_rank - 1, sequence_position);
Expand All @@ -594,7 +594,7 @@ class fm_index_cursor
{
assert(index != nullptr);

return std::views::iota(node.lb, node.lb + count())
return std::views::iota(node_.lb, node_.lb + count())
| std::views::transform([*this, _offset = offset()] (auto sa_pos)
{
return locate_result_value_type{0u, _offset - index->index[sa_pos]};
Expand All @@ -609,7 +609,7 @@ class fm_index_cursor
{
assert(index != nullptr);

return std::views::iota(node.lb, node.lb + count())
return std::views::iota(node_.lb, node_.lb + count())
| std::views::transform([*this, _offset = offset()] (auto sa_pos)
{
return _offset - index->index[sa_pos];
Expand All @@ -621,6 +621,16 @@ class fm_index_cursor
return locate_result_value_type{sequence_rank - 1, sequence_position};
});
}

//!\cond DEV
/*!\brief Returns the suffix tree node.
* \sa seqan3::detail::fm_index_cursor_node
*/
node_type const & node() const noexcept
{
return node_;
}
//!\endcond
};

//!\}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -348,7 +348,23 @@ TYPED_TEST_P(fm_index_cursor_collection_test, concept_check)
EXPECT_TRUE(seqan3::fm_index_cursor_specialisation<TypeParam>);
}

TYPED_TEST_P(fm_index_cursor_collection_test, node_access)
{
using fm_index_type = typename TypeParam::index_type;
if constexpr (!seqan3::bi_fm_index_specialisation<fm_index_type>)
{
fm_index_type fm{this->text_col1}; // {"ACGACG", "ACGACG"}
TypeParam it = TypeParam(fm);

EXPECT_TRUE(it.node() == (seqan3::detail::fm_index_cursor_node<fm_index_type>{0, fm.size() - 1, 0, 0}));

it.extend_right(seqan3::assign_char_to('A', typename TestFixture::alphabet_type{}));

EXPECT_TRUE(it.node() == (seqan3::detail::fm_index_cursor_node<fm_index_type>{1, 4, 1, 0}));
}
}

REGISTER_TYPED_TEST_SUITE_P(fm_index_cursor_collection_test, ctr, begin, extend_right_range,
extend_right_range_empty_text, extend_right_char, extend_right_range_and_cycle,
extend_right_char_and_cycle, extend_right_and_cycle, query, last_rank, incomplete_alphabet,
lazy_locate, extend_const_char_pointer, concept_check);
lazy_locate, extend_const_char_pointer, concept_check, node_access);
Original file line number Diff line number Diff line change
Expand Up @@ -300,6 +300,22 @@ TYPED_TEST_P(fm_index_cursor_test, concept_check)
EXPECT_TRUE(seqan3::fm_index_cursor_specialisation<TypeParam>);
}

TYPED_TEST_P(fm_index_cursor_test, node_access)
{
using fm_index_type = typename TypeParam::index_type;
if constexpr (!seqan3::bi_fm_index_specialisation<fm_index_type>)
{
fm_index_type fm{this->text1}; // "ACGACG"
TypeParam it = TypeParam(fm);

EXPECT_TRUE(it.node() == (seqan3::detail::fm_index_cursor_node<fm_index_type>{0, fm.size() - 1, 0, 0}));

it.extend_right(seqan3::assign_char_to('A', typename TestFixture::alphabet_type{}));

EXPECT_TRUE(it.node() == (seqan3::detail::fm_index_cursor_node<fm_index_type>{1, 2, 1, 0}));
}
}

REGISTER_TYPED_TEST_SUITE_P(fm_index_cursor_test, ctr, begin, extend_right_range, extend_right_char,
extend_right_range_and_cycle, extend_right_char_and_cycle, extend_right_and_cycle, query,
last_rank, incomplete_alphabet, lazy_locate, concept_check);
last_rank, incomplete_alphabet, lazy_locate, concept_check, node_access);

0 comments on commit 01bffc8

Please sign in to comment.