Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[MISC] Add alphabet type template to fm_index #1222

Merged
merged 1 commit into from
Aug 30, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,19 @@ If possible, provide tooling that performs the changes, e.g. a shell-script.
* **The `seqan3::concatenated_sequences::data()` function has been deprecated:**
Use `seqan3::concatenated_sequences::raw_data()` instead.

#### Search

* **Changed class signature of (bi_)fm_index:**
All code that relies on automatic template deduction will be unaffected. In case you specified the template parameters
of a `seqan3::fm_index` or `seqan3::bi_fm_index` you will need to add the alphabet type as first parameter and pass a
`seqan3::text_layout` instead of a `bool` to indicate the text layout (single, collection).
For example, `fm_index<false> index{text}` where `text` is of type `dna4_vector` needs to be changed to
`fm_index<dna4, text_layout::single> index{text}`.

* **The `construct()` method of the (bi_)fm_index is now private:**
Use the constructor `seqan3::fm_index::fm_index(text_t && text)` or `seqan3::bi_fm_index::bi_fm_index(text_t && text)`
instead.

## Notable Bug-fixes

# 3.0.0 ("Escala")
Expand Down
2 changes: 1 addition & 1 deletion doc/tutorial/read_mapper/read_mapper_step2.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ void map_reads(std::filesystem::path const & query_path,
uint8_t const errors)
//! [map_reads]
{
bi_fm_index<text_layout::collection> index; // we need to know if we work on a text collection before loading
bi_fm_index<dna5, text_layout::collection> index; // we need the alphabet and text layout before loading
{
std::ifstream is{index_path, std::ios::binary};
cereal::BinaryInputArchive iarchive{is};
Expand Down
2 changes: 1 addition & 1 deletion doc/tutorial/read_mapper/read_mapper_step3.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ void map_reads(std::filesystem::path const & query_path,
reference_storage_t & storage,
uint8_t const errors)
{
bi_fm_index<text_layout::collection> index; // we need to know if we work on a text collection before loading
bi_fm_index<dna5, text_layout::collection> index; // we need the alphabet and text layout before loading
{
std::ifstream is{index_path, std::ios::binary};
cereal::BinaryInputArchive iarchive{is};
Expand Down
2 changes: 1 addition & 1 deletion doc/tutorial/read_mapper/read_mapper_step4.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ void map_reads(std::filesystem::path const & query_path,
reference_storage_t & storage,
uint8_t const errors)
{
bi_fm_index<text_layout::collection> index; // we need to know if we work on a text collection before loading
bi_fm_index<dna5, text_layout::collection> index; // we need the alphabet and text layout before loading
{
std::ifstream is{index_path, std::ios::binary};
cereal::BinaryInputArchive iarchive{is};
Expand Down
3 changes: 2 additions & 1 deletion doc/tutorial/search/search_small_snippets.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,8 @@ fm_index index{text};

{
//![load]
fm_index<text_layout::single> index; // we need to tell the index that we work on a single text before loading
// we need to tell the index that we work on a single text and a `char` alphabet before loading
fm_index<char, text_layout::single> index;
{
std::ifstream is{"index.file", std::ios::binary};
cereal::BinaryInputArchive iarchive{is};
Expand Down
3 changes: 2 additions & 1 deletion doc/tutorial/search/search_solution1.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,8 @@ int main()
oarchive(index);
}

fm_index<text_layout::single> index2; // we need to tell the index that we work on a single text before loading
// we need to tell the index that we work on a single text and a `dna4` alphabet before loading
fm_index<dna4, text_layout::single> index2;
{
std::ifstream is{"index.file", std::ios::binary};
cereal::BinaryInputArchive iarchive{is};
Expand Down
4 changes: 2 additions & 2 deletions include/seqan3/search/algorithm/detail/search.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,7 @@ inline auto search_single(index_t const & index, query_t & query, configuration_
}
else
{
using hit_t = std::conditional_t<index_t::is_collection_,
using hit_t = std::conditional_t<index_t::text_layout_mode == text_layout::collection,
std::pair<typename index_t::size_type, typename index_t::size_type>,
typename index_t::size_type>;
std::vector<hit_t> hits;
Expand Down Expand Up @@ -184,7 +184,7 @@ inline auto search_all(index_t const & index, queries_t & queries, configuration
// delegate params: text_position (or cursor). we will withhold all hits of one query anyway to filter
// duplicates. more efficient to call delegate once with one vector instead of calling
// delegate for each hit separately at once.
using text_pos_t = std::conditional_t<index_t::is_collection_,
using text_pos_t = std::conditional_t<index_t::text_layout_mode == text_layout::collection,
std::pair<typename index_t::size_type, typename index_t::size_type>,
typename index_t::size_type>;
using hit_t = std::conditional_t<cfg_t::template exists<search_cfg::output<detail::search_output_index_cursor>>(),
Expand Down
1 change: 1 addition & 0 deletions include/seqan3/search/algorithm/detail/search_trivial.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@

#include <type_traits>

#include <seqan3/alphabet/concept.hpp>
#include <seqan3/range/concept.hpp>
#include <seqan3/range/view/drop.hpp>
#include <seqan3/search/algorithm/detail/search_common.hpp>
Expand Down
2 changes: 0 additions & 2 deletions include/seqan3/search/algorithm/search.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -108,8 +108,6 @@ inline auto search(queries_t && queries,
static_assert(detail::is_type_specialisation_of_v<remove_cvref_t<configuration_t>, configuration>,
"cfg must be a specialisation of seqan3::configuration.");

assert(alphabet_size<innermost_value_type_t<queries_t>> == index.sigma);

using cfg_t = remove_cvref_t<configuration_t>;

if constexpr (cfg_t::template exists<search_cfg::max_error>())
Expand Down
156 changes: 73 additions & 83 deletions include/seqan3/search/fm_index/bi_fm_index.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,23 +24,16 @@
namespace seqan3
{

//!\cond
SEQAN3_DEPRECATED_310
void bi_fm_index_deprecation(bool);

template <typename t>
void bi_fm_index_deprecation(t);
//!\endcond

/*!\addtogroup submodule_fm_index
* \{
*/

/*!\brief The SeqAn Bidirectional FM Index
* \implements seqan3::BiFmIndex
* \tparam is_collection Indicates whether this index works on a text collection or a single text.
* See seqan3::text_layout.
* \tparam sdsl_index_type_ The type of the underlying SDSL index, must model seqan3::SdslIndex.
* \tparam alphabet_t The alphabet type; must model seqan3::Semialphabet.
* \tparam text_layout_mode_ Indicates whether this index works on a text collection or a single text.
* See seqan3::text_layout.
* \tparam sdsl_index_type_ The type of the underlying SDSL index, must model seqan3::SdslIndex.
* \details
*
* The seqan3::bi_fm_index is a fast and space-efficient bidirectional string index to search strings and
Expand All @@ -64,18 +57,13 @@ void bi_fm_index_deprecation(t);
*
* \attention When building an index for a **text collection** over any alphabet, the symbols with rank 254 and 255
* are reserved and may not be used in the text.
*
* \deprecated Use seqan3::text_layout to indicate single texts and text collections. The use of bool is deprecated.
*/
template <auto is_collection = text_layout::single, detail::SdslIndex sdsl_index_type_ = default_sdsl_index_type>
template <Semialphabet alphabet_t,
text_layout text_layout_mode_,
detail::SdslIndex sdsl_index_type_ = default_sdsl_index_type>
class bi_fm_index
{
protected:
//!\brief The alphabet size of the text.
size_t sigma{0};
//!\brief Indicates whether index is built over a collection.
static constexpr bool is_collection_{is_collection};

private:
/*!\name Index types
* \{
*/
Expand All @@ -94,10 +82,10 @@ class bi_fm_index
using sdsl_sigma_type = typename sdsl_index_type::alphabet_type::sigma_type;

//!\brief The type of the underlying FM index for the original text.
using fm_index_type = fm_index<text_layout{is_collection_}, sdsl_index_type>;
using fm_index_type = fm_index<alphabet_t, text_layout_mode_, sdsl_index_type>;

//!\brief The type of the underlying FM index for the reversed text.\if DEV \todo Change sampling behaviour. \endif
using rev_fm_index_type = fm_index<text_layout{is_collection_}, sdsl_index_type>;
using rev_fm_index_type = fm_index<alphabet_t, text_layout_mode_, sdsl_index_type>;
//!\}

//!\brief Underlying FM index for the original text.
Expand All @@ -106,57 +94,6 @@ class bi_fm_index
//!\brief Underlying FM index for the reversed text.
rev_fm_index_type rev_fm;

//!\cond
using unused_t [[maybe_unused]] = decltype(bi_fm_index_deprecation(is_collection));
//!\endcond

public:
/*!\name Text types
* \{
*/
//!\brief Type for representing positions in the indexed text.
using size_type = typename sdsl_index_type::size_type;
//!\}

/*!\name Cursor types
* \{
*/
//!\brief The type of the bidirectional cursor.
using cursor_type = bi_fm_index_cursor<bi_fm_index<is_collection, sdsl_index_type>>;
//!\brief The type of the unidirectional cursor on the original text.
using fwd_cursor_type = fm_index_cursor<fm_index_type>;
//!\brief The type of the unidirectional cursor on the reversed text.
using rev_cursor_type = fm_index_cursor<rev_fm_index_type>;
//!\}

template <typename fm_index_t>
friend class fm_index_cursor;

/*!\name Constructors, destructor and assignment
* \{
*/
bi_fm_index() = default; //!< Defaulted.
bi_fm_index(bi_fm_index const &) = default; //!< Defaulted.
bi_fm_index & operator=(bi_fm_index const &) = default; //!< Defaulted.
bi_fm_index(bi_fm_index &&) = default; //!< Defaulted.
bi_fm_index & operator=(bi_fm_index &&) = default; //!< Defaulted.
~bi_fm_index() = default; //!< Defaulted.

/*!\brief Constructor that immediately constructs the index given a range. The range cannot be empty.
* \tparam text_t The type of range to construct from; must model std::ranges::BidirectionalRange.
* \param[in] text The text to construct from.
*
* ### Complexity
*
* \if DEV \todo \endif At least linear.
*/
template <std::ranges::Range text_t>
bi_fm_index(text_t && text)
{
construct(std::forward<decltype(text)>(text));
}
//!\}

/*!\brief Constructs the index given a range.
* The range cannot be an rvalue (i.e. a temporary object) and has to be non-empty.
* \tparam text_t The type of range to construct from; must model std::ranges::BidirectionalRange.
Expand All @@ -177,13 +114,15 @@ class bi_fm_index
* No guarantee. \if DEV \todo Ensure strong exception guarantee. \endif
*/
template <std::ranges::Range text_t>
//!\cond
requires !is_collection_
//!\endcond
//!\cond
requires text_layout_mode_ == text_layout::single
//!\endcond
void construct(text_t && text)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

make this private or move into constructor

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

also document this in the changelog

{
static_assert(std::ranges::BidirectionalRange<text_t>, "The text must model BidirectionalRange.");
static_assert(alphabet_size<innermost_value_type_t<text_t>> <= 256, "The alphabet is too big.");
static_assert(std::ConvertibleTo<innermost_value_type_t<text_t>, alphabet_t>,
"The alphabet of the text collection must be convertible to the alphabet of the index.");
static_assert(dimension_v<text_t> == 1, "The input cannot be a text collection.");

// text must not be empty
Expand All @@ -193,21 +132,21 @@ class bi_fm_index
auto rev_text = std::view::reverse(text);
fwd_fm.construct(text);
rev_fm.construct(rev_text);

sigma = fwd_fm.sigma;
}

//!\overload
template <std::ranges::Range text_t>
//!\cond
requires is_collection_
//!\endcond
//!\cond
requires text_layout_mode_ == text_layout::collection
//!\endcond
void construct(text_t && text)
{
static_assert(std::ranges::BidirectionalRange<text_t>, "The text must model BidirectionalRange.");
static_assert(std::ranges::BidirectionalRange<reference_t<text_t>>,
"The elements of the text collection must model BidirectionalRange.");
static_assert(alphabet_size<innermost_value_type_t<text_t>> <= 256, "The alphabet is too big.");
static_assert(std::ConvertibleTo<innermost_value_type_t<text_t>, alphabet_t>,
"The alphabet of the text collection must be convertible to the alphabet of the index.");
static_assert(dimension_v<text_t> == 2, "The input must be a text collection.");

// text must not be empty
Expand All @@ -217,9 +156,60 @@ class bi_fm_index
auto rev_text = text | view::deep{std::view::reverse} | std::view::reverse;
fwd_fm.construct(text);
rev_fm.construct(rev_text);
}

public:
//!\brief Indicates whether index is built over a collection.
static constexpr text_layout text_layout_mode = text_layout_mode_;

/*!\name Text types
* \{
*/
//!\brief The type of the underlying character of the indexed text.
using char_type = typename fm_index_type::char_type;
//!\brief Type for representing positions in the indexed text.
using size_type = typename sdsl_index_type::size_type;
//!\}

/*!\name Cursor types
* \{
*/
//!\brief The type of the bidirectional cursor.
using cursor_type = bi_fm_index_cursor<bi_fm_index<alphabet_t, text_layout_mode, sdsl_index_type>>;
//!\brief The type of the unidirectional cursor on the original text.
using fwd_cursor_type = fm_index_cursor<fm_index_type>;
//!\brief The type of the unidirectional cursor on the reversed text.
using rev_cursor_type = fm_index_cursor<rev_fm_index_type>;

//!\}

template <typename fm_index_t>
friend class fm_index_cursor;

/*!\name Constructors, destructor and assignment
* \{
*/
bi_fm_index() = default; //!< Defaulted.
bi_fm_index(bi_fm_index const &) = default; //!< Defaulted.
bi_fm_index & operator=(bi_fm_index const &) = default; //!< Defaulted.
bi_fm_index(bi_fm_index &&) = default; //!< Defaulted.
bi_fm_index & operator=(bi_fm_index &&) = default; //!< Defaulted.
~bi_fm_index() = default; //!< Defaulted.

sigma = fwd_fm.sigma;
/*!\brief Constructor that immediately constructs the index given a range. The range cannot be empty.
* \tparam text_t The type of range to construct from; must model std::ranges::BidirectionalRange.
* \param[in] text The text to construct from.
*
* ### Complexity
*
* \if DEV \todo \endif At least linear.
*/
template <std::ranges::Range text_t>
bi_fm_index(text_t && text)
{
construct(std::forward<text_t>(text));
}
//!\}

/*!\brief Returns the length of the indexed text including sentinel characters.
* \returns Returns the length of the indexed text including sentinel characters.
Expand Down Expand Up @@ -361,7 +351,7 @@ class bi_fm_index
*/
//! \brief Deduces the dimensions of the text.
template <std::ranges::Range text_t>
bi_fm_index(text_t &&) -> bi_fm_index<text_layout{dimension_v<text_t> != 1}>;
bi_fm_index(text_t &&) -> bi_fm_index<innermost_value_type_t<text_t>, text_layout{dimension_v<text_t> != 1}>;
//!\}

//!\}
Expand Down
Loading