From e844a22a3d00c24df8e5aafe30191f56b26c3f1d Mon Sep 17 00:00:00 2001 From: mitradarja Date: Fri, 19 Nov 2021 15:53:08 +0100 Subject: [PATCH 01/20] Update seqan3 --- lib/seqan3 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/seqan3 b/lib/seqan3 index 52f201a..d29786b 160000 --- a/lib/seqan3 +++ b/lib/seqan3 @@ -1 +1 @@ -Subproject commit 52f201a5ef3d806b7348b67afd80374f160451c7 +Subproject commit d29786b61de73f14eed5c83c14ef7e02f038bdb1 From 099bb19ea4326ebc28e55fb040537834e83b12e2 Mon Sep 17 00:00:00 2001 From: mitradarja Date: Tue, 23 Nov 2021 17:49:00 +0100 Subject: [PATCH 02/20] [FEATURE] Add modmers. --- test/api/CMakeLists.txt | 2 ++ test/api/comparison_test.cpp | 1 - 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/test/api/CMakeLists.txt b/test/api/CMakeLists.txt index f2b634c..9f6eced 100644 --- a/test/api/CMakeLists.txt +++ b/test/api/CMakeLists.txt @@ -2,3 +2,5 @@ cmake_minimum_required (VERSION 3.8) add_api_test (comparison_test.cpp) target_use_datasources (comparison_test FILES example1.fasta) + +add_api_test (modmer_test.cpp) diff --git a/test/api/comparison_test.cpp b/test/api/comparison_test.cpp index b7c8d03..98a2593 100644 --- a/test/api/comparison_test.cpp +++ b/test/api/comparison_test.cpp @@ -1,6 +1,5 @@ #include -#include #include #include "compare.h" From 1eb370c302bca07b262758cc6d84605a3d513643 Mon Sep 17 00:00:00 2001 From: mitradarja Date: Wed, 24 Nov 2021 10:16:21 +0100 Subject: [PATCH 03/20] [FEATURE] Add modmers. --- include/modmer.hpp | 530 +++++++++++++++++++++++++++++++++++++++ test/api/modmer_test.cpp | 221 ++++++++++++++++ 2 files changed, 751 insertions(+) create mode 100644 include/modmer.hpp create mode 100644 test/api/modmer_test.cpp diff --git a/include/modmer.hpp b/include/modmer.hpp new file mode 100644 index 0000000..c62d97a --- /dev/null +++ b/include/modmer.hpp @@ -0,0 +1,530 @@ +// ----------------------------------------------------------------------------------------------------- +// Copyright (c) 2006-2021, Knut Reinert & Freie Universität Berlin +// Copyright (c) 2016-2021, Knut Reinert & MPI für molekulare Genetik +// This file may be used, modified and/or redistributed under the terms of the 3-clause BSD-License +// shipped with this file and also available at: https://github.com/seqan/seqan3/blob/master/LICENSE.md +// ----------------------------------------------------------------------------------------------------- + +/*!\file + * \author Mitra Darvish + * \brief Provides modmer. + */ + +#pragma once + +#include +#include + +#include +#include +#include +#include +#include + +namespace seqan3::detail +{ +// --------------------------------------------------------------------------------------------------------------------- +// modmer_view class +// --------------------------------------------------------------------------------------------------------------------- + +/*!\brief The type returned by modmer. + * \tparam urng1_t The type of the underlying range, must model std::ranges::forward_range, the reference type must + * model std::totally_ordered. The typical use case is that the reference type is the result of + * seqan3::kmer_hash. + * \tparam urng2_t The type of the second underlying range, must model std::ranges::forward_range, the reference type + * must model std::totally_ordered. If only one range is provided this defaults to + * std::ranges::empty_view. + * \implements std::ranges::view + * \ingroup search_views + * + * + * \note Most members of this class are generated by std::ranges::view_interface which is not yet documented here. + + */ +template > +class modmer_view : public std::ranges::view_interface> +{ +private: + static_assert(std::ranges::forward_range, "The modmer_view only works on forward_ranges."); + static_assert(std::ranges::forward_range, "The modmer_view only works on forward_ranges."); + static_assert(std::totally_ordered>, + "The reference type of the underlying range must model std::totally_ordered."); + + //!\brief The default argument of the second range. + using default_urng2_t = std::ranges::empty_view; + + //!\brief Boolean variable, which is true, when second range is not of empty type. + static constexpr bool second_range_is_given = !std::same_as; + + static_assert(!second_range_is_given || std::totally_ordered_with, + std::ranges::range_reference_t>, + "The reference types of the underlying ranges must model std::totally_ordered_with."); + + //!\brief Whether the given ranges are const_iterable + static constexpr bool const_iterable = seqan3::const_iterable_range && + seqan3::const_iterable_range; + + //!\brief The first underlying range. + urng1_t urange1{}; + //!\brief The second underlying range. + urng2_t urange2{}; + + //!\brief The number of values in one window. + size_t mod_used{}; + + template + class basic_iterator; + + //!\brief The sentinel type of the modmer_view. + using sentinel = std::default_sentinel_t; + +public: + /*!\name Constructors, destructor and assignment + * \{ + */ + modmer_view() + requires std::default_initializable && std::default_initializable + = default; //!< Defaulted. + modmer_view(modmer_view const & rhs) = default; //!< Defaulted. + modmer_view(modmer_view && rhs) = default; //!< Defaulted. + modmer_view & operator=(modmer_view const & rhs) = default; //!< Defaulted. + modmer_view & operator=(modmer_view && rhs) = default; //!< Defaulted. + ~modmer_view() = default; //!< Defaulted. + + /*!\brief Construct from a view and a given number of values in one window. + * \param[in] urange1 The input range to process. Must model std::ranges::viewable_range and + * std::ranges::forward_range. + * \param[in] mod_used The number of values in one window. + */ + modmer_view(urng1_t urange1, size_t const mod_used) : + modmer_view{std::move(urange1), default_urng2_t{}, mod_used} + {} + + /*!\brief Construct from a non-view that can be view-wrapped and a given number of values in one window. + * \tparam other_urng1_t The type of another urange. Must model std::ranges::viewable_range and be constructible + from urng1_t. + * \param[in] urange1 The input range to process. Must model std::ranges::viewable_range and + * std::ranges::forward_range. + * \param[in] mod_used The number of values in one window. + */ + template + //!\cond + requires (std::ranges::viewable_range && + std::constructible_from>>) + //!\endcond + modmer_view(other_urng1_t && urange1, size_t const mod_used) : + urange1{std::views::all(std::forward(urange1))}, + urange2{default_urng2_t{}}, + mod_used{mod_used} + {} + + /*!\brief Construct from two views and a given number of values in one window. + * \param[in] urange1 The first input range to process. Must model std::ranges::viewable_range and + * std::ranges::forward_range. + * \param[in] urange2 The second input range to process. Must model std::ranges::viewable_range and + * std::ranges::forward_range. + * \param[in] mod_used The number of values in one window. + */ + modmer_view(urng1_t urange1, urng2_t urange2, size_t const mod_used) : + urange1{std::move(urange1)}, + urange2{std::move(urange2)}, + mod_used{mod_used} + { + if constexpr (second_range_is_given) + { + if (std::ranges::distance(urange1) != std::ranges::distance(urange2)) + throw std::invalid_argument{"The two ranges do not have the same size."}; + } + } + + /*!\brief Construct from two non-views that can be view-wrapped and a given number of values in one window. + * \tparam other_urng1_t The type of another urange. Must model std::ranges::viewable_range and be constructible + from urng1_t. + * \tparam other_urng2_t The type of another urange. Must model std::ranges::viewable_range and be constructible + from urng2_t. + * \param[in] urange1 The input range to process. Must model std::ranges::viewable_range and + * std::ranges::forward_range. + * \param[in] urange2 The second input range to process. Must model std::ranges::viewable_range and + * std::ranges::forward_range. + * \param[in] mod_used The number of values in one window. + */ + template + //!\cond + requires (std::ranges::viewable_range && + std::constructible_from> && + std::ranges::viewable_range && + std::constructible_from>) + //!\endcond + modmer_view(other_urng1_t && urange1, other_urng2_t && urange2, size_t const mod_used) : + urange1{std::views::all(std::forward(urange1))}, + urange2{std::views::all(std::forward(urange2))}, + mod_used{mod_used} + { + if constexpr (second_range_is_given) + { + if (std::ranges::distance(urange1) != std::ranges::distance(urange2)) + throw std::invalid_argument{"The two ranges do not have the same size."}; + } + } + //!\} + + /*!\name Iterators + * \{ + */ + /*!\brief Returns an iterator to the first element of the range. + * \returns Iterator to the first element. + * + * \details + * + * ### Complexity + * + * Constant. + * + * ### Exceptions + * + * Strong exception guarantee. + */ + basic_iterator begin() + { + return {std::ranges::begin(urange1), + std::ranges::end(urange1), + std::ranges::begin(urange2), + mod_used}; + } + + //!\copydoc begin() + basic_iterator begin() const + //!\cond + requires const_iterable + //!\endcond + { + return {std::ranges::cbegin(urange1), + std::ranges::cend(urange1), + std::ranges::cbegin(urange2), + mod_used}; + } + + /*!\brief Returns an iterator to the element following the last element of the range. + * \returns Iterator to the end. + * + * \details + * + * This element acts as a placeholder; attempting to dereference it results in undefined behaviour. + * + * ### Complexity + * + * Constant. + * + * ### Exceptions + * + * No-throw guarantee. + */ + sentinel end() const + { + return {}; + } + //!\} +}; + +//!\brief Iterator for calculating modmers. +template +template +class modmer_view::basic_iterator +{ +private: + //!\brief The sentinel type of the first underlying range. + using urng1_sentinel_t = maybe_const_sentinel_t; + //!\brief The iterator type of the first underlying range. + using urng1_iterator_t = maybe_const_iterator_t; + //!\brief The iterator type of the second underlying range. + using urng2_iterator_t = maybe_const_iterator_t; + + template + friend class basic_iterator; + +public: + /*!\name Associated types + * \{ + */ + //!\brief Type for distances between iterators. + using difference_type = std::ranges::range_difference_t; + //!\brief Value type of this iterator. + using value_type = std::ranges::range_value_t; + //!\brief The pointer type. + using pointer = void; + //!\brief Reference to `value_type`. + using reference = value_type; + //!\brief Tag this class as a forward iterator. + using iterator_category = std::forward_iterator_tag; + //!\brief Tag this class as a forward iterator. + using iterator_concept = iterator_category; + //!\} + + /*!\name Constructors, destructor and assignment + * \{ + */ + basic_iterator() = default; //!< Defaulted. + basic_iterator(basic_iterator const &) = default; //!< Defaulted. + basic_iterator(basic_iterator &&) = default; //!< Defaulted. + basic_iterator & operator=(basic_iterator const &) = default; //!< Defaulted. + basic_iterator & operator=(basic_iterator &&) = default; //!< Defaulted. + ~basic_iterator() = default; //!< Defaulted. + + //!\brief Allow iterator on a const range to be constructible from an iterator over a non-const range. + basic_iterator(basic_iterator const & it) + //!\cond + requires const_range + //!\endcond + : modmer_value{std::move(it.modmer_value)}, + urng1_iterator{std::move(it.urng1_iterator)}, + urng1_sentinel{std::move(it.urng1_sentinel)}, + urng2_iterator{std::move(it.urng2_iterator)} + {} + + /*!\brief Construct from begin and end iterators of a given range over std::totally_ordered values, and the number + of values per window. + * \param[in] urng1_iterator Iterator pointing to the first position of the first std::totally_ordered range. + * \param[in] urng1_sentinel Iterator pointing to the last position of the first std::totally_ordered range. + * \param[in] urng2_iterator Iterator pointing to the first position of the second std::totally_ordered range. + * \param[in] mod_used The number of values in one window. + * + * \details + * + * Looks at the number of values per window in two ranges, returns the smallest between both as modmer and + * shifts then by one to repeat this action. If a modmer in consecutive windows is the same, it is returned only + * once. + */ + basic_iterator(urng1_iterator_t urng1_iterator, + urng1_sentinel_t urng1_sentinel, + urng2_iterator_t urng2_iterator, + size_t mod_used) : + urng1_iterator{std::move(urng1_iterator)}, + urng1_sentinel{std::move(urng1_sentinel)}, + urng2_iterator{std::move(urng2_iterator)}, + mod_used{std::move(mod_used)} + { + size_t size = std::ranges::distance(urng1_iterator, urng1_sentinel); + mod_used = std::min(mod_used, size); + + first_modmer(); + } + //!\} + + //!\anchor basic_iterator_comparison + //!\name Comparison operators + //!\{ + + //!\brief Compare to another basic_iterator. + friend bool operator==(basic_iterator const & lhs, basic_iterator const & rhs) + { + return (lhs.urng1_iterator == rhs.urng1_iterator) && + (rhs.urng2_iterator == rhs.urng2_iterator); + } + + //!\brief Compare to another basic_iterator. + friend bool operator!=(basic_iterator const & lhs, basic_iterator const & rhs) + { + return !(lhs == rhs); + } + + //!\brief Compare to the sentinel of the modmer_view. + friend bool operator==(basic_iterator const & lhs, sentinel const &) + { + return lhs.urng1_iterator == lhs.urng1_sentinel; + } + + //!\brief Compare to the sentinel of the modmer_view. + friend bool operator==(sentinel const & lhs, basic_iterator const & rhs) + { + return rhs == lhs; + } + + //!\brief Compare to the sentinel of the modmer_view. + friend bool operator!=(sentinel const & lhs, basic_iterator const & rhs) + { + return !(lhs == rhs); + } + + //!\brief Compare to the sentinel of the modmer_view. + friend bool operator!=(basic_iterator const & lhs, sentinel const & rhs) + { + return !(lhs == rhs); + } + //!\} + + //!\brief Pre-increment. + basic_iterator & operator++() noexcept + { + next_unique_modmer(); + return *this; + } + + //!\brief Post-increment. + basic_iterator operator++(int) noexcept + { + basic_iterator tmp{*this}; + next_unique_modmer(); + return tmp; + } + + //!\brief Return the modmer. + value_type operator*() const noexcept + { + return modmer_value; + } + +private: + //!\brief The modmer value. + value_type modmer_value{}; + + //!\brief Iterator to the rightmost value of one window. + urng1_iterator_t urng1_iterator{}; + //!brief Iterator to last element in range. + urng1_sentinel_t urng1_sentinel{}; + //!\brief Iterator to the rightmost value of one window of the second range. + urng2_iterator_t urng2_iterator{}; + + size_t mod_used{}; + + //!\brief Advances the window to the next position. + void advance() + { + ++urng1_iterator; + if constexpr (second_range_is_given) + ++urng2_iterator; + } + + void first_modmer() + { + if (!next_modmer()) + next_unique_modmer(); + } + + //!\brief Increments iterator by 1. + void next_unique_modmer() + { + if (mod_used == 0u) + return; + + while (1) + { + advance(); + if (next_modmer()) + break; + } + } + + /*!\brief Calculates the next modmer value. + * \returns True, if new modmer is found or end is reached. Otherwise returns false. + */ + bool next_modmer() + { + if (urng1_iterator == urng1_sentinel) + return true; + + if constexpr (!second_range_is_given) + { + if (*urng1_iterator % mod_used == 0) + { + modmer_value = *urng1_iterator; + return true; + } + } + else + { + value_type mod = std::min(*urng1_iterator, *urng2_iterator); + if (mod % mod_used == 0) + { + modmer_value = mod; + return true; + } + } + + return false; + } +}; + +//!\brief A deduction guide for the view class template. +template +modmer_view(rng1_t &&, size_t const mod_used) -> modmer_view>; + +//!\brief A deduction guide for the view class template. +template +modmer_view(rng1_t &&, rng2_t &&, size_t const mod_used) -> modmer_view, + std::views::all_t>; + +// --------------------------------------------------------------------------------------------------------------------- +// modmer_fn (adaptor definition) +// --------------------------------------------------------------------------------------------------------------------- + +//![adaptor_def] +//!\brief modmer's range adaptor object type (non-closure). +//!\ingroup search_views +struct modmer_fn +{ + //!\brief Store the number of values in one window and return a range adaptor closure object. + constexpr auto operator()(size_t const mod_used) const + { + return adaptor_from_functor{*this, mod_used}; + } + + /*!\brief Call the view's constructor with two arguments: the underlying view and an integer indicating how many + * values one window contains. + * \tparam urng1_t The type of the input range to process. Must model std::ranges::viewable_range. + * \param[in] urange1 The input range to process. Must model std::ranges::viewable_range and + * std::ranges::forward_range. + * \param[in] mod_used The number of values in one window. + * \returns A range of converted values. + */ + template + constexpr auto operator()(urng1_t && urange1, size_t const mod_used) const + { + static_assert(std::ranges::viewable_range, + "The range parameter to views::modmer cannot be a temporary of a non-view range."); + static_assert(std::ranges::forward_range, + "The range parameter to views::modmer must model std::ranges::forward_range."); + + if (mod_used == 1) // Would just return urange1 without any changes + throw std::invalid_argument{"The chosen mod_used is not valid. " + "Please choose a value greater than 1 or use two ranges."}; + + return modmer_view{urange1, mod_used}; + } +}; +//![adaptor_def] + +} // namespace seqan3::detail + +/*!\brief Computes modmers for a range of comparable values. A modmer is ... + * \tparam urng_t The type of the first range being processed. See below for requirements. [template + * parameter is omitted in pipe notation] + * \param[in] urange1 The range being processed. [parameter is omitted in pipe notation] + * \param[in] mod_used The number of values in one window. + * \returns A range of std::totally_ordered where each value is ... See below for the + * properties of the returned range. + * \ingroup search_views + * + * + * ### View properties + * + * | Concepts and traits | `urng_t` (underlying range type) | `rrng_t` (returned range type) | + * |----------------------------------|:----------------------------------:|:--------------------------------:| + * | std::ranges::input_range | *required* | *preserved* | + * | std::ranges::forward_range | *required* | *preserved* | + * | std::ranges::bidirectional_range | | *lost* | + * | std::ranges::random_access_range | | *lost* | + * | std::ranges::contiguous_range | | *lost* | + * | | | | + * | std::ranges::viewable_range | *required* | *guaranteed* | + * | std::ranges::view | | *guaranteed* | + * | std::ranges::sized_range | | *lost* | + * | std::ranges::common_range | | *lost* | + * | std::ranges::output_range | | *lost* | + * | seqan3::const_iterable_range | | *preserved* | + * | | | | + * | std::ranges::range_reference_t | std::totally_ordered | std::totally_ordered | + * + * See the \link views views submodule documentation \endlink for detailed descriptions of the view properties. + */ +inline constexpr auto modmer = seqan3::detail::modmer_fn{}; diff --git a/test/api/modmer_test.cpp b/test/api/modmer_test.cpp new file mode 100644 index 0000000..d81f9da --- /dev/null +++ b/test/api/modmer_test.cpp @@ -0,0 +1,221 @@ +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include + +#include + +#include "../../lib/seqan3/test/unit/range/iterator_test_template.hpp" + +#include "modmer.hpp" + +using seqan3::operator""_dna4; +using seqan3::operator""_shape; +using result_t = std::vector; + +inline static constexpr auto kmer_view = seqan3::views::kmer_hash(seqan3::ungapped{4}); +inline static constexpr auto rev_kmer_view = seqan3::views::complement | std::views::reverse + | kmer_view + | std::views::reverse; +inline static constexpr auto gapped_kmer_view = seqan3::views::kmer_hash(0b1001_shape); +inline static constexpr auto rev_gapped_kmer_view = seqan3::views::complement | std::views::reverse + | seqan3::views::kmer_hash(0b1001_shape) + | std::views::reverse; +inline static constexpr auto modmer_no_rev_view = modmer(2); + +using iterator_type = std::ranges::iterator_t< decltype(std::declval() + | kmer_view + | modmer_no_rev_view)>; +using two_ranges_iterator_type = std::ranges::iterator_t< decltype(seqan3::detail::modmer_view{ + std::declval() + | kmer_view, + std::declval() + | rev_kmer_view, + 2})>; + +template <> +struct iterator_fixture : public ::testing::Test +{ + using iterator_tag = std::forward_iterator_tag; + static constexpr bool const_iterable = true; + + seqan3::dna4_vector text{"ACGGCGACGTTTAG"_dna4}; + decltype(seqan3::views::kmer_hash(text, seqan3::ungapped{4})) vec = text | kmer_view; + result_t expected_range{26, 166, 152, 134, 252, 242}; + + decltype(modmer(seqan3::views::kmer_hash(text, seqan3::ungapped{4}), 2)) test_range = + modmer(vec, 2); +}; + +template <> +struct iterator_fixture : public ::testing::Test +{ + using iterator_tag = std::forward_iterator_tag; + static constexpr bool const_iterable = true; + + seqan3::dna4_vector text{"ACGGCGACGTTTAG"_dna4}; + using kmer_hash_view_t = decltype(seqan3::views::kmer_hash(text, seqan3::ungapped{4})); + + kmer_hash_view_t vec = kmer_view(text); + result_t expected_range{26, 152, 6, 192, 112}; + + using reverse_kmer_hash_view_t = decltype(rev_kmer_view(text)); + + using test_range_t = decltype(seqan3::detail::modmer_view{kmer_hash_view_t{}, reverse_kmer_hash_view_t{}, 2}); + test_range_t test_range = seqan3::detail::modmer_view{vec, rev_kmer_view(text), 2}; +}; + +using test_types = ::testing::Types; +INSTANTIATE_TYPED_TEST_SUITE_P(iterator_fixture, iterator_fixture, test_types, ); + +template +class modmer_view_properties_test: public ::testing::Test { }; + +using underlying_range_types = ::testing::Types, + std::vector const, + seqan3::bitpacked_sequence, + seqan3::bitpacked_sequence const, + std::list, + std::list const, + std::forward_list, + std::forward_list const>; +TYPED_TEST_SUITE(modmer_view_properties_test, underlying_range_types, ); + +class modmer_test : public ::testing::Test +{ +protected: + std::vector text1{"AAAAAA"_dna4}; + result_t result1{0, 0, 0}; // Same result for ungapped and gapped + + std::vector too_short_text{"AC"_dna4}; + + // ACGG CGGC, GGCG, GCGA, CGAC, GACG, ACGT, CGTT, GTTT, TTTA, TTAG + // ACGG CGGC cgcc GCGA CGAC cgtc ACGT aacg aaac taaa ctaa + std::vector text3{"ACGGCGACGTTTAG"_dna4}; + result_t result3_ungapped{26, 152, 6, 192, 112}; // ACGG, GCGA, aacg, taaa, ctaa + result_t result3_gapped{2, 8, 2, 12, 4}; // A--G, G--A, a--g, t--a, c--a - "-" for gap + result_t result3_ungapped_no_rev{26, 166, 152, 134, 252, 242}; // ACGG, GGCG, GCGA, GACG, TTTA, TTAG + result_t result3_gapped_no_rev{2, 10, 8, 10, 12, 14}; // A--G, G--G, G--A, G--G, T--A, T--G "-" for gap + result_t result3_ungapped_stop{26, 152}; // ACGG, GCGA + result_t result3_ungapped_no_rev_stop{26, 166, 152, 134}; // ACGG, GGCG, GCGA, GACG + result_t result3_gapped_stop{2, 8}; // A--G, G--A + result_t result3_gapped_no_rev_stop{2, 10, 8, 10}; + result_t result3_ungapped_start{6, 192, 112}; // For start at second A, aacg, taaa, ctaa + result_t result3_gapped_start{2, 12, 4}; // a--g, t--a, c--a - "-" for gap + result_t result3_ungapped_no_rev_start{242}; // For start at second A, TTAG + result_t result3_gapped_no_rev_start{14}; // For start at second A, T--G +}; + +template +void compare_types(adaptor_t v) +{ + EXPECT_TRUE(std::ranges::input_range); + EXPECT_TRUE(std::ranges::forward_range); + EXPECT_FALSE(std::ranges::bidirectional_range); + EXPECT_FALSE(std::ranges::random_access_range); + EXPECT_TRUE(std::ranges::view); + EXPECT_FALSE(std::ranges::sized_range); + EXPECT_FALSE(std::ranges::common_range); + EXPECT_TRUE(seqan3::const_iterable_range); + EXPECT_FALSE((std::ranges::output_range)); +} + +TYPED_TEST(modmer_view_properties_test, concepts) +{ + TypeParam text{'A'_dna4, 'C'_dna4, 'G'_dna4, 'T'_dna4, 'C'_dna4, 'G'_dna4, 'A'_dna4, 'C'_dna4, 'G'_dna4, 'T'_dna4, + 'T'_dna4, 'T'_dna4, 'A'_dna4, 'G'_dna4}; // ACGTCGACGTTTAG + + auto v = text | kmer_view | modmer_no_rev_view; + compare_types(v); + auto v2 = seqan3::detail::modmer_view{text | kmer_view, text | kmer_view, 2}; + + if constexpr (std::ranges::bidirectional_range) // excludes forward_list + { + auto v3 = seqan3::detail::modmer_view{text | kmer_view, text | rev_kmer_view, 2}; + compare_types(v3); + } +} + +TYPED_TEST(modmer_view_properties_test, different_inputs_kmer_hash) +{ + TypeParam text{'A'_dna4, 'C'_dna4, 'G'_dna4, 'T'_dna4, 'C'_dna4, 'G'_dna4, 'A'_dna4, 'C'_dna4, 'G'_dna4, 'T'_dna4, + 'T'_dna4, 'T'_dna4, 'A'_dna4, 'G'_dna4}; // ACGTCGACGTTTAG + result_t ungapped{216, 6, 192, 112}; // TCGA, aacg, taaa, ctaa + result_t gapped{12, 2, 12, 4}; // T--A, a--g, t--a, c--a - "-" for gap + result_t ungapped_no_rev{182, 216, 134, 252, 242}; // GTCG, TCGA, GACG, TTTA, TTAG + result_t gapped_no_rev{10, 12, 10, 12, 14}; // G--G, T--G, T--A, G--G, T--A, T--G "-" for gap + EXPECT_RANGE_EQ(ungapped_no_rev, text | kmer_view | modmer_no_rev_view); + EXPECT_RANGE_EQ(gapped_no_rev, text | gapped_kmer_view | modmer_no_rev_view); + + if constexpr (std::ranges::bidirectional_range) // excludes forward_list + { + EXPECT_RANGE_EQ(ungapped, (seqan3::detail::modmer_view{text | kmer_view, text | rev_kmer_view, 2})) ; + EXPECT_RANGE_EQ(gapped, (seqan3::detail::modmer_view{text | gapped_kmer_view, text | rev_gapped_kmer_view, 2})); + } +} + +TEST_F(modmer_test, ungapped_kmer_hash) +{ + EXPECT_RANGE_EQ(result1, (seqan3::detail::modmer_view{text1 | kmer_view, text1 | rev_kmer_view, 2})); + EXPECT_RANGE_EQ(result1, text1 | kmer_view | modmer_no_rev_view); + auto empty_view = seqan3::detail::modmer_view{too_short_text | kmer_view, too_short_text | rev_kmer_view, 2}; + EXPECT_TRUE(std::ranges::empty(empty_view)); + auto empty_view2 = too_short_text | kmer_view | modmer_no_rev_view; + EXPECT_TRUE(std::ranges::empty(empty_view2)); + EXPECT_RANGE_EQ(result3_ungapped, (seqan3::detail::modmer_view{text3 | kmer_view, text3 | rev_kmer_view, 2})); + EXPECT_RANGE_EQ(result3_ungapped_no_rev, text3 | kmer_view | modmer_no_rev_view); + +} + +TEST_F(modmer_test, gapped_kmer_hash) +{ + EXPECT_RANGE_EQ(result1, (seqan3::detail::modmer_view{text1 | gapped_kmer_view, + text1 | rev_gapped_kmer_view, + 2})); + EXPECT_RANGE_EQ(result1, text1 | gapped_kmer_view | modmer_no_rev_view); + auto empty_view = seqan3::detail::modmer_view{too_short_text | gapped_kmer_view, + too_short_text | rev_gapped_kmer_view, + 2}; + EXPECT_TRUE(std::ranges::empty(empty_view)); + auto empty_view2 = too_short_text | gapped_kmer_view | modmer_no_rev_view; + EXPECT_TRUE(std::ranges::empty(empty_view2)); + EXPECT_RANGE_EQ(result3_gapped, (seqan3::detail::modmer_view{text3 | gapped_kmer_view, + text3 | rev_gapped_kmer_view, + 2})); + EXPECT_RANGE_EQ(result3_gapped_no_rev, text3 | gapped_kmer_view | modmer_no_rev_view); +} + +TEST_F(modmer_test, combinability) +{ + auto stop_at_t = std::views::take_while([] (seqan3::dna4 const x) { return x != 'T'_dna4; }); + EXPECT_RANGE_EQ(result3_ungapped_no_rev_stop, text3 | stop_at_t | kmer_view | modmer_no_rev_view); + EXPECT_RANGE_EQ(result3_gapped_no_rev_stop, text3 | stop_at_t | gapped_kmer_view | modmer_no_rev_view); + + EXPECT_RANGE_EQ(result3_ungapped_stop, (seqan3::detail::modmer_view{text3 | stop_at_t | kmer_view, + text3 | stop_at_t | rev_kmer_view, + 2})); + EXPECT_RANGE_EQ(result3_gapped_stop, (seqan3::detail::modmer_view{text3 | stop_at_t | gapped_kmer_view, + text3 | stop_at_t | rev_gapped_kmer_view, + 2})); + + auto start_at_a = std::views::drop(6); + EXPECT_RANGE_EQ(result3_ungapped_start, (seqan3::detail::modmer_view{text3 | start_at_a | kmer_view, + text3 | start_at_a | rev_kmer_view, + 2})); + EXPECT_RANGE_EQ(result3_gapped_start, (seqan3::detail::modmer_view{text3 | start_at_a | gapped_kmer_view, + text3 | start_at_a | rev_gapped_kmer_view, + 2})); +} + +TEST_F(modmer_test, two_ranges_unequal_size) +{ + EXPECT_THROW((seqan3::detail::modmer_view{text1 | kmer_view, text3 | rev_kmer_view, 2}), std::invalid_argument); +} From 8f8b3ef48f28c43028a947eb4b672f13e3f976c0 Mon Sep 17 00:00:00 2001 From: mitradarja Date: Wed, 24 Nov 2021 10:58:18 +0100 Subject: [PATCH 04/20] [FEATURE] Add modmer_hash. --- include/modmer_hash.hpp | 146 ++++++++++++++++++++++++++++++++++ test/api/CMakeLists.txt | 1 + test/api/modmer_hash_test.cpp | 132 ++++++++++++++++++++++++++++++ 3 files changed, 279 insertions(+) create mode 100644 include/modmer_hash.hpp create mode 100644 test/api/modmer_hash_test.cpp diff --git a/include/modmer_hash.hpp b/include/modmer_hash.hpp new file mode 100644 index 0000000..b48e1df --- /dev/null +++ b/include/modmer_hash.hpp @@ -0,0 +1,146 @@ +// ----------------------------------------------------------------------------------------------------- +// Copyright (c) 2006-2021, Knut Reinert & Freie Universität Berlin +// Copyright (c) 2016-2021, Knut Reinert & MPI für molekulare Genetik +// This file may be used, modified and/or redistributed under the terms of the 3-clause BSD-License +// shipped with this file and also available at: https://github.com/seqan/seqan3/blob/master/LICENSE.md +// ----------------------------------------------------------------------------------------------------- + +/*!\file + * \author Mitra Darvish + * \brief Provides modmer_hash. + */ + +#pragma once + +#include +#include +#include +#include + +#include "modmer.hpp" + +namespace seqan3 +{ +//!\brief strong_type for the mod_used. +//!\ingroup search_views +struct mod_used : seqan3::detail::strong_type +{ + using seqan3::detail::strong_type::strong_type; +}; +} // namespace seqan3 +namespace seqan3::detail +{ +//!\brief seqan3::views::modmer_hash's range adaptor object type (non-closure). +//!\ingroup search_views +struct modmer_hash_fn +{ + /*!\brief Store the shape and the window size and return a range adaptor closure object. + * \param[in] shape The seqan3::shape to use for hashing. + * \param[in] mod_used The windows size to use. + * \throws std::invalid_argument if the size of the shape is greater than the `mod_used`. + * \returns A range of converted elements. + */ + constexpr auto operator()(shape const & shape, mod_used const mod_used) const + { + return seqan3::detail::adaptor_from_functor{*this, shape, mod_used}; + } + + /*!\brief Store the shape, the window size and the seed and return a range adaptor closure object. + * \param[in] shape The seqan3::shape to use for hashing. + * \param[in] mod_used The size of the window. + * \param[in] seed The seed to use. + * \throws std::invalid_argument if the size of the shape is greater than the `mod_used`. + * \returns A range of converted elements. + */ + constexpr auto operator()(shape const & shape, mod_used const mod_used, seed const seed) const + { + return seqan3::detail::adaptor_from_functor{*this, shape, mod_used, seed}; + } + + /*!\brief Call the view's constructor with the underlying view, a seqan3::shape and a window size as argument. + * \param[in] urange The input range to process. Must model std::ranges::viewable_range and the reference type + * of the range must model seqan3::semialphabet. + * \param[in] shape The seqan3::shape to use for hashing. + * \param[in] mod_used The size of the window. + * \param[in] seed The seed to use. + * \throws std::invalid_argument if the size of the shape is greater than the `mod_used`. + * \returns A range of converted elements. + */ + template + constexpr auto operator()(urng_t && urange, + shape const & shape, + mod_used const mod_used, + seed const seed = seqan3::seed{0x8F3F73B5CF1C9ADE}) const + { + static_assert(std::ranges::viewable_range, + "The range parameter to views::modmer_hash cannot be a temporary of a non-view range."); + static_assert(std::ranges::forward_range, + "The range parameter to views::modmer_hash must model std::ranges::forward_range."); + static_assert(semialphabet>, + "The range parameter to views::modmer_hash must be over elements of seqan3::semialphabet."); + + auto forward_strand = std::forward(urange) | seqan3::views::kmer_hash(shape) + | std::views::transform([seed] (uint64_t i) + {return i ^ seed.get();}); + + auto reverse_strand = std::forward(urange) | seqan3::views::complement + | std::views::reverse + | seqan3::views::kmer_hash(shape) + | std::views::transform([seed] (uint64_t i) + {return i ^ seed.get();}) + | std::views::reverse; + + return seqan3::detail::modmer_view(forward_strand, reverse_strand, mod_used.get()); + } +}; + +} // namespace seqan3::detail + +/*!\name Alphabet related views + * \{ + */ + +/*!\brief Computes modmers for a range with a given shape, mod_used and seed. + * \tparam urng_t The type of the range being processed. See below for requirements. [template parameter is + * omitted in pipe notation] + * \param[in] urange The range being processed. [parameter is omitted in pipe notation] + * \param[in] shape The seqan3::shape that determines how to compute the hash value. + * \param[in] mod_used The mod value to use. + * \param[in] seed The seed used to skew the hash values. Default: 0x8F3F73B5CF1C9ADE. + * \returns A range of `size_t` where each value is the modmer of the resp. window. + * See below for the properties of the returned range. + * \ingroup search_views + * + * \attention + * Be aware of the requirements of the seqan3::views::kmer_hash view. + * + * \experimentalapi + * + * ### View properties + * + * | Concepts and traits | `urng_t` (underlying range type) | `rrng_t` (returned range type) | + * |----------------------------------|:----------------------------------:|:--------------------------------:| + * | std::ranges::input_range | *required* | *preserved* | + * | std::ranges::forward_range | *required* | *preserved* | + * | std::ranges::bidirectional_range | | *lost* | + * | std::ranges::random_access_range | | *lost* | + * | std::ranges::contiguous_range | | *lost* | + * | | | | + * | std::ranges::viewable_range | *required* | *guaranteed* | + * | std::ranges::view | | *guaranteed* | + * | std::ranges::sized_range | | *lost* | + * | std::ranges::common_range | | *lost* | + * | std::ranges::output_range | | *lost* | + * | seqan3::const_iterable_range | | *preserved* | + * | | | | + * | std::ranges::range_reference_t | seqan3::semialphabet | std::size_t | + * + * See the \link views views submodule documentation \endlink for detailed descriptions of the view properties. + * + * \hideinitializer + * + * \experimentalapi{Experimental since version 3.1.} + */ +inline constexpr auto modmer_hash = seqan3::detail::modmer_hash_fn{}; + +//!\} diff --git a/test/api/CMakeLists.txt b/test/api/CMakeLists.txt index 9f6eced..733f86c 100644 --- a/test/api/CMakeLists.txt +++ b/test/api/CMakeLists.txt @@ -4,3 +4,4 @@ add_api_test (comparison_test.cpp) target_use_datasources (comparison_test FILES example1.fasta) add_api_test (modmer_test.cpp) +add_api_test (modmer_hash_test.cpp) diff --git a/test/api/modmer_hash_test.cpp b/test/api/modmer_hash_test.cpp new file mode 100644 index 0000000..f82bce3 --- /dev/null +++ b/test/api/modmer_hash_test.cpp @@ -0,0 +1,132 @@ +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include + +#include + +#include "../../lib/seqan3/test/unit/range/iterator_test_template.hpp" + +#include "modmer_hash.hpp" + +using seqan3::operator""_dna4; +using seqan3::operator""_shape; +using result_t = std::vector; + +using iterator_type = std::ranges::iterator_t() + | modmer_hash(seqan3::ungapped{4}, + seqan3::mod_used{2}, + seqan3::seed{0}))>; + +static constexpr seqan3::shape ungapped_shape = seqan3::ungapped{4}; +static constexpr seqan3::shape gapped_shape = 0b1001_shape; +static constexpr auto ungapped_view = modmer_hash(ungapped_shape, + seqan3::mod_used{2}, + seqan3::seed{0}); +static constexpr auto gapped_view = modmer_hash(gapped_shape, + seqan3::mod_used{2}, + seqan3::seed{0}); + +template <> +struct iterator_fixture : public ::testing::Test +{ + using iterator_tag = std::forward_iterator_tag; + static constexpr bool const_iterable = false; + + seqan3::dna4_vector text{"ACGGCGACGTTTAG"_dna4}; + result_t expected_range{26, 152, 6, 192, 112}; + + using test_range_t = decltype(text | ungapped_view); + test_range_t test_range = text | ungapped_view; +}; + +using test_type = ::testing::Types; +INSTANTIATE_TYPED_TEST_SUITE_P(iterator_fixture, iterator_fixture, test_type, ); + +template +class modmer_hash_view_properties_test: public ::testing::Test { }; + +using underlying_range_types = ::testing::Types, + std::vector const, + seqan3::bitpacked_sequence, + seqan3::bitpacked_sequence const, + std::list, + std::list const>; + +TYPED_TEST_SUITE(modmer_hash_view_properties_test, underlying_range_types, ); + +class modmer_hash_test : public ::testing::Test +{ +protected: + std::vector text1{"AAAAAA"_dna4}; + result_t result1{0, 0, 0}; // Same result for ungapped and gapped + + std::vector too_short_text{"AC"_dna4}; + + // ACGG CGGC, GGCG, GCGA, CGAC, GACG, ACGT, CGTT, GTTT, TTTA, TTAG + // ACGG CGGC cgcc GCGA CGAC cgtc ACGT aacg aaac taaa ctaa + std::vector text3{"ACGGCGACGTTTAG"_dna4}; + result_t result3_ungapped{26, 152, 6, 192, 112}; // ACGG, GCGA, aacg, taaa, ctaa + result_t result3_gapped{2, 8, 2, 12, 4}; // A--G, G--A, a--g, t--a, c--a - "-" for gap + result_t result3_ungapped_stop{26, 152}; // ACGG, GCGA + result_t result3_gapped_stop{2, 8}; // A--G, G--A + result_t result3_ungapped_start{6, 192, 112}; // For start at second A, aacg, taaa, ctaa + result_t result3_gapped_start{2, 12, 4}; // a--g, t--a, c--a - "-" for gap +}; + +template +void compare_types(adaptor_t v) +{ + EXPECT_TRUE(std::ranges::input_range); + EXPECT_TRUE(std::ranges::forward_range); + EXPECT_FALSE(std::ranges::bidirectional_range); + EXPECT_FALSE(std::ranges::random_access_range); + EXPECT_TRUE(std::ranges::view); + EXPECT_FALSE(std::ranges::sized_range); + EXPECT_FALSE(std::ranges::common_range); + EXPECT_TRUE(seqan3::const_iterable_range); + EXPECT_FALSE((std::ranges::output_range)); +} + +TYPED_TEST(modmer_hash_view_properties_test, different_input_ranges) +{ + TypeParam text{'A'_dna4, 'C'_dna4, 'G'_dna4, 'T'_dna4, 'C'_dna4, 'G'_dna4, 'A'_dna4, 'C'_dna4, 'G'_dna4, 'T'_dna4, + 'T'_dna4, 'T'_dna4, 'A'_dna4, 'G'_dna4}; // ACGTCGACGTTTAG + result_t ungapped{216, 6, 192, 112}; // TCGA, aacg, taaa, ctaa + result_t gapped{12, 2, 12, 4}; // T--A, a--g, t--a, c--a - "-" for gap + EXPECT_RANGE_EQ(ungapped, text | ungapped_view); + EXPECT_RANGE_EQ(gapped, text | gapped_view); +} + +TEST_F(modmer_hash_test, ungapped) +{ + EXPECT_RANGE_EQ(result1, text1 | ungapped_view); + EXPECT_TRUE(std::ranges::empty(too_short_text | ungapped_view)); + EXPECT_RANGE_EQ(result3_ungapped, text3 | ungapped_view); +} + +TEST_F(modmer_hash_test, gapped) +{ + EXPECT_RANGE_EQ(result1, text1 | gapped_view); + EXPECT_TRUE(std::ranges::empty(too_short_text | gapped_view)); + EXPECT_RANGE_EQ(result3_gapped, text3 | gapped_view); +} + +TEST_F(modmer_hash_test, combinability) +{ + auto stop_at_t = std::views::take_while([] (seqan3::dna4 const x) { return x != 'T'_dna4; }); + EXPECT_RANGE_EQ(result3_ungapped_stop, text3 | stop_at_t | ungapped_view); + EXPECT_RANGE_EQ(result3_gapped_stop, text3 | stop_at_t | gapped_view); + + auto start_at_a = std::views::drop(6); + EXPECT_RANGE_EQ(result3_ungapped_start, text3 | start_at_a | ungapped_view); + EXPECT_RANGE_EQ(result3_gapped_start, text3 | start_at_a | gapped_view); +} From c731684a6347f3f01a07a2f0c3bfa00d74355179 Mon Sep 17 00:00:00 2001 From: mitradarja Date: Wed, 24 Nov 2021 11:26:52 +0100 Subject: [PATCH 05/20] [FEATURE] Add modmer as option to speed and coverage. --- include/compare.h | 2 +- include/modmer_hash.hpp | 6 +++--- src/compare.cpp | 8 ++++++++ src/main.cpp | 6 ++++-- test/api/modmer_hash_test.cpp | 6 +++--- test/cli/minions_coverage_test.cpp | 10 +++++++++- test/cli/minions_speed_test.cpp | 10 +++++++++- 7 files changed, 37 insertions(+), 11 deletions(-) diff --git a/include/compare.h b/include/compare.h index f4f17af..bdd1757 100644 --- a/include/compare.h +++ b/include/compare.h @@ -18,7 +18,7 @@ inline constexpr static uint64_t adjust_seed(uint8_t const kmer_size, uint64_t c return seed >> (64u - 2u * kmer_size); } -enum methods {kmer = 0, minimiser, strobemer}; +enum methods {kmer = 0, minimiser, modmers, strobemer}; struct minimiser_arguments { diff --git a/include/modmer_hash.hpp b/include/modmer_hash.hpp index b48e1df..523ac9e 100644 --- a/include/modmer_hash.hpp +++ b/include/modmer_hash.hpp @@ -40,7 +40,7 @@ struct modmer_hash_fn * \throws std::invalid_argument if the size of the shape is greater than the `mod_used`. * \returns A range of converted elements. */ - constexpr auto operator()(shape const & shape, mod_used const mod_used) const + constexpr auto operator()(shape const & shape, window_size const mod_used) const { return seqan3::detail::adaptor_from_functor{*this, shape, mod_used}; } @@ -52,7 +52,7 @@ struct modmer_hash_fn * \throws std::invalid_argument if the size of the shape is greater than the `mod_used`. * \returns A range of converted elements. */ - constexpr auto operator()(shape const & shape, mod_used const mod_used, seed const seed) const + constexpr auto operator()(shape const & shape, window_size const mod_used, seed const seed) const { return seqan3::detail::adaptor_from_functor{*this, shape, mod_used, seed}; } @@ -69,7 +69,7 @@ struct modmer_hash_fn template constexpr auto operator()(urng_t && urange, shape const & shape, - mod_used const mod_used, + window_size const mod_used, seed const seed = seqan3::seed{0x8F3F73B5CF1C9ADE}) const { static_assert(std::ranges::viewable_range, diff --git a/src/compare.cpp b/src/compare.cpp index 5477f16..ba86768 100644 --- a/src/compare.cpp +++ b/src/compare.cpp @@ -7,6 +7,7 @@ #include #include "compare.h" +#include "modmer_hash.hpp" /*! \brief Calculate mean and variance of given list. * \param results The vector from which mean and varaince should be calculated of. @@ -248,6 +249,9 @@ void do_comparison(std::vector sequence_files, range_argu case minimiser: compare(sequence_files, seqan3::views::minimiser_hash(args.shape, args.w_size), "minimiser_hash_" + std::to_string(args.k_size) + "_" + std::to_string(args.w_size.get()), args); break; + case modmers: compare(sequence_files, modmer_hash(args.shape, + args.w_size), "modmer_hash_" + std::to_string(args.k_size) + "_" + std::to_string(args.w_size.get()), args); + break; case strobemer: std::ranges::empty_view empty{}; if (args.rand & (args.order == 2)) compare, 1>(sequence_files, empty, @@ -274,5 +278,9 @@ void do_coverage(std::filesystem::path sequence_file, range_arguments & args) seqan3::window_size{args.shape.size()}), seqan3::views::minimiser_hash(args.shape, args.w_size), "minimiser_hash_" + std::to_string(args.k_size) + "_" + std::to_string(args.w_size.get()), args); break; + case modmers: compare_cov(sequence_file, seqan3::views::minimiser_hash(args.shape, + seqan3::window_size{args.shape.size()}), modmer_hash(args.shape, + args.w_size), "modmer_hash_" + std::to_string(args.k_size) + "_" + std::to_string(args.w_size.get()), args); + break; } } diff --git a/src/main.cpp b/src/main.cpp index a99b08d..550ddda 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -17,6 +17,8 @@ void string_to_methods(std::string name, methods & m) m = minimiser; else if (name == "strobemer") m = strobemer; + else if (name == "modmer") + m = modmers; }; void read_range_arguments_strobemers(seqan3::argument_parser & parser, range_arguments & args) @@ -55,7 +57,7 @@ int coverage(seqan3::argument_parser & parser) parser.add_option(args.path_out, 'o', "out", "Directory, where output files should be saved."); parser.add_option(args.k_size, 'k', "kmer-size", "Define kmer size."); std::string method{}; - parser.add_option(method, '\0', "method", "Pick your method.", seqan3::option_spec::required, seqan3::value_list_validator{"kmer", "minimiser"}); + parser.add_option(method, '\0', "method", "Pick your method.", seqan3::option_spec::required, seqan3::value_list_validator{"kmer", "minimiser", "modmer"}); read_range_arguments_minimiser(parser, args); @@ -92,7 +94,7 @@ int speed(seqan3::argument_parser & parser) parser.add_option(args.path_out, 'o', "out", "Directory, where output files should be saved."); parser.add_option(args.k_size, 'k', "kmer-size", "Define kmer size."); std::string method{}; - parser.add_option(method, '\0', "method", "Pick your method.", seqan3::option_spec::required, seqan3::value_list_validator{"kmer", "minimiser", "strobemer"}); + parser.add_option(method, '\0', "method", "Pick your method.", seqan3::option_spec::required, seqan3::value_list_validator{"kmer", "minimiser", "modmer", "strobemer"}); read_range_arguments_minimiser(parser, args); read_range_arguments_strobemers(parser, args); diff --git a/test/api/modmer_hash_test.cpp b/test/api/modmer_hash_test.cpp index f82bce3..60a5fbc 100644 --- a/test/api/modmer_hash_test.cpp +++ b/test/api/modmer_hash_test.cpp @@ -23,16 +23,16 @@ using result_t = std::vector; using iterator_type = std::ranges::iterator_t() | modmer_hash(seqan3::ungapped{4}, - seqan3::mod_used{2}, + seqan3::window_size{2}, seqan3::seed{0}))>; static constexpr seqan3::shape ungapped_shape = seqan3::ungapped{4}; static constexpr seqan3::shape gapped_shape = 0b1001_shape; static constexpr auto ungapped_view = modmer_hash(ungapped_shape, - seqan3::mod_used{2}, + seqan3::window_size{2}, seqan3::seed{0}); static constexpr auto gapped_view = modmer_hash(gapped_shape, - seqan3::mod_used{2}, + seqan3::window_size{2}, seqan3::seed{0}); template <> diff --git a/test/cli/minions_coverage_test.cpp b/test/cli/minions_coverage_test.cpp index 892cf99..cbef732 100644 --- a/test/cli/minions_coverage_test.cpp +++ b/test/cli/minions_coverage_test.cpp @@ -38,13 +38,21 @@ TEST_F(cli_test, minimiser) EXPECT_EQ(result.err, std::string{}); } +TEST_F(cli_test, modmer) +{ + cli_test_result result = execute_app("minions coverage --method modmer -k 19 -w 19 ", data("example1.fasta")); + EXPECT_EQ(result.exit_code, 0); + EXPECT_EQ(result.out, std::string{}); + EXPECT_EQ(result.err, std::string{}); +} + TEST_F(cli_test, wrong_method) { cli_test_result result = execute_app("minions coverage --method submer -k 19", data("example1.fasta")); std::string expected { "Error. Incorrect command line input for coverage. Validation failed " - "for option --method: Value submer is not one of [kmer,minimiser].\n" + "for option --method: Value submer is not one of [kmer,minimiser,modmer].\n" }; EXPECT_EQ(result.exit_code, 0); diff --git a/test/cli/minions_speed_test.cpp b/test/cli/minions_speed_test.cpp index 60dccaf..92c676f 100644 --- a/test/cli/minions_speed_test.cpp +++ b/test/cli/minions_speed_test.cpp @@ -30,6 +30,14 @@ TEST_F(cli_test, minimiser) EXPECT_EQ(result.err, std::string{}); } +TEST_F(cli_test, modmer) +{ + cli_test_result result = execute_app("minions speed --method modmer -k 19 -w 19 ", data("example1.fasta")); + EXPECT_EQ(result.exit_code, 0); + EXPECT_EQ(result.out, std::string{}); + EXPECT_EQ(result.err, std::string{}); +} + TEST_F(cli_test, strobemer) { cli_test_result result = execute_app("minions speed --method strobemer -k 19 --w-min 16 --w-max 30 --order 2 --randstrobemers", data("example1.fasta")); @@ -44,7 +52,7 @@ TEST_F(cli_test, wrong_method) std::string expected { "Error. Incorrect command line input for speed. Validation failed " - "for option --method: Value submer is not one of [kmer,minimiser,strobemer].\n" + "for option --method: Value submer is not one of [kmer,minimiser,modmer,strobemer].\n" }; EXPECT_EQ(result.exit_code, 0); EXPECT_EQ(result.out, std::string{}); From 57b8c17f2ef4a876452e2c7175f3cdbf53c2bd45 Mon Sep 17 00:00:00 2001 From: mitradarja Date: Tue, 30 Nov 2021 16:36:52 +0100 Subject: [PATCH 06/20] [MISC] Add second mod function. --- include/compare.h | 9 +++- include/modmer.hpp | 87 ++++++++++++++++++------------ include/modmer_hash.hpp | 40 ++++++-------- src/compare.cpp | 28 +++++++--- src/main.cpp | 8 +++ test/api/modmer_hash_test.cpp | 9 ++-- test/api/modmer_test.cpp | 42 +++++++-------- test/cli/minions_coverage_test.cpp | 2 +- 8 files changed, 132 insertions(+), 93 deletions(-) diff --git a/include/compare.h b/include/compare.h index bdd1757..7af4413 100644 --- a/include/compare.h +++ b/include/compare.h @@ -28,6 +28,13 @@ struct minimiser_arguments seqan3::window_size w_size; }; +struct modmer_arguments +{ + // Needed for modmers + uint32_t mod1{17}; + uint32_t mod2{2}; +}; + struct strobemer_arguments { // Needed for strobemers @@ -39,7 +46,7 @@ struct strobemer_arguments unsigned int order; }; -struct range_arguments : minimiser_arguments, strobemer_arguments +struct range_arguments : minimiser_arguments, modmer_arguments, strobemer_arguments { std::filesystem::path path_out{"./"}; diff --git a/include/modmer.hpp b/include/modmer.hpp index c62d97a..354b587 100644 --- a/include/modmer.hpp +++ b/include/modmer.hpp @@ -71,7 +71,8 @@ class modmer_view : public std::ranges::view_interface class basic_iterator; @@ -95,10 +96,11 @@ class modmer_view : public std::ranges::view_interface //!\cond requires (std::ranges::viewable_range && std::constructible_from>>) //!\endcond - modmer_view(other_urng1_t && urange1, size_t const mod_used) : + modmer_view(other_urng1_t && urange1, size_t const mod_used_1, size_t const mod_used_2) : urange1{std::views::all(std::forward(urange1))}, urange2{default_urng2_t{}}, - mod_used{mod_used} + mod_used_1{mod_used_1}, + mod_used_2{mod_used_2} {} /*!\brief Construct from two views and a given number of values in one window. @@ -124,12 +128,14 @@ class modmer_view : public std::ranges::view_interface //!\cond @@ -156,10 +163,11 @@ class modmer_view : public std::ranges::view_interface && std::constructible_from>) //!\endcond - modmer_view(other_urng1_t && urange1, other_urng2_t && urange2, size_t const mod_used) : + modmer_view(other_urng1_t && urange1, other_urng2_t && urange2, size_t const mod_used_1, size_t const mod_used_2) : urange1{std::views::all(std::forward(urange1))}, urange2{std::views::all(std::forward(urange2))}, - mod_used{mod_used} + mod_used_1{mod_used_1}, + mod_used_2{mod_used_2} { if constexpr (second_range_is_given) { @@ -190,7 +198,8 @@ class modmer_view : public std::ranges::view_interface::basic_iterator * \param[in] urng1_iterator Iterator pointing to the first position of the first std::totally_ordered range. * \param[in] urng1_sentinel Iterator pointing to the last position of the first std::totally_ordered range. * \param[in] urng2_iterator Iterator pointing to the first position of the second std::totally_ordered range. - * \param[in] mod_used The number of values in one window. + * \param[in] mod_used_1 The number of values in one window. + * \param[in] mod_used_2 The number of values in one window. * * \details * @@ -298,14 +309,17 @@ class modmer_view::basic_iterator basic_iterator(urng1_iterator_t urng1_iterator, urng1_sentinel_t urng1_sentinel, urng2_iterator_t urng2_iterator, - size_t mod_used) : + size_t mod_used_1, + size_t mod_used_2) : urng1_iterator{std::move(urng1_iterator)}, urng1_sentinel{std::move(urng1_sentinel)}, urng2_iterator{std::move(urng2_iterator)}, - mod_used{std::move(mod_used)} + mod_1{mod_used_1}, + mod_2{mod_used_2} { size_t size = std::ranges::distance(urng1_iterator, urng1_sentinel); - mod_used = std::min(mod_used, size); + mod_used_1 = std::min(mod_used_1, size); + mod_used_2 = std::min(mod_used_2, size); first_modmer(); } @@ -385,7 +399,11 @@ class modmer_view::basic_iterator //!\brief Iterator to the rightmost value of one window of the second range. urng2_iterator_t urng2_iterator{}; - size_t mod_used{}; + size_t mod_1{}; + size_t mod_2{}; + +/* size_t mod_used_1{}; + size_t mod_used_2{}; */ //!\brief Advances the window to the next position. void advance() @@ -404,9 +422,6 @@ class modmer_view::basic_iterator //!\brief Increments iterator by 1. void next_unique_modmer() { - if (mod_used == 0u) - return; - while (1) { advance(); @@ -425,7 +440,7 @@ class modmer_view::basic_iterator if constexpr (!second_range_is_given) { - if (*urng1_iterator % mod_used == 0) + if ((*urng1_iterator % mod_1) % mod_2 == 0) { modmer_value = *urng1_iterator; return true; @@ -434,7 +449,7 @@ class modmer_view::basic_iterator else { value_type mod = std::min(*urng1_iterator, *urng2_iterator); - if (mod % mod_used == 0) + if ((mod % mod_1) % mod_2 == 0) { modmer_value = mod; return true; @@ -447,11 +462,11 @@ class modmer_view::basic_iterator //!\brief A deduction guide for the view class template. template -modmer_view(rng1_t &&, size_t const mod_used) -> modmer_view>; +modmer_view(rng1_t &&, size_t const mod_used_1, size_t const mod_used_2) -> modmer_view>; //!\brief A deduction guide for the view class template. template -modmer_view(rng1_t &&, rng2_t &&, size_t const mod_used) -> modmer_view, +modmer_view(rng1_t &&, rng2_t &&, size_t const mod_used_1, size_t const mod_used_2) -> modmer_view, std::views::all_t>; // --------------------------------------------------------------------------------------------------------------------- @@ -464,9 +479,9 @@ modmer_view(rng1_t &&, rng2_t &&, size_t const mod_used) -> modmer_view - constexpr auto operator()(urng1_t && urange1, size_t const mod_used) const + constexpr auto operator()(urng1_t && urange1, size_t const mod_used_1, size_t const mod_used_2) const { static_assert(std::ranges::viewable_range, "The range parameter to views::modmer cannot be a temporary of a non-view range."); static_assert(std::ranges::forward_range, "The range parameter to views::modmer must model std::ranges::forward_range."); - if (mod_used == 1) // Would just return urange1 without any changes - throw std::invalid_argument{"The chosen mod_used is not valid. " + if (mod_used_1 == 1) // Would just return urange1 without any changes + throw std::invalid_argument{"The chosen mod_used_1 is not valid. " "Please choose a value greater than 1 or use two ranges."}; - return modmer_view{urange1, mod_used}; + return modmer_view{urange1, mod_used_1, mod_used_2}; } }; //![adaptor_def] @@ -500,7 +516,8 @@ struct modmer_fn * \tparam urng_t The type of the first range being processed. See below for requirements. [template * parameter is omitted in pipe notation] * \param[in] urange1 The range being processed. [parameter is omitted in pipe notation] - * \param[in] mod_used The number of values in one window. + * \param[in] mod_used_1 The number of values in one window. + * \param[in] mod_used_2 The number of values in one window. * \returns A range of std::totally_ordered where each value is ... See below for the * properties of the returned range. * \ingroup search_views diff --git a/include/modmer_hash.hpp b/include/modmer_hash.hpp index 523ac9e..2605193 100644 --- a/include/modmer_hash.hpp +++ b/include/modmer_hash.hpp @@ -19,15 +19,6 @@ #include "modmer.hpp" -namespace seqan3 -{ -//!\brief strong_type for the mod_used. -//!\ingroup search_views -struct mod_used : seqan3::detail::strong_type -{ - using seqan3::detail::strong_type::strong_type; -}; -} // namespace seqan3 namespace seqan3::detail { //!\brief seqan3::views::modmer_hash's range adaptor object type (non-closure). @@ -36,40 +27,41 @@ struct modmer_hash_fn { /*!\brief Store the shape and the window size and return a range adaptor closure object. * \param[in] shape The seqan3::shape to use for hashing. - * \param[in] mod_used The windows size to use. - * \throws std::invalid_argument if the size of the shape is greater than the `mod_used`. + * \param[in] mod_used_1 The windows size to use. + * \throws std::invalid_argument if the size of the shape is greater than the `mod_used_1`. * \returns A range of converted elements. */ - constexpr auto operator()(shape const & shape, window_size const mod_used) const + /*constexpr auto operator()(shape const & shape, uint32_t const mod_used_1, uint32_t const mod_used_2) const { - return seqan3::detail::adaptor_from_functor{*this, shape, mod_used}; - } + return seqan3::detail::adaptor_from_functor{*this, shape, mod_used_1, mod_used_2}; + }*/ /*!\brief Store the shape, the window size and the seed and return a range adaptor closure object. * \param[in] shape The seqan3::shape to use for hashing. - * \param[in] mod_used The size of the window. + * \param[in] mod_used_1 The size of the window. * \param[in] seed The seed to use. - * \throws std::invalid_argument if the size of the shape is greater than the `mod_used`. + * \throws std::invalid_argument if the size of the shape is greater than the `mod_used_1`. * \returns A range of converted elements. */ - constexpr auto operator()(shape const & shape, window_size const mod_used, seed const seed) const + constexpr auto operator()(shape const & shape, uint32_t const mod_used_1, uint32_t const mod_used_2, seed const seed) const { - return seqan3::detail::adaptor_from_functor{*this, shape, mod_used, seed}; + return seqan3::detail::adaptor_from_functor{*this, shape, mod_used_1, mod_used_2, seed}; } /*!\brief Call the view's constructor with the underlying view, a seqan3::shape and a window size as argument. * \param[in] urange The input range to process. Must model std::ranges::viewable_range and the reference type * of the range must model seqan3::semialphabet. * \param[in] shape The seqan3::shape to use for hashing. - * \param[in] mod_used The size of the window. + * \param[in] mod_used_1 The size of the window. * \param[in] seed The seed to use. - * \throws std::invalid_argument if the size of the shape is greater than the `mod_used`. + * \throws std::invalid_argument if the size of the shape is greater than the `mod_used_1`. * \returns A range of converted elements. */ template constexpr auto operator()(urng_t && urange, shape const & shape, - window_size const mod_used, + uint32_t const mod_used_1, + uint32_t const mod_used_2, seed const seed = seqan3::seed{0x8F3F73B5CF1C9ADE}) const { static_assert(std::ranges::viewable_range, @@ -90,7 +82,7 @@ struct modmer_hash_fn {return i ^ seed.get();}) | std::views::reverse; - return seqan3::detail::modmer_view(forward_strand, reverse_strand, mod_used.get()); + return seqan3::detail::modmer_view(forward_strand, reverse_strand, mod_used_1, mod_used_2); } }; @@ -100,12 +92,12 @@ struct modmer_hash_fn * \{ */ -/*!\brief Computes modmers for a range with a given shape, mod_used and seed. +/*!\brief Computes modmers for a range with a given shape, mod_used_1 and seed. * \tparam urng_t The type of the range being processed. See below for requirements. [template parameter is * omitted in pipe notation] * \param[in] urange The range being processed. [parameter is omitted in pipe notation] * \param[in] shape The seqan3::shape that determines how to compute the hash value. - * \param[in] mod_used The mod value to use. + * \param[in] mod_used_1 The mod value to use. * \param[in] seed The seed used to skew the hash values. Default: 0x8F3F73B5CF1C9ADE. * \returns A range of `size_t` where each value is the modmer of the resp. window. * See below for the properties of the returned range. diff --git a/src/compare.cpp b/src/compare.cpp index ba86768..3aff89c 100644 --- a/src/compare.cpp +++ b/src/compare.cpp @@ -166,8 +166,8 @@ void compare_cov(std::filesystem::path sequence_file, urng_t kmer_view, urng_t2 std::vector covereage_avg{}; std::ofstream outfile; - seqan3::sequence_file_input> fin{sequence_file}; - for (auto & [seq] : fin) + seqan3::sequence_file_input> fin{sequence_file}; + for (auto & [id, seq] : fin) { auto kmers = seq | kmer_view; auto submers = seq | input_view; @@ -180,6 +180,7 @@ void compare_cov(std::filesystem::path sequence_file, urng_t kmer_view, urng_t2 submers2.push_back(sub); coverage(kmers, submers2, covs, args.shape); + int i{0}; for(auto & elem : covs) { if (elem == 0) @@ -190,11 +191,22 @@ void compare_cov(std::filesystem::path sequence_file, urng_t kmer_view, urng_t2 { if (island > 0) { + if (island >23) + std::cout << i << " " << island << ", " << id<< "\n"; islands.push_back(island); island = 0; } covered++; } + i++; + } + + if (island > 0) + { + if (island >23) + std::cout << i << " " << island << ", " << id<< "\n"; + islands.push_back(island); + island = 0; } covered_percentage.push_back(covered); @@ -247,10 +259,10 @@ void do_comparison(std::vector sequence_files, range_argu case kmer: compare(sequence_files, seqan3::views::kmer_hash(args.shape), "kmer_hash_"+std::to_string(args.k_size), args); break; case minimiser: compare(sequence_files, seqan3::views::minimiser_hash(args.shape, - args.w_size), "minimiser_hash_" + std::to_string(args.k_size) + "_" + std::to_string(args.w_size.get()), args); + args.w_size, args.seed_se), "minimiser_hash_" + std::to_string(args.k_size) + "_" + std::to_string(args.w_size.get()), args); break; case modmers: compare(sequence_files, modmer_hash(args.shape, - args.w_size), "modmer_hash_" + std::to_string(args.k_size) + "_" + std::to_string(args.w_size.get()), args); + args.mod1, args.mod2, args.seed_se), "modmer_hash_" + std::to_string(args.k_size) + "_" + std::to_string(args.mod1) + "_" + std::to_string(args.mod2), args); break; case strobemer: std::ranges::empty_view empty{}; if (args.rand & (args.order == 2)) @@ -275,12 +287,12 @@ void do_coverage(std::filesystem::path sequence_file, range_arguments & args) case kmer: compare_cov(sequence_file, seqan3::views::kmer_hash(args.shape), seqan3::views::kmer_hash(args.shape), "kmer_hash_"+std::to_string(args.k_size), args); break; case minimiser: compare_cov(sequence_file, seqan3::views::minimiser_hash(args.shape, - seqan3::window_size{args.shape.size()}), seqan3::views::minimiser_hash(args.shape, - args.w_size), "minimiser_hash_" + std::to_string(args.k_size) + "_" + std::to_string(args.w_size.get()), args); + seqan3::window_size{args.shape.size()}, args.seed_se), seqan3::views::minimiser_hash(args.shape, + args.w_size, args.seed_se), "minimiser_hash_" + std::to_string(args.k_size) + "_" + std::to_string(args.w_size.get()), args); break; case modmers: compare_cov(sequence_file, seqan3::views::minimiser_hash(args.shape, - seqan3::window_size{args.shape.size()}), modmer_hash(args.shape, - args.w_size), "modmer_hash_" + std::to_string(args.k_size) + "_" + std::to_string(args.w_size.get()), args); + seqan3::window_size{args.shape.size()}, args.seed_se), modmer_hash(args.shape, + args.mod1, args.mod2, args.seed_se), "modmer_hash_" + std::to_string(args.k_size) + "_" + std::to_string(args.mod1) + "_" + std::to_string(args.mod2), args); break; } } diff --git a/src/main.cpp b/src/main.cpp index 550ddda..eb4fca3 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -39,6 +39,12 @@ void read_range_arguments_minimiser(seqan3::argument_parser & parser, range_argu parser.add_option(se, '\0', "seed", "Define seed."); } +void read_range_arguments_modmer(seqan3::argument_parser & parser, range_arguments & args) +{ + parser.add_option(args.mod1, '\0', "mod1", "Define mod1. Default: 17."); + parser.add_option(args.mod2, '\0', "mod2", "Define mod2. Default: 2."); +} + void parsing(range_arguments & args) { args.w_size = seqan3::window_size{w_size}; @@ -60,6 +66,7 @@ int coverage(seqan3::argument_parser & parser) parser.add_option(method, '\0', "method", "Pick your method.", seqan3::option_spec::required, seqan3::value_list_validator{"kmer", "minimiser", "modmer"}); read_range_arguments_minimiser(parser, args); + read_range_arguments_modmer(parser, args); try { @@ -97,6 +104,7 @@ int speed(seqan3::argument_parser & parser) parser.add_option(method, '\0', "method", "Pick your method.", seqan3::option_spec::required, seqan3::value_list_validator{"kmer", "minimiser", "modmer", "strobemer"}); read_range_arguments_minimiser(parser, args); + read_range_arguments_modmer(parser, args); read_range_arguments_strobemers(parser, args); try diff --git a/test/api/modmer_hash_test.cpp b/test/api/modmer_hash_test.cpp index 60a5fbc..93d77fa 100644 --- a/test/api/modmer_hash_test.cpp +++ b/test/api/modmer_hash_test.cpp @@ -23,16 +23,19 @@ using result_t = std::vector; using iterator_type = std::ranges::iterator_t() | modmer_hash(seqan3::ungapped{4}, - seqan3::window_size{2}, + 4, + 2, seqan3::seed{0}))>; static constexpr seqan3::shape ungapped_shape = seqan3::ungapped{4}; static constexpr seqan3::shape gapped_shape = 0b1001_shape; static constexpr auto ungapped_view = modmer_hash(ungapped_shape, - seqan3::window_size{2}, + 4, + 2, seqan3::seed{0}); static constexpr auto gapped_view = modmer_hash(gapped_shape, - seqan3::window_size{2}, + 4, + 2, seqan3::seed{0}); template <> diff --git a/test/api/modmer_test.cpp b/test/api/modmer_test.cpp index d81f9da..066ca2e 100644 --- a/test/api/modmer_test.cpp +++ b/test/api/modmer_test.cpp @@ -29,7 +29,7 @@ inline static constexpr auto gapped_kmer_view = seqan3::views::kmer_hash(0b1001_ inline static constexpr auto rev_gapped_kmer_view = seqan3::views::complement | std::views::reverse | seqan3::views::kmer_hash(0b1001_shape) | std::views::reverse; -inline static constexpr auto modmer_no_rev_view = modmer(2); +inline static constexpr auto modmer_no_rev_view = modmer(4, 2); using iterator_type = std::ranges::iterator_t< decltype(std::declval() | kmer_view @@ -39,7 +39,7 @@ using two_ranges_iterator_type = std::ranges::iterator_t< decltype(seqan3::detai | kmer_view, std::declval() | rev_kmer_view, - 2})>; + 4, 2})>; template <> struct iterator_fixture : public ::testing::Test @@ -51,8 +51,8 @@ struct iterator_fixture : public ::testing::Test decltype(seqan3::views::kmer_hash(text, seqan3::ungapped{4})) vec = text | kmer_view; result_t expected_range{26, 166, 152, 134, 252, 242}; - decltype(modmer(seqan3::views::kmer_hash(text, seqan3::ungapped{4}), 2)) test_range = - modmer(vec, 2); + decltype(modmer(seqan3::views::kmer_hash(text, seqan3::ungapped{4}), 4, 2)) test_range = + modmer(vec, 4, 2); }; template <> @@ -69,8 +69,8 @@ struct iterator_fixture : public ::testing::Test using reverse_kmer_hash_view_t = decltype(rev_kmer_view(text)); - using test_range_t = decltype(seqan3::detail::modmer_view{kmer_hash_view_t{}, reverse_kmer_hash_view_t{}, 2}); - test_range_t test_range = seqan3::detail::modmer_view{vec, rev_kmer_view(text), 2}; + using test_range_t = decltype(seqan3::detail::modmer_view{kmer_hash_view_t{}, reverse_kmer_hash_view_t{}, 4, 2}); + test_range_t test_range = seqan3::detail::modmer_view{vec, rev_kmer_view(text), 4, 2}; }; using test_types = ::testing::Types; @@ -135,11 +135,11 @@ TYPED_TEST(modmer_view_properties_test, concepts) auto v = text | kmer_view | modmer_no_rev_view; compare_types(v); - auto v2 = seqan3::detail::modmer_view{text | kmer_view, text | kmer_view, 2}; + auto v2 = seqan3::detail::modmer_view{text | kmer_view, text | kmer_view, 4, 2}; if constexpr (std::ranges::bidirectional_range) // excludes forward_list { - auto v3 = seqan3::detail::modmer_view{text | kmer_view, text | rev_kmer_view, 2}; + auto v3 = seqan3::detail::modmer_view{text | kmer_view, text | rev_kmer_view, 4, 2}; compare_types(v3); } } @@ -157,20 +157,20 @@ TYPED_TEST(modmer_view_properties_test, different_inputs_kmer_hash) if constexpr (std::ranges::bidirectional_range) // excludes forward_list { - EXPECT_RANGE_EQ(ungapped, (seqan3::detail::modmer_view{text | kmer_view, text | rev_kmer_view, 2})) ; - EXPECT_RANGE_EQ(gapped, (seqan3::detail::modmer_view{text | gapped_kmer_view, text | rev_gapped_kmer_view, 2})); + EXPECT_RANGE_EQ(ungapped, (seqan3::detail::modmer_view{text | kmer_view, text | rev_kmer_view, 4, 2})) ; + EXPECT_RANGE_EQ(gapped, (seqan3::detail::modmer_view{text | gapped_kmer_view, text | rev_gapped_kmer_view, 4, 2})); } } TEST_F(modmer_test, ungapped_kmer_hash) { - EXPECT_RANGE_EQ(result1, (seqan3::detail::modmer_view{text1 | kmer_view, text1 | rev_kmer_view, 2})); + EXPECT_RANGE_EQ(result1, (seqan3::detail::modmer_view{text1 | kmer_view, text1 | rev_kmer_view, 4, 2})); EXPECT_RANGE_EQ(result1, text1 | kmer_view | modmer_no_rev_view); - auto empty_view = seqan3::detail::modmer_view{too_short_text | kmer_view, too_short_text | rev_kmer_view, 2}; + auto empty_view = seqan3::detail::modmer_view{too_short_text | kmer_view, too_short_text | rev_kmer_view, 4, 2}; EXPECT_TRUE(std::ranges::empty(empty_view)); auto empty_view2 = too_short_text | kmer_view | modmer_no_rev_view; EXPECT_TRUE(std::ranges::empty(empty_view2)); - EXPECT_RANGE_EQ(result3_ungapped, (seqan3::detail::modmer_view{text3 | kmer_view, text3 | rev_kmer_view, 2})); + EXPECT_RANGE_EQ(result3_ungapped, (seqan3::detail::modmer_view{text3 | kmer_view, text3 | rev_kmer_view, 4, 2})); EXPECT_RANGE_EQ(result3_ungapped_no_rev, text3 | kmer_view | modmer_no_rev_view); } @@ -179,17 +179,17 @@ TEST_F(modmer_test, gapped_kmer_hash) { EXPECT_RANGE_EQ(result1, (seqan3::detail::modmer_view{text1 | gapped_kmer_view, text1 | rev_gapped_kmer_view, - 2})); + 4, 2})); EXPECT_RANGE_EQ(result1, text1 | gapped_kmer_view | modmer_no_rev_view); auto empty_view = seqan3::detail::modmer_view{too_short_text | gapped_kmer_view, too_short_text | rev_gapped_kmer_view, - 2}; + 4, 2}; EXPECT_TRUE(std::ranges::empty(empty_view)); auto empty_view2 = too_short_text | gapped_kmer_view | modmer_no_rev_view; EXPECT_TRUE(std::ranges::empty(empty_view2)); EXPECT_RANGE_EQ(result3_gapped, (seqan3::detail::modmer_view{text3 | gapped_kmer_view, text3 | rev_gapped_kmer_view, - 2})); + 4, 2})); EXPECT_RANGE_EQ(result3_gapped_no_rev, text3 | gapped_kmer_view | modmer_no_rev_view); } @@ -201,21 +201,21 @@ TEST_F(modmer_test, combinability) EXPECT_RANGE_EQ(result3_ungapped_stop, (seqan3::detail::modmer_view{text3 | stop_at_t | kmer_view, text3 | stop_at_t | rev_kmer_view, - 2})); + 4, 2})); EXPECT_RANGE_EQ(result3_gapped_stop, (seqan3::detail::modmer_view{text3 | stop_at_t | gapped_kmer_view, text3 | stop_at_t | rev_gapped_kmer_view, - 2})); + 4, 2})); auto start_at_a = std::views::drop(6); EXPECT_RANGE_EQ(result3_ungapped_start, (seqan3::detail::modmer_view{text3 | start_at_a | kmer_view, text3 | start_at_a | rev_kmer_view, - 2})); + 4, 2})); EXPECT_RANGE_EQ(result3_gapped_start, (seqan3::detail::modmer_view{text3 | start_at_a | gapped_kmer_view, text3 | start_at_a | rev_gapped_kmer_view, - 2})); + 4, 2})); } TEST_F(modmer_test, two_ranges_unequal_size) { - EXPECT_THROW((seqan3::detail::modmer_view{text1 | kmer_view, text3 | rev_kmer_view, 2}), std::invalid_argument); + EXPECT_THROW((seqan3::detail::modmer_view{text1 | kmer_view, text3 | rev_kmer_view, 4, 2}), std::invalid_argument); } diff --git a/test/cli/minions_coverage_test.cpp b/test/cli/minions_coverage_test.cpp index cbef732..12a5fb5 100644 --- a/test/cli/minions_coverage_test.cpp +++ b/test/cli/minions_coverage_test.cpp @@ -40,7 +40,7 @@ TEST_F(cli_test, minimiser) TEST_F(cli_test, modmer) { - cli_test_result result = execute_app("minions coverage --method modmer -k 19 -w 19 ", data("example1.fasta")); + cli_test_result result = execute_app("minions coverage --method modmer -k 19 --mod1 17 --mod2 2 ", data("example1.fasta")); EXPECT_EQ(result.exit_code, 0); EXPECT_EQ(result.out, std::string{}); EXPECT_EQ(result.err, std::string{}); From 9cc9516329380484ec2859f3ddc96b63a2e5d60b Mon Sep 17 00:00:00 2001 From: mitradarja Date: Tue, 7 Dec 2021 15:13:14 +0100 Subject: [PATCH 07/20] [MISC] Modmer has one input parameter. --- include/compare.h | 9 +---- include/modmer.hpp | 62 ++++++++++-------------------- include/modmer_hash.hpp | 13 +++---- src/compare.cpp | 4 +- src/main.cpp | 8 ---- test/api/modmer_hash_test.cpp | 3 -- test/api/modmer_test.cpp | 42 ++++++++++---------- test/cli/minions_coverage_test.cpp | 2 +- 8 files changed, 52 insertions(+), 91 deletions(-) diff --git a/include/compare.h b/include/compare.h index 7af4413..bdd1757 100644 --- a/include/compare.h +++ b/include/compare.h @@ -28,13 +28,6 @@ struct minimiser_arguments seqan3::window_size w_size; }; -struct modmer_arguments -{ - // Needed for modmers - uint32_t mod1{17}; - uint32_t mod2{2}; -}; - struct strobemer_arguments { // Needed for strobemers @@ -46,7 +39,7 @@ struct strobemer_arguments unsigned int order; }; -struct range_arguments : minimiser_arguments, modmer_arguments, strobemer_arguments +struct range_arguments : minimiser_arguments, strobemer_arguments { std::filesystem::path path_out{"./"}; diff --git a/include/modmer.hpp b/include/modmer.hpp index 354b587..e02c4f6 100644 --- a/include/modmer.hpp +++ b/include/modmer.hpp @@ -72,7 +72,6 @@ class modmer_view : public std::ranges::view_interface class basic_iterator; @@ -97,10 +96,9 @@ class modmer_view : public std::ranges::view_interface //!\cond requires (std::ranges::viewable_range && std::constructible_from>>) //!\endcond - modmer_view(other_urng1_t && urange1, size_t const mod_used_1, size_t const mod_used_2) : + modmer_view(other_urng1_t && urange1, size_t const mod_used_1) : urange1{std::views::all(std::forward(urange1))}, urange2{default_urng2_t{}}, - mod_used_1{mod_used_1}, - mod_used_2{mod_used_2} + mod_used_1{mod_used_1} {} /*!\brief Construct from two views and a given number of values in one window. @@ -129,13 +125,11 @@ class modmer_view : public std::ranges::view_interface //!\cond @@ -163,11 +156,10 @@ class modmer_view : public std::ranges::view_interface && std::constructible_from>) //!\endcond - modmer_view(other_urng1_t && urange1, other_urng2_t && urange2, size_t const mod_used_1, size_t const mod_used_2) : + modmer_view(other_urng1_t && urange1, other_urng2_t && urange2, size_t const mod_used_1) : urange1{std::views::all(std::forward(urange1))}, urange2{std::views::all(std::forward(urange2))}, - mod_used_1{mod_used_1}, - mod_used_2{mod_used_2} + mod_used_1{mod_used_1} { if constexpr (second_range_is_given) { @@ -198,8 +190,7 @@ class modmer_view : public std::ranges::view_interface::basic_iterator * \param[in] urng1_sentinel Iterator pointing to the last position of the first std::totally_ordered range. * \param[in] urng2_iterator Iterator pointing to the first position of the second std::totally_ordered range. * \param[in] mod_used_1 The number of values in one window. - * \param[in] mod_used_2 The number of values in one window. * * \details * @@ -309,17 +298,14 @@ class modmer_view::basic_iterator basic_iterator(urng1_iterator_t urng1_iterator, urng1_sentinel_t urng1_sentinel, urng2_iterator_t urng2_iterator, - size_t mod_used_1, - size_t mod_used_2) : + size_t mod_used_1) : urng1_iterator{std::move(urng1_iterator)}, urng1_sentinel{std::move(urng1_sentinel)}, urng2_iterator{std::move(urng2_iterator)}, - mod_1{mod_used_1}, - mod_2{mod_used_2} + mod_1{mod_used_1} { size_t size = std::ranges::distance(urng1_iterator, urng1_sentinel); mod_used_1 = std::min(mod_used_1, size); - mod_used_2 = std::min(mod_used_2, size); first_modmer(); } @@ -400,10 +386,6 @@ class modmer_view::basic_iterator urng2_iterator_t urng2_iterator{}; size_t mod_1{}; - size_t mod_2{}; - -/* size_t mod_used_1{}; - size_t mod_used_2{}; */ //!\brief Advances the window to the next position. void advance() @@ -440,7 +422,7 @@ class modmer_view::basic_iterator if constexpr (!second_range_is_given) { - if ((*urng1_iterator % mod_1) % mod_2 == 0) + if (*urng1_iterator % mod_1 == 0) { modmer_value = *urng1_iterator; return true; @@ -449,7 +431,7 @@ class modmer_view::basic_iterator else { value_type mod = std::min(*urng1_iterator, *urng2_iterator); - if ((mod % mod_1) % mod_2 == 0) + if (mod % mod_1 == 0) { modmer_value = mod; return true; @@ -462,12 +444,12 @@ class modmer_view::basic_iterator //!\brief A deduction guide for the view class template. template -modmer_view(rng1_t &&, size_t const mod_used_1, size_t const mod_used_2) -> modmer_view>; +modmer_view(rng1_t &&, size_t const mod_used_1) -> modmer_view>; //!\brief A deduction guide for the view class template. template -modmer_view(rng1_t &&, rng2_t &&, size_t const mod_used_1, size_t const mod_used_2) -> modmer_view, - std::views::all_t>; +modmer_view(rng1_t &&, rng2_t &&, size_t const mod_used_1) -> modmer_view, + std::views::all_t>; // --------------------------------------------------------------------------------------------------------------------- // modmer_fn (adaptor definition) @@ -479,9 +461,9 @@ modmer_view(rng1_t &&, rng2_t &&, size_t const mod_used_1, size_t const mod_used struct modmer_fn { //!\brief Store the number of values in one window and return a range adaptor closure object. - constexpr auto operator()(size_t const mod_used_1, size_t const mod_used_2) const + constexpr auto operator()(size_t const mod_used_1) const { - return adaptor_from_functor{*this, mod_used_1, mod_used_2}; + return adaptor_from_functor{*this, mod_used_1}; } /*!\brief Call the view's constructor with two arguments: the underlying view and an integer indicating how many @@ -490,11 +472,10 @@ struct modmer_fn * \param[in] urange1 The input range to process. Must model std::ranges::viewable_range and * std::ranges::forward_range. * \param[in] mod_used_1 The number of values in one window. - * \param[in] mod_used_2 The number of values in one window. * \returns A range of converted values. */ template - constexpr auto operator()(urng1_t && urange1, size_t const mod_used_1, size_t const mod_used_2) const + constexpr auto operator()(urng1_t && urange1, size_t const mod_used_1) const { static_assert(std::ranges::viewable_range, "The range parameter to views::modmer cannot be a temporary of a non-view range."); @@ -505,7 +486,7 @@ struct modmer_fn throw std::invalid_argument{"The chosen mod_used_1 is not valid. " "Please choose a value greater than 1 or use two ranges."}; - return modmer_view{urange1, mod_used_1, mod_used_2}; + return modmer_view{urange1, mod_used_1}; } }; //![adaptor_def] @@ -517,7 +498,6 @@ struct modmer_fn * parameter is omitted in pipe notation] * \param[in] urange1 The range being processed. [parameter is omitted in pipe notation] * \param[in] mod_used_1 The number of values in one window. - * \param[in] mod_used_2 The number of values in one window. * \returns A range of std::totally_ordered where each value is ... See below for the * properties of the returned range. * \ingroup search_views diff --git a/include/modmer_hash.hpp b/include/modmer_hash.hpp index 2605193..c2c750c 100644 --- a/include/modmer_hash.hpp +++ b/include/modmer_hash.hpp @@ -31,10 +31,10 @@ struct modmer_hash_fn * \throws std::invalid_argument if the size of the shape is greater than the `mod_used_1`. * \returns A range of converted elements. */ - /*constexpr auto operator()(shape const & shape, uint32_t const mod_used_1, uint32_t const mod_used_2) const + constexpr auto operator()(shape const & shape, uint32_t const mod_used_1) const { - return seqan3::detail::adaptor_from_functor{*this, shape, mod_used_1, mod_used_2}; - }*/ + return seqan3::detail::adaptor_from_functor{*this, shape, mod_used_1}; + } /*!\brief Store the shape, the window size and the seed and return a range adaptor closure object. * \param[in] shape The seqan3::shape to use for hashing. @@ -43,9 +43,9 @@ struct modmer_hash_fn * \throws std::invalid_argument if the size of the shape is greater than the `mod_used_1`. * \returns A range of converted elements. */ - constexpr auto operator()(shape const & shape, uint32_t const mod_used_1, uint32_t const mod_used_2, seed const seed) const + constexpr auto operator()(shape const & shape, uint32_t const mod_used_1, seed const seed) const { - return seqan3::detail::adaptor_from_functor{*this, shape, mod_used_1, mod_used_2, seed}; + return seqan3::detail::adaptor_from_functor{*this, shape, mod_used_1, seed}; } /*!\brief Call the view's constructor with the underlying view, a seqan3::shape and a window size as argument. @@ -61,7 +61,6 @@ struct modmer_hash_fn constexpr auto operator()(urng_t && urange, shape const & shape, uint32_t const mod_used_1, - uint32_t const mod_used_2, seed const seed = seqan3::seed{0x8F3F73B5CF1C9ADE}) const { static_assert(std::ranges::viewable_range, @@ -82,7 +81,7 @@ struct modmer_hash_fn {return i ^ seed.get();}) | std::views::reverse; - return seqan3::detail::modmer_view(forward_strand, reverse_strand, mod_used_1, mod_used_2); + return seqan3::detail::modmer_view(forward_strand, reverse_strand, mod_used_1); } }; diff --git a/src/compare.cpp b/src/compare.cpp index 3aff89c..895f9c6 100644 --- a/src/compare.cpp +++ b/src/compare.cpp @@ -262,7 +262,7 @@ void do_comparison(std::vector sequence_files, range_argu args.w_size, args.seed_se), "minimiser_hash_" + std::to_string(args.k_size) + "_" + std::to_string(args.w_size.get()), args); break; case modmers: compare(sequence_files, modmer_hash(args.shape, - args.mod1, args.mod2, args.seed_se), "modmer_hash_" + std::to_string(args.k_size) + "_" + std::to_string(args.mod1) + "_" + std::to_string(args.mod2), args); + args.w_size.get(), args.seed_se), "modmer_hash_" + std::to_string(args.k_size) + "_" + std::to_string(args.w_size.get()), args); break; case strobemer: std::ranges::empty_view empty{}; if (args.rand & (args.order == 2)) @@ -292,7 +292,7 @@ void do_coverage(std::filesystem::path sequence_file, range_arguments & args) break; case modmers: compare_cov(sequence_file, seqan3::views::minimiser_hash(args.shape, seqan3::window_size{args.shape.size()}, args.seed_se), modmer_hash(args.shape, - args.mod1, args.mod2, args.seed_se), "modmer_hash_" + std::to_string(args.k_size) + "_" + std::to_string(args.mod1) + "_" + std::to_string(args.mod2), args); + args.w_size.get(), args.seed_se), "modmer_hash_" + std::to_string(args.k_size) + "_" + std::to_string(args.w_size.get()), args); break; } } diff --git a/src/main.cpp b/src/main.cpp index eb4fca3..550ddda 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -39,12 +39,6 @@ void read_range_arguments_minimiser(seqan3::argument_parser & parser, range_argu parser.add_option(se, '\0', "seed", "Define seed."); } -void read_range_arguments_modmer(seqan3::argument_parser & parser, range_arguments & args) -{ - parser.add_option(args.mod1, '\0', "mod1", "Define mod1. Default: 17."); - parser.add_option(args.mod2, '\0', "mod2", "Define mod2. Default: 2."); -} - void parsing(range_arguments & args) { args.w_size = seqan3::window_size{w_size}; @@ -66,7 +60,6 @@ int coverage(seqan3::argument_parser & parser) parser.add_option(method, '\0', "method", "Pick your method.", seqan3::option_spec::required, seqan3::value_list_validator{"kmer", "minimiser", "modmer"}); read_range_arguments_minimiser(parser, args); - read_range_arguments_modmer(parser, args); try { @@ -104,7 +97,6 @@ int speed(seqan3::argument_parser & parser) parser.add_option(method, '\0', "method", "Pick your method.", seqan3::option_spec::required, seqan3::value_list_validator{"kmer", "minimiser", "modmer", "strobemer"}); read_range_arguments_minimiser(parser, args); - read_range_arguments_modmer(parser, args); read_range_arguments_strobemers(parser, args); try diff --git a/test/api/modmer_hash_test.cpp b/test/api/modmer_hash_test.cpp index 93d77fa..d9d6618 100644 --- a/test/api/modmer_hash_test.cpp +++ b/test/api/modmer_hash_test.cpp @@ -23,18 +23,15 @@ using result_t = std::vector; using iterator_type = std::ranges::iterator_t() | modmer_hash(seqan3::ungapped{4}, - 4, 2, seqan3::seed{0}))>; static constexpr seqan3::shape ungapped_shape = seqan3::ungapped{4}; static constexpr seqan3::shape gapped_shape = 0b1001_shape; static constexpr auto ungapped_view = modmer_hash(ungapped_shape, - 4, 2, seqan3::seed{0}); static constexpr auto gapped_view = modmer_hash(gapped_shape, - 4, 2, seqan3::seed{0}); diff --git a/test/api/modmer_test.cpp b/test/api/modmer_test.cpp index 066ca2e..d81f9da 100644 --- a/test/api/modmer_test.cpp +++ b/test/api/modmer_test.cpp @@ -29,7 +29,7 @@ inline static constexpr auto gapped_kmer_view = seqan3::views::kmer_hash(0b1001_ inline static constexpr auto rev_gapped_kmer_view = seqan3::views::complement | std::views::reverse | seqan3::views::kmer_hash(0b1001_shape) | std::views::reverse; -inline static constexpr auto modmer_no_rev_view = modmer(4, 2); +inline static constexpr auto modmer_no_rev_view = modmer(2); using iterator_type = std::ranges::iterator_t< decltype(std::declval() | kmer_view @@ -39,7 +39,7 @@ using two_ranges_iterator_type = std::ranges::iterator_t< decltype(seqan3::detai | kmer_view, std::declval() | rev_kmer_view, - 4, 2})>; + 2})>; template <> struct iterator_fixture : public ::testing::Test @@ -51,8 +51,8 @@ struct iterator_fixture : public ::testing::Test decltype(seqan3::views::kmer_hash(text, seqan3::ungapped{4})) vec = text | kmer_view; result_t expected_range{26, 166, 152, 134, 252, 242}; - decltype(modmer(seqan3::views::kmer_hash(text, seqan3::ungapped{4}), 4, 2)) test_range = - modmer(vec, 4, 2); + decltype(modmer(seqan3::views::kmer_hash(text, seqan3::ungapped{4}), 2)) test_range = + modmer(vec, 2); }; template <> @@ -69,8 +69,8 @@ struct iterator_fixture : public ::testing::Test using reverse_kmer_hash_view_t = decltype(rev_kmer_view(text)); - using test_range_t = decltype(seqan3::detail::modmer_view{kmer_hash_view_t{}, reverse_kmer_hash_view_t{}, 4, 2}); - test_range_t test_range = seqan3::detail::modmer_view{vec, rev_kmer_view(text), 4, 2}; + using test_range_t = decltype(seqan3::detail::modmer_view{kmer_hash_view_t{}, reverse_kmer_hash_view_t{}, 2}); + test_range_t test_range = seqan3::detail::modmer_view{vec, rev_kmer_view(text), 2}; }; using test_types = ::testing::Types; @@ -135,11 +135,11 @@ TYPED_TEST(modmer_view_properties_test, concepts) auto v = text | kmer_view | modmer_no_rev_view; compare_types(v); - auto v2 = seqan3::detail::modmer_view{text | kmer_view, text | kmer_view, 4, 2}; + auto v2 = seqan3::detail::modmer_view{text | kmer_view, text | kmer_view, 2}; if constexpr (std::ranges::bidirectional_range) // excludes forward_list { - auto v3 = seqan3::detail::modmer_view{text | kmer_view, text | rev_kmer_view, 4, 2}; + auto v3 = seqan3::detail::modmer_view{text | kmer_view, text | rev_kmer_view, 2}; compare_types(v3); } } @@ -157,20 +157,20 @@ TYPED_TEST(modmer_view_properties_test, different_inputs_kmer_hash) if constexpr (std::ranges::bidirectional_range) // excludes forward_list { - EXPECT_RANGE_EQ(ungapped, (seqan3::detail::modmer_view{text | kmer_view, text | rev_kmer_view, 4, 2})) ; - EXPECT_RANGE_EQ(gapped, (seqan3::detail::modmer_view{text | gapped_kmer_view, text | rev_gapped_kmer_view, 4, 2})); + EXPECT_RANGE_EQ(ungapped, (seqan3::detail::modmer_view{text | kmer_view, text | rev_kmer_view, 2})) ; + EXPECT_RANGE_EQ(gapped, (seqan3::detail::modmer_view{text | gapped_kmer_view, text | rev_gapped_kmer_view, 2})); } } TEST_F(modmer_test, ungapped_kmer_hash) { - EXPECT_RANGE_EQ(result1, (seqan3::detail::modmer_view{text1 | kmer_view, text1 | rev_kmer_view, 4, 2})); + EXPECT_RANGE_EQ(result1, (seqan3::detail::modmer_view{text1 | kmer_view, text1 | rev_kmer_view, 2})); EXPECT_RANGE_EQ(result1, text1 | kmer_view | modmer_no_rev_view); - auto empty_view = seqan3::detail::modmer_view{too_short_text | kmer_view, too_short_text | rev_kmer_view, 4, 2}; + auto empty_view = seqan3::detail::modmer_view{too_short_text | kmer_view, too_short_text | rev_kmer_view, 2}; EXPECT_TRUE(std::ranges::empty(empty_view)); auto empty_view2 = too_short_text | kmer_view | modmer_no_rev_view; EXPECT_TRUE(std::ranges::empty(empty_view2)); - EXPECT_RANGE_EQ(result3_ungapped, (seqan3::detail::modmer_view{text3 | kmer_view, text3 | rev_kmer_view, 4, 2})); + EXPECT_RANGE_EQ(result3_ungapped, (seqan3::detail::modmer_view{text3 | kmer_view, text3 | rev_kmer_view, 2})); EXPECT_RANGE_EQ(result3_ungapped_no_rev, text3 | kmer_view | modmer_no_rev_view); } @@ -179,17 +179,17 @@ TEST_F(modmer_test, gapped_kmer_hash) { EXPECT_RANGE_EQ(result1, (seqan3::detail::modmer_view{text1 | gapped_kmer_view, text1 | rev_gapped_kmer_view, - 4, 2})); + 2})); EXPECT_RANGE_EQ(result1, text1 | gapped_kmer_view | modmer_no_rev_view); auto empty_view = seqan3::detail::modmer_view{too_short_text | gapped_kmer_view, too_short_text | rev_gapped_kmer_view, - 4, 2}; + 2}; EXPECT_TRUE(std::ranges::empty(empty_view)); auto empty_view2 = too_short_text | gapped_kmer_view | modmer_no_rev_view; EXPECT_TRUE(std::ranges::empty(empty_view2)); EXPECT_RANGE_EQ(result3_gapped, (seqan3::detail::modmer_view{text3 | gapped_kmer_view, text3 | rev_gapped_kmer_view, - 4, 2})); + 2})); EXPECT_RANGE_EQ(result3_gapped_no_rev, text3 | gapped_kmer_view | modmer_no_rev_view); } @@ -201,21 +201,21 @@ TEST_F(modmer_test, combinability) EXPECT_RANGE_EQ(result3_ungapped_stop, (seqan3::detail::modmer_view{text3 | stop_at_t | kmer_view, text3 | stop_at_t | rev_kmer_view, - 4, 2})); + 2})); EXPECT_RANGE_EQ(result3_gapped_stop, (seqan3::detail::modmer_view{text3 | stop_at_t | gapped_kmer_view, text3 | stop_at_t | rev_gapped_kmer_view, - 4, 2})); + 2})); auto start_at_a = std::views::drop(6); EXPECT_RANGE_EQ(result3_ungapped_start, (seqan3::detail::modmer_view{text3 | start_at_a | kmer_view, text3 | start_at_a | rev_kmer_view, - 4, 2})); + 2})); EXPECT_RANGE_EQ(result3_gapped_start, (seqan3::detail::modmer_view{text3 | start_at_a | gapped_kmer_view, text3 | start_at_a | rev_gapped_kmer_view, - 4, 2})); + 2})); } TEST_F(modmer_test, two_ranges_unequal_size) { - EXPECT_THROW((seqan3::detail::modmer_view{text1 | kmer_view, text3 | rev_kmer_view, 4, 2}), std::invalid_argument); + EXPECT_THROW((seqan3::detail::modmer_view{text1 | kmer_view, text3 | rev_kmer_view, 2}), std::invalid_argument); } diff --git a/test/cli/minions_coverage_test.cpp b/test/cli/minions_coverage_test.cpp index 12a5fb5..c68f348 100644 --- a/test/cli/minions_coverage_test.cpp +++ b/test/cli/minions_coverage_test.cpp @@ -40,7 +40,7 @@ TEST_F(cli_test, minimiser) TEST_F(cli_test, modmer) { - cli_test_result result = execute_app("minions coverage --method modmer -k 19 --mod1 17 --mod2 2 ", data("example1.fasta")); + cli_test_result result = execute_app("minions coverage --method modmer -k 19 -w 2 ", data("example1.fasta")); EXPECT_EQ(result.exit_code, 0); EXPECT_EQ(result.out, std::string{}); EXPECT_EQ(result.err, std::string{}); From 8fd497803756f20175874de2108305e2a76a3683 Mon Sep 17 00:00:00 2001 From: mitradarja Date: Tue, 7 Dec 2021 15:33:25 +0100 Subject: [PATCH 08/20] [MISC] Rename variables. --- include/modmer.hpp | 68 +++++++++++++++++++++++----------------------- 1 file changed, 34 insertions(+), 34 deletions(-) diff --git a/include/modmer.hpp b/include/modmer.hpp index e02c4f6..4c352f4 100644 --- a/include/modmer.hpp +++ b/include/modmer.hpp @@ -71,7 +71,7 @@ class modmer_view : public std::ranges::view_interface class basic_iterator; @@ -95,10 +95,10 @@ class modmer_view : public std::ranges::view_interface //!\cond requires (std::ranges::viewable_range && std::constructible_from>>) //!\endcond - modmer_view(other_urng1_t && urange1, size_t const mod_used_1) : + modmer_view(other_urng1_t && urange1, size_t const mod_used) : urange1{std::views::all(std::forward(urange1))}, urange2{default_urng2_t{}}, - mod_used_1{mod_used_1} + mod_used{mod_used} {} /*!\brief Construct from two views and a given number of values in one window. @@ -124,12 +124,12 @@ class modmer_view : public std::ranges::view_interface //!\cond @@ -156,10 +156,10 @@ class modmer_view : public std::ranges::view_interface && std::constructible_from>) //!\endcond - modmer_view(other_urng1_t && urange1, other_urng2_t && urange2, size_t const mod_used_1) : + modmer_view(other_urng1_t && urange1, other_urng2_t && urange2, size_t const mod_used) : urange1{std::views::all(std::forward(urange1))}, urange2{std::views::all(std::forward(urange2))}, - mod_used_1{mod_used_1} + mod_used{mod_used} { if constexpr (second_range_is_given) { @@ -190,7 +190,7 @@ class modmer_view : public std::ranges::view_interface::basic_iterator * \param[in] urng1_iterator Iterator pointing to the first position of the first std::totally_ordered range. * \param[in] urng1_sentinel Iterator pointing to the last position of the first std::totally_ordered range. * \param[in] urng2_iterator Iterator pointing to the first position of the second std::totally_ordered range. - * \param[in] mod_used_1 The number of values in one window. + * \param[in] mod_used The number of values in one window. * * \details * @@ -298,14 +298,14 @@ class modmer_view::basic_iterator basic_iterator(urng1_iterator_t urng1_iterator, urng1_sentinel_t urng1_sentinel, urng2_iterator_t urng2_iterator, - size_t mod_used_1) : + size_t mod_used) : urng1_iterator{std::move(urng1_iterator)}, urng1_sentinel{std::move(urng1_sentinel)}, urng2_iterator{std::move(urng2_iterator)}, - mod_1{mod_used_1} + mod{mod_used} { size_t size = std::ranges::distance(urng1_iterator, urng1_sentinel); - mod_used_1 = std::min(mod_used_1, size); + mod_used = std::min(mod_used, size); first_modmer(); } @@ -385,7 +385,7 @@ class modmer_view::basic_iterator //!\brief Iterator to the rightmost value of one window of the second range. urng2_iterator_t urng2_iterator{}; - size_t mod_1{}; + size_t mod{}; //!\brief Advances the window to the next position. void advance() @@ -422,7 +422,7 @@ class modmer_view::basic_iterator if constexpr (!second_range_is_given) { - if (*urng1_iterator % mod_1 == 0) + if (*urng1_iterator % mod == 0) { modmer_value = *urng1_iterator; return true; @@ -430,10 +430,10 @@ class modmer_view::basic_iterator } else { - value_type mod = std::min(*urng1_iterator, *urng2_iterator); - if (mod % mod_1 == 0) + value_type value = std::min(*urng1_iterator, *urng2_iterator); + if (value % mod == 0) { - modmer_value = mod; + modmer_value = value; return true; } } @@ -444,11 +444,11 @@ class modmer_view::basic_iterator //!\brief A deduction guide for the view class template. template -modmer_view(rng1_t &&, size_t const mod_used_1) -> modmer_view>; +modmer_view(rng1_t &&, size_t const mod_used) -> modmer_view>; //!\brief A deduction guide for the view class template. template -modmer_view(rng1_t &&, rng2_t &&, size_t const mod_used_1) -> modmer_view, +modmer_view(rng1_t &&, rng2_t &&, size_t const mod_used) -> modmer_view, std::views::all_t>; // --------------------------------------------------------------------------------------------------------------------- @@ -461,9 +461,9 @@ modmer_view(rng1_t &&, rng2_t &&, size_t const mod_used_1) -> modmer_view - constexpr auto operator()(urng1_t && urange1, size_t const mod_used_1) const + constexpr auto operator()(urng1_t && urange1, size_t const mod_used) const { static_assert(std::ranges::viewable_range, "The range parameter to views::modmer cannot be a temporary of a non-view range."); static_assert(std::ranges::forward_range, "The range parameter to views::modmer must model std::ranges::forward_range."); - if (mod_used_1 == 1) // Would just return urange1 without any changes - throw std::invalid_argument{"The chosen mod_used_1 is not valid. " + if (mod_used == 1) // Would just return urange1 without any changes + throw std::invalid_argument{"The chosen mod_used is not valid. " "Please choose a value greater than 1 or use two ranges."}; - return modmer_view{urange1, mod_used_1}; + return modmer_view{urange1, mod_used}; } }; //![adaptor_def] @@ -497,7 +497,7 @@ struct modmer_fn * \tparam urng_t The type of the first range being processed. See below for requirements. [template * parameter is omitted in pipe notation] * \param[in] urange1 The range being processed. [parameter is omitted in pipe notation] - * \param[in] mod_used_1 The number of values in one window. + * \param[in] mod_used The number of values in one window. * \returns A range of std::totally_ordered where each value is ... See below for the * properties of the returned range. * \ingroup search_views From c506edacfd4f9b5a9e4b69eb500f445c5f459471 Mon Sep 17 00:00:00 2001 From: mitradarja Date: Thu, 9 Dec 2021 18:45:30 +0100 Subject: [PATCH 09/20] [MISC] Modmer view only takes one range. --- include/modmer.hpp | 125 +++++++-------------------------------- test/api/modmer_test.cpp | 93 ++--------------------------- 2 files changed, 29 insertions(+), 189 deletions(-) diff --git a/include/modmer.hpp b/include/modmer.hpp index 4c352f4..2b06b92 100644 --- a/include/modmer.hpp +++ b/include/modmer.hpp @@ -41,34 +41,19 @@ namespace seqan3::detail * \note Most members of this class are generated by std::ranges::view_interface which is not yet documented here. */ -template > -class modmer_view : public std::ranges::view_interface> +template +class modmer_view : public std::ranges::view_interface> { private: static_assert(std::ranges::forward_range, "The modmer_view only works on forward_ranges."); - static_assert(std::ranges::forward_range, "The modmer_view only works on forward_ranges."); static_assert(std::totally_ordered>, "The reference type of the underlying range must model std::totally_ordered."); - //!\brief The default argument of the second range. - using default_urng2_t = std::ranges::empty_view; - - //!\brief Boolean variable, which is true, when second range is not of empty type. - static constexpr bool second_range_is_given = !std::same_as; - - static_assert(!second_range_is_given || std::totally_ordered_with, - std::ranges::range_reference_t>, - "The reference types of the underlying ranges must model std::totally_ordered_with."); - //!\brief Whether the given ranges are const_iterable - static constexpr bool const_iterable = seqan3::const_iterable_range && - seqan3::const_iterable_range; + static constexpr bool const_iterable = seqan3::const_iterable_range; //!\brief The first underlying range. urng1_t urange1{}; - //!\brief The second underlying range. - urng2_t urange2{}; //!\brief The number of values in one window. size_t mod_used{}; @@ -84,7 +69,7 @@ class modmer_view : public std::ranges::view_interface && std::default_initializable + requires std::default_initializable = default; //!< Defaulted. modmer_view(modmer_view const & rhs) = default; //!< Defaulted. modmer_view(modmer_view && rhs) = default; //!< Defaulted. @@ -98,7 +83,8 @@ class modmer_view : public std::ranges::view_interface(urange1))}, - urange2{default_urng2_t{}}, mod_used{mod_used} {} - /*!\brief Construct from two views and a given number of values in one window. - * \param[in] urange1 The first input range to process. Must model std::ranges::viewable_range and - * std::ranges::forward_range. - * \param[in] urange2 The second input range to process. Must model std::ranges::viewable_range and - * std::ranges::forward_range. - * \param[in] mod_used The number of values in one window. - */ - modmer_view(urng1_t urange1, urng2_t urange2, size_t const mod_used) : - urange1{std::move(urange1)}, - urange2{std::move(urange2)}, - mod_used{mod_used} - { - if constexpr (second_range_is_given) - { - if (std::ranges::distance(urange1) != std::ranges::distance(urange2)) - throw std::invalid_argument{"The two ranges do not have the same size."}; - } - } - - /*!\brief Construct from two non-views that can be view-wrapped and a given number of values in one window. - * \tparam other_urng1_t The type of another urange. Must model std::ranges::viewable_range and be constructible - from urng1_t. - * \tparam other_urng2_t The type of another urange. Must model std::ranges::viewable_range and be constructible - from urng2_t. - * \param[in] urange1 The input range to process. Must model std::ranges::viewable_range and - * std::ranges::forward_range. - * \param[in] urange2 The second input range to process. Must model std::ranges::viewable_range and - * std::ranges::forward_range. - * \param[in] mod_used The number of values in one window. - */ - template - //!\cond - requires (std::ranges::viewable_range && - std::constructible_from> && - std::ranges::viewable_range && - std::constructible_from>) - //!\endcond - modmer_view(other_urng1_t && urange1, other_urng2_t && urange2, size_t const mod_used) : - urange1{std::views::all(std::forward(urange1))}, - urange2{std::views::all(std::forward(urange2))}, - mod_used{mod_used} - { - if constexpr (second_range_is_given) - { - if (std::ranges::distance(urange1) != std::ranges::distance(urange2)) - throw std::invalid_argument{"The two ranges do not have the same size."}; - } - } - //!\} - /*!\name Iterators * \{ */ @@ -189,7 +124,6 @@ class modmer_view : public std::ranges::view_interface +template template -class modmer_view::basic_iterator +class modmer_view::basic_iterator { private: //!\brief The sentinel type of the first underlying range. using urng1_sentinel_t = maybe_const_sentinel_t; //!\brief The iterator type of the first underlying range. using urng1_iterator_t = maybe_const_iterator_t; - //!\brief The iterator type of the second underlying range. - using urng2_iterator_t = maybe_const_iterator_t; template friend class basic_iterator; @@ -278,8 +209,7 @@ class modmer_view::basic_iterator //!\endcond : modmer_value{std::move(it.modmer_value)}, urng1_iterator{std::move(it.urng1_iterator)}, - urng1_sentinel{std::move(it.urng1_sentinel)}, - urng2_iterator{std::move(it.urng2_iterator)} + urng1_sentinel{std::move(it.urng1_sentinel)} {} /*!\brief Construct from begin and end iterators of a given range over std::totally_ordered values, and the number @@ -297,11 +227,9 @@ class modmer_view::basic_iterator */ basic_iterator(urng1_iterator_t urng1_iterator, urng1_sentinel_t urng1_sentinel, - urng2_iterator_t urng2_iterator, size_t mod_used) : urng1_iterator{std::move(urng1_iterator)}, urng1_sentinel{std::move(urng1_sentinel)}, - urng2_iterator{std::move(urng2_iterator)}, mod{mod_used} { size_t size = std::ranges::distance(urng1_iterator, urng1_sentinel); @@ -318,8 +246,7 @@ class modmer_view::basic_iterator //!\brief Compare to another basic_iterator. friend bool operator==(basic_iterator const & lhs, basic_iterator const & rhs) { - return (lhs.urng1_iterator == rhs.urng1_iterator) && - (rhs.urng2_iterator == rhs.urng2_iterator); + return (lhs.urng1_iterator == rhs.urng1_iterator); } //!\brief Compare to another basic_iterator. @@ -382,17 +309,16 @@ class modmer_view::basic_iterator urng1_iterator_t urng1_iterator{}; //!brief Iterator to last element in range. urng1_sentinel_t urng1_sentinel{}; - //!\brief Iterator to the rightmost value of one window of the second range. - urng2_iterator_t urng2_iterator{}; size_t mod{}; + size_t distance{}; + //!\brief Advances the window to the next position. void advance() { + distance++; ++urng1_iterator; - if constexpr (second_range_is_given) - ++urng2_iterator; } void first_modmer() @@ -420,22 +346,18 @@ class modmer_view::basic_iterator if (urng1_iterator == urng1_sentinel) return true; - if constexpr (!second_range_is_given) + if (*urng1_iterator % mod == 0) { - if (*urng1_iterator % mod == 0) + if constexpr (measure_distance) { - modmer_value = *urng1_iterator; - return true; + modmer_value = distance; + distance = 0; } - } - else - { - value_type value = std::min(*urng1_iterator, *urng2_iterator); - if (value % mod == 0) + else { - modmer_value = value; - return true; + modmer_value = *urng1_iterator; } + return true; } return false; @@ -446,10 +368,9 @@ class modmer_view::basic_iterator template modmer_view(rng1_t &&, size_t const mod_used) -> modmer_view>; -//!\brief A deduction guide for the view class template. -template -modmer_view(rng1_t &&, rng2_t &&, size_t const mod_used) -> modmer_view, - std::views::all_t>; +template +modmer_view(rng1_t &&, size_t const mod_used) -> modmer_view, m1>; + // --------------------------------------------------------------------------------------------------------------------- // modmer_fn (adaptor definition) diff --git a/test/api/modmer_test.cpp b/test/api/modmer_test.cpp index d81f9da..11f76da 100644 --- a/test/api/modmer_test.cpp +++ b/test/api/modmer_test.cpp @@ -22,24 +22,13 @@ using seqan3::operator""_shape; using result_t = std::vector; inline static constexpr auto kmer_view = seqan3::views::kmer_hash(seqan3::ungapped{4}); -inline static constexpr auto rev_kmer_view = seqan3::views::complement | std::views::reverse - | kmer_view - | std::views::reverse; inline static constexpr auto gapped_kmer_view = seqan3::views::kmer_hash(0b1001_shape); -inline static constexpr auto rev_gapped_kmer_view = seqan3::views::complement | std::views::reverse - | seqan3::views::kmer_hash(0b1001_shape) - | std::views::reverse; + inline static constexpr auto modmer_no_rev_view = modmer(2); using iterator_type = std::ranges::iterator_t< decltype(std::declval() | kmer_view | modmer_no_rev_view)>; -using two_ranges_iterator_type = std::ranges::iterator_t< decltype(seqan3::detail::modmer_view{ - std::declval() - | kmer_view, - std::declval() - | rev_kmer_view, - 2})>; template <> struct iterator_fixture : public ::testing::Test @@ -55,25 +44,7 @@ struct iterator_fixture : public ::testing::Test modmer(vec, 2); }; -template <> -struct iterator_fixture : public ::testing::Test -{ - using iterator_tag = std::forward_iterator_tag; - static constexpr bool const_iterable = true; - - seqan3::dna4_vector text{"ACGGCGACGTTTAG"_dna4}; - using kmer_hash_view_t = decltype(seqan3::views::kmer_hash(text, seqan3::ungapped{4})); - - kmer_hash_view_t vec = kmer_view(text); - result_t expected_range{26, 152, 6, 192, 112}; - - using reverse_kmer_hash_view_t = decltype(rev_kmer_view(text)); - - using test_range_t = decltype(seqan3::detail::modmer_view{kmer_hash_view_t{}, reverse_kmer_hash_view_t{}, 2}); - test_range_t test_range = seqan3::detail::modmer_view{vec, rev_kmer_view(text), 2}; -}; - -using test_types = ::testing::Types; +using test_types = ::testing::Types; INSTANTIATE_TYPED_TEST_SUITE_P(iterator_fixture, iterator_fixture, test_types, ); template @@ -100,16 +71,11 @@ class modmer_test : public ::testing::Test // ACGG CGGC, GGCG, GCGA, CGAC, GACG, ACGT, CGTT, GTTT, TTTA, TTAG // ACGG CGGC cgcc GCGA CGAC cgtc ACGT aacg aaac taaa ctaa std::vector text3{"ACGGCGACGTTTAG"_dna4}; - result_t result3_ungapped{26, 152, 6, 192, 112}; // ACGG, GCGA, aacg, taaa, ctaa - result_t result3_gapped{2, 8, 2, 12, 4}; // A--G, G--A, a--g, t--a, c--a - "-" for gap result_t result3_ungapped_no_rev{26, 166, 152, 134, 252, 242}; // ACGG, GGCG, GCGA, GACG, TTTA, TTAG result_t result3_gapped_no_rev{2, 10, 8, 10, 12, 14}; // A--G, G--G, G--A, G--G, T--A, T--G "-" for gap - result_t result3_ungapped_stop{26, 152}; // ACGG, GCGA result_t result3_ungapped_no_rev_stop{26, 166, 152, 134}; // ACGG, GGCG, GCGA, GACG - result_t result3_gapped_stop{2, 8}; // A--G, G--A result_t result3_gapped_no_rev_stop{2, 10, 8, 10}; - result_t result3_ungapped_start{6, 192, 112}; // For start at second A, aacg, taaa, ctaa - result_t result3_gapped_start{2, 12, 4}; // a--g, t--a, c--a - "-" for gap + result_t result3_ungapped_start{252, 242}; // For start at second A, aacg, taaa, ctaa result_t result3_ungapped_no_rev_start{242}; // For start at second A, TTAG result_t result3_gapped_no_rev_start{14}; // For start at second A, T--G }; @@ -135,61 +101,31 @@ TYPED_TEST(modmer_view_properties_test, concepts) auto v = text | kmer_view | modmer_no_rev_view; compare_types(v); - auto v2 = seqan3::detail::modmer_view{text | kmer_view, text | kmer_view, 2}; - - if constexpr (std::ranges::bidirectional_range) // excludes forward_list - { - auto v3 = seqan3::detail::modmer_view{text | kmer_view, text | rev_kmer_view, 2}; - compare_types(v3); - } } TYPED_TEST(modmer_view_properties_test, different_inputs_kmer_hash) { TypeParam text{'A'_dna4, 'C'_dna4, 'G'_dna4, 'T'_dna4, 'C'_dna4, 'G'_dna4, 'A'_dna4, 'C'_dna4, 'G'_dna4, 'T'_dna4, 'T'_dna4, 'T'_dna4, 'A'_dna4, 'G'_dna4}; // ACGTCGACGTTTAG - result_t ungapped{216, 6, 192, 112}; // TCGA, aacg, taaa, ctaa - result_t gapped{12, 2, 12, 4}; // T--A, a--g, t--a, c--a - "-" for gap result_t ungapped_no_rev{182, 216, 134, 252, 242}; // GTCG, TCGA, GACG, TTTA, TTAG result_t gapped_no_rev{10, 12, 10, 12, 14}; // G--G, T--G, T--A, G--G, T--A, T--G "-" for gap EXPECT_RANGE_EQ(ungapped_no_rev, text | kmer_view | modmer_no_rev_view); EXPECT_RANGE_EQ(gapped_no_rev, text | gapped_kmer_view | modmer_no_rev_view); - - if constexpr (std::ranges::bidirectional_range) // excludes forward_list - { - EXPECT_RANGE_EQ(ungapped, (seqan3::detail::modmer_view{text | kmer_view, text | rev_kmer_view, 2})) ; - EXPECT_RANGE_EQ(gapped, (seqan3::detail::modmer_view{text | gapped_kmer_view, text | rev_gapped_kmer_view, 2})); - } } TEST_F(modmer_test, ungapped_kmer_hash) { - EXPECT_RANGE_EQ(result1, (seqan3::detail::modmer_view{text1 | kmer_view, text1 | rev_kmer_view, 2})); EXPECT_RANGE_EQ(result1, text1 | kmer_view | modmer_no_rev_view); - auto empty_view = seqan3::detail::modmer_view{too_short_text | kmer_view, too_short_text | rev_kmer_view, 2}; + auto empty_view = too_short_text | kmer_view | modmer_no_rev_view; EXPECT_TRUE(std::ranges::empty(empty_view)); - auto empty_view2 = too_short_text | kmer_view | modmer_no_rev_view; - EXPECT_TRUE(std::ranges::empty(empty_view2)); - EXPECT_RANGE_EQ(result3_ungapped, (seqan3::detail::modmer_view{text3 | kmer_view, text3 | rev_kmer_view, 2})); EXPECT_RANGE_EQ(result3_ungapped_no_rev, text3 | kmer_view | modmer_no_rev_view); - } TEST_F(modmer_test, gapped_kmer_hash) { - EXPECT_RANGE_EQ(result1, (seqan3::detail::modmer_view{text1 | gapped_kmer_view, - text1 | rev_gapped_kmer_view, - 2})); EXPECT_RANGE_EQ(result1, text1 | gapped_kmer_view | modmer_no_rev_view); - auto empty_view = seqan3::detail::modmer_view{too_short_text | gapped_kmer_view, - too_short_text | rev_gapped_kmer_view, - 2}; + auto empty_view = too_short_text | gapped_kmer_view | modmer_no_rev_view; EXPECT_TRUE(std::ranges::empty(empty_view)); - auto empty_view2 = too_short_text | gapped_kmer_view | modmer_no_rev_view; - EXPECT_TRUE(std::ranges::empty(empty_view2)); - EXPECT_RANGE_EQ(result3_gapped, (seqan3::detail::modmer_view{text3 | gapped_kmer_view, - text3 | rev_gapped_kmer_view, - 2})); EXPECT_RANGE_EQ(result3_gapped_no_rev, text3 | gapped_kmer_view | modmer_no_rev_view); } @@ -199,23 +135,6 @@ TEST_F(modmer_test, combinability) EXPECT_RANGE_EQ(result3_ungapped_no_rev_stop, text3 | stop_at_t | kmer_view | modmer_no_rev_view); EXPECT_RANGE_EQ(result3_gapped_no_rev_stop, text3 | stop_at_t | gapped_kmer_view | modmer_no_rev_view); - EXPECT_RANGE_EQ(result3_ungapped_stop, (seqan3::detail::modmer_view{text3 | stop_at_t | kmer_view, - text3 | stop_at_t | rev_kmer_view, - 2})); - EXPECT_RANGE_EQ(result3_gapped_stop, (seqan3::detail::modmer_view{text3 | stop_at_t | gapped_kmer_view, - text3 | stop_at_t | rev_gapped_kmer_view, - 2})); - auto start_at_a = std::views::drop(6); - EXPECT_RANGE_EQ(result3_ungapped_start, (seqan3::detail::modmer_view{text3 | start_at_a | kmer_view, - text3 | start_at_a | rev_kmer_view, - 2})); - EXPECT_RANGE_EQ(result3_gapped_start, (seqan3::detail::modmer_view{text3 | start_at_a | gapped_kmer_view, - text3 | start_at_a | rev_gapped_kmer_view, - 2})); -} - -TEST_F(modmer_test, two_ranges_unequal_size) -{ - EXPECT_THROW((seqan3::detail::modmer_view{text1 | kmer_view, text3 | rev_kmer_view, 2}), std::invalid_argument); + EXPECT_RANGE_EQ(result3_ungapped_start, (seqan3::detail::modmer_view{text3 | start_at_a | kmer_view, 2})); } From 0d7cd761954279eca6baf42c16b1040c70676481 Mon Sep 17 00:00:00 2001 From: mitradarja Date: Thu, 9 Dec 2021 19:54:16 +0100 Subject: [PATCH 10/20] [FEATURE] Add distance to modmer --- include/modmer.hpp | 4 +-- test/api/modmer_test.cpp | 68 +++++++++++++++++++++++++++------------- 2 files changed, 48 insertions(+), 24 deletions(-) diff --git a/include/modmer.hpp b/include/modmer.hpp index 2b06b92..8c65249 100644 --- a/include/modmer.hpp +++ b/include/modmer.hpp @@ -312,7 +312,7 @@ class modmer_view::basic_iterator size_t mod{}; - size_t distance{}; + size_t distance{1}; //!\brief Advances the window to the next position. void advance() @@ -350,7 +350,7 @@ class modmer_view::basic_iterator { if constexpr (measure_distance) { - modmer_value = distance; + modmer_value = distance - 1; distance = 0; } else diff --git a/test/api/modmer_test.cpp b/test/api/modmer_test.cpp index 11f76da..e04ccbd 100644 --- a/test/api/modmer_test.cpp +++ b/test/api/modmer_test.cpp @@ -24,11 +24,11 @@ using result_t = std::vector; inline static constexpr auto kmer_view = seqan3::views::kmer_hash(seqan3::ungapped{4}); inline static constexpr auto gapped_kmer_view = seqan3::views::kmer_hash(0b1001_shape); -inline static constexpr auto modmer_no_rev_view = modmer(2); +inline static constexpr auto modmer_view = modmer(2); using iterator_type = std::ranges::iterator_t< decltype(std::declval() | kmer_view - | modmer_no_rev_view)>; + | modmer_view)>; template <> struct iterator_fixture : public ::testing::Test @@ -65,19 +65,23 @@ class modmer_test : public ::testing::Test protected: std::vector text1{"AAAAAA"_dna4}; result_t result1{0, 0, 0}; // Same result for ungapped and gapped + result_t result1_distance{0, 0, 0}; std::vector too_short_text{"AC"_dna4}; // ACGG CGGC, GGCG, GCGA, CGAC, GACG, ACGT, CGTT, GTTT, TTTA, TTAG + // CCGT GCCG CGCC TCGC GTCG CGTC ACGT AACG AAAC TAAA CTAA // ACGG CGGC cgcc GCGA CGAC cgtc ACGT aacg aaac taaa ctaa std::vector text3{"ACGGCGACGTTTAG"_dna4}; - result_t result3_ungapped_no_rev{26, 166, 152, 134, 252, 242}; // ACGG, GGCG, GCGA, GACG, TTTA, TTAG - result_t result3_gapped_no_rev{2, 10, 8, 10, 12, 14}; // A--G, G--G, G--A, G--G, T--A, T--G "-" for gap - result_t result3_ungapped_no_rev_stop{26, 166, 152, 134}; // ACGG, GGCG, GCGA, GACG - result_t result3_gapped_no_rev_stop{2, 10, 8, 10}; - result_t result3_ungapped_start{252, 242}; // For start at second A, aacg, taaa, ctaa - result_t result3_ungapped_no_rev_start{242}; // For start at second A, TTAG - result_t result3_gapped_no_rev_start{14}; // For start at second A, T--G + result_t result3_ungapped{26, 166, 152, 134, 252, 242}; // ACGG, GGCG, GCGA, GACG, TTTA, TTAG + result_t result3_gapped{2, 10, 8, 10, 12, 14}; // A--G, G--G, G--A, G--G, T--A, T--G "-" for gap + result_t result3_distance{0, 1, 0, 1, 3, 0}; + result_t result3_ungapped_stop{26, 166, 152, 134}; // ACGG, GGCG, GCGA, GACG + result_t result3_gapped_stop{2, 10, 8, 10}; + result_t result3_distance_stop{0, 1, 0, 1}; + result_t result3_ungapped_start{252, 242}; // For start at second A, TTTA, TTAG + result_t result3_gapped_start{14}; // For start at second A, T--G + result_t result3_distance_start{3, 0}; }; template @@ -99,7 +103,7 @@ TYPED_TEST(modmer_view_properties_test, concepts) TypeParam text{'A'_dna4, 'C'_dna4, 'G'_dna4, 'T'_dna4, 'C'_dna4, 'G'_dna4, 'A'_dna4, 'C'_dna4, 'G'_dna4, 'T'_dna4, 'T'_dna4, 'T'_dna4, 'A'_dna4, 'G'_dna4}; // ACGTCGACGTTTAG - auto v = text | kmer_view | modmer_no_rev_view; + auto v = text | kmer_view | modmer_view; compare_types(v); } @@ -107,34 +111,54 @@ TYPED_TEST(modmer_view_properties_test, different_inputs_kmer_hash) { TypeParam text{'A'_dna4, 'C'_dna4, 'G'_dna4, 'T'_dna4, 'C'_dna4, 'G'_dna4, 'A'_dna4, 'C'_dna4, 'G'_dna4, 'T'_dna4, 'T'_dna4, 'T'_dna4, 'A'_dna4, 'G'_dna4}; // ACGTCGACGTTTAG - result_t ungapped_no_rev{182, 216, 134, 252, 242}; // GTCG, TCGA, GACG, TTTA, TTAG - result_t gapped_no_rev{10, 12, 10, 12, 14}; // G--G, T--G, T--A, G--G, T--A, T--G "-" for gap - EXPECT_RANGE_EQ(ungapped_no_rev, text | kmer_view | modmer_no_rev_view); - EXPECT_RANGE_EQ(gapped_no_rev, text | gapped_kmer_view | modmer_no_rev_view); + result_t ungapped{182, 216, 134, 252, 242}; // GTCG, TCGA, GACG, TTTA, TTAG + result_t gapped{10, 12, 10, 12, 14}; // G--G, T--G, T--A, G--G, T--A, T--G "-" for gap + EXPECT_RANGE_EQ(ungapped, text | kmer_view | modmer_view); + EXPECT_RANGE_EQ(gapped, text | gapped_kmer_view | modmer_view); } TEST_F(modmer_test, ungapped_kmer_hash) { - EXPECT_RANGE_EQ(result1, text1 | kmer_view | modmer_no_rev_view); - auto empty_view = too_short_text | kmer_view | modmer_no_rev_view; + EXPECT_RANGE_EQ(result1, text1 | kmer_view | modmer_view); + auto empty_view = too_short_text | kmer_view | modmer_view; EXPECT_TRUE(std::ranges::empty(empty_view)); - EXPECT_RANGE_EQ(result3_ungapped_no_rev, text3 | kmer_view | modmer_no_rev_view); + EXPECT_RANGE_EQ(result3_ungapped, text3 | kmer_view | modmer_view); + + auto v1 = text1 | kmer_view; + EXPECT_RANGE_EQ(result1_distance, (seqan3::detail::modmer_view(v1, 2))); + auto v2 = text3 | kmer_view; + EXPECT_RANGE_EQ(result3_distance, (seqan3::detail::modmer_view(v2, 2))); } TEST_F(modmer_test, gapped_kmer_hash) { - EXPECT_RANGE_EQ(result1, text1 | gapped_kmer_view | modmer_no_rev_view); - auto empty_view = too_short_text | gapped_kmer_view | modmer_no_rev_view; + EXPECT_RANGE_EQ(result1, text1 | gapped_kmer_view | modmer_view); + auto empty_view = too_short_text | gapped_kmer_view | modmer_view; EXPECT_TRUE(std::ranges::empty(empty_view)); - EXPECT_RANGE_EQ(result3_gapped_no_rev, text3 | gapped_kmer_view | modmer_no_rev_view); + EXPECT_RANGE_EQ(result3_gapped, text3 | gapped_kmer_view | modmer_view); + + auto v1 = text1 | gapped_kmer_view; + EXPECT_RANGE_EQ(result1_distance, (seqan3::detail::modmer_view(v1, 2))); + auto v2 = text3 | gapped_kmer_view; + EXPECT_RANGE_EQ(result3_distance, (seqan3::detail::modmer_view(v2, 2))); } TEST_F(modmer_test, combinability) { auto stop_at_t = std::views::take_while([] (seqan3::dna4 const x) { return x != 'T'_dna4; }); - EXPECT_RANGE_EQ(result3_ungapped_no_rev_stop, text3 | stop_at_t | kmer_view | modmer_no_rev_view); - EXPECT_RANGE_EQ(result3_gapped_no_rev_stop, text3 | stop_at_t | gapped_kmer_view | modmer_no_rev_view); + EXPECT_RANGE_EQ(result3_ungapped_stop, text3 | stop_at_t | kmer_view | modmer_view); + EXPECT_RANGE_EQ(result3_gapped_stop, text3 | stop_at_t | gapped_kmer_view | modmer_view); + + auto v1 = text3 | stop_at_t | kmer_view; + auto v2 = text3 | stop_at_t | kmer_view; + EXPECT_RANGE_EQ(result3_distance_stop, (seqan3::detail::modmer_view(v1, 2))); + EXPECT_RANGE_EQ(result3_distance_stop, (seqan3::detail::modmer_view(v2, 2))); auto start_at_a = std::views::drop(6); EXPECT_RANGE_EQ(result3_ungapped_start, (seqan3::detail::modmer_view{text3 | start_at_a | kmer_view, 2})); + + auto v3 = text3 | start_at_a | kmer_view; + auto v4 = text3 | start_at_a | gapped_kmer_view; + EXPECT_RANGE_EQ(result3_distance_start, (seqan3::detail::modmer_view(v3, 2))); + EXPECT_RANGE_EQ(result3_distance_start, (seqan3::detail::modmer_view(v4, 2))); } From 3d4da5c5b570e8a57d35ccd538cced0aabe88a50 Mon Sep 17 00:00:00 2001 From: mitradarja Date: Fri, 10 Dec 2021 15:25:40 +0100 Subject: [PATCH 11/20] [FEATURE] Add modmer and minimiser distance. --- include/minimiser_distance.hpp | 583 ++++++++++++++++++++++++++++ include/minimiser_hash_distance.hpp | 174 +++++++++ include/modmer.hpp | 4 - include/modmer_hash.hpp | 5 +- include/modmer_hash_distance.hpp | 139 +++++++ include/shared.hpp | 24 ++ src/compare.cpp | 32 +- test/api/modmer_hash_test.cpp | 25 +- test/cli/minions_coverage_test.cpp | 16 +- 9 files changed, 965 insertions(+), 37 deletions(-) create mode 100644 include/minimiser_distance.hpp create mode 100644 include/minimiser_hash_distance.hpp create mode 100644 include/modmer_hash_distance.hpp create mode 100644 include/shared.hpp diff --git a/include/minimiser_distance.hpp b/include/minimiser_distance.hpp new file mode 100644 index 0000000..7c47e5a --- /dev/null +++ b/include/minimiser_distance.hpp @@ -0,0 +1,583 @@ +// ----------------------------------------------------------------------------------------------------- +// Copyright (c) 2006-2021, Knut Reinert & Freie Universität Berlin +// Copyright (c) 2016-2021, Knut Reinert & MPI für molekulare Genetik +// This file may be used, modified and/or redistributed under the terms of the 3-clause BSD-License +// shipped with this file and also available at: https://github.com/seqan/seqan3/blob/master/LICENSE.md +// ----------------------------------------------------------------------------------------------------- + +/*!\file + * \author Mitra Darvish + * \brief Provides seqan3::views::minimiser_distance. + */ + +#pragma once + +#include +#include + +#include +#include +#include +#include +#include + +namespace seqan3::detail +{ +// --------------------------------------------------------------------------------------------------------------------- +// minimiser_distance_view class +// --------------------------------------------------------------------------------------------------------------------- + +/*!\brief The type returned by seqan3::views::minimiser_distance. + * \tparam urng1_t The type of the underlying range, must model std::ranges::forward_range, the reference type must + * model std::totally_ordered. The typical use case is that the reference type is the result of + * seqan3::kmer_hash. + * \tparam urng2_t The type of the second underlying range, must model std::ranges::forward_range, the reference type + * must model std::totally_ordered. If only one range is provided this defaults to + * std::ranges::empty_view. + * \implements std::ranges::view + * \ingroup search_views + * + * \details + * + * See seqan3::views::minimiser_distance for a detailed explanation on minimizers. + * + * \note Most members of this class are generated by std::ranges::view_interface which is not yet documented here. + * + * \sa seqan3::views::minimiser_distance + */ +template > +class minimiser_distance_view : public std::ranges::view_interface> +{ +private: + static_assert(std::ranges::forward_range, "The minimiser_distance_view only works on forward_ranges."); + static_assert(std::ranges::forward_range, "The minimiser_distance_view only works on forward_ranges."); + static_assert(std::totally_ordered>, + "The reference type of the underlying range must model std::totally_ordered."); + + //!\brief The default argument of the second range. + using default_urng2_t = std::ranges::empty_view; + + //!\brief Boolean variable, which is true, when second range is not of empty type. + static constexpr bool second_range_is_given = !std::same_as; + + static_assert(!second_range_is_given || std::totally_ordered_with, + std::ranges::range_reference_t>, + "The reference types of the underlying ranges must model std::totally_ordered_with."); + + //!\brief Whether the given ranges are const_iterable + static constexpr bool const_iterable = seqan3::const_iterable_range && + seqan3::const_iterable_range; + + //!\brief The first underlying range. + urng1_t urange1{}; + //!\brief The second underlying range. + urng2_t urange2{}; + + //!\brief The number of values in one window. + size_t window_size{}; + + template + class basic_iterator; + + //!\brief The sentinel type of the minimiser_distance_view. + using sentinel = std::default_sentinel_t; + +public: + /*!\name Constructors, destructor and assignment + * \{ + */ + minimiser_distance_view() = default; //!< Defaulted. + minimiser_distance_view(minimiser_distance_view const & rhs) = default; //!< Defaulted. + minimiser_distance_view(minimiser_distance_view && rhs) = default; //!< Defaulted. + minimiser_distance_view & operator=(minimiser_distance_view const & rhs) = default; //!< Defaulted. + minimiser_distance_view & operator=(minimiser_distance_view && rhs) = default; //!< Defaulted. + ~minimiser_distance_view() = default; //!< Defaulted. + + /*!\brief Construct from a view and a given number of values in one window. + * \param[in] urange1 The input range to process. Must model std::ranges::viewable_range and + * std::ranges::forward_range. + * \param[in] window_size The number of values in one window. + */ + minimiser_distance_view(urng1_t urange1, size_t const window_size) : + minimiser_distance_view{std::move(urange1), default_urng2_t{}, window_size} + {} + + /*!\brief Construct from a non-view that can be view-wrapped and a given number of values in one window. + * \tparam other_urng1_t The type of another urange. Must model std::ranges::viewable_range and be constructible + from urng1_t. + * \param[in] urange1 The input range to process. Must model std::ranges::viewable_range and + * std::ranges::forward_range. + * \param[in] window_size The number of values in one window. + */ + template + //!\cond + requires (std::ranges::viewable_range && + std::constructible_from>>) + //!\endcond + minimiser_distance_view(other_urng1_t && urange1, size_t const window_size) : + urange1{std::views::all(std::forward(urange1))}, + urange2{default_urng2_t{}}, + window_size{window_size} + {} + + /*!\brief Construct from two views and a given number of values in one window. + * \param[in] urange1 The first input range to process. Must model std::ranges::viewable_range and + * std::ranges::forward_range. + * \param[in] urange2 The second input range to process. Must model std::ranges::viewable_range and + * std::ranges::forward_range. + * \param[in] window_size The number of values in one window. + */ + minimiser_distance_view(urng1_t urange1, urng2_t urange2, size_t const window_size) : + urange1{std::move(urange1)}, + urange2{std::move(urange2)}, + window_size{window_size} + { + if constexpr (second_range_is_given) + { + if (std::ranges::distance(urange1) != std::ranges::distance(urange2)) + throw std::invalid_argument{"The two ranges do not have the same size."}; + } + } + + /*!\brief Construct from two non-views that can be view-wrapped and a given number of values in one window. + * \tparam other_urng1_t The type of another urange. Must model std::ranges::viewable_range and be constructible + from urng1_t. + * \tparam other_urng2_t The type of another urange. Must model std::ranges::viewable_range and be constructible + from urng2_t. + * \param[in] urange1 The input range to process. Must model std::ranges::viewable_range and + * std::ranges::forward_range. + * \param[in] urange2 The second input range to process. Must model std::ranges::viewable_range and + * std::ranges::forward_range. + * \param[in] window_size The number of values in one window. + */ + template + //!\cond + requires (std::ranges::viewable_range && + std::constructible_from> && + std::ranges::viewable_range && + std::constructible_from>) + //!\endcond + minimiser_distance_view(other_urng1_t && urange1, other_urng2_t && urange2, size_t const window_size) : + urange1{std::views::all(std::forward(urange1))}, + urange2{std::views::all(std::forward(urange2))}, + window_size{window_size} + { + if constexpr (second_range_is_given) + { + if (std::ranges::distance(urange1) != std::ranges::distance(urange2)) + throw std::invalid_argument{"The two ranges do not have the same size."}; + } + } + //!\} + + /*!\name Iterators + * \{ + */ + /*!\brief Returns an iterator to the first element of the range. + * \returns Iterator to the first element. + * + * \details + * + * ### Complexity + * + * Constant. + * + * ### Exceptions + * + * Strong exception guarantee. + */ + basic_iterator begin() + { + return {std::ranges::begin(urange1), + std::ranges::end(urange1), + std::ranges::begin(urange2), + window_size}; + } + + //!\copydoc begin() + basic_iterator begin() const + //!\cond + requires const_iterable + //!\endcond + { + return {std::ranges::cbegin(urange1), + std::ranges::cend(urange1), + std::ranges::cbegin(urange2), + window_size}; + } + + /*!\brief Returns an iterator to the element following the last element of the range. + * \returns Iterator to the end. + * + * \details + * + * This element acts as a placeholder; attempting to dereference it results in undefined behaviour. + * + * ### Complexity + * + * Constant. + * + * ### Exceptions + * + * No-throw guarantee. + */ + sentinel end() const + { + return {}; + } + //!\} +}; + +//!\brief Iterator for calculating minimiser_distances. +template +template +class minimiser_distance_view::basic_iterator +{ +private: + //!\brief The sentinel type of the first underlying range. + using urng1_sentinel_t = maybe_const_sentinel_t; + //!\brief The iterator type of the first underlying range. + using urng1_iterator_t = maybe_const_iterator_t; + //!\brief The iterator type of the second underlying range. + using urng2_iterator_t = maybe_const_iterator_t; + + template + friend class basic_iterator; + +public: + /*!\name Associated types + * \{ + */ + //!\brief Type for distances between iterators. + using difference_type = std::ranges::range_difference_t; + //!\brief Value type of this iterator. + using value_type = std::ranges::range_value_t; + //!\brief The pointer type. + using pointer = void; + //!\brief Reference to `value_type`. + using reference = value_type; + //!\brief Tag this class as a forward iterator. + using iterator_category = std::forward_iterator_tag; + //!\brief Tag this class as a forward iterator. + using iterator_concept = iterator_category; + //!\} + + /*!\name Constructors, destructor and assignment + * \{ + */ + basic_iterator() = default; //!< Defaulted. + basic_iterator(basic_iterator const &) = default; //!< Defaulted. + basic_iterator(basic_iterator &&) = default; //!< Defaulted. + basic_iterator & operator=(basic_iterator const &) = default; //!< Defaulted. + basic_iterator & operator=(basic_iterator &&) = default; //!< Defaulted. + ~basic_iterator() = default; //!< Defaulted. + + //!\brief Allow iterator on a const range to be constructible from an iterator over a non-const range. + basic_iterator(basic_iterator const & it) + //!\cond + requires const_range + //!\endcond + : minimiser_distance_value{std::move(it.minimiser_distance_value)}, + urng1_iterator{std::move(it.urng1_iterator)}, + urng1_sentinel{std::move(it.urng1_sentinel)}, + urng2_iterator{std::move(it.urng2_iterator)}, + window_values{std::move(it.window_values)} + {} + + /*!\brief Construct from begin and end iterators of a given range over std::totally_ordered values, and the number + of values per window. + * \param[in] urng1_iterator Iterator pointing to the first position of the first std::totally_ordered range. + * \param[in] urng1_sentinel Iterator pointing to the last position of the first std::totally_ordered range. + * \param[in] urng2_iterator Iterator pointing to the first position of the second std::totally_ordered range. + * \param[in] window_size The number of values in one window. + * + * \details + * + * Looks at the number of values per window in two ranges, returns the smallest between both as minimiser_distance and + * shifts then by one to repeat this action. If a minimiser_distance in consecutive windows is the same, it is returned only + * once. + */ + basic_iterator(urng1_iterator_t urng1_iterator, + urng1_sentinel_t urng1_sentinel, + urng2_iterator_t urng2_iterator, + size_t window_size) : + urng1_iterator{std::move(urng1_iterator)}, + urng1_sentinel{std::move(urng1_sentinel)}, + urng2_iterator{std::move(urng2_iterator)} + { + size_t size = std::ranges::distance(urng1_iterator, urng1_sentinel); + window_size = std::min(window_size, size); + + window_first(window_size); + } + //!\} + + //!\anchor basic_iterator_comparison + //!\name Comparison operators + //!\{ + + //!\brief Compare to another basic_iterator. + friend bool operator==(basic_iterator const & lhs, basic_iterator const & rhs) + { + return (lhs.urng1_iterator == rhs.urng1_iterator) && + (rhs.urng2_iterator == rhs.urng2_iterator) && + (lhs.window_values.size() == rhs.window_values.size()); + } + + //!\brief Compare to another basic_iterator. + friend bool operator!=(basic_iterator const & lhs, basic_iterator const & rhs) + { + return !(lhs == rhs); + } + + //!\brief Compare to the sentinel of the minimiser_distance_view. + friend bool operator==(basic_iterator const & lhs, sentinel const &) + { + return lhs.urng1_iterator == lhs.urng1_sentinel; + } + + //!\brief Compare to the sentinel of the minimiser_distance_view. + friend bool operator==(sentinel const & lhs, basic_iterator const & rhs) + { + return rhs == lhs; + } + + //!\brief Compare to the sentinel of the minimiser_distance_view. + friend bool operator!=(sentinel const & lhs, basic_iterator const & rhs) + { + return !(lhs == rhs); + } + + //!\brief Compare to the sentinel of the minimiser_distance_view. + friend bool operator!=(basic_iterator const & lhs, sentinel const & rhs) + { + return !(lhs == rhs); + } + //!\} + + //!\brief Pre-increment. + basic_iterator & operator++() noexcept + { + next_unique_minimiser_distance(); + return *this; + } + + //!\brief Post-increment. + basic_iterator operator++(int) noexcept + { + basic_iterator tmp{*this}; + next_unique_minimiser_distance(); + return tmp; + } + + //!\brief Return the minimiser_distance. + value_type operator*() const noexcept + { + return minimiser_distance_value; + } + +private: + //!\brief The minimiser_distance value. + value_type minimiser_distance_value{}; + + //!\brief The offset relative to the beginning of the window where the minimizer value is found. + size_t minimiser_distance_position_offset{}; + + //!\brief Iterator to the rightmost value of one window. + urng1_iterator_t urng1_iterator{}; + //!brief Iterator to last element in range. + urng1_sentinel_t urng1_sentinel{}; + //!\brief Iterator to the rightmost value of one window of the second range. + urng2_iterator_t urng2_iterator{}; + + //!\brief Stored values per window. It is necessary to store them, because a shift can remove the current minimiser_distance. + std::deque window_values{}; + + //!\brief Increments iterator by 1. + void next_unique_minimiser_distance() + { + while (!next_minimiser_distance()) {} + } + + //!\brief Returns new window value. + auto window_value() const + { + if constexpr (!second_range_is_given) + return *urng1_iterator; + else + return std::min(*urng1_iterator, *urng2_iterator); + } + + //!\brief Advances the window to the next position. + void advance_window() + { + ++urng1_iterator; + if constexpr (second_range_is_given) + ++urng2_iterator; + } + + //!\brief Calculates minimiser_distances for the first window. + void window_first(size_t const window_size) + { + if (window_size == 0u) + return; + + for (size_t i = 0u; i < window_size - 1u; ++i) + { + window_values.push_back(window_value()); + advance_window(); + } + window_values.push_back(window_value()); + auto minimiser_distance_it = std::ranges::min_element(window_values, std::less_equal{}); + minimiser_distance_value = std::distance(std::begin(window_values), minimiser_distance_it); + minimiser_distance_position_offset = std::distance(std::begin(window_values), minimiser_distance_it); + } + + /*!\brief Calculates the next minimiser_distance value. + * \returns True, if new minimiser_distance is found or end is reached. Otherwise returns false. + * \details + * For the following windows, we remove the first window value (is now not in window_values) and add the new + * value that results from the window shifting. + */ + bool next_minimiser_distance() + { + advance_window(); + if (urng1_iterator == urng1_sentinel) + return true; + + value_type const new_value = window_value(); + + window_values.pop_front(); + window_values.push_back(new_value); + + if (minimiser_distance_position_offset == 0) + { + auto minimiser_distance_it = std::ranges::min_element(window_values, std::less_equal{}); + minimiser_distance_value = std::distance(std::begin(window_values), minimiser_distance_it) ; + minimiser_distance_position_offset = std::distance(std::begin(window_values), minimiser_distance_it); + return true; + } + + if (new_value < minimiser_distance_value) + { + minimiser_distance_value = window_values.size() - 1; + minimiser_distance_position_offset = window_values.size() - 1; + return true; + } + + --minimiser_distance_position_offset; + return false; + } +}; + +//!\brief A deduction guide for the view class template. +template +minimiser_distance_view(rng1_t &&, size_t const window_size) -> minimiser_distance_view>; + +//!\brief A deduction guide for the view class template. +template +minimiser_distance_view(rng1_t &&, rng2_t &&, size_t const window_size) -> minimiser_distance_view, + std::views::all_t>; + +// --------------------------------------------------------------------------------------------------------------------- +// minimiser_distance_fn (adaptor definition) +// --------------------------------------------------------------------------------------------------------------------- + +//![adaptor_def] +//!\brief seqan3::views::minimiser_distance's range adaptor object type (non-closure). +//!\ingroup search_views +struct minimiser_distance_fn +{ + //!\brief Store the number of values in one window and return a range adaptor closure object. + constexpr auto operator()(size_t const window_size) const + { + return adaptor_from_functor{*this, window_size}; + } + + /*!\brief Call the view's constructor with two arguments: the underlying view and an integer indicating how many + * values one window contains. + * \tparam urng1_t The type of the input range to process. Must model std::ranges::viewable_range. + * \param[in] urange1 The input range to process. Must model std::ranges::viewable_range and + * std::ranges::forward_range. + * \param[in] window_size The number of values in one window. + * \returns A range of converted values. + */ + template + constexpr auto operator()(urng1_t && urange1, size_t const window_size) const + { + static_assert(std::ranges::viewable_range, + "The range parameter to views::minimiser_distance cannot be a temporary of a non-view range."); + static_assert(std::ranges::forward_range, + "The range parameter to views::minimiser_distance must model std::ranges::forward_range."); + + if (window_size == 1) // Would just return urange1 without any changes + throw std::invalid_argument{"The chosen window_size is not valid. " + "Please choose a value greater than 1 or use two ranges."}; + + return minimiser_distance_view{urange1, window_size}; + } +}; +//![adaptor_def] + +} // namespace seqan3::detail + +/*!\brief Computes minimiser_distances for a range of comparable values. A minimiser_distance is the smallest value in a window. + * \tparam urng_t The type of the first range being processed. See below for requirements. [template + * parameter is omitted in pipe notation] + * \param[in] urange1 The range being processed. [parameter is omitted in pipe notation] + * \param[in] window_size The number of values in one window. + * \returns A range of std::totally_ordered where each value is the minimal value for one window. See below for the + * properties of the returned range. + * \ingroup search_views + * + * \details + * + * A minimiser_distance is the smallest value in a window. For example for the following list of hash values + * `[28, 100, 9, 23, 4, 1, 72, 37, 8]` and 4 as `window_size`, the minimiser_distance values are `[9, 4, 1]`. + * + * The minimiser_distance can be calculated for one given range or for two given ranges, where the minimizer is the smallest + * value in both windows. For example for the following list of hash values `[28, 100, 9, 23, 4, 1, 72, 37, 8]` and + * `[30, 2, 11, 101, 199, 73, 34, 900]` and 4 as `window_size`, the minimiser_distance values are `[2, 4, 1]`. + * + * Note that in the interface with the second underlying range the const-iterable property will only be preserved if + * both underlying ranges are const-iterable. + * + * ### Robust Winnowing + * + * In case there are multiple minimal values within one window, the minimum and therefore the minimiser_distance is ambiguous. + * We choose the rightmost value as the minimiser_distance of the window, and when shifting the window, the minimiser_distance is only + * changed if there appears a value that is strictly smaller than the current minimum. This approach is termed + * *robust winnowing* by [Chirag et al.](https://www.biorxiv.org/content/10.1101/2020.02.11.943241v1.full.pdf) + * and is proven to work especially well on repeat regions. + * + * ### Example + * + * \include test/snippet/search/views/minimiser_distance.cpp + * + * ### View properties + * + * | Concepts and traits | `urng_t` (underlying range type) | `rrng_t` (returned range type) | + * |----------------------------------|:----------------------------------:|:--------------------------------:| + * | std::ranges::input_range | *required* | *preserved* | + * | std::ranges::forward_range | *required* | *preserved* | + * | std::ranges::bidirectional_range | | *lost* | + * | std::ranges::random_access_range | | *lost* | + * | std::ranges::contiguous_range | | *lost* | + * | | | | + * | std::ranges::viewable_range | *required* | *guaranteed* | + * | std::ranges::view | | *guaranteed* | + * | std::ranges::sized_range | | *lost* | + * | std::ranges::common_range | | *lost* | + * | std::ranges::output_range | | *lost* | + * | seqan3::const_iterable_range | | *preserved* | + * | | | | + * | std::ranges::range_reference_t | std::totally_ordered | std::totally_ordered | + * + * See the \link views views submodule documentation \endlink for detailed descriptions of the view properties. + * + * \hideinitializer + * + * \stableapi{Since version 3.1.} + */ +inline constexpr auto minimiser_distance = seqan3::detail::minimiser_distance_fn{}; diff --git a/include/minimiser_hash_distance.hpp b/include/minimiser_hash_distance.hpp new file mode 100644 index 0000000..026afbe --- /dev/null +++ b/include/minimiser_hash_distance.hpp @@ -0,0 +1,174 @@ +// ----------------------------------------------------------------------------------------------------- +// Copyright (c) 2006-2021, Knut Reinert & Freie Universität Berlin +// Copyright (c) 2016-2021, Knut Reinert & MPI für molekulare Genetik +// This file may be used, modified and/or redistributed under the terms of the 3-clause BSD-License +// shipped with this file and also available at: https://github.com/seqan/seqan3/blob/master/LICENSE.md +// ----------------------------------------------------------------------------------------------------- + +/*!\file + * \author Mitra Darvish + * \brief Provides seqan3::views::minimiser_distance_hash. + */ + +#pragma once + +#include +#include +#include +#include + +#include "minimiser_distance.hpp" + + +namespace seqan3::detail +{ +//!\brief seqan3::views::minimiser_distance_hash's range adaptor object type (non-closure). +//!\ingroup search_views +struct minimiser_distance_hash_fn +{ + /*!\brief Store the shape and the window size and return a range adaptor closure object. + * \param[in] shape The seqan3::shape to use for hashing. + * \param[in] window_size The windows size to use. + * \throws std::invalid_argument if the size of the shape is greater than the `window_size`. + * \returns A range of converted elements. + */ + constexpr auto operator()(shape const & shape, window_size const window_size) const + { + return seqan3::detail::adaptor_from_functor{*this, shape, window_size}; + } + + /*!\brief Store the shape, the window size and the seed and return a range adaptor closure object. + * \param[in] shape The seqan3::shape to use for hashing. + * \param[in] window_size The size of the window. + * \param[in] seed The seed to use. + * \throws std::invalid_argument if the size of the shape is greater than the `window_size`. + * \returns A range of converted elements. + */ + constexpr auto operator()(shape const & shape, window_size const window_size, seed const seed) const + { + return seqan3::detail::adaptor_from_functor{*this, shape, window_size, seed}; + } + + /*!\brief Call the view's constructor with the underlying view, a seqan3::shape and a window size as argument. + * \param[in] urange The input range to process. Must model std::ranges::viewable_range and the reference type + * of the range must model seqan3::semialphabet. + * \param[in] shape The seqan3::shape to use for hashing. + * \param[in] window_size The size of the window. + * \param[in] seed The seed to use. + * \throws std::invalid_argument if the size of the shape is greater than the `window_size`. + * \returns A range of converted elements. + */ + template + constexpr auto operator()(urng_t && urange, + shape const & shape, + window_size const window_size, + seed const seed = seqan3::seed{0x8F3F73B5CF1C9ADE}) const + { + static_assert(std::ranges::viewable_range, + "The range parameter to views::minimiser_distance_hash cannot be a temporary of a non-view range."); + static_assert(std::ranges::forward_range, + "The range parameter to views::minimiser_distance_hash must model std::ranges::forward_range."); + static_assert(semialphabet>, + "The range parameter to views::minimiser_distance_hash must be over elements of seqan3::semialphabet."); + + if (shape.size() > window_size.get()) + throw std::invalid_argument{"The size of the shape cannot be greater than the window size."}; + + auto forward_strand = std::forward(urange) | seqan3::views::kmer_hash(shape) + | std::views::transform([seed] (uint64_t i) + {return i ^ seed.get();}); + + auto reverse_strand = std::forward(urange) | seqan3::views::complement + | std::views::reverse + | seqan3::views::kmer_hash(shape) + | std::views::transform([seed] (uint64_t i) + {return i ^ seed.get();}) + | std::views::reverse; + + return minimiser_distance_view(forward_strand, reverse_strand, window_size.get() - shape.size() + 1); + } +}; + +} // namespace seqan3::detail + + +/*!\name Alphabet related views + * \{ + */ + +/*!\brief Computes minimisers for a range with a given shape, window size and seed. + * \tparam urng_t The type of the range being processed. See below for requirements. [template parameter is + * omitted in pipe notation] + * \param[in] urange The range being processed. [parameter is omitted in pipe notation] + * \param[in] shape The seqan3::shape that determines how to compute the hash value. + * \param[in] window_size The window size to use. + * \param[in] seed The seed used to skew the hash values. Default: 0x8F3F73B5CF1C9ADE. + * \returns A range of `size_t` where each value is the minimiser of the resp. window. + * See below for the properties of the returned range. + * \ingroup utility_views + * + * \details + * + * A sequence can be presented by a small number of k-mers (minimisers). For a given shape and window size all k-mers + * are determined in the forward strand and the backward strand and only the lexicographically smallest k-mer is + * returned for one window. This process is repeated over every possible window of a sequence. If consecutive windows + * share a minimiser, it is saved only once. + * For example, in the sequence "TAAAGTGCTAAA" for an ungapped shape of length 3 and a window size of 5 the first, + * the second and the last window contain the same minimiser "AAA". + * Because the minimisers of the first two consecutive windows also share the same position, storing this minimiser + * twice is redundant and it is stored only once. The "AAA" minimiser of the last window on the other hand is stored, + * since it is located at an other position than the previous "AAA" minimiser and hence storing the second + * "AAA"-minimiser is not redundant but necessary. + * + * ### Non-lexicographical Minimisers by skewing the hash value with a seed + * + * It might happen that a minimiser changes only slightly when sliding the window over the sequence. For instance, when + * a minimiser starts with a repetition of A’s, then in the next window it is highly likely that the minimiser will + * start with a repetition of A’s as well. Because it is only one A shorter, depending on how long the repetition is + * this might go on for multiple window shifts. Saving these only slightly different minimiser makes no sense because + * they contain no new information about the underlying sequence. + * Additionally, sequences with a repetition of A’s will be seen as more similar to each other than they actually are. + * As [Marçais et al.](https://doi.org/10.1093/bioinformatics/btx235) have shown, randomizing the order of the k-mers + * can solve this problem. Therefore, a random seed is used to XOR all k-mers, thereby randomizing the + * order. The user can change the seed to any other value he or she thinks is useful. A seed of 0 is returning the + * lexicographical order. + * + * \sa seqan3::views::minimiser_view + * + * \attention + * Be aware of the requirements of the seqan3::views::kmer_hash view. + * + * \experimentalapi + * + * ### View properties + * + * | Concepts and traits | `urng_t` (underlying range type) | `rrng_t` (returned range type) | + * |----------------------------------|:----------------------------------:|:--------------------------------:| + * | std::ranges::input_range | *required* | *preserved* | + * | std::ranges::forward_range | *required* | *preserved* | + * | std::ranges::bidirectional_range | | *lost* | + * | std::ranges::random_access_range | | *lost* | + * | std::ranges::contiguous_range | | *lost* | + * | | | | + * | std::ranges::viewable_range | *required* | *guaranteed* | + * | std::ranges::view | | *guaranteed* | + * | std::ranges::sized_range | | *lost* | + * | std::ranges::common_range | | *lost* | + * | std::ranges::output_range | | *lost* | + * | seqan3::const_iterable_range | | *preserved* | + * | | | | + * | std::ranges::range_reference_t | seqan3::semialphabet | std::size_t | + * + * See the \link views views submodule documentation \endlink for detailed descriptions of the view properties. + * + * ### Example + * + * \include test/snippet/search/views/minimiser_distance_hash.cpp + * + * \hideinitializer + * + * \experimentalapi{Experimental since version 3.1.} + */ +inline constexpr auto minimiser_hash_distance = seqan3::detail::minimiser_distance_hash_fn{}; + +//!\} diff --git a/include/modmer.hpp b/include/modmer.hpp index 8c65249..13789ea 100644 --- a/include/modmer.hpp +++ b/include/modmer.hpp @@ -31,9 +31,6 @@ namespace seqan3::detail * \tparam urng1_t The type of the underlying range, must model std::ranges::forward_range, the reference type must * model std::totally_ordered. The typical use case is that the reference type is the result of * seqan3::kmer_hash. - * \tparam urng2_t The type of the second underlying range, must model std::ranges::forward_range, the reference type - * must model std::totally_ordered. If only one range is provided this defaults to - * std::ranges::empty_view. * \implements std::ranges::view * \ingroup search_views * @@ -216,7 +213,6 @@ class modmer_view::basic_iterator of values per window. * \param[in] urng1_iterator Iterator pointing to the first position of the first std::totally_ordered range. * \param[in] urng1_sentinel Iterator pointing to the last position of the first std::totally_ordered range. - * \param[in] urng2_iterator Iterator pointing to the first position of the second std::totally_ordered range. * \param[in] mod_used The number of values in one window. * * \details diff --git a/include/modmer_hash.hpp b/include/modmer_hash.hpp index c2c750c..e0054f5 100644 --- a/include/modmer_hash.hpp +++ b/include/modmer_hash.hpp @@ -16,8 +16,10 @@ #include #include #include +#include #include "modmer.hpp" +#include "shared.hpp" namespace seqan3::detail { @@ -81,7 +83,8 @@ struct modmer_hash_fn {return i ^ seed.get();}) | std::views::reverse; - return seqan3::detail::modmer_view(forward_strand, reverse_strand, mod_used_1); + auto combined_strand = seqan3::views::zip(forward_strand, reverse_strand) | std::views::transform([seed](std::tuple i){return fnv_hash(std::get<0>(i) + std::get<1>(i), seed.get());}); + return seqan3::detail::modmer_view(combined_strand, mod_used_1); } }; diff --git a/include/modmer_hash_distance.hpp b/include/modmer_hash_distance.hpp new file mode 100644 index 0000000..3c5e824 --- /dev/null +++ b/include/modmer_hash_distance.hpp @@ -0,0 +1,139 @@ +// ----------------------------------------------------------------------------------------------------- +// Copyright (c) 2006-2021, Knut Reinert & Freie Universität Berlin +// Copyright (c) 2016-2021, Knut Reinert & MPI für molekulare Genetik +// This file may be used, modified and/or redistributed under the terms of the 3-clause BSD-License +// shipped with this file and also available at: https://github.com/seqan/seqan3/blob/master/LICENSE.md +// ----------------------------------------------------------------------------------------------------- + +/*!\file + * \author Mitra Darvish + * \brief Provides modmer_hash. + */ + +#pragma once + +#include +#include +#include +#include + +#include "modmer.hpp" +#include "shared.hpp" + +namespace seqan3::detail +{ +//!\brief seqan3::views::modmer_hash's range adaptor object type (non-closure). +//!\ingroup search_views +struct modmer_hash_distance_fn +{ + /*!\brief Store the shape and the window size and return a range adaptor closure object. + * \param[in] shape The seqan3::shape to use for hashing. + * \param[in] mod_used_1 The windows size to use. + * \throws std::invalid_argument if the size of the shape is greater than the `mod_used_1`. + * \returns A range of converted elements. + */ + constexpr auto operator()(shape const & shape, uint32_t const mod_used_1) const + { + return seqan3::detail::adaptor_from_functor{*this, shape, mod_used_1}; + } + + /*!\brief Store the shape, the window size and the seed and return a range adaptor closure object. + * \param[in] shape The seqan3::shape to use for hashing. + * \param[in] mod_used_1 The size of the window. + * \param[in] seed The seed to use. + * \throws std::invalid_argument if the size of the shape is greater than the `mod_used_1`. + * \returns A range of converted elements. + */ + constexpr auto operator()(shape const & shape, uint32_t const mod_used_1, seed const seed) const + { + return seqan3::detail::adaptor_from_functor{*this, shape, mod_used_1, seed}; + } + + /*!\brief Call the view's constructor with the underlying view, a seqan3::shape and a window size as argument. + * \param[in] urange The input range to process. Must model std::ranges::viewable_range and the reference type + * of the range must model seqan3::semialphabet. + * \param[in] shape The seqan3::shape to use for hashing. + * \param[in] mod_used_1 The size of the window. + * \param[in] seed The seed to use. + * \throws std::invalid_argument if the size of the shape is greater than the `mod_used_1`. + * \returns A range of converted elements. + */ + template + constexpr auto operator()(urng_t && urange, + shape const & shape, + uint32_t const mod_used_1, + seed const seed = seqan3::seed{0x8F3F73B5CF1C9ADE}) const + { + static_assert(std::ranges::viewable_range, + "The range parameter to views::modmer_hash cannot be a temporary of a non-view range."); + static_assert(std::ranges::forward_range, + "The range parameter to views::modmer_hash must model std::ranges::forward_range."); + static_assert(semialphabet>, + "The range parameter to views::modmer_hash must be over elements of seqan3::semialphabet."); + + auto forward_strand = std::forward(urange) | seqan3::views::kmer_hash(shape) + | std::views::transform([seed] (uint64_t i) + {return i ^ seed.get();}); + + auto reverse_strand = std::forward(urange) | seqan3::views::complement + | std::views::reverse + | seqan3::views::kmer_hash(shape) + | std::views::transform([seed] (uint64_t i) + {return i ^ seed.get();}) + | std::views::reverse; + + auto combined_strand = seqan3::views::zip(forward_strand, reverse_strand) | std::views::transform([seed](std::tuple i){return fnv_hash(std::get<0>(i) + std::get<1>(i), seed.get());}); + return seqan3::detail::modmer_view(combined_strand, mod_used_1); + } +}; + +} // namespace seqan3::detail + +/*!\name Alphabet related views + * \{ + */ + +/*!\brief Computes modmers for a range with a given shape, mod_used_1 and seed. + * \tparam urng_t The type of the range being processed. See below for requirements. [template parameter is + * omitted in pipe notation] + * \param[in] urange The range being processed. [parameter is omitted in pipe notation] + * \param[in] shape The seqan3::shape that determines how to compute the hash value. + * \param[in] mod_used_1 The mod value to use. + * \param[in] seed The seed used to skew the hash values. Default: 0x8F3F73B5CF1C9ADE. + * \returns A range of `size_t` where each value is the modmer of the resp. window. + * See below for the properties of the returned range. + * \ingroup search_views + * + * \attention + * Be aware of the requirements of the seqan3::views::kmer_hash view. + * + * \experimentalapi + * + * ### View properties + * + * | Concepts and traits | `urng_t` (underlying range type) | `rrng_t` (returned range type) | + * |----------------------------------|:----------------------------------:|:--------------------------------:| + * | std::ranges::input_range | *required* | *preserved* | + * | std::ranges::forward_range | *required* | *preserved* | + * | std::ranges::bidirectional_range | | *lost* | + * | std::ranges::random_access_range | | *lost* | + * | std::ranges::contiguous_range | | *lost* | + * | | | | + * | std::ranges::viewable_range | *required* | *guaranteed* | + * | std::ranges::view | | *guaranteed* | + * | std::ranges::sized_range | | *lost* | + * | std::ranges::common_range | | *lost* | + * | std::ranges::output_range | | *lost* | + * | seqan3::const_iterable_range | | *preserved* | + * | | | | + * | std::ranges::range_reference_t | seqan3::semialphabet | std::size_t | + * + * See the \link views views submodule documentation \endlink for detailed descriptions of the view properties. + * + * \hideinitializer + * + * \experimentalapi{Experimental since version 3.1.} + */ +inline constexpr auto modmer_hash_distance = seqan3::detail::modmer_hash_distance_fn{}; + +//!\} diff --git a/include/shared.hpp b/include/shared.hpp new file mode 100644 index 0000000..33238fa --- /dev/null +++ b/include/shared.hpp @@ -0,0 +1,24 @@ +#pragma once + +// https://en.wikipedia.org/wiki/Fowler%E2%80%93Noll%E2%80%93Vo_hash_function +uint64_t fnv_hash(uint64_t hash_value, uint64_t seed) +{ + if (seed == 0) + return hash_value; + + constexpr static uint64_t default_offset_basis = 0xcbf29ce484222325; + constexpr static uint64_t prime = 0x100000001b3; + + uint64_t hashed = hash_value; + std::ostringstream os; + os << hash_value; + std::string oss = os.str(); + + for (int i = 0; i < oss.size(); i++) + { + hashed = hashed * prime; + hashed= hashed ^ oss[i]; + } + + return hashed; +} diff --git a/src/compare.cpp b/src/compare.cpp index 895f9c6..f380bf0 100644 --- a/src/compare.cpp +++ b/src/compare.cpp @@ -7,7 +7,9 @@ #include #include "compare.h" +#include "minimiser_hash_distance.hpp" #include "modmer_hash.hpp" +#include "modmer_hash_distance.hpp" /*! \brief Calculate mean and variance of given list. * \param results The vector from which mean and varaince should be calculated of. @@ -252,6 +254,28 @@ void compare_cov(std::filesystem::path sequence_file, urng_t kmer_view, urng_t2 outfile.close(); } +template +void compare_cov2(std::filesystem::path sequence_file, urng_t distance_view, std::string method_name, range_arguments & args) +{ + std::vector coverage{}; + std::vector stdev{}; + std::ofstream outfile; + + seqan3::sequence_file_input> fin{sequence_file}; + for (auto & [seq] : fin) + { + for (auto && hash : seq | distance_view) + coverage.push_back(hash); + } + double mean_coverage, stdev_coverage; + get_mean_and_var(coverage, mean_coverage, stdev_coverage); + + // Store speed and compression + outfile.open(std::string{args.path_out} + method_name + "_coverage.out"); + outfile << "COV\t"<< method_name << "\t" << *std::min_element(coverage.begin(), coverage.end()) << "\t" << mean_coverage << "\t" << stdev_coverage << "\t" << *std::max_element(coverage.begin(), coverage.end()) << "\n"; + outfile.close(); +} + void do_comparison(std::vector sequence_files, range_arguments & args) { switch(args.name) @@ -284,14 +308,10 @@ void do_coverage(std::filesystem::path sequence_file, range_arguments & args) { switch(args.name) { - case kmer: compare_cov(sequence_file, seqan3::views::kmer_hash(args.shape), seqan3::views::kmer_hash(args.shape), "kmer_hash_"+std::to_string(args.k_size), args); - break; - case minimiser: compare_cov(sequence_file, seqan3::views::minimiser_hash(args.shape, - seqan3::window_size{args.shape.size()}, args.seed_se), seqan3::views::minimiser_hash(args.shape, + case minimiser: compare_cov2(sequence_file, minimiser_hash_distance(args.shape, args.w_size, args.seed_se), "minimiser_hash_" + std::to_string(args.k_size) + "_" + std::to_string(args.w_size.get()), args); break; - case modmers: compare_cov(sequence_file, seqan3::views::minimiser_hash(args.shape, - seqan3::window_size{args.shape.size()}, args.seed_se), modmer_hash(args.shape, + case modmers: compare_cov2(sequence_file, modmer_hash_distance(args.shape, args.w_size.get(), args.seed_se), "modmer_hash_" + std::to_string(args.k_size) + "_" + std::to_string(args.w_size.get()), args); break; } diff --git a/test/api/modmer_hash_test.cpp b/test/api/modmer_hash_test.cpp index d9d6618..74367ff 100644 --- a/test/api/modmer_hash_test.cpp +++ b/test/api/modmer_hash_test.cpp @@ -42,7 +42,7 @@ struct iterator_fixture : public ::testing::Test static constexpr bool const_iterable = false; seqan3::dna4_vector text{"ACGGCGACGTTTAG"_dna4}; - result_t expected_range{26, 152, 6, 192, 112}; + result_t expected_range{27+27, 191+1, 252+192, 242+112}; using test_range_t = decltype(text | ungapped_view); test_range_t test_range = text | ungapped_view; @@ -67,19 +67,16 @@ class modmer_hash_test : public ::testing::Test { protected: std::vector text1{"AAAAAA"_dna4}; - result_t result1{0, 0, 0}; // Same result for ungapped and gapped + result_t result1{}; // Same result for ungapped and gapped std::vector too_short_text{"AC"_dna4}; // ACGG CGGC, GGCG, GCGA, CGAC, GACG, ACGT, CGTT, GTTT, TTTA, TTAG + // CCGT GCCG CGCC TCGC GTCG CGTC ACGT AACG AAAC TAAA CTAA // ACGG CGGC cgcc GCGA CGAC cgtc ACGT aacg aaac taaa ctaa std::vector text3{"ACGGCGACGTTTAG"_dna4}; - result_t result3_ungapped{26, 152, 6, 192, 112}; // ACGG, GCGA, aacg, taaa, ctaa - result_t result3_gapped{2, 8, 2, 12, 4}; // A--G, G--A, a--g, t--a, c--a - "-" for gap - result_t result3_ungapped_stop{26, 152}; // ACGG, GCGA - result_t result3_gapped_stop{2, 8}; // A--G, G--A - result_t result3_ungapped_start{6, 192, 112}; // For start at second A, aacg, taaa, ctaa - result_t result3_gapped_start{2, 12, 4}; // a--g, t--a, c--a - "-" for gap + result_t result3_ungapped{27+27, 191+1, 252+192, 242+112}; // ACGT/ACGT, GTTT/AAAC, TTTA/TAAA, TTAG/CTAA + result_t result3_gapped{3+3, 11+1, 12+12, 14+4}; // A--T/A--T, G--T/A--C, T--A/T--A, T--G/C--A - "-" for gap }; template @@ -100,8 +97,8 @@ TYPED_TEST(modmer_hash_view_properties_test, different_input_ranges) { TypeParam text{'A'_dna4, 'C'_dna4, 'G'_dna4, 'T'_dna4, 'C'_dna4, 'G'_dna4, 'A'_dna4, 'C'_dna4, 'G'_dna4, 'T'_dna4, 'T'_dna4, 'T'_dna4, 'A'_dna4, 'G'_dna4}; // ACGTCGACGTTTAG - result_t ungapped{216, 6, 192, 112}; // TCGA, aacg, taaa, ctaa - result_t gapped{12, 2, 12, 4}; // T--A, a--g, t--a, c--a - "-" for gap + result_t ungapped{27+27,216+216, 27+27, 191+1, 252+192, 242+112}; // ACGT/ACGT, TCGA/TCGA, ACGT/ACGT, GTTT/AAAC, TTTA/TAAA, TTAG/CTAA + result_t gapped{3+3, 12+12, 3+3, 11+1, 12+12, 14+4}; // A--T/A--T, T--A/T--A, A--T/A--T, G--T/A--C, T--A/T--A, T--G/C--A - "-" for gap EXPECT_RANGE_EQ(ungapped, text | ungapped_view); EXPECT_RANGE_EQ(gapped, text | gapped_view); } @@ -123,10 +120,10 @@ TEST_F(modmer_hash_test, gapped) TEST_F(modmer_hash_test, combinability) { auto stop_at_t = std::views::take_while([] (seqan3::dna4 const x) { return x != 'T'_dna4; }); - EXPECT_RANGE_EQ(result3_ungapped_stop, text3 | stop_at_t | ungapped_view); - EXPECT_RANGE_EQ(result3_gapped_stop, text3 | stop_at_t | gapped_view); + EXPECT_RANGE_EQ(result1, text3 | stop_at_t | ungapped_view); + EXPECT_RANGE_EQ(result1, text3 | stop_at_t | gapped_view); auto start_at_a = std::views::drop(6); - EXPECT_RANGE_EQ(result3_ungapped_start, text3 | start_at_a | ungapped_view); - EXPECT_RANGE_EQ(result3_gapped_start, text3 | start_at_a | gapped_view); + EXPECT_RANGE_EQ(result3_ungapped, text3 | start_at_a | ungapped_view); + EXPECT_RANGE_EQ(result3_gapped, text3 | start_at_a | gapped_view); } diff --git a/test/cli/minions_coverage_test.cpp b/test/cli/minions_coverage_test.cpp index c68f348..557b88c 100644 --- a/test/cli/minions_coverage_test.cpp +++ b/test/cli/minions_coverage_test.cpp @@ -14,25 +14,17 @@ TEST_F(cli_test, no_options) EXPECT_EQ(result.err, std::string{}); } -TEST_F(cli_test, kmer) -{ - cli_test_result result = execute_app("minions coverage --method kmer -k 19", data("example1.fasta")); - EXPECT_EQ(result.exit_code, 0); - EXPECT_EQ(result.out, std::string{}); - EXPECT_EQ(result.err, std::string{}); -} - -TEST_F(cli_test, gapped_kmer) +TEST_F(cli_test, minimiser) { - cli_test_result result = execute_app("minions coverage --method kmer --shape 524223", data("example1.fasta")); + cli_test_result result = execute_app("minions coverage --method minimiser -k 19 -w 19 ", data("example1.fasta")); EXPECT_EQ(result.exit_code, 0); EXPECT_EQ(result.out, std::string{}); EXPECT_EQ(result.err, std::string{}); } -TEST_F(cli_test, minimiser) +TEST_F(cli_test, gapped_minimiser) { - cli_test_result result = execute_app("minions coverage --method minimiser -k 19 -w 19 ", data("example1.fasta")); + cli_test_result result = execute_app("minions coverage --method minimiser -k 19 -w 19 --shape 524223", data("example1.fasta")); EXPECT_EQ(result.exit_code, 0); EXPECT_EQ(result.out, std::string{}); EXPECT_EQ(result.err, std::string{}); From 793c6cd560708ad350e61bd460876b9988baf727 Mon Sep 17 00:00:00 2001 From: mitradarja Date: Fri, 10 Dec 2021 15:39:26 +0100 Subject: [PATCH 12/20] [DOC] Improve documentation. --- include/minimiser_distance.hpp | 51 ++-------------------- include/minimiser_hash_distance.hpp | 66 ++--------------------------- include/modmer.hpp | 9 ++-- include/modmer_hash.hpp | 30 ++++++------- include/modmer_hash_distance.hpp | 28 ++++++------ include/shared.hpp | 9 +++- 6 files changed, 48 insertions(+), 145 deletions(-) diff --git a/include/minimiser_distance.hpp b/include/minimiser_distance.hpp index 7c47e5a..6a463d2 100644 --- a/include/minimiser_distance.hpp +++ b/include/minimiser_distance.hpp @@ -522,8 +522,8 @@ struct minimiser_distance_fn } // namespace seqan3::detail -/*!\brief Computes minimiser_distances for a range of comparable values. A minimiser_distance is the smallest value in a window. - * \tparam urng_t The type of the first range being processed. See below for requirements. [template +/*!\brief Computes the distance of minimiser for a range of comparable values. A minimiser_distance is the smallest value in a window. + * \tparam urng_t The type of the first range being processed. [template * parameter is omitted in pipe notation] * \param[in] urange1 The range being processed. [parameter is omitted in pipe notation] * \param[in] window_size The number of values in one window. @@ -533,51 +533,6 @@ struct minimiser_distance_fn * * \details * - * A minimiser_distance is the smallest value in a window. For example for the following list of hash values - * `[28, 100, 9, 23, 4, 1, 72, 37, 8]` and 4 as `window_size`, the minimiser_distance values are `[9, 4, 1]`. - * - * The minimiser_distance can be calculated for one given range or for two given ranges, where the minimizer is the smallest - * value in both windows. For example for the following list of hash values `[28, 100, 9, 23, 4, 1, 72, 37, 8]` and - * `[30, 2, 11, 101, 199, 73, 34, 900]` and 4 as `window_size`, the minimiser_distance values are `[2, 4, 1]`. - * - * Note that in the interface with the second underlying range the const-iterable property will only be preserved if - * both underlying ranges are const-iterable. - * - * ### Robust Winnowing - * - * In case there are multiple minimal values within one window, the minimum and therefore the minimiser_distance is ambiguous. - * We choose the rightmost value as the minimiser_distance of the window, and when shifting the window, the minimiser_distance is only - * changed if there appears a value that is strictly smaller than the current minimum. This approach is termed - * *robust winnowing* by [Chirag et al.](https://www.biorxiv.org/content/10.1101/2020.02.11.943241v1.full.pdf) - * and is proven to work especially well on repeat regions. - * - * ### Example - * - * \include test/snippet/search/views/minimiser_distance.cpp - * - * ### View properties - * - * | Concepts and traits | `urng_t` (underlying range type) | `rrng_t` (returned range type) | - * |----------------------------------|:----------------------------------:|:--------------------------------:| - * | std::ranges::input_range | *required* | *preserved* | - * | std::ranges::forward_range | *required* | *preserved* | - * | std::ranges::bidirectional_range | | *lost* | - * | std::ranges::random_access_range | | *lost* | - * | std::ranges::contiguous_range | | *lost* | - * | | | | - * | std::ranges::viewable_range | *required* | *guaranteed* | - * | std::ranges::view | | *guaranteed* | - * | std::ranges::sized_range | | *lost* | - * | std::ranges::common_range | | *lost* | - * | std::ranges::output_range | | *lost* | - * | seqan3::const_iterable_range | | *preserved* | - * | | | | - * | std::ranges::range_reference_t | std::totally_ordered | std::totally_ordered | - * - * See the \link views views submodule documentation \endlink for detailed descriptions of the view properties. - * - * \hideinitializer - * - * \stableapi{Since version 3.1.} + * For more information look into seqan3::views::minimiser. */ inline constexpr auto minimiser_distance = seqan3::detail::minimiser_distance_fn{}; diff --git a/include/minimiser_hash_distance.hpp b/include/minimiser_hash_distance.hpp index 026afbe..3c8ebb5 100644 --- a/include/minimiser_hash_distance.hpp +++ b/include/minimiser_hash_distance.hpp @@ -96,9 +96,8 @@ struct minimiser_distance_hash_fn * \{ */ -/*!\brief Computes minimisers for a range with a given shape, window size and seed. - * \tparam urng_t The type of the range being processed. See below for requirements. [template parameter is - * omitted in pipe notation] +/*!\brief Computes the distance of minimisers for a range with a given shape, window size and seed. + * \tparam urng_t The type of the range being processed. * \param[in] urange The range being processed. [parameter is omitted in pipe notation] * \param[in] shape The seqan3::shape that determines how to compute the hash value. * \param[in] window_size The window size to use. @@ -108,66 +107,7 @@ struct minimiser_distance_hash_fn * \ingroup utility_views * * \details - * - * A sequence can be presented by a small number of k-mers (minimisers). For a given shape and window size all k-mers - * are determined in the forward strand and the backward strand and only the lexicographically smallest k-mer is - * returned for one window. This process is repeated over every possible window of a sequence. If consecutive windows - * share a minimiser, it is saved only once. - * For example, in the sequence "TAAAGTGCTAAA" for an ungapped shape of length 3 and a window size of 5 the first, - * the second and the last window contain the same minimiser "AAA". - * Because the minimisers of the first two consecutive windows also share the same position, storing this minimiser - * twice is redundant and it is stored only once. The "AAA" minimiser of the last window on the other hand is stored, - * since it is located at an other position than the previous "AAA" minimiser and hence storing the second - * "AAA"-minimiser is not redundant but necessary. - * - * ### Non-lexicographical Minimisers by skewing the hash value with a seed - * - * It might happen that a minimiser changes only slightly when sliding the window over the sequence. For instance, when - * a minimiser starts with a repetition of A’s, then in the next window it is highly likely that the minimiser will - * start with a repetition of A’s as well. Because it is only one A shorter, depending on how long the repetition is - * this might go on for multiple window shifts. Saving these only slightly different minimiser makes no sense because - * they contain no new information about the underlying sequence. - * Additionally, sequences with a repetition of A’s will be seen as more similar to each other than they actually are. - * As [Marçais et al.](https://doi.org/10.1093/bioinformatics/btx235) have shown, randomizing the order of the k-mers - * can solve this problem. Therefore, a random seed is used to XOR all k-mers, thereby randomizing the - * order. The user can change the seed to any other value he or she thinks is useful. A seed of 0 is returning the - * lexicographical order. - * - * \sa seqan3::views::minimiser_view - * - * \attention - * Be aware of the requirements of the seqan3::views::kmer_hash view. - * - * \experimentalapi - * - * ### View properties - * - * | Concepts and traits | `urng_t` (underlying range type) | `rrng_t` (returned range type) | - * |----------------------------------|:----------------------------------:|:--------------------------------:| - * | std::ranges::input_range | *required* | *preserved* | - * | std::ranges::forward_range | *required* | *preserved* | - * | std::ranges::bidirectional_range | | *lost* | - * | std::ranges::random_access_range | | *lost* | - * | std::ranges::contiguous_range | | *lost* | - * | | | | - * | std::ranges::viewable_range | *required* | *guaranteed* | - * | std::ranges::view | | *guaranteed* | - * | std::ranges::sized_range | | *lost* | - * | std::ranges::common_range | | *lost* | - * | std::ranges::output_range | | *lost* | - * | seqan3::const_iterable_range | | *preserved* | - * | | | | - * | std::ranges::range_reference_t | seqan3::semialphabet | std::size_t | - * - * See the \link views views submodule documentation \endlink for detailed descriptions of the view properties. - * - * ### Example - * - * \include test/snippet/search/views/minimiser_distance_hash.cpp - * - * \hideinitializer - * - * \experimentalapi{Experimental since version 3.1.} + * For more information look into seqan3::views::minimiser_hash */ inline constexpr auto minimiser_hash_distance = seqan3::detail::minimiser_distance_hash_fn{}; diff --git a/include/modmer.hpp b/include/modmer.hpp index 13789ea..17ff657 100644 --- a/include/modmer.hpp +++ b/include/modmer.hpp @@ -31,6 +31,7 @@ namespace seqan3::detail * \tparam urng1_t The type of the underlying range, must model std::ranges::forward_range, the reference type must * model std::totally_ordered. The typical use case is that the reference type is the result of * seqan3::kmer_hash. + * \tparam measure_distance If true, then not the actual modmers are returned, but the distances of the modmers. * \implements std::ranges::view * \ingroup search_views * @@ -306,8 +307,9 @@ class modmer_view::basic_iterator //!brief Iterator to last element in range. urng1_sentinel_t urng1_sentinel{}; + //!brief The mod value used. size_t mod{}; - + //!brief The distance stored. Only relevant, if measure_distance is true. size_t distance{1}; //!\brief Advances the window to the next position. @@ -410,11 +412,12 @@ struct modmer_fn } // namespace seqan3::detail -/*!\brief Computes modmers for a range of comparable values. A modmer is ... +/*!\brief Computes modmers for a range of comparable values. A modmer is a value that fullfills the + condition value % mod_used. * \tparam urng_t The type of the first range being processed. See below for requirements. [template * parameter is omitted in pipe notation] * \param[in] urange1 The range being processed. [parameter is omitted in pipe notation] - * \param[in] mod_used The number of values in one window. + * \param[in] mod_used The mod value used. * \returns A range of std::totally_ordered where each value is ... See below for the * properties of the returned range. * \ingroup search_views diff --git a/include/modmer_hash.hpp b/include/modmer_hash.hpp index e0054f5..453bcc1 100644 --- a/include/modmer_hash.hpp +++ b/include/modmer_hash.hpp @@ -29,40 +29,40 @@ struct modmer_hash_fn { /*!\brief Store the shape and the window size and return a range adaptor closure object. * \param[in] shape The seqan3::shape to use for hashing. - * \param[in] mod_used_1 The windows size to use. - * \throws std::invalid_argument if the size of the shape is greater than the `mod_used_1`. + * \param[in] mod_used The mod value to use. + * \throws std::invalid_argument if the size of the shape is greater than the `mod_used`. * \returns A range of converted elements. */ - constexpr auto operator()(shape const & shape, uint32_t const mod_used_1) const + constexpr auto operator()(shape const & shape, uint32_t const mod_used) const { - return seqan3::detail::adaptor_from_functor{*this, shape, mod_used_1}; + return seqan3::detail::adaptor_from_functor{*this, shape, mod_used}; } /*!\brief Store the shape, the window size and the seed and return a range adaptor closure object. * \param[in] shape The seqan3::shape to use for hashing. - * \param[in] mod_used_1 The size of the window. + * \param[in] mod_used The mod value to use. * \param[in] seed The seed to use. - * \throws std::invalid_argument if the size of the shape is greater than the `mod_used_1`. + * \throws std::invalid_argument if the size of the shape is greater than the `mod_used`. * \returns A range of converted elements. */ - constexpr auto operator()(shape const & shape, uint32_t const mod_used_1, seed const seed) const + constexpr auto operator()(shape const & shape, uint32_t const mod_used, seed const seed) const { - return seqan3::detail::adaptor_from_functor{*this, shape, mod_used_1, seed}; + return seqan3::detail::adaptor_from_functor{*this, shape, mod_used, seed}; } /*!\brief Call the view's constructor with the underlying view, a seqan3::shape and a window size as argument. * \param[in] urange The input range to process. Must model std::ranges::viewable_range and the reference type * of the range must model seqan3::semialphabet. * \param[in] shape The seqan3::shape to use for hashing. - * \param[in] mod_used_1 The size of the window. + * \param[in] mod_used The mod value to use. * \param[in] seed The seed to use. - * \throws std::invalid_argument if the size of the shape is greater than the `mod_used_1`. + * \throws std::invalid_argument if the size of the shape is greater than the `mod_used`. * \returns A range of converted elements. */ template constexpr auto operator()(urng_t && urange, shape const & shape, - uint32_t const mod_used_1, + uint32_t const mod_used, seed const seed = seqan3::seed{0x8F3F73B5CF1C9ADE}) const { static_assert(std::ranges::viewable_range, @@ -82,9 +82,9 @@ struct modmer_hash_fn | std::views::transform([seed] (uint64_t i) {return i ^ seed.get();}) | std::views::reverse; - + // fnv_hash ensures actual randomness. auto combined_strand = seqan3::views::zip(forward_strand, reverse_strand) | std::views::transform([seed](std::tuple i){return fnv_hash(std::get<0>(i) + std::get<1>(i), seed.get());}); - return seqan3::detail::modmer_view(combined_strand, mod_used_1); + return seqan3::detail::modmer_view(combined_strand, mod_used); } }; @@ -94,12 +94,12 @@ struct modmer_hash_fn * \{ */ -/*!\brief Computes modmers for a range with a given shape, mod_used_1 and seed. +/*!\brief Computes modmers for a range with a given shape, mod_used and seed. * \tparam urng_t The type of the range being processed. See below for requirements. [template parameter is * omitted in pipe notation] * \param[in] urange The range being processed. [parameter is omitted in pipe notation] * \param[in] shape The seqan3::shape that determines how to compute the hash value. - * \param[in] mod_used_1 The mod value to use. + * \param[in] mod_used The mod value to use. * \param[in] seed The seed used to skew the hash values. Default: 0x8F3F73B5CF1C9ADE. * \returns A range of `size_t` where each value is the modmer of the resp. window. * See below for the properties of the returned range. diff --git a/include/modmer_hash_distance.hpp b/include/modmer_hash_distance.hpp index 3c5e824..e0215ad 100644 --- a/include/modmer_hash_distance.hpp +++ b/include/modmer_hash_distance.hpp @@ -28,40 +28,40 @@ struct modmer_hash_distance_fn { /*!\brief Store the shape and the window size and return a range adaptor closure object. * \param[in] shape The seqan3::shape to use for hashing. - * \param[in] mod_used_1 The windows size to use. - * \throws std::invalid_argument if the size of the shape is greater than the `mod_used_1`. + * \param[in] mod_used The mod value to use. + * \throws std::invalid_argument if the size of the shape is greater than the `mod_used`. * \returns A range of converted elements. */ - constexpr auto operator()(shape const & shape, uint32_t const mod_used_1) const + constexpr auto operator()(shape const & shape, uint32_t const mod_used) const { - return seqan3::detail::adaptor_from_functor{*this, shape, mod_used_1}; + return seqan3::detail::adaptor_from_functor{*this, shape, mod_used}; } /*!\brief Store the shape, the window size and the seed and return a range adaptor closure object. * \param[in] shape The seqan3::shape to use for hashing. - * \param[in] mod_used_1 The size of the window. + * \param[in] mod_used The mod value to use. * \param[in] seed The seed to use. - * \throws std::invalid_argument if the size of the shape is greater than the `mod_used_1`. + * \throws std::invalid_argument if the size of the shape is greater than the `mod_used`. * \returns A range of converted elements. */ - constexpr auto operator()(shape const & shape, uint32_t const mod_used_1, seed const seed) const + constexpr auto operator()(shape const & shape, uint32_t const mod_used, seed const seed) const { - return seqan3::detail::adaptor_from_functor{*this, shape, mod_used_1, seed}; + return seqan3::detail::adaptor_from_functor{*this, shape, mod_used, seed}; } /*!\brief Call the view's constructor with the underlying view, a seqan3::shape and a window size as argument. * \param[in] urange The input range to process. Must model std::ranges::viewable_range and the reference type * of the range must model seqan3::semialphabet. * \param[in] shape The seqan3::shape to use for hashing. - * \param[in] mod_used_1 The size of the window. + * \param[in] mod_used The mod value to use. * \param[in] seed The seed to use. - * \throws std::invalid_argument if the size of the shape is greater than the `mod_used_1`. + * \throws std::invalid_argument if the size of the shape is greater than the `mod_used`. * \returns A range of converted elements. */ template constexpr auto operator()(urng_t && urange, shape const & shape, - uint32_t const mod_used_1, + uint32_t const mod_used, seed const seed = seqan3::seed{0x8F3F73B5CF1C9ADE}) const { static_assert(std::ranges::viewable_range, @@ -83,7 +83,7 @@ struct modmer_hash_distance_fn | std::views::reverse; auto combined_strand = seqan3::views::zip(forward_strand, reverse_strand) | std::views::transform([seed](std::tuple i){return fnv_hash(std::get<0>(i) + std::get<1>(i), seed.get());}); - return seqan3::detail::modmer_view(combined_strand, mod_used_1); + return seqan3::detail::modmer_view(combined_strand, mod_used); } }; @@ -93,12 +93,12 @@ struct modmer_hash_distance_fn * \{ */ -/*!\brief Computes modmers for a range with a given shape, mod_used_1 and seed. +/*!\brief Computes modmers for a range with a given shape, mod_used and seed. * \tparam urng_t The type of the range being processed. See below for requirements. [template parameter is * omitted in pipe notation] * \param[in] urange The range being processed. [parameter is omitted in pipe notation] * \param[in] shape The seqan3::shape that determines how to compute the hash value. - * \param[in] mod_used_1 The mod value to use. + * \param[in] mod_used The mod value to use. * \param[in] seed The seed used to skew the hash values. Default: 0x8F3F73B5CF1C9ADE. * \returns A range of `size_t` where each value is the modmer of the resp. window. * See below for the properties of the returned range. diff --git a/include/shared.hpp b/include/shared.hpp index 33238fa..3129f6c 100644 --- a/include/shared.hpp +++ b/include/shared.hpp @@ -1,8 +1,13 @@ #pragma once -// https://en.wikipedia.org/wiki/Fowler%E2%80%93Noll%E2%80%93Vo_hash_function -uint64_t fnv_hash(uint64_t hash_value, uint64_t seed) +// +/*! \brief Function that ensures random hashes, based on https://en.wikipedia.org/wiki/Fowler%E2%80%93Noll%E2%80%93Vo_hash_function + * \param hash_value The hash_value that should be transformed. + * \param seed The seed. + */ +uint64_t fnv_hash(uint64_t hash_value) { + // If seed is 0, then the hash value is just returned. if (seed == 0) return hash_value; From 96560bc65fb4e76ebf3c27f33f815b2f20b82356 Mon Sep 17 00:00:00 2001 From: mitradarja Date: Mon, 13 Dec 2021 09:17:36 -0500 Subject: [PATCH 13/20] [TEST] Test mininimiser distance. --- include/minimiser_distance.hpp | 17 +- test/api/minimiser_distance_test.cpp | 243 +++++++++++++++++++++++++++ 2 files changed, 257 insertions(+), 3 deletions(-) create mode 100644 test/api/minimiser_distance_test.cpp diff --git a/include/minimiser_distance.hpp b/include/minimiser_distance.hpp index 6a463d2..c3651aa 100644 --- a/include/minimiser_distance.hpp +++ b/include/minimiser_distance.hpp @@ -380,6 +380,10 @@ class minimiser_distance_view::basic_iterator private: //!\brief The minimiser_distance value. value_type minimiser_distance_value{}; + //!\brief The minimiser value. + value_type minimiser_value{}; + //!\brief Helper to track distance. + size_t distance{}; //!\brief The offset relative to the beginning of the window where the minimizer value is found. size_t minimiser_distance_position_offset{}; @@ -430,8 +434,10 @@ class minimiser_distance_view::basic_iterator } window_values.push_back(window_value()); auto minimiser_distance_it = std::ranges::min_element(window_values, std::less_equal{}); + minimiser_value = *minimiser_distance_it; minimiser_distance_value = std::distance(std::begin(window_values), minimiser_distance_it); minimiser_distance_position_offset = std::distance(std::begin(window_values), minimiser_distance_it); + distance = window_values.size() - minimiser_distance_value - 1; } /*!\brief Calculates the next minimiser_distance value. @@ -454,18 +460,23 @@ class minimiser_distance_view::basic_iterator if (minimiser_distance_position_offset == 0) { auto minimiser_distance_it = std::ranges::min_element(window_values, std::less_equal{}); - minimiser_distance_value = std::distance(std::begin(window_values), minimiser_distance_it) ; + minimiser_distance_value = std::distance(std::begin(window_values), minimiser_distance_it); minimiser_distance_position_offset = std::distance(std::begin(window_values), minimiser_distance_it); + minimiser_value = *minimiser_distance_it; + distance = window_values.size() - minimiser_distance_value - 1; return true; } - if (new_value < minimiser_distance_value) + if (new_value < minimiser_value) { - minimiser_distance_value = window_values.size() - 1; + minimiser_distance_value = distance; + distance = 0; minimiser_distance_position_offset = window_values.size() - 1; + minimiser_value = new_value; return true; } + distance++; --minimiser_distance_position_offset; return false; } diff --git a/test/api/minimiser_distance_test.cpp b/test/api/minimiser_distance_test.cpp new file mode 100644 index 0000000..8a7f008 --- /dev/null +++ b/test/api/minimiser_distance_test.cpp @@ -0,0 +1,243 @@ +// ----------------------------------------------------------------------------------------------------- +// Copyright (c) 2006-2021, Knut Reinert & Freie Universität Berlin +// Copyright (c) 2016-2021, Knut Reinert & MPI für molekulare Genetik +// This file may be used, modified and/or redistributed under the terms of the 3-clause BSD-License +// shipped with this file and also available at: https://github.com/seqan/seqan3/blob/master/LICENSE.md +// ----------------------------------------------------------------------------------------------------- + +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "../../lib/seqan3/test/unit/range/iterator_test_template.hpp" + +#include "minimiser_distance.hpp" + +using seqan3::operator""_dna4; +using seqan3::operator""_shape; +using result_t = std::vector; + +inline static constexpr auto kmer_view = seqan3::views::kmer_hash(seqan3::ungapped{4}); +inline static constexpr auto rev_kmer_view = seqan3::views::complement | std::views::reverse + | kmer_view + | std::views::reverse; +inline static constexpr auto gapped_kmer_view = seqan3::views::kmer_hash(0b1001_shape); +inline static constexpr auto rev_gapped_kmer_view = seqan3::views::complement | std::views::reverse + | seqan3::views::kmer_hash(0b1001_shape) + | std::views::reverse; +inline static constexpr auto minimiser_view1 = minimiser_distance(1); // kmer_size == window_size +inline static constexpr auto minimiser_no_rev_view = minimiser_distance(5); + +using iterator_type = std::ranges::iterator_t< decltype(std::declval() + | kmer_view + | minimiser_no_rev_view)>; +using two_ranges_iterator_type = std::ranges::iterator_t< decltype(seqan3::detail::minimiser_distance_view{ + std::declval() + | kmer_view, + std::declval() + | rev_kmer_view, + 5})>; + +template <> +struct iterator_fixture : public ::testing::Test +{ + using iterator_tag = std::forward_iterator_tag; + static constexpr bool const_iterable = true; + + seqan3::dna4_vector text{"ACGGCGACGTTTAG"_dna4}; + decltype(seqan3::views::kmer_hash(text, seqan3::ungapped{4})) vec = text | kmer_view; + result_t expected_range{0, 3, 1}; + + decltype(minimiser_distance(seqan3::views::kmer_hash(text, seqan3::ungapped{4}), 5)) test_range = + minimiser_distance(vec, 5); +}; + +template <> +struct iterator_fixture : public ::testing::Test +{ + using iterator_tag = std::forward_iterator_tag; + static constexpr bool const_iterable = true; + + seqan3::dna4_vector text{"ACGGCGACGTTTAG"_dna4}; + using kmer_hash_view_t = decltype(seqan3::views::kmer_hash(text, seqan3::ungapped{4})); + + kmer_hash_view_t vec = kmer_view(text); + result_t expected_range{0, 3, 1, 0, 0}; + + using reverse_kmer_hash_view_t = decltype(rev_kmer_view(text)); + + using test_range_t = decltype(seqan3::detail::minimiser_distance_view{kmer_hash_view_t{}, reverse_kmer_hash_view_t{}, 5}); + test_range_t test_range = seqan3::detail::minimiser_distance_view{vec, rev_kmer_view(text), 5}; +}; + +using test_types = ::testing::Types; +INSTANTIATE_TYPED_TEST_SUITE_P(iterator_fixture, iterator_fixture, test_types, ); + +template +class minimiser_view_properties_test: public ::testing::Test { }; + +using underlying_range_types = ::testing::Types, + std::vector const, + seqan3::bitpacked_sequence, + seqan3::bitpacked_sequence const, + std::list, + std::list const, + std::forward_list, + std::forward_list const>; +TYPED_TEST_SUITE(minimiser_view_properties_test, underlying_range_types, ); + +class minimiser_test : public ::testing::Test +{ +protected: + std::vector text1{"AAAAAAAAAAAAAAAAAAA"_dna4}; + std::vector text1_short{"AAAAAA"_dna4}; + result_t result1{4, 4, 4}; // Same result for ungapped and gapped + result_t result1_short{15}; + + std::vector too_short_text{"AC"_dna4}; + + std::vector text3{"ACGGCGACGTTTAG"_dna4}; + result_t result3_ungapped{0, 3, 1, 0, 0}; // ACGG, CGAC, ACGT, aacg, aaac - lowercase for reverse complement + result_t result3_gapped{0, 4, 0, 0, 0}; // A--G, c--c, A--T, a--g, a--c - "-" for gap + result_t result3_ungapped_no_rev{0, 3, 1}; // ACGG, CGAC, ACGT + result_t result3_gapped_no_rev{0, 3, 1}; // A--G, C--C-, A--T "-" for gap + result_t result3_stop{0, 3}; // For stop at first T + result_t result3_gapped_stop{0, 4}; // A--G, c--c + result_t result3_start{2}; // For start at second A, ungapped and gapped the same + result_t result3_ungapped_no_rev_start{0}; // For start at second A + result_t result3_gapped_no_rev_start{0}; // For start at second A +}; + +template +void compare_types(adaptor_t v) +{ + EXPECT_TRUE(std::ranges::input_range); + EXPECT_TRUE(std::ranges::forward_range); + EXPECT_FALSE(std::ranges::bidirectional_range); + EXPECT_FALSE(std::ranges::random_access_range); + EXPECT_TRUE(std::ranges::view); + EXPECT_FALSE(std::ranges::sized_range); + EXPECT_FALSE(std::ranges::common_range); + EXPECT_TRUE(seqan3::const_iterable_range); + EXPECT_FALSE((std::ranges::output_range)); +} + +TYPED_TEST(minimiser_view_properties_test, concepts) +{ + TypeParam text{'A'_dna4, 'C'_dna4, 'G'_dna4, 'T'_dna4, 'C'_dna4, 'G'_dna4, 'A'_dna4, 'C'_dna4, 'G'_dna4, 'T'_dna4, + 'T'_dna4, 'T'_dna4, 'A'_dna4, 'G'_dna4}; // ACGTCGACGTTTAG + + auto v = text | kmer_view | minimiser_no_rev_view; + compare_types(v); + auto v2 = seqan3::detail::minimiser_distance_view{text | kmer_view, text | kmer_view, 5}; + + if constexpr (std::ranges::bidirectional_range) // excludes forward_list + { + auto v3 = seqan3::detail::minimiser_distance_view{text | kmer_view, text | rev_kmer_view, 5}; + compare_types(v3); + } +} + +TYPED_TEST(minimiser_view_properties_test, different_inputs_kmer_hash) +{ + TypeParam text{'A'_dna4, 'C'_dna4, 'G'_dna4, 'T'_dna4, 'C'_dna4, 'G'_dna4, 'A'_dna4, 'C'_dna4, 'G'_dna4, 'T'_dna4, + 'T'_dna4, 'T'_dna4, 'A'_dna4, 'G'_dna4}; // ACGTCGACGTTTAG + result_t ungapped{0, 3, 1, 0, 0}; // ACGT, CGAC, ACGT, aacg, aaac - lowercase for reverse comp. + result_t gapped{0, 4, 0, 0, 0}; // A--T, c--c, A--T, a--g, a--c - "-" for gap + result_t ungapped_no_rev{0, 3, 1}; // ACGT, CGAC, ACGT + result_t gapped_no_rev{0, 3, 1}; // A--T, C--C, A--T - "-" for gap + EXPECT_RANGE_EQ(ungapped_no_rev, text | kmer_view | minimiser_no_rev_view); + EXPECT_RANGE_EQ(gapped_no_rev, text | gapped_kmer_view | minimiser_no_rev_view); + + if constexpr (std::ranges::bidirectional_range) // excludes forward_list + { + EXPECT_RANGE_EQ(ungapped, (seqan3::detail::minimiser_distance_view{text | kmer_view, text | rev_kmer_view, 5})) ; + EXPECT_RANGE_EQ(gapped, (seqan3::detail::minimiser_distance_view{text | gapped_kmer_view, text | rev_gapped_kmer_view, 5})); + } +} + +TEST_F(minimiser_test, ungapped_kmer_hash) +{ + EXPECT_RANGE_EQ(result1, (seqan3::detail::minimiser_distance_view{text1 | kmer_view, text1 | rev_kmer_view, 5})); + EXPECT_RANGE_EQ(result1, text1 | kmer_view | minimiser_no_rev_view); + EXPECT_THROW(text1_short | kmer_view | minimiser_view1, std::invalid_argument); + auto empty_view = seqan3::detail::minimiser_distance_view{too_short_text | kmer_view, too_short_text | rev_kmer_view, 5}; + EXPECT_TRUE(std::ranges::empty(empty_view)); + auto empty_view2 = too_short_text | kmer_view | minimiser_no_rev_view; + EXPECT_TRUE(std::ranges::empty(empty_view2)); + EXPECT_RANGE_EQ(result3_ungapped, (seqan3::detail::minimiser_distance_view{text3 | kmer_view, text3 | rev_kmer_view, 5})); + EXPECT_RANGE_EQ(result3_ungapped_no_rev, text3 | kmer_view | minimiser_no_rev_view); + +} + +TEST_F(minimiser_test, gapped_kmer_hash) +{ + EXPECT_RANGE_EQ(result1, (seqan3::detail::minimiser_distance_view{text1 | gapped_kmer_view, + text1 | rev_gapped_kmer_view, + 5})); + EXPECT_RANGE_EQ(result1, text1 | gapped_kmer_view | minimiser_no_rev_view); + EXPECT_THROW(text1_short | gapped_kmer_view | minimiser_view1, std::invalid_argument); + auto empty_view = seqan3::detail::minimiser_distance_view{too_short_text | gapped_kmer_view, + too_short_text | rev_gapped_kmer_view, + 5}; + EXPECT_TRUE(std::ranges::empty(empty_view)); + auto empty_view2 = too_short_text | gapped_kmer_view | minimiser_no_rev_view; + EXPECT_TRUE(std::ranges::empty(empty_view2)); + EXPECT_RANGE_EQ(result3_gapped, (seqan3::detail::minimiser_distance_view{text3 | gapped_kmer_view, + text3 | rev_gapped_kmer_view, + 5})); + EXPECT_RANGE_EQ(result3_gapped_no_rev, text3 | gapped_kmer_view | minimiser_no_rev_view); +} + +TEST_F(minimiser_test, window_too_big) +{ + EXPECT_RANGE_EQ(result1_short, text1 | kmer_view | minimiser_distance(20)); + EXPECT_RANGE_EQ(result1_short, text1 | gapped_kmer_view | minimiser_distance(20)); + EXPECT_RANGE_EQ(result1_short, (seqan3::detail::minimiser_distance_view{text1 | kmer_view, text1 | rev_kmer_view, 20})); + EXPECT_RANGE_EQ(result1_short, (seqan3::detail::minimiser_distance_view{text1 | gapped_kmer_view, + text1 | rev_gapped_kmer_view, + 20})); +} + +TEST_F(minimiser_test, combinability) +{ + auto stop_at_t = std::views::take_while([] (seqan3::dna4 const x) { return x != 'T'_dna4; }); + EXPECT_RANGE_EQ(result3_stop, text3 | stop_at_t | kmer_view | minimiser_no_rev_view); + EXPECT_RANGE_EQ(result3_stop, text3 | stop_at_t | gapped_kmer_view | minimiser_no_rev_view); + + EXPECT_RANGE_EQ(result3_stop, (seqan3::detail::minimiser_distance_view{text3 | stop_at_t | kmer_view, + text3 | stop_at_t | rev_kmer_view, + 5})); + EXPECT_RANGE_EQ(result3_gapped_stop, (seqan3::detail::minimiser_distance_view{text3 | stop_at_t | gapped_kmer_view, + text3 | stop_at_t | rev_gapped_kmer_view, + 5})); + + auto start_at_a = std::views::drop(6); + EXPECT_RANGE_EQ(result3_start, (seqan3::detail::minimiser_distance_view{text3 | start_at_a | kmer_view, + text3 | start_at_a | rev_kmer_view, + 5})); + EXPECT_RANGE_EQ(result3_start, (seqan3::detail::minimiser_distance_view{text3 | start_at_a | gapped_kmer_view, + text3 | start_at_a | rev_gapped_kmer_view, + 5})); +} + +/*TEST_F(minimiser_test, non_arithmetic_value) +{ + // just compute the minimizer directly on the alphabet + EXPECT_RANGE_EQ("ACACA"_dna4, text3 | minimiser_no_rev_view); +}*/ + +TEST_F(minimiser_test, two_ranges_unequal_size) +{ + EXPECT_THROW((seqan3::detail::minimiser_distance_view{text1 | kmer_view, text3 | rev_kmer_view, 5}), std::invalid_argument); +} From 0f6bbf1bf73023aad3a66d6356b7446e35cbb2c5 Mon Sep 17 00:00:00 2001 From: mitradarja Date: Mon, 13 Dec 2021 09:18:04 -0500 Subject: [PATCH 14/20] [FIX] Missing stuff. --- include/shared.hpp | 2 +- test/api/CMakeLists.txt | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/include/shared.hpp b/include/shared.hpp index 3129f6c..219d54c 100644 --- a/include/shared.hpp +++ b/include/shared.hpp @@ -5,7 +5,7 @@ * \param hash_value The hash_value that should be transformed. * \param seed The seed. */ -uint64_t fnv_hash(uint64_t hash_value) +uint64_t fnv_hash(uint64_t hash_value, uint64_t seed) { // If seed is 0, then the hash value is just returned. if (seed == 0) diff --git a/test/api/CMakeLists.txt b/test/api/CMakeLists.txt index 733f86c..6591128 100644 --- a/test/api/CMakeLists.txt +++ b/test/api/CMakeLists.txt @@ -3,5 +3,7 @@ cmake_minimum_required (VERSION 3.8) add_api_test (comparison_test.cpp) target_use_datasources (comparison_test FILES example1.fasta) +add_api_test (minimiser_distance_test.cpp) + add_api_test (modmer_test.cpp) add_api_test (modmer_hash_test.cpp) From 0ddfbe1072674747e4b8cc5e9c837a21cb373e87 Mon Sep 17 00:00:00 2001 From: mitradarja Date: Mon, 13 Dec 2021 10:14:28 -0500 Subject: [PATCH 15/20] [DOC] Improve documentation. --- include/modmer.hpp | 2 +- include/modmer_hash.hpp | 4 +--- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/include/modmer.hpp b/include/modmer.hpp index 17ff657..029d440 100644 --- a/include/modmer.hpp +++ b/include/modmer.hpp @@ -442,6 +442,6 @@ struct modmer_fn * | | | | * | std::ranges::range_reference_t | std::totally_ordered | std::totally_ordered | * - * See the \link views views submodule documentation \endlink for detailed descriptions of the view properties. + * See the views views submodule documentation for detailed descriptions of the view properties. */ inline constexpr auto modmer = seqan3::detail::modmer_fn{}; diff --git a/include/modmer_hash.hpp b/include/modmer_hash.hpp index 453bcc1..6692702 100644 --- a/include/modmer_hash.hpp +++ b/include/modmer_hash.hpp @@ -108,7 +108,6 @@ struct modmer_hash_fn * \attention * Be aware of the requirements of the seqan3::views::kmer_hash view. * - * \experimentalapi * * ### View properties * @@ -129,11 +128,10 @@ struct modmer_hash_fn * | | | | * | std::ranges::range_reference_t | seqan3::semialphabet | std::size_t | * - * See the \link views views submodule documentation \endlink for detailed descriptions of the view properties. + * See the views views submodule documentation for detailed descriptions of the view properties. * * \hideinitializer * - * \experimentalapi{Experimental since version 3.1.} */ inline constexpr auto modmer_hash = seqan3::detail::modmer_hash_fn{}; From 41a9aab30b4eaf1c203969bd325e479c56e6fdae Mon Sep 17 00:00:00 2001 From: mitradarja Date: Mon, 13 Dec 2021 10:14:53 -0500 Subject: [PATCH 16/20] [TEST] Test modmer distance. --- include/modmer_hash_distance.hpp | 3 +- test/api/CMakeLists.txt | 1 + test/api/modmer_hash_distance_test.cpp | 130 +++++++++++++++++++++++++ 3 files changed, 133 insertions(+), 1 deletion(-) create mode 100644 test/api/modmer_hash_distance_test.cpp diff --git a/include/modmer_hash_distance.hpp b/include/modmer_hash_distance.hpp index e0215ad..5671543 100644 --- a/include/modmer_hash_distance.hpp +++ b/include/modmer_hash_distance.hpp @@ -16,6 +16,7 @@ #include #include #include +#include #include "modmer.hpp" #include "shared.hpp" @@ -93,7 +94,7 @@ struct modmer_hash_distance_fn * \{ */ -/*!\brief Computes modmers for a range with a given shape, mod_used and seed. +/*!\brief Computes the distance of modmers for a range with a given shape, mod_used and seed. * \tparam urng_t The type of the range being processed. See below for requirements. [template parameter is * omitted in pipe notation] * \param[in] urange The range being processed. [parameter is omitted in pipe notation] diff --git a/test/api/CMakeLists.txt b/test/api/CMakeLists.txt index 6591128..73ac436 100644 --- a/test/api/CMakeLists.txt +++ b/test/api/CMakeLists.txt @@ -7,3 +7,4 @@ add_api_test (minimiser_distance_test.cpp) add_api_test (modmer_test.cpp) add_api_test (modmer_hash_test.cpp) +add_api_test (modmer_hash_distance_test.cpp) diff --git a/test/api/modmer_hash_distance_test.cpp b/test/api/modmer_hash_distance_test.cpp new file mode 100644 index 0000000..4260f7d --- /dev/null +++ b/test/api/modmer_hash_distance_test.cpp @@ -0,0 +1,130 @@ +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include + +#include + +#include "../../lib/seqan3/test/unit/range/iterator_test_template.hpp" + +#include "modmer_hash_distance.hpp" + +using seqan3::operator""_dna4; +using seqan3::operator""_shape; +using result_t = std::vector; + +using iterator_type = std::ranges::iterator_t() + | modmer_hash_distance(seqan3::ungapped{4}, + 2, + seqan3::seed{0}))>; + +static constexpr seqan3::shape ungapped_shape = seqan3::ungapped{4}; +static constexpr seqan3::shape gapped_shape = 0b1001_shape; +static constexpr auto ungapped_view = modmer_hash_distance(ungapped_shape, + 2, + seqan3::seed{0}); +static constexpr auto gapped_view = modmer_hash_distance(gapped_shape, + 2, + seqan3::seed{0}); + +template <> +struct iterator_fixture : public ::testing::Test +{ + using iterator_tag = std::forward_iterator_tag; + static constexpr bool const_iterable = false; + + seqan3::dna4_vector text{"ACGGCGACGTTTAG"_dna4}; + result_t expected_range{6, 1, 0, 0}; + + using test_range_t = decltype(text | ungapped_view); + test_range_t test_range = text | ungapped_view; +}; + +using test_type = ::testing::Types; +INSTANTIATE_TYPED_TEST_SUITE_P(iterator_fixture, iterator_fixture, test_type, ); + +template +class modmer_hash_distance_view_properties_test: public ::testing::Test { }; + +using underlying_range_types = ::testing::Types, + std::vector const, + seqan3::bitpacked_sequence, + seqan3::bitpacked_sequence const, + std::list, + std::list const>; + +TYPED_TEST_SUITE(modmer_hash_distance_view_properties_test, underlying_range_types, ); + +class modmer_hash_distance_test : public ::testing::Test +{ +protected: + std::vector text1{"AAAAAA"_dna4}; + result_t result1{}; // Same result for ungapped and gapped + + std::vector too_short_text{"AC"_dna4}; + + // ACGG CGGC, GGCG, GCGA, CGAC, GACG, ACGT, CGTT, GTTT, TTTA, TTAG + // CCGT GCCG CGCC TCGC GTCG CGTC ACGT AACG AAAC TAAA CTAA + // ACGG CGGC cgcc GCGA CGAC cgtc ACGT aacg aaac taaa ctaa + std::vector text3{"ACGGCGACGTTTAG"_dna4}; + result_t result3{6, 1, 0, 0}; // ACGT/ACGT, GTTT/AAAC, TTTA/TAAA, TTAG/CTAA // A--T/A--T, G--T/A--C, T--A/T--A, T--G/C--A - "-" for gap + result_t result3_stop{}; + result_t result3_start{0, 1, 0, 0}; +}; + +template +void compare_types(adaptor_t v) +{ + EXPECT_TRUE(std::ranges::input_range); + EXPECT_TRUE(std::ranges::forward_range); + EXPECT_FALSE(std::ranges::bidirectional_range); + EXPECT_FALSE(std::ranges::random_access_range); + EXPECT_TRUE(std::ranges::view); + EXPECT_FALSE(std::ranges::sized_range); + EXPECT_FALSE(std::ranges::common_range); + EXPECT_TRUE(seqan3::const_iterable_range); + EXPECT_FALSE((std::ranges::output_range)); +} + +TYPED_TEST(modmer_hash_distance_view_properties_test, different_input_ranges) +{ + TypeParam text{'A'_dna4, 'C'_dna4, 'G'_dna4, 'T'_dna4, 'C'_dna4, 'G'_dna4, 'A'_dna4, 'C'_dna4, 'G'_dna4, 'T'_dna4, + 'T'_dna4, 'T'_dna4, 'A'_dna4, 'G'_dna4}; // ACGTCGACGTTTAG + result_t ungapped{0, 2, 2, 1, 0, 0}; // ACGT/ACGT, TCGA/TCGA, ACGT/ACGT, GTTT/AAAC, TTTA/TAAA, TTAG/CTAA + result_t gapped{0, 2, 2, 1, 0, 0}; // A--T/A--T, T--A/T--A, A--T/A--T, G--T/A--C, T--A/T--A, T--G/C--A - "-" for gap + EXPECT_RANGE_EQ(ungapped, text | ungapped_view); + EXPECT_RANGE_EQ(gapped, text | gapped_view); +} + +TEST_F(modmer_hash_distance_test, ungapped) +{ + EXPECT_RANGE_EQ(result1, text1 | ungapped_view); + EXPECT_TRUE(std::ranges::empty(too_short_text | ungapped_view)); + EXPECT_RANGE_EQ(result3, text3 | ungapped_view); +} + +TEST_F(modmer_hash_distance_test, gapped) +{ + EXPECT_RANGE_EQ(result1, text1 | gapped_view); + EXPECT_TRUE(std::ranges::empty(too_short_text | gapped_view)); + EXPECT_RANGE_EQ(result3, text3 | gapped_view); +} + +TEST_F(modmer_hash_distance_test, combinability) +{ + auto stop_at_t = std::views::take_while([] (seqan3::dna4 const x) { return x != 'T'_dna4; }); + EXPECT_RANGE_EQ(result3_stop, text3 | stop_at_t | ungapped_view); + EXPECT_RANGE_EQ(result3_stop, text3 | stop_at_t | gapped_view); + + auto start_at_a = std::views::drop(6); + EXPECT_RANGE_EQ(result3_start, text3 | start_at_a | ungapped_view); + EXPECT_RANGE_EQ(result3_start, text3 | start_at_a | gapped_view); +} From 1d92cb95008cbe42da65dcfab973731a8d8f4c73 Mon Sep 17 00:00:00 2001 From: mitradarja Date: Mon, 13 Dec 2021 10:42:41 -0500 Subject: [PATCH 17/20] [DOC] Improve documentation. --- include/modmer.hpp | 8 ++++---- include/modmer_hash_distance.hpp | 4 +--- 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/include/modmer.hpp b/include/modmer.hpp index 029d440..c3ae206 100644 --- a/include/modmer.hpp +++ b/include/modmer.hpp @@ -66,9 +66,9 @@ class modmer_view : public std::ranges::view_interface> /*!\name Constructors, destructor and assignment * \{ */ - modmer_view() - requires std::default_initializable - = default; //!< Defaulted. + /// \cond Workaround_Doxygen + modmer_view() requires std::default_initializable = default; //!< Defaulted. + /// \endcond modmer_view(modmer_view const & rhs) = default; //!< Defaulted. modmer_view(modmer_view && rhs) = default; //!< Defaulted. modmer_view & operator=(modmer_view const & rhs) = default; //!< Defaulted. @@ -236,7 +236,7 @@ class modmer_view::basic_iterator } //!\} - //!\anchor basic_iterator_comparison + //!\anchor basic_iterator_comparison_modmer //!\name Comparison operators //!\{ diff --git a/include/modmer_hash_distance.hpp b/include/modmer_hash_distance.hpp index 5671543..d824a11 100644 --- a/include/modmer_hash_distance.hpp +++ b/include/modmer_hash_distance.hpp @@ -108,7 +108,6 @@ struct modmer_hash_distance_fn * \attention * Be aware of the requirements of the seqan3::views::kmer_hash view. * - * \experimentalapi * * ### View properties * @@ -129,11 +128,10 @@ struct modmer_hash_distance_fn * | | | | * | std::ranges::range_reference_t | seqan3::semialphabet | std::size_t | * - * See the \link views views submodule documentation \endlink for detailed descriptions of the view properties. + * See the views views submodule documentation for detailed descriptions of the view properties. * * \hideinitializer * - * \experimentalapi{Experimental since version 3.1.} */ inline constexpr auto modmer_hash_distance = seqan3::detail::modmer_hash_distance_fn{}; From da0c1f9c03a45bd420ea8da3cd19ef7dea7d7229 Mon Sep 17 00:00:00 2001 From: mitradarja Date: Tue, 4 Jan 2022 19:05:17 +0100 Subject: [PATCH 18/20] [TEST] Increase code coverage. --- include/modmer.hpp | 4 ---- include/modmer_hash.hpp | 4 ++++ include/modmer_hash_distance.hpp | 4 ++++ test/api/minimiser_distance_test.cpp | 4 +++- test/api/modmer_hash_test.cpp | 9 +++++++++ test/api/modmer_test.cpp | 2 +- 6 files changed, 21 insertions(+), 6 deletions(-) diff --git a/include/modmer.hpp b/include/modmer.hpp index c3ae206..b4cf1e6 100644 --- a/include/modmer.hpp +++ b/include/modmer.hpp @@ -401,10 +401,6 @@ struct modmer_fn static_assert(std::ranges::forward_range, "The range parameter to views::modmer must model std::ranges::forward_range."); - if (mod_used == 1) // Would just return urange1 without any changes - throw std::invalid_argument{"The chosen mod_used is not valid. " - "Please choose a value greater than 1 or use two ranges."}; - return modmer_view{urange1, mod_used}; } }; diff --git a/include/modmer_hash.hpp b/include/modmer_hash.hpp index 6692702..a17114b 100644 --- a/include/modmer_hash.hpp +++ b/include/modmer_hash.hpp @@ -72,6 +72,10 @@ struct modmer_hash_fn static_assert(semialphabet>, "The range parameter to views::modmer_hash must be over elements of seqan3::semialphabet."); + if (mod_used == 1) // Would just return urange1 without any changes + throw std::invalid_argument{"The chosen mod_used is not valid. " + "Please choose a value greater than 1."}; + auto forward_strand = std::forward(urange) | seqan3::views::kmer_hash(shape) | std::views::transform([seed] (uint64_t i) {return i ^ seed.get();}); diff --git a/include/modmer_hash_distance.hpp b/include/modmer_hash_distance.hpp index d824a11..c448ba3 100644 --- a/include/modmer_hash_distance.hpp +++ b/include/modmer_hash_distance.hpp @@ -72,6 +72,10 @@ struct modmer_hash_distance_fn static_assert(semialphabet>, "The range parameter to views::modmer_hash must be over elements of seqan3::semialphabet."); + if (mod_used == 1) // Would just return urange1 without any changes + throw std::invalid_argument{"The chosen mod_used is not valid. " + "Please choose a value greater than 1."}; + auto forward_strand = std::forward(urange) | seqan3::views::kmer_hash(shape) | std::views::transform([seed] (uint64_t i) {return i ^ seed.get();}); diff --git a/test/api/minimiser_distance_test.cpp b/test/api/minimiser_distance_test.cpp index 8a7f008..9123c93 100644 --- a/test/api/minimiser_distance_test.cpp +++ b/test/api/minimiser_distance_test.cpp @@ -21,7 +21,7 @@ #include "../../lib/seqan3/test/unit/range/iterator_test_template.hpp" -#include "minimiser_distance.hpp" +#include "minimiser_hash_distance.hpp" using seqan3::operator""_dna4; using seqan3::operator""_shape; @@ -177,6 +177,7 @@ TEST_F(minimiser_test, ungapped_kmer_hash) EXPECT_TRUE(std::ranges::empty(empty_view2)); EXPECT_RANGE_EQ(result3_ungapped, (seqan3::detail::minimiser_distance_view{text3 | kmer_view, text3 | rev_kmer_view, 5})); EXPECT_RANGE_EQ(result3_ungapped_no_rev, text3 | kmer_view | minimiser_no_rev_view); + EXPECT_THROW((text3 | minimiser_hash_distance(seqan3::ungapped{4}, seqan3::window_size{3})), std::invalid_argument); } @@ -197,6 +198,7 @@ TEST_F(minimiser_test, gapped_kmer_hash) text3 | rev_gapped_kmer_view, 5})); EXPECT_RANGE_EQ(result3_gapped_no_rev, text3 | gapped_kmer_view | minimiser_no_rev_view); + EXPECT_THROW((text3 | minimiser_hash_distance(0b1001_shape, seqan3::window_size{3})), std::invalid_argument); } TEST_F(minimiser_test, window_too_big) diff --git a/test/api/modmer_hash_test.cpp b/test/api/modmer_hash_test.cpp index 74367ff..2294770 100644 --- a/test/api/modmer_hash_test.cpp +++ b/test/api/modmer_hash_test.cpp @@ -31,6 +31,9 @@ static constexpr seqan3::shape gapped_shape = 0b1001_shape; static constexpr auto ungapped_view = modmer_hash(ungapped_shape, 2, seqan3::seed{0}); +static constexpr auto ungapped_3_view = modmer_hash(ungapped_shape, + 3, + seqan3::seed{0}); static constexpr auto gapped_view = modmer_hash(gapped_shape, 2, seqan3::seed{0}); @@ -77,6 +80,7 @@ class modmer_hash_test : public ::testing::Test std::vector text3{"ACGGCGACGTTTAG"_dna4}; result_t result3_ungapped{27+27, 191+1, 252+192, 242+112}; // ACGT/ACGT, GTTT/AAAC, TTTA/TAAA, TTAG/CTAA result_t result3_gapped{3+3, 11+1, 12+12, 14+4}; // A--T/A--T, G--T/A--C, T--A/T--A, T--G/C--A - "-" for gap + result_t result3_ungapped_3{117, 255, 267, 369, 279, 243, 27+27, 117, 191+1, 252+192, 242+112}; }; template @@ -108,6 +112,9 @@ TEST_F(modmer_hash_test, ungapped) EXPECT_RANGE_EQ(result1, text1 | ungapped_view); EXPECT_TRUE(std::ranges::empty(too_short_text | ungapped_view)); EXPECT_RANGE_EQ(result3_ungapped, text3 | ungapped_view); + EXPECT_NO_THROW(text1 | modmer_hash(ungapped_shape, 2)); + EXPECT_RANGE_EQ(result3_ungapped_3, text3 | ungapped_3_view); + EXPECT_THROW((text3 | modmer_hash(ungapped_shape, 1)), std::invalid_argument); } TEST_F(modmer_hash_test, gapped) @@ -115,6 +122,8 @@ TEST_F(modmer_hash_test, gapped) EXPECT_RANGE_EQ(result1, text1 | gapped_view); EXPECT_TRUE(std::ranges::empty(too_short_text | gapped_view)); EXPECT_RANGE_EQ(result3_gapped, text3 | gapped_view); + EXPECT_NO_THROW(text1 | modmer_hash(gapped_shape, 2)); + EXPECT_THROW((text3 | modmer_hash(gapped_shape, 1)), std::invalid_argument); } TEST_F(modmer_hash_test, combinability) diff --git a/test/api/modmer_test.cpp b/test/api/modmer_test.cpp index e04ccbd..7dbbb6b 100644 --- a/test/api/modmer_test.cpp +++ b/test/api/modmer_test.cpp @@ -140,7 +140,7 @@ TEST_F(modmer_test, gapped_kmer_hash) auto v1 = text1 | gapped_kmer_view; EXPECT_RANGE_EQ(result1_distance, (seqan3::detail::modmer_view(v1, 2))); auto v2 = text3 | gapped_kmer_view; - EXPECT_RANGE_EQ(result3_distance, (seqan3::detail::modmer_view(v2, 2))); + EXPECT_RANGE_EQ(result3_distance, (seqan3::detail::modmer_view(v2, 2))); } TEST_F(modmer_test, combinability) From 1ff5c186e76c96aa81c1ed7b42937b3bcb7aa9e1 Mon Sep 17 00:00:00 2001 From: mitradarja Date: Wed, 5 Jan 2022 10:45:06 +0100 Subject: [PATCH 19/20] [TEST] Increase code coverage. --- src/main.cpp | 25 +++++-------------------- 1 file changed, 5 insertions(+), 20 deletions(-) diff --git a/src/main.cpp b/src/main.cpp index 550ddda..713d8a6 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -72,16 +72,8 @@ int coverage(seqan3::argument_parser & parser) return -1; } - try - { - string_to_methods(method, args.name); - do_coverage(sequence_file, args); - } - catch (const std::invalid_argument & e) - { - std::cerr << e.what() << std::endl; - return -1; - } + string_to_methods(method, args.name); + do_coverage(sequence_file, args); return 0; } @@ -110,16 +102,9 @@ int speed(seqan3::argument_parser & parser) return -1; } - try - { - string_to_methods(method, args.name); - do_comparison(sequence_files, args); - } - catch (const std::invalid_argument & e) - { - std::cerr << e.what() << std::endl; - return -1; - } + string_to_methods(method, args.name); + do_comparison(sequence_files, args); + return 0; } From 301fdc9862267a4779ec6940a4e37110864e6cbd Mon Sep 17 00:00:00 2001 From: mitradarja Date: Wed, 5 Jan 2022 10:50:47 +0100 Subject: [PATCH 20/20] [TEST] Increase code coverage. --- test/cli/minions_speed_test.cpp | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/test/cli/minions_speed_test.cpp b/test/cli/minions_speed_test.cpp index 92c676f..3f70ab7 100644 --- a/test/cli/minions_speed_test.cpp +++ b/test/cli/minions_speed_test.cpp @@ -32,7 +32,7 @@ TEST_F(cli_test, minimiser) TEST_F(cli_test, modmer) { - cli_test_result result = execute_app("minions speed --method modmer -k 19 -w 19 ", data("example1.fasta")); + cli_test_result result = execute_app("minions speed --method modmer -k 19 -w 2 ", data("example1.fasta")); EXPECT_EQ(result.exit_code, 0); EXPECT_EQ(result.out, std::string{}); EXPECT_EQ(result.err, std::string{}); @@ -46,6 +46,22 @@ TEST_F(cli_test, strobemer) EXPECT_EQ(result.err, std::string{}); } +TEST_F(cli_test, hybridstrobemer) +{ + cli_test_result result = execute_app("minions speed --method strobemer -k 19 --w-min 16 --w-max 30 --order 2 --hybrid", data("example1.fasta")); + EXPECT_EQ(result.exit_code, 0); + EXPECT_EQ(result.out, std::string{}); + EXPECT_EQ(result.err, std::string{}); +} + +TEST_F(cli_test, minstrobers) +{ + cli_test_result result = execute_app("minions speed --method strobemer -k 19 --w-min 16 --w-max 30 --order 2 --minstrobers", data("example1.fasta")); + EXPECT_EQ(result.exit_code, 0); + EXPECT_EQ(result.out, std::string{}); + EXPECT_EQ(result.err, std::string{}); +} + TEST_F(cli_test, wrong_method) { cli_test_result result = execute_app("minions speed --method submer -k 19", data("example1.fasta"));