diff --git a/include/raptor/threshold/threshold.hpp b/include/raptor/threshold/threshold.hpp index f69f713d..2a06acd1 100644 --- a/include/raptor/threshold/threshold.hpp +++ b/include/raptor/threshold/threshold.hpp @@ -28,51 +28,9 @@ class threshold threshold & operator=(threshold &&) = default; ~threshold() = default; - threshold(threshold_parameters const & arguments) - { - uint8_t const kmer_size{arguments.shape.size()}; - size_t const kmers_per_window = arguments.window_size - kmer_size + 1; - - if (!std::isnan(arguments.percentage)) - { - threshold_kind = threshold_kinds::percentage; - threshold_percentage = arguments.percentage; - } - else if (kmers_per_window == 1u) - { - threshold_kind = threshold_kinds::lemma; - size_t const kmer_lemma_minuend = arguments.query_length + 1u; - size_t const kmer_lemma_subtrahend = (arguments.errors + 1u) * kmer_size; - kmer_lemma = kmer_lemma_minuend > kmer_lemma_subtrahend ? kmer_lemma_minuend - kmer_lemma_subtrahend : 0; - } - else - { - threshold_kind = threshold_kinds::probabilistic; - size_t const kmers_per_pattern = arguments.query_length - kmer_size + 1; - minimal_number_of_minimizers = kmers_per_pattern / kmers_per_window; - maximal_number_of_minimizers = arguments.query_length - arguments.window_size + 1; - precomp_correction = precompute_correction(arguments); - precomp_thresholds = precompute_threshold(arguments); - } - } + threshold(threshold_parameters const & arguments); - size_t get(size_t const minimiser_count) const noexcept - { - switch (threshold_kind) - { - case threshold_kinds::lemma: - return kmer_lemma; - case threshold_kinds::percentage: - return static_cast(minimiser_count * threshold_percentage); - default: - { - assert(threshold_kind == threshold_kinds::probabilistic); - size_t const index = std::clamp(minimiser_count, minimal_number_of_minimizers, maximal_number_of_minimizers) - - minimal_number_of_minimizers; - return precomp_thresholds[index] + precomp_correction[index]; - } - } - } + size_t get(size_t const minimiser_count) const noexcept; private: enum class threshold_kinds diff --git a/src/threshold/CMakeLists.txt b/src/threshold/CMakeLists.txt index 5fdd1509..65905f1d 100644 --- a/src/threshold/CMakeLists.txt +++ b/src/threshold/CMakeLists.txt @@ -1,8 +1,14 @@ cmake_minimum_required (VERSION 3.18) if (NOT TARGET raptor_threshold) - add_library ("raptor_threshold" STATIC multiple_error_model.cpp one_error_model.cpp one_indirect_error_model.cpp - pascal_row.cpp precompute_correction.cpp precompute_threshold.cpp + add_library ("raptor_threshold" STATIC + multiple_error_model.cpp + one_error_model.cpp + one_indirect_error_model.cpp + pascal_row.cpp + precompute_correction.cpp + precompute_threshold.cpp + threshold.cpp ) target_link_libraries ("raptor_threshold" PUBLIC "raptor_interface") diff --git a/src/threshold/threshold.cpp b/src/threshold/threshold.cpp new file mode 100644 index 00000000..1d57418c --- /dev/null +++ b/src/threshold/threshold.cpp @@ -0,0 +1,64 @@ +// -------------------------------------------------------------------------------------------------- +// Copyright (c) 2006-2023, Knut Reinert & Freie Universität Berlin +// Copyright (c) 2016-2023, Knut Reinert & MPI für molekulare Genetik +// This file may be used, modified and/or redistributed under the terms of the 3-clause BSD-License +// shipped with this file and also available at: https://github.com/seqan/raptor/blob/main/LICENSE.md +// -------------------------------------------------------------------------------------------------- + +/*!\file + * \brief Implements raptor::threshold::threshold. + * \author Enrico Seiler + */ + +#include + +namespace raptor::threshold +{ + +threshold::threshold(threshold_parameters const & arguments) +{ + uint8_t const kmer_size{arguments.shape.size()}; + size_t const kmers_per_window = arguments.window_size - kmer_size + 1; + + if (!std::isnan(arguments.percentage)) + { + threshold_kind = threshold_kinds::percentage; + threshold_percentage = arguments.percentage; + } + else if (kmers_per_window == 1u) + { + threshold_kind = threshold_kinds::lemma; + size_t const kmer_lemma_minuend = arguments.query_length + 1u; + size_t const kmer_lemma_subtrahend = (arguments.errors + 1u) * kmer_size; + kmer_lemma = kmer_lemma_minuend > kmer_lemma_subtrahend ? kmer_lemma_minuend - kmer_lemma_subtrahend : 1u; + } + else + { + threshold_kind = threshold_kinds::probabilistic; + size_t const kmers_per_pattern = arguments.query_length - kmer_size + 1; + minimal_number_of_minimizers = kmers_per_pattern / kmers_per_window; + maximal_number_of_minimizers = arguments.query_length - arguments.window_size + 1; + precomp_correction = precompute_correction(arguments); + precomp_thresholds = precompute_threshold(arguments); + } +} + +size_t threshold::get(size_t const minimiser_count) const noexcept +{ + switch (threshold_kind) + { + case threshold_kinds::lemma: + return kmer_lemma; + case threshold_kinds::percentage: + return std::max(1u, minimiser_count * threshold_percentage); + default: + { + assert(threshold_kind == threshold_kinds::probabilistic); + size_t const index = std::clamp(minimiser_count, minimal_number_of_minimizers, maximal_number_of_minimizers) + - minimal_number_of_minimizers; + return std::max(1u, precomp_thresholds[index] + precomp_correction[index]); + } + } +} + +} // namespace raptor::threshold diff --git a/test/unit/api/CMakeLists.txt b/test/unit/api/CMakeLists.txt index e0d76b7e..f13589d3 100644 --- a/test/unit/api/CMakeLists.txt +++ b/test/unit/api/CMakeLists.txt @@ -11,4 +11,5 @@ raptor_add_unit_test (issue_142.cpp) raptor_add_unit_test (formatted_bytes.cpp) raptor_add_unit_test (index_size.cpp) raptor_add_unit_test (memory_usage.cpp) +raptor_add_unit_test (threshold.cpp) raptor_add_unit_test (validate_shape.cpp) diff --git a/test/unit/api/threshold.cpp b/test/unit/api/threshold.cpp new file mode 100644 index 00000000..c7b4f52e --- /dev/null +++ b/test/unit/api/threshold.cpp @@ -0,0 +1,134 @@ +// -------------------------------------------------------------------------------------------------- +// Copyright (c) 2006-2023, Knut Reinert & Freie Universität Berlin +// Copyright (c) 2016-2023, Knut Reinert & MPI für molekulare Genetik +// This file may be used, modified and/or redistributed under the terms of the 3-clause BSD-License +// shipped with this file and also available at: https://github.com/seqan/raptor/blob/main/LICENSE.md +// -------------------------------------------------------------------------------------------------- + +#include + +#include + +static inline raptor::threshold::threshold_parameters const default_parameters{.window_size = 32, + .shape = seqan3::ungapped{32u}, + .query_length = 250u, + .p_max = 0.15, + .fpr = 0.05, + .tau = 0.9999}; + +TEST(kmer_lemma, no_error) +{ + raptor::threshold::threshold threshold{default_parameters}; + EXPECT_EQ(threshold.get(100u), 219u); + EXPECT_EQ(threshold.get(100u), threshold.get(1000u)); +} + +TEST(kmer_lemma, with_error) +{ + auto threshold_params = default_parameters; + threshold_params.errors = 1u; + raptor::threshold::threshold threshold{threshold_params}; + EXPECT_EQ(threshold.get(100u), 187u); + EXPECT_EQ(threshold.get(100u), threshold.get(1000u)); +} + +TEST(kmer_lemma, minimum) +{ + auto threshold_params = default_parameters; + threshold_params.errors = 10u; + raptor::threshold::threshold threshold{threshold_params}; + EXPECT_EQ(threshold.get(100u), 1u); + EXPECT_EQ(threshold.get(100u), threshold.get(1000u)); +} + +TEST(minimiser, no_error) +{ + auto threshold_params = default_parameters; + threshold_params.window_size = 50u; + raptor::threshold::threshold threshold{threshold_params}; + size_t const min_count{11u}; // 219 / 19 + size_t const max_count{201u}; // 250 - 50 + 1 + + for (size_t i = 0; i < min_count; ++i) + EXPECT_EQ(threshold.get(i), threshold.get(min_count)) << i; + + EXPECT_EQ(threshold.get(max_count), threshold.get(max_count + 1u)); + EXPECT_EQ(threshold.get(max_count), threshold.get(1000u)); + + for (size_t i = min_count; i < max_count; ++i) + { + if (i == 59u) // 63 vs 60 + continue; + EXPECT_LE(threshold.get(i), threshold.get(i + 1u)) << i; + } +} + +TEST(minimiser, with_error) +{ + auto threshold_params = default_parameters; + threshold_params.window_size = 50u; + threshold_params.errors = 1u; + raptor::threshold::threshold threshold{threshold_params}; + size_t const min_count{11u}; // 219 / 19 + size_t const max_count{201u}; // 250 - 50 + 1 + + for (size_t i = 0; i < min_count; ++i) + EXPECT_EQ(threshold.get(i), threshold.get(min_count)) << i; + + EXPECT_EQ(threshold.get(max_count), threshold.get(max_count + 1u)); + EXPECT_EQ(threshold.get(max_count), threshold.get(1000u)); + + for (size_t i = min_count; i < max_count; ++i) + { + if (i == 59u) // 47 vs 44 + continue; + EXPECT_LE(threshold.get(i), threshold.get(i + 1u)) << i; + } +} + +TEST(minimiser, minimum) +{ + auto threshold_params = default_parameters; + threshold_params.window_size = 50u; + threshold_params.query_length = 75u; + threshold_params.errors = 4u; + raptor::threshold::threshold threshold{threshold_params}; + size_t const min_count{2u}; // 44 / 19 + size_t const max_count{26u}; // 75 - 50 + 1 + + for (size_t i = 0; i <= min_count; ++i) + EXPECT_EQ(threshold.get(i), 1u) << i; + + EXPECT_EQ(threshold.get(max_count), threshold.get(max_count + 1u)); + EXPECT_EQ(threshold.get(max_count), threshold.get(1000u)); + + for (size_t i = min_count; i < max_count; ++i) + EXPECT_LE(threshold.get(i), threshold.get(i + 1u)) << i; +} + +TEST(percentage, 100) +{ + auto threshold_params = default_parameters; + threshold_params.percentage = 1.0; + raptor::threshold::threshold threshold{threshold_params}; + EXPECT_EQ(threshold.get(100u), 100u); + EXPECT_EQ(threshold.get(250u), 250u); +} + +TEST(percentage, 50) +{ + auto threshold_params = default_parameters; + threshold_params.percentage = 0.5; + raptor::threshold::threshold threshold{threshold_params}; + EXPECT_EQ(threshold.get(100u), 50u); + EXPECT_EQ(threshold.get(250u), 125u); +} + +TEST(percentage, 0) +{ + auto threshold_params = default_parameters; + threshold_params.percentage = 0.0; + raptor::threshold::threshold threshold{threshold_params}; + EXPECT_EQ(threshold.get(100u), 1u); + EXPECT_EQ(threshold.get(250u), 1u); +}