generated from seqan/app-template
-
Notifications
You must be signed in to change notification settings - Fork 17
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #399 from eseiler/misc/threshold
[MISC] Threshold minimum should be 1
- Loading branch information
Showing
5 changed files
with
209 additions
and
46 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,64 @@ | ||
// -------------------------------------------------------------------------------------------------- | ||
// Copyright (c) 2006-2023, Knut Reinert & Freie Universität Berlin | ||
// Copyright (c) 2016-2023, Knut Reinert & MPI für molekulare Genetik | ||
// This file may be used, modified and/or redistributed under the terms of the 3-clause BSD-License | ||
// shipped with this file and also available at: https://github.com/seqan/raptor/blob/main/LICENSE.md | ||
// -------------------------------------------------------------------------------------------------- | ||
|
||
/*!\file | ||
* \brief Implements raptor::threshold::threshold. | ||
* \author Enrico Seiler <enrico.seiler AT fu-berlin.de> | ||
*/ | ||
|
||
#include <raptor/threshold/threshold.hpp> | ||
|
||
namespace raptor::threshold | ||
{ | ||
|
||
threshold::threshold(threshold_parameters const & arguments) | ||
{ | ||
uint8_t const kmer_size{arguments.shape.size()}; | ||
size_t const kmers_per_window = arguments.window_size - kmer_size + 1; | ||
|
||
if (!std::isnan(arguments.percentage)) | ||
{ | ||
threshold_kind = threshold_kinds::percentage; | ||
threshold_percentage = arguments.percentage; | ||
} | ||
else if (kmers_per_window == 1u) | ||
{ | ||
threshold_kind = threshold_kinds::lemma; | ||
size_t const kmer_lemma_minuend = arguments.query_length + 1u; | ||
size_t const kmer_lemma_subtrahend = (arguments.errors + 1u) * kmer_size; | ||
kmer_lemma = kmer_lemma_minuend > kmer_lemma_subtrahend ? kmer_lemma_minuend - kmer_lemma_subtrahend : 1u; | ||
} | ||
else | ||
{ | ||
threshold_kind = threshold_kinds::probabilistic; | ||
size_t const kmers_per_pattern = arguments.query_length - kmer_size + 1; | ||
minimal_number_of_minimizers = kmers_per_pattern / kmers_per_window; | ||
maximal_number_of_minimizers = arguments.query_length - arguments.window_size + 1; | ||
precomp_correction = precompute_correction(arguments); | ||
precomp_thresholds = precompute_threshold(arguments); | ||
} | ||
} | ||
|
||
size_t threshold::get(size_t const minimiser_count) const noexcept | ||
{ | ||
switch (threshold_kind) | ||
{ | ||
case threshold_kinds::lemma: | ||
return kmer_lemma; | ||
case threshold_kinds::percentage: | ||
return std::max<size_t>(1u, minimiser_count * threshold_percentage); | ||
default: | ||
{ | ||
assert(threshold_kind == threshold_kinds::probabilistic); | ||
size_t const index = std::clamp(minimiser_count, minimal_number_of_minimizers, maximal_number_of_minimizers) | ||
- minimal_number_of_minimizers; | ||
return std::max<size_t>(1u, precomp_thresholds[index] + precomp_correction[index]); | ||
} | ||
} | ||
} | ||
|
||
} // namespace raptor::threshold |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,134 @@ | ||
// -------------------------------------------------------------------------------------------------- | ||
// Copyright (c) 2006-2023, Knut Reinert & Freie Universität Berlin | ||
// Copyright (c) 2016-2023, Knut Reinert & MPI für molekulare Genetik | ||
// This file may be used, modified and/or redistributed under the terms of the 3-clause BSD-License | ||
// shipped with this file and also available at: https://github.com/seqan/raptor/blob/main/LICENSE.md | ||
// -------------------------------------------------------------------------------------------------- | ||
|
||
#include <gtest/gtest.h> | ||
|
||
#include <raptor/threshold/threshold.hpp> | ||
|
||
static inline raptor::threshold::threshold_parameters const default_parameters{.window_size = 32, | ||
.shape = seqan3::ungapped{32u}, | ||
.query_length = 250u, | ||
.p_max = 0.15, | ||
.fpr = 0.05, | ||
.tau = 0.9999}; | ||
|
||
TEST(kmer_lemma, no_error) | ||
{ | ||
raptor::threshold::threshold threshold{default_parameters}; | ||
EXPECT_EQ(threshold.get(100u), 219u); | ||
EXPECT_EQ(threshold.get(100u), threshold.get(1000u)); | ||
} | ||
|
||
TEST(kmer_lemma, with_error) | ||
{ | ||
auto threshold_params = default_parameters; | ||
threshold_params.errors = 1u; | ||
raptor::threshold::threshold threshold{threshold_params}; | ||
EXPECT_EQ(threshold.get(100u), 187u); | ||
EXPECT_EQ(threshold.get(100u), threshold.get(1000u)); | ||
} | ||
|
||
TEST(kmer_lemma, minimum) | ||
{ | ||
auto threshold_params = default_parameters; | ||
threshold_params.errors = 10u; | ||
raptor::threshold::threshold threshold{threshold_params}; | ||
EXPECT_EQ(threshold.get(100u), 1u); | ||
EXPECT_EQ(threshold.get(100u), threshold.get(1000u)); | ||
} | ||
|
||
TEST(minimiser, no_error) | ||
{ | ||
auto threshold_params = default_parameters; | ||
threshold_params.window_size = 50u; | ||
raptor::threshold::threshold threshold{threshold_params}; | ||
size_t const min_count{11u}; // 219 / 19 | ||
size_t const max_count{201u}; // 250 - 50 + 1 | ||
|
||
for (size_t i = 0; i < min_count; ++i) | ||
EXPECT_EQ(threshold.get(i), threshold.get(min_count)) << i; | ||
|
||
EXPECT_EQ(threshold.get(max_count), threshold.get(max_count + 1u)); | ||
EXPECT_EQ(threshold.get(max_count), threshold.get(1000u)); | ||
|
||
for (size_t i = min_count; i < max_count; ++i) | ||
{ | ||
if (i == 59u) // 63 vs 60 | ||
continue; | ||
EXPECT_LE(threshold.get(i), threshold.get(i + 1u)) << i; | ||
} | ||
} | ||
|
||
TEST(minimiser, with_error) | ||
{ | ||
auto threshold_params = default_parameters; | ||
threshold_params.window_size = 50u; | ||
threshold_params.errors = 1u; | ||
raptor::threshold::threshold threshold{threshold_params}; | ||
size_t const min_count{11u}; // 219 / 19 | ||
size_t const max_count{201u}; // 250 - 50 + 1 | ||
|
||
for (size_t i = 0; i < min_count; ++i) | ||
EXPECT_EQ(threshold.get(i), threshold.get(min_count)) << i; | ||
|
||
EXPECT_EQ(threshold.get(max_count), threshold.get(max_count + 1u)); | ||
EXPECT_EQ(threshold.get(max_count), threshold.get(1000u)); | ||
|
||
for (size_t i = min_count; i < max_count; ++i) | ||
{ | ||
if (i == 59u) // 47 vs 44 | ||
continue; | ||
EXPECT_LE(threshold.get(i), threshold.get(i + 1u)) << i; | ||
} | ||
} | ||
|
||
TEST(minimiser, minimum) | ||
{ | ||
auto threshold_params = default_parameters; | ||
threshold_params.window_size = 50u; | ||
threshold_params.query_length = 75u; | ||
threshold_params.errors = 4u; | ||
raptor::threshold::threshold threshold{threshold_params}; | ||
size_t const min_count{2u}; // 44 / 19 | ||
size_t const max_count{26u}; // 75 - 50 + 1 | ||
|
||
for (size_t i = 0; i <= min_count; ++i) | ||
EXPECT_EQ(threshold.get(i), 1u) << i; | ||
|
||
EXPECT_EQ(threshold.get(max_count), threshold.get(max_count + 1u)); | ||
EXPECT_EQ(threshold.get(max_count), threshold.get(1000u)); | ||
|
||
for (size_t i = min_count; i < max_count; ++i) | ||
EXPECT_LE(threshold.get(i), threshold.get(i + 1u)) << i; | ||
} | ||
|
||
TEST(percentage, 100) | ||
{ | ||
auto threshold_params = default_parameters; | ||
threshold_params.percentage = 1.0; | ||
raptor::threshold::threshold threshold{threshold_params}; | ||
EXPECT_EQ(threshold.get(100u), 100u); | ||
EXPECT_EQ(threshold.get(250u), 250u); | ||
} | ||
|
||
TEST(percentage, 50) | ||
{ | ||
auto threshold_params = default_parameters; | ||
threshold_params.percentage = 0.5; | ||
raptor::threshold::threshold threshold{threshold_params}; | ||
EXPECT_EQ(threshold.get(100u), 50u); | ||
EXPECT_EQ(threshold.get(250u), 125u); | ||
} | ||
|
||
TEST(percentage, 0) | ||
{ | ||
auto threshold_params = default_parameters; | ||
threshold_params.percentage = 0.0; | ||
raptor::threshold::threshold threshold{threshold_params}; | ||
EXPECT_EQ(threshold.get(100u), 1u); | ||
EXPECT_EQ(threshold.get(250u), 1u); | ||
} |
b530372
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Successfully deployed to the following URLs:
raptor – ./
raptor-git-main-seqan.vercel.app
seqan-raptor.vercel.app
raptor-seqan.vercel.app