Skip to content

Commit

Permalink
Merge pull request #399 from eseiler/misc/threshold
Browse files Browse the repository at this point in the history
[MISC] Threshold minimum should be 1
  • Loading branch information
eseiler committed Oct 31, 2023
2 parents 0a787da + a475c13 commit b530372
Show file tree
Hide file tree
Showing 5 changed files with 209 additions and 46 deletions.
46 changes: 2 additions & 44 deletions include/raptor/threshold/threshold.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,51 +28,9 @@ class threshold
threshold & operator=(threshold &&) = default;
~threshold() = default;

threshold(threshold_parameters const & arguments)
{
uint8_t const kmer_size{arguments.shape.size()};
size_t const kmers_per_window = arguments.window_size - kmer_size + 1;

if (!std::isnan(arguments.percentage))
{
threshold_kind = threshold_kinds::percentage;
threshold_percentage = arguments.percentage;
}
else if (kmers_per_window == 1u)
{
threshold_kind = threshold_kinds::lemma;
size_t const kmer_lemma_minuend = arguments.query_length + 1u;
size_t const kmer_lemma_subtrahend = (arguments.errors + 1u) * kmer_size;
kmer_lemma = kmer_lemma_minuend > kmer_lemma_subtrahend ? kmer_lemma_minuend - kmer_lemma_subtrahend : 0;
}
else
{
threshold_kind = threshold_kinds::probabilistic;
size_t const kmers_per_pattern = arguments.query_length - kmer_size + 1;
minimal_number_of_minimizers = kmers_per_pattern / kmers_per_window;
maximal_number_of_minimizers = arguments.query_length - arguments.window_size + 1;
precomp_correction = precompute_correction(arguments);
precomp_thresholds = precompute_threshold(arguments);
}
}
threshold(threshold_parameters const & arguments);

size_t get(size_t const minimiser_count) const noexcept
{
switch (threshold_kind)
{
case threshold_kinds::lemma:
return kmer_lemma;
case threshold_kinds::percentage:
return static_cast<size_t>(minimiser_count * threshold_percentage);
default:
{
assert(threshold_kind == threshold_kinds::probabilistic);
size_t const index = std::clamp(minimiser_count, minimal_number_of_minimizers, maximal_number_of_minimizers)
- minimal_number_of_minimizers;
return precomp_thresholds[index] + precomp_correction[index];
}
}
}
size_t get(size_t const minimiser_count) const noexcept;

private:
enum class threshold_kinds
Expand Down
10 changes: 8 additions & 2 deletions src/threshold/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,8 +1,14 @@
cmake_minimum_required (VERSION 3.18)

if (NOT TARGET raptor_threshold)
add_library ("raptor_threshold" STATIC multiple_error_model.cpp one_error_model.cpp one_indirect_error_model.cpp
pascal_row.cpp precompute_correction.cpp precompute_threshold.cpp
add_library ("raptor_threshold" STATIC
multiple_error_model.cpp
one_error_model.cpp
one_indirect_error_model.cpp
pascal_row.cpp
precompute_correction.cpp
precompute_threshold.cpp
threshold.cpp
)

target_link_libraries ("raptor_threshold" PUBLIC "raptor_interface")
Expand Down
64 changes: 64 additions & 0 deletions src/threshold/threshold.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
// --------------------------------------------------------------------------------------------------
// Copyright (c) 2006-2023, Knut Reinert & Freie Universität Berlin
// Copyright (c) 2016-2023, Knut Reinert & MPI für molekulare Genetik
// This file may be used, modified and/or redistributed under the terms of the 3-clause BSD-License
// shipped with this file and also available at: https://github.com/seqan/raptor/blob/main/LICENSE.md
// --------------------------------------------------------------------------------------------------

/*!\file
* \brief Implements raptor::threshold::threshold.
* \author Enrico Seiler <enrico.seiler AT fu-berlin.de>
*/

#include <raptor/threshold/threshold.hpp>

namespace raptor::threshold
{

threshold::threshold(threshold_parameters const & arguments)
{
uint8_t const kmer_size{arguments.shape.size()};
size_t const kmers_per_window = arguments.window_size - kmer_size + 1;

if (!std::isnan(arguments.percentage))
{
threshold_kind = threshold_kinds::percentage;
threshold_percentage = arguments.percentage;
}
else if (kmers_per_window == 1u)
{
threshold_kind = threshold_kinds::lemma;
size_t const kmer_lemma_minuend = arguments.query_length + 1u;
size_t const kmer_lemma_subtrahend = (arguments.errors + 1u) * kmer_size;
kmer_lemma = kmer_lemma_minuend > kmer_lemma_subtrahend ? kmer_lemma_minuend - kmer_lemma_subtrahend : 1u;
}
else
{
threshold_kind = threshold_kinds::probabilistic;
size_t const kmers_per_pattern = arguments.query_length - kmer_size + 1;
minimal_number_of_minimizers = kmers_per_pattern / kmers_per_window;
maximal_number_of_minimizers = arguments.query_length - arguments.window_size + 1;
precomp_correction = precompute_correction(arguments);
precomp_thresholds = precompute_threshold(arguments);
}
}

size_t threshold::get(size_t const minimiser_count) const noexcept
{
switch (threshold_kind)
{
case threshold_kinds::lemma:
return kmer_lemma;
case threshold_kinds::percentage:
return std::max<size_t>(1u, minimiser_count * threshold_percentage);
default:
{
assert(threshold_kind == threshold_kinds::probabilistic);
size_t const index = std::clamp(minimiser_count, minimal_number_of_minimizers, maximal_number_of_minimizers)
- minimal_number_of_minimizers;
return std::max<size_t>(1u, precomp_thresholds[index] + precomp_correction[index]);
}
}
}

} // namespace raptor::threshold
1 change: 1 addition & 0 deletions test/unit/api/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -11,4 +11,5 @@ raptor_add_unit_test (issue_142.cpp)
raptor_add_unit_test (formatted_bytes.cpp)
raptor_add_unit_test (index_size.cpp)
raptor_add_unit_test (memory_usage.cpp)
raptor_add_unit_test (threshold.cpp)
raptor_add_unit_test (validate_shape.cpp)
134 changes: 134 additions & 0 deletions test/unit/api/threshold.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
// --------------------------------------------------------------------------------------------------
// Copyright (c) 2006-2023, Knut Reinert & Freie Universität Berlin
// Copyright (c) 2016-2023, Knut Reinert & MPI für molekulare Genetik
// This file may be used, modified and/or redistributed under the terms of the 3-clause BSD-License
// shipped with this file and also available at: https://github.com/seqan/raptor/blob/main/LICENSE.md
// --------------------------------------------------------------------------------------------------

#include <gtest/gtest.h>

#include <raptor/threshold/threshold.hpp>

static inline raptor::threshold::threshold_parameters const default_parameters{.window_size = 32,
.shape = seqan3::ungapped{32u},
.query_length = 250u,
.p_max = 0.15,
.fpr = 0.05,
.tau = 0.9999};

TEST(kmer_lemma, no_error)
{
raptor::threshold::threshold threshold{default_parameters};
EXPECT_EQ(threshold.get(100u), 219u);
EXPECT_EQ(threshold.get(100u), threshold.get(1000u));
}

TEST(kmer_lemma, with_error)
{
auto threshold_params = default_parameters;
threshold_params.errors = 1u;
raptor::threshold::threshold threshold{threshold_params};
EXPECT_EQ(threshold.get(100u), 187u);
EXPECT_EQ(threshold.get(100u), threshold.get(1000u));
}

TEST(kmer_lemma, minimum)
{
auto threshold_params = default_parameters;
threshold_params.errors = 10u;
raptor::threshold::threshold threshold{threshold_params};
EXPECT_EQ(threshold.get(100u), 1u);
EXPECT_EQ(threshold.get(100u), threshold.get(1000u));
}

TEST(minimiser, no_error)
{
auto threshold_params = default_parameters;
threshold_params.window_size = 50u;
raptor::threshold::threshold threshold{threshold_params};
size_t const min_count{11u}; // 219 / 19
size_t const max_count{201u}; // 250 - 50 + 1

for (size_t i = 0; i < min_count; ++i)
EXPECT_EQ(threshold.get(i), threshold.get(min_count)) << i;

EXPECT_EQ(threshold.get(max_count), threshold.get(max_count + 1u));
EXPECT_EQ(threshold.get(max_count), threshold.get(1000u));

for (size_t i = min_count; i < max_count; ++i)
{
if (i == 59u) // 63 vs 60
continue;
EXPECT_LE(threshold.get(i), threshold.get(i + 1u)) << i;
}
}

TEST(minimiser, with_error)
{
auto threshold_params = default_parameters;
threshold_params.window_size = 50u;
threshold_params.errors = 1u;
raptor::threshold::threshold threshold{threshold_params};
size_t const min_count{11u}; // 219 / 19
size_t const max_count{201u}; // 250 - 50 + 1

for (size_t i = 0; i < min_count; ++i)
EXPECT_EQ(threshold.get(i), threshold.get(min_count)) << i;

EXPECT_EQ(threshold.get(max_count), threshold.get(max_count + 1u));
EXPECT_EQ(threshold.get(max_count), threshold.get(1000u));

for (size_t i = min_count; i < max_count; ++i)
{
if (i == 59u) // 47 vs 44
continue;
EXPECT_LE(threshold.get(i), threshold.get(i + 1u)) << i;
}
}

TEST(minimiser, minimum)
{
auto threshold_params = default_parameters;
threshold_params.window_size = 50u;
threshold_params.query_length = 75u;
threshold_params.errors = 4u;
raptor::threshold::threshold threshold{threshold_params};
size_t const min_count{2u}; // 44 / 19
size_t const max_count{26u}; // 75 - 50 + 1

for (size_t i = 0; i <= min_count; ++i)
EXPECT_EQ(threshold.get(i), 1u) << i;

EXPECT_EQ(threshold.get(max_count), threshold.get(max_count + 1u));
EXPECT_EQ(threshold.get(max_count), threshold.get(1000u));

for (size_t i = min_count; i < max_count; ++i)
EXPECT_LE(threshold.get(i), threshold.get(i + 1u)) << i;
}

TEST(percentage, 100)
{
auto threshold_params = default_parameters;
threshold_params.percentage = 1.0;
raptor::threshold::threshold threshold{threshold_params};
EXPECT_EQ(threshold.get(100u), 100u);
EXPECT_EQ(threshold.get(250u), 250u);
}

TEST(percentage, 50)
{
auto threshold_params = default_parameters;
threshold_params.percentage = 0.5;
raptor::threshold::threshold threshold{threshold_params};
EXPECT_EQ(threshold.get(100u), 50u);
EXPECT_EQ(threshold.get(250u), 125u);
}

TEST(percentage, 0)
{
auto threshold_params = default_parameters;
threshold_params.percentage = 0.0;
raptor::threshold::threshold threshold{threshold_params};
EXPECT_EQ(threshold.get(100u), 1u);
EXPECT_EQ(threshold.get(250u), 1u);
}

1 comment on commit b530372

@vercel
Copy link

@vercel vercel bot commented on b530372 Oct 31, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Successfully deployed to the following URLs:

raptor – ./

raptor-git-main-seqan.vercel.app
seqan-raptor.vercel.app
raptor-seqan.vercel.app

Please sign in to comment.