Skip to content

Commit

Permalink
Use adaptive bubble score for tumour calling
Browse files Browse the repository at this point in the history
  • Loading branch information
Daniel Cooke committed Nov 29, 2018
1 parent 9f496d3 commit d582cbc
Show file tree
Hide file tree
Showing 3 changed files with 64 additions and 5 deletions.
14 changes: 13 additions & 1 deletion src/config/option_collation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -942,6 +942,18 @@ auto get_assembler_region_generator_frequency_trigger(const OptionMap& options)
}
}

coretools::LocalReassembler::BubbleScoreSetter
get_assembler_bubble_score_setter(const OptionMap& options) noexcept
{
using namespace octopus::coretools;
if (is_cancer_calling(options)) {
return DepthBasedBubbleScoreSetter {options.at("min-bubble-score").as<double>(),
options.at("min-expected-somatic-frequency").as<float>()};
} else {
return ConstantBubbleScoreSetter {options.at("min-bubble-score").as<double>()};
}
}

class MissingSourceVariantFile : public MissingFileError
{
std::string do_where() const override
Expand Down Expand Up @@ -1051,7 +1063,7 @@ auto make_variant_generator_builder(const OptionMap& options)
reassembler_options.bin_overlap = as_unsigned("max-assemble-region-overlap", options);
reassembler_options.min_kmer_observations = as_unsigned("min-kmer-prune", options);
reassembler_options.max_bubbles = as_unsigned("max-bubbles", options);
reassembler_options.min_bubble_score = options.at("min-bubble-score").as<double>();
reassembler_options.min_bubble_score = get_assembler_bubble_score_setter(options);
reassembler_options.max_variant_size = as_unsigned("max-variant-size", options);
result.set_local_reassembler(std::move(reassembler_options));
}
Expand Down
30 changes: 28 additions & 2 deletions src/core/tools/vargen/local_reassembler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,10 @@
#include "utils/mappable_algorithms.hpp"
#include "utils/sequence_utils.hpp"
#include "utils/append.hpp"
#include "utils/global_aligner.hpp"
#include "utils/read_stats.hpp"
#include "io/reference/reference_genome.hpp"
#include "logging/logging.hpp"
#include "utils/global_aligner.hpp"

namespace octopus { namespace coretools {

Expand Down Expand Up @@ -995,7 +996,7 @@ LocalReassembler::try_assemble_region(Assembler& assembler, const NucleotideSequ
if (assembler.is_empty() || assembler.is_all_reference()) {
return status;
}
auto variants = assembler.extract_variants(max_bubbles_, min_bubble_score_);
auto variants = assembler.extract_variants(max_bubbles_, calculate_min_bubble_score(assemble_region));
assembler.clear();
if (!variants.empty()) {
trim_reference(variants);
Expand All @@ -1020,5 +1021,30 @@ LocalReassembler::try_assemble_region(Assembler& assembler, const NucleotideSequ
return status;
}

double LocalReassembler::calculate_min_bubble_score(const GenomicRegion& assemble_region) const
{
ReadBaseCountMap read_counts {};
read_counts.reserve(read_buffer_.size());
for (const auto& p : read_buffer_) {
read_counts.emplace(p.first, count_base_pairs(p.second, assemble_region));
}
return min_bubble_score_(assemble_region, read_counts);
}

double DepthBasedBubbleScoreSetter::operator()(const GenomicRegion& region, const LocalReassembler::ReadBaseCountMap& read_counts) const
{
auto result = min_score_;
for (const auto& p : read_counts) {
const auto mean_depth = p.second / size(region);
result = std::max(result, std::floor(min_allele_frequency_ * mean_depth));
}
return result;
}

DepthBasedBubbleScoreSetter::DepthBasedBubbleScoreSetter(double min_score, double min_allele_frequency)
: min_score_ {min_score}
, min_allele_frequency_ {min_allele_frequency}
{}

} // namespace coretools
} // namespace octopus
25 changes: 23 additions & 2 deletions src/core/tools/vargen/local_reassembler.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,9 @@ namespace coretools {
class LocalReassembler : public VariantGenerator
{
public:
using ReadBaseCountMap = std::unordered_map<SampleName, unsigned>;
using BubbleScoreSetter = std::function<double(const GenomicRegion&, const ReadBaseCountMap&)>;

struct Options
{
ExecutionPolicy execution_policy = ExecutionPolicy::seq;
Expand All @@ -42,7 +45,7 @@ class LocalReassembler : public VariantGenerator
AlignedRead::BaseQuality mask_threshold = 0;
unsigned min_kmer_observations = 1;
unsigned max_bubbles = 10;
double min_bubble_score = 2.0;
BubbleScoreSetter min_bubble_score = [] (const GenomicRegion&, const ReadBaseCountMap&) { return 2.0; };
Variant::MappingDomain::Size max_variant_size = 5000;
};

Expand Down Expand Up @@ -113,7 +116,7 @@ class LocalReassembler : public VariantGenerator
AlignedRead::BaseQuality mask_threshold_;
unsigned min_kmer_observations_;
unsigned max_bubbles_;
double min_bubble_score_;
BubbleScoreSetter min_bubble_score_;
Variant::MappingDomain::Size max_variant_size_;

void prepare_bins(const GenomicRegion& active_region, BinList& bins) const;
Expand All @@ -126,6 +129,24 @@ class LocalReassembler : public VariantGenerator
AssemblerStatus assemble_bin(unsigned kmer_size, const Bin& bin, std::deque<Variant>& result) const;
AssemblerStatus try_assemble_region(Assembler& assembler, const NucleotideSequence& reference_sequence,
const GenomicRegion& reference_region, std::deque<Variant>& result) const;
double calculate_min_bubble_score(const GenomicRegion& assemble_region) const;
};

struct ConstantBubbleScoreSetter
{
double operator()(const GenomicRegion&, const LocalReassembler::ReadBaseCountMap&) const noexcept { return min_score_; }
ConstantBubbleScoreSetter(double min_score) : min_score_ {min_score} {}
private:
double min_score_;
};

struct DepthBasedBubbleScoreSetter
{
double operator()(const GenomicRegion& region, const LocalReassembler::ReadBaseCountMap& read_counts) const;
DepthBasedBubbleScoreSetter(double min_score, double min_allele_frequency);
private:

double min_score_, min_allele_frequency_;
};

} // namespace coretools
Expand Down

0 comments on commit d582cbc

Please sign in to comment.