Skip to content

Commit

Permalink
Switch base-* to allele-* to satisfy change in VCF spec
Browse files Browse the repository at this point in the history
  • Loading branch information
Daniel Cooke committed Jul 3, 2020
1 parent 885382e commit 820eb49
Show file tree
Hide file tree
Showing 17 changed files with 177 additions and 233 deletions.
6 changes: 3 additions & 3 deletions src/core/csr/facets/alleles.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,10 @@ static void insert(std::vector<T>&& src, MappableFlatSet<T>& dst)
Alleles::Alleles(const std::vector<VcfRecord::SampleName>& samples, const std::vector<VcfRecord>& calls)
{
alleles_.reserve(calls.size());
for (const auto& call : calls) {
auto& call_map = alleles_[mapped_region(call)];
for (std::size_t idx {0}; idx < calls.size(); ++idx) {
auto& call_map = alleles_[mapped_region(calls[idx])];
for (const auto& sample : samples) {
call_map[sample] = get_resolved_alleles(call, sample);
call_map[sample] = get_resolved_alleles(calls, idx, sample);
}
}
}
Expand Down
8 changes: 6 additions & 2 deletions src/core/csr/facets/read_assignments.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -108,8 +108,12 @@ ReadAssignments::ReadAssignments(const ReferenceGenome& reference,
}
}
}
for (const auto& call : calls) {
auto alleles = get_called_alleles(call, sample).first;
for (std::size_t call_idx {0}; call_idx < calls.size(); ++call_idx) {
std::vector<Allele> alleles {};
alleles.reserve(calls[call_idx].num_alt() + 1);
for (auto&& allele : get_resolved_alleles(calls, call_idx, sample)) {
if (allele) alleles.push_back(std::move(*allele));
}
auto allele_support = compute_allele_support(alleles, result_.haplotypes.at(sample), sample);
for (auto& allele : alleles) {
result_.alleles[sample].emplace(std::move(allele), std::move(allele_support.at(allele)));
Expand Down
3 changes: 1 addition & 2 deletions src/core/csr/measures/allele_depth.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,7 @@ namespace {

bool is_canonical(const VcfRecord::NucleotideSequence& allele) noexcept
{
const static VcfRecord::NucleotideSequence deleted_allele {vcfspec::deletedBase};
return !(allele == vcfspec::missingValue || allele == deleted_allele);
return !(allele == vcfspec::missingValue || allele == vcfspec::deleteMaskAllele);
}

bool has_called_alt_allele(const VcfRecord& call, const VcfRecord::SampleName& sample)
Expand Down
18 changes: 10 additions & 8 deletions src/core/csr/measures/denovo_contamination.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
#include "utils/append.hpp"
#include "is_denovo.hpp"
#include "../facets/samples.hpp"
#include "../facets/alleles.hpp"
#include "../facets/genotypes.hpp"
#include "../facets/pedigree.hpp"
#include "../facets/read_assignments.hpp"
Expand Down Expand Up @@ -66,11 +67,11 @@ void sort_unique(Container& values)
values.erase(std::unique(std::begin(values), std::end(values)), std::end(values));
}

auto get_denovo_alleles(const VcfRecord& denovo, const Trio& trio)
auto get_denovo_alleles(const VcfRecord& denovo, const Trio& trio, const Facet::AlleleMap& alleles)
{
auto parent_alleles = concat(get_called_alleles(denovo, trio.mother()).first,
get_called_alleles(denovo, trio.father()).first);
auto child_alleles = get_called_alleles(denovo, trio.child()).first;
auto parent_alleles = concat(get_called(alleles, denovo, trio.mother()),
get_called(alleles, denovo, trio.father()));
auto child_alleles = get_called(alleles, denovo, trio.child());
sort_unique(parent_alleles); sort_unique(child_alleles);
std::vector<Allele> result {};
result.reserve(child_alleles.size());
Expand Down Expand Up @@ -112,9 +113,9 @@ bool is_parental_haplotype(const Haplotype& haplotype, const Facet::GenotypeMap&
return contains(genotypes.at(trio.mother()), haplotype) || contains(genotypes.at(trio.father()), haplotype);
}

auto get_denovo_haplotypes(const VcfRecord& denovo, const Facet::GenotypeMap& genotypes, const Trio& trio)
auto get_denovo_haplotypes(const VcfRecord& denovo, const Facet::GenotypeMap& genotypes, const Trio& trio, const Facet::AlleleMap& alleles)
{
const auto denovo_alleles = get_denovo_alleles(denovo, trio);
const auto denovo_alleles = get_denovo_alleles(denovo, trio, alleles);
auto result = get_denovo_haplotypes(genotypes, denovo_alleles);
const auto is_parental = [&] (const auto& haplotype) { return is_parental_haplotype(haplotype, genotypes, trio); };
result.erase(std::remove_if(std::begin(result), std::end(result), is_parental), std::end(result));
Expand All @@ -139,11 +140,12 @@ Measure::ResultType DeNovoContamination::do_evaluate(const VcfRecord& call, cons
Optional<ValueType> result {};
if (is_denovo(call, facets)) {
const auto& samples = get_value<Samples>(facets.at("Samples"));
const auto& alleles = get_value<Alleles>(facets.at("Alleles"));
const auto& pedigree = get_value<Pedigree>(facets.at("Pedigree"));
assert(is_trio(samples, pedigree)); // TODO: Implement for general pedigree
const auto trio = *make_trio(samples[find_child_idx(samples, pedigree)], pedigree);
const auto& genotypes = get_value<Genotypes>(facets.at("Genotypes"));
const auto denovo_haplotypes = get_denovo_haplotypes(call, genotypes, trio);
const auto denovo_haplotypes = get_denovo_haplotypes(call, genotypes, trio, alleles);
if (denovo_haplotypes.empty()) {
return result;
}
Expand Down Expand Up @@ -230,7 +232,7 @@ std::string DeNovoContamination::do_describe() const

std::vector<std::string> DeNovoContamination::do_requirements() const
{
std::vector<std::string> result {"Samples", "Genotypes", "ReadAssignments", "Pedigree"};
std::vector<std::string> result {"Samples", "Alleles", "Genotypes", "ReadAssignments", "Pedigree"};
utils::append(IsDenovo(false).requirements(), result);
sort_unique(result);
return result;
Expand Down
3 changes: 1 addition & 2 deletions src/core/csr/measures/duplicate_allele_depth.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,7 @@ namespace {

bool is_canonical(const VcfRecord::NucleotideSequence& allele) noexcept
{
const static VcfRecord::NucleotideSequence deleted_allele {vcfspec::deletedBase};
return !(allele == vcfspec::missingValue || allele == deleted_allele);
return !(allele == vcfspec::missingValue || allele == vcfspec::deleteMaskAllele);
}

bool has_called_alt_allele(const VcfRecord& call, const VcfRecord::SampleName& sample)
Expand Down
3 changes: 1 addition & 2 deletions src/core/csr/measures/duplicate_concordance.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,7 @@ namespace {

bool is_canonical(const VcfRecord::NucleotideSequence& allele) noexcept
{
const static VcfRecord::NucleotideSequence deleted_allele {vcfspec::deletedBase};
return !(allele == vcfspec::missingValue || allele == deleted_allele);
return !(allele == vcfspec::missingValue || allele == vcfspec::deleteMaskAllele);
}

bool has_called_alt_allele(const VcfRecord& call, const VcfRecord::SampleName& sample)
Expand Down
3 changes: 1 addition & 2 deletions src/core/csr/measures/median_base_quality.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,7 @@ namespace {

bool is_canonical(const VcfRecord::NucleotideSequence& allele) noexcept
{
const static VcfRecord::NucleotideSequence deleted_allele {vcfspec::deletedBase};
return !(allele == vcfspec::missingValue || allele == deleted_allele);
return !(allele == vcfspec::missingValue || allele == vcfspec::deleteMaskAllele);
}

bool has_called_alt_allele(const VcfRecord& call, const VcfRecord::SampleName& sample)
Expand Down
30 changes: 16 additions & 14 deletions src/core/csr/measures/median_somatic_mapping_quality.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
#include "utils/maths.hpp"
#include "is_somatic.hpp"
#include "../facets/samples.hpp"
#include "../facets/alleles.hpp"
#include "../facets/genotypes.hpp"
#include "../facets/read_assignments.hpp"

Expand All @@ -40,27 +41,24 @@ Measure::ValueType MedianSomaticMappingQuality::get_value_type() const

namespace {

auto extract_called_alleles(const VcfRecord& call, const VcfRecord::SampleName& sample)
{
return get_called_alleles(call, sample, ReferencePadPolicy::trim_alt_alleles).first;
}

template <typename Container>
void sort_unique(Container& values)
{
std::sort(std::begin(values), std::end(values));
values.erase(std::unique(std::begin(values), std::end(values)), std::end(values));
}

auto get_somatic_alleles(const VcfRecord& somatic, const std::vector<SampleName>& somatic_samples,
const std::vector<SampleName>& normal_samples)
auto get_somatic_alleles(const VcfRecord& somatic,
const std::vector<SampleName>& somatic_samples,
const std::vector<SampleName>& normal_samples,
const Facet::AlleleMap& alleles)
{
std::vector<Allele> somatic_sample_alleles {}, normal_sample_alleles {};
for (const auto& sample : somatic_samples) {
utils::append(extract_called_alleles(somatic, sample), somatic_sample_alleles);
utils::append(get_called(alleles, somatic, sample), somatic_sample_alleles);
}
for (const auto& sample : normal_samples) {
utils::append(extract_called_alleles(somatic, sample), normal_sample_alleles);
utils::append(get_called(alleles, somatic, sample), normal_sample_alleles);
}
sort_unique(somatic_sample_alleles); sort_unique(normal_sample_alleles);
std::vector<Allele> result {};
Expand Down Expand Up @@ -93,10 +91,13 @@ auto get_somatic_haplotypes(const Facet::GenotypeMap& genotypes, const std::vect
return result;
}

auto get_somatic_haplotypes(const VcfRecord& somatic, const Facet::GenotypeMap& genotypes,
const std::vector<SampleName>& somatic_samples, const std::vector<SampleName>& normal_samples)
auto get_somatic_haplotypes(const VcfRecord& somatic,
const Facet::GenotypeMap& genotypes,
const std::vector<SampleName>& somatic_samples,
const std::vector<SampleName>& normal_samples,
const Facet::AlleleMap& alleles)
{
const auto somatic_alleles = get_somatic_alleles(somatic, somatic_samples, normal_samples);
const auto somatic_alleles = get_somatic_alleles(somatic, somatic_samples, normal_samples, alleles);
return get_somatic_haplotypes(genotypes, somatic_alleles);
}

Expand All @@ -107,6 +108,7 @@ Measure::ResultType MedianSomaticMappingQuality::do_evaluate(const VcfRecord& ca
const auto& samples = get_value<Samples>(facets.at("Samples"));
Array<Optional<ValueType>> result(samples.size());
if (is_somatic(call)) {
const auto& alleles = get_value<Alleles>(facets.at("Alleles"));
const auto somatic_status = boost::get<Array<ValueType>>(IsSomatic(true).evaluate(call, facets));
std::vector<SampleName> somatic_samples {}, normal_samples {};
somatic_samples.reserve(samples.size()); normal_samples.reserve(samples.size());
Expand All @@ -119,7 +121,7 @@ Measure::ResultType MedianSomaticMappingQuality::do_evaluate(const VcfRecord& ca
}
if (somatic_samples.empty() || normal_samples.empty()) return result;
const auto& genotypes = get_value<Genotypes>(facets.at("Genotypes"));
const auto somatic_haplotypes = get_somatic_haplotypes(call, genotypes, somatic_samples, normal_samples);
const auto somatic_haplotypes = get_somatic_haplotypes(call, genotypes, somatic_samples, normal_samples, alleles);
if (somatic_haplotypes.empty()) return result;
const auto& assignments = get_value<ReadAssignments>(facets.at("ReadAssignments")).haplotypes;
for (std::size_t s {0}; s < samples.size(); ++s) {
Expand Down Expand Up @@ -164,7 +166,7 @@ std::string MedianSomaticMappingQuality::do_describe() const

std::vector<std::string> MedianSomaticMappingQuality::do_requirements() const
{
std::vector<std::string> result {"Samples", "Genotypes", "ReadAssignments"};
std::vector<std::string> result {"Samples", "Alleles", "Genotypes", "ReadAssignments"};
utils::append(IsSomatic(true).requirements(), result);
sort_unique(result);
return result;
Expand Down
27 changes: 14 additions & 13 deletions src/core/csr/measures/normal_contamination.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
#include "utils/append.hpp"
#include "is_somatic.hpp"
#include "../facets/samples.hpp"
#include "../facets/alleles.hpp"
#include "../facets/genotypes.hpp"
#include "../facets/read_assignments.hpp"

Expand All @@ -38,11 +39,6 @@ Measure::ValueType NormalContamination::get_value_type() const

namespace {

auto extract_called_alleles(const VcfRecord& call, const VcfRecord::SampleName& sample)
{
return get_called_alleles(call, sample, ReferencePadPolicy::trim_alt_alleles).first;
}

template <typename Container>
void sort_unique(Container& values)
{
Expand All @@ -51,14 +47,15 @@ void sort_unique(Container& values)
}

auto get_somatic_alleles(const VcfRecord& somatic, const std::vector<SampleName>& somatic_samples,
const std::vector<SampleName>& normal_samples)
const std::vector<SampleName>& normal_samples,
const Facet::AlleleMap& alleles)
{
std::vector<Allele> somatic_sample_alleles {}, normal_sample_alleles {};
for (const auto& sample : somatic_samples) {
utils::append(extract_called_alleles(somatic, sample), somatic_sample_alleles);
utils::append(get_called(alleles, somatic, sample), somatic_sample_alleles);
}
for (const auto& sample : normal_samples) {
utils::append(extract_called_alleles(somatic, sample), normal_sample_alleles);
utils::append(get_called(alleles, somatic, sample), normal_sample_alleles);
}
sort_unique(somatic_sample_alleles); sort_unique(normal_sample_alleles);
std::vector<Allele> result {};
Expand Down Expand Up @@ -91,10 +88,13 @@ auto get_somatic_haplotypes(const Facet::GenotypeMap& genotypes, const std::vect
return result;
}

auto get_somatic_haplotypes(const VcfRecord& somatic, const Facet::GenotypeMap& genotypes,
const std::vector<SampleName>& somatic_samples, const std::vector<SampleName>& normal_samples)
auto get_somatic_haplotypes(const VcfRecord& somatic,
const Facet::GenotypeMap& genotypes,
const std::vector<SampleName>& somatic_samples,
const std::vector<SampleName>& normal_samples,
const Facet::AlleleMap& alleles)
{
const auto somatic_alleles = get_somatic_alleles(somatic, somatic_samples, normal_samples);
const auto somatic_alleles = get_somatic_alleles(somatic, somatic_samples, normal_samples, alleles);
return get_somatic_haplotypes(genotypes, somatic_alleles);
}

Expand All @@ -117,6 +117,7 @@ Measure::ResultType NormalContamination::do_evaluate(const VcfRecord& call, cons
if (is_somatic(call)) {
std::size_t contamination {0}, total_overlapped {0};
const auto& samples = get_value<Samples>(facets.at("Samples"));
const auto& alleles = get_value<Alleles>(facets.at("Alleles"));
const auto somatic_status = boost::get<Array<ValueType>>(IsSomatic(true).evaluate(call, facets));
std::vector<SampleName> somatic_samples {}, normal_samples {};
somatic_samples.reserve(samples.size()); normal_samples.reserve(samples.size());
Expand All @@ -128,7 +129,7 @@ Measure::ResultType NormalContamination::do_evaluate(const VcfRecord& call, cons
}
}
const auto& genotypes = get_value<Genotypes>(facets.at("Genotypes"));
const auto somatic_haplotypes = get_somatic_haplotypes(call, genotypes, somatic_samples, normal_samples);
const auto somatic_haplotypes = get_somatic_haplotypes(call, genotypes, somatic_samples, normal_samples, alleles);
const auto& assignments = get_value<ReadAssignments>(facets.at("ReadAssignments")).haplotypes;
Genotype<Haplotype> somatic_genotype {static_cast<unsigned>(somatic_haplotypes.size() + 1)};
for (const auto& haplotype : somatic_haplotypes) {
Expand Down Expand Up @@ -208,7 +209,7 @@ std::string NormalContamination::do_describe() const

std::vector<std::string> NormalContamination::do_requirements() const
{
std::vector<std::string> result {"Samples", "Genotypes", "ReadAssignments"};
std::vector<std::string> result {"Samples", "Alleles", "Genotypes", "ReadAssignments"};
utils::append(IsSomatic(true).requirements(), result);
sort_unique(result);
return result;
Expand Down
3 changes: 1 addition & 2 deletions src/core/csr/measures/strand_bias.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -70,8 +70,7 @@ namespace {

bool is_canonical(const VcfRecord::NucleotideSequence& allele) noexcept
{
const static VcfRecord::NucleotideSequence deleted_allele {vcfspec::deletedBase};
return !(allele == vcfspec::missingValue || allele == deleted_allele);
return !(allele == vcfspec::missingValue || allele == vcfspec::deleteMaskAllele);
}

bool has_called_alt_allele(const VcfRecord& call, const VcfRecord::SampleName& sample)
Expand Down
18 changes: 10 additions & 8 deletions src/core/tools/indel_profiler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -231,14 +231,16 @@ IndelProfiler::read_next_data_batch(VcfIterator& first, const VcfIterator& last,
evaluate_support(sample_genotype, reads[sample], result.support);
reads[sample].clear();
reads[sample].shrink_to_fit();
for (const auto& record : records) {
std::vector<Allele> alleles; bool has_ref;
std::tie(alleles, has_ref) = get_called_alleles(record, sample);
if (has_ref) alleles.erase(std::cbegin(alleles));
alleles.erase(std::remove_if(std::begin(alleles), std::end(alleles),
[] (const auto& allele) { return !is_indel(allele); }),
std::end(alleles));
result.indels.insert(std::begin(alleles), std::end(alleles));
for (std::size_t record_idx {0}; record_idx < records.size(); ++record_idx) {
auto alleles = get_resolved_alleles(records, record_idx, sample);
std::vector<Allele> alt_indels {};
alt_indels.reserve(alleles.size() - 1);
std::for_each(std::next(std::begin(alleles)), std::end(alleles), [&] (auto& allele) {
if (allele && is_indel(*allele)) {
alt_indels.push_back(std::move(*allele));
}
});
result.indels.insert(std::begin(alt_indels), std::end(alt_indels));
}
}
} else {
Expand Down
4 changes: 1 addition & 3 deletions src/core/tools/vargen/vcf_extractor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,9 +35,7 @@ namespace {

static bool is_canonical(const VcfRecord::NucleotideSequence& allele)
{
return allele != vcfspec::missingValue
&& std::none_of(std::cbegin(allele), std::cend(allele),
[](const auto base) { return base == vcfspec::deletedBase; });
return allele != vcfspec::missingValue && allele != vcfspec::deleteMaskAllele;
}

template <typename Iterator>
Expand Down
Loading

0 comments on commit 820eb49

Please sign in to comment.