Skip to content

Commit

Permalink
Merge branch 'exp/bamout-tags' into develop
Browse files Browse the repository at this point in the history
  • Loading branch information
Daniel Cooke committed Feb 10, 2019
2 parents 987c5a1 + 81384d5 commit c047e96
Show file tree
Hide file tree
Showing 22 changed files with 602 additions and 279 deletions.
3 changes: 2 additions & 1 deletion src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,8 @@ set(IO_SOURCES
io/read/read_writer.hpp
io/read/read_writer.cpp
io/read/buffered_read_writer.hpp
io/read/buffered_read_writer.cpp
io/read/annotated_aligned_read.hpp
io/read/annotated_aligned_read.cpp

io/variant/htslib_bcf_facade.hpp
io/variant/htslib_bcf_facade.cpp
Expand Down
2 changes: 1 addition & 1 deletion src/basics/aligned_read.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -255,7 +255,7 @@ template <typename Range>
MemoryFootprint footprint(const Range& reads) noexcept
{
return std::accumulate(std::cbegin(reads), std::cend(reads), MemoryFootprint {0},
[] (const MemoryFootprint curr, const AlignedRead& read) noexcept { return curr + footprint(read); });
[] (auto curr, const auto& read) noexcept { return curr + footprint(read); });
}

bool operator==(const AlignedRead& lhs, const AlignedRead& rhs) noexcept;
Expand Down
8 changes: 8 additions & 0 deletions src/basics/cigar_string.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
#include <array>
#include <ostream>
#include <stdexcept>
#include <sstream>

#include <boost/lexical_cast.hpp>

Expand Down Expand Up @@ -400,6 +401,13 @@ std::ostream& operator<<(std::ostream& os, const CigarString& cigar)
return os;
}

std::string to_string(const CigarString& cigar)
{
std::ostringstream ss {};
ss << cigar;
return ss.str();
}

std::size_t CigarHash::operator()(const CigarOperation& op) const noexcept
{
std::size_t result {};
Expand Down
2 changes: 2 additions & 0 deletions src/basics/cigar_string.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -168,6 +168,8 @@ std::ostream& operator<<(std::ostream& os, const CigarOperation::Flag& flag);
std::ostream& operator<<(std::ostream& os, const CigarOperation& cigar_operation);
std::ostream& operator<<(std::ostream& os, const CigarString& cigar);

std::string to_string(const CigarString& cigar);

struct CigarHash
{
std::size_t operator()(const CigarOperation& op) const noexcept;
Expand Down
8 changes: 0 additions & 8 deletions src/config/option_collation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2048,14 +2048,6 @@ boost::optional<fs::path> bamout_request(const OptionMap& options)
return boost::none;
}

boost::optional<fs::path> split_bamout_request(const OptionMap& options)
{
if (is_set("split-bamout", options)) {
return resolve_path(options.at("split-bamout").as<fs::path>(), options);
}
return boost::none;
}

bool full_bamouts_requested(const OptionMap& options)
{
return options.at("full-bamout").as<bool>();
Expand Down
1 change: 0 additions & 1 deletion src/config/option_collation.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,6 @@ boost::optional<fs::path> filter_request(const OptionMap& options);
bool annotate_filter_output(const OptionMap& options);

boost::optional<fs::path> bamout_request(const OptionMap& options);
boost::optional<fs::path> split_bamout_request(const OptionMap& options);
bool full_bamouts_requested(const OptionMap& options);

unsigned estimate_max_open_files(const OptionMap& options);
Expand Down
4 changes: 0 additions & 4 deletions src/config/option_parser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -184,10 +184,6 @@ OptionMap parse_options(const int argc, const char** argv)
po::value<fs::path>(),
"Output realigned BAM files")

("split-bamout",
po::value<fs::path>(),
"Output split realigned BAM files")

("full-bamout",
po::bool_switch()->default_value(false),
"Output all reads when producing realigned bam outputs rather than just variant read minibams")
Expand Down
5 changes: 4 additions & 1 deletion src/core/callers/caller.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -181,7 +181,10 @@ std::vector<VcfRecord> Caller::regenotype(const std::vector<Variant>& variants,
auto assign_and_realign(const std::vector<AlignedRead>& reads, const Genotype<Haplotype>& genotype)
{
auto result = compute_haplotype_support(genotype, reads, {AssignmentConfig::AmbiguousAction::first});
for (auto& p : result) realign_to_reference(p.second, p.first);
for (auto& p : result) {
realign_to_reference(p.second, p.first);
std::sort(std::begin(p.second), std::end(p.second));
}
return result;
}

Expand Down
15 changes: 6 additions & 9 deletions src/core/calling_components.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -168,14 +168,9 @@ boost::optional<GenomeCallingComponents::Path> GenomeCallingComponents::bamout()
return components_.bamout;
}

boost::optional<GenomeCallingComponents::Path> GenomeCallingComponents::split_bamout() const
BAMRealigner::Config GenomeCallingComponents::bamout_config() const noexcept
{
return components_.split_bamout;
}

bool GenomeCallingComponents::write_full_bamouts() const noexcept
{
return components_.write_full_bamouts;
return components_.bamout_config;
}

boost::optional<GenomeCallingComponents::Path> GenomeCallingComponents::data_profile() const
Expand Down Expand Up @@ -511,8 +506,7 @@ GenomeCallingComponents::Components::Components(ReferenceGenome&& reference, Rea
, legacy {}
, filter_request {}
, bamout {options::bamout_request(options)}
, split_bamout {options::split_bamout_request(options)}
, write_full_bamouts {options::full_bamouts_requested(options)}
, bamout_config {}
, data_profile {options::data_profile_request(options)}
{
drop_unused_samples(this->samples, this->read_manager);
Expand All @@ -531,6 +525,9 @@ GenomeCallingComponents::Components::Components(ReferenceGenome&& reference, Rea
if (temp_directory) fs::remove_all(*temp_directory);
throw;
}
bamout_config.copy_hom_ref_reads = options::full_bamouts_requested(options);
bamout_config.max_buffer = read_buffer_footprint;
bamout_config.max_threads = num_threads;
}

void GenomeCallingComponents::Components::setup_progress_meter(const options::OptionMap& options)
Expand Down
8 changes: 4 additions & 4 deletions src/core/calling_components.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
#include "readpipe/read_pipe_fwd.hpp"
#include "core/callers/caller_factory.hpp"
#include "core/csr/filters/variant_call_filter_factory.hpp"
#include "core/tools/bam_realigner.hpp"
#include "utils/memory_footprint.hpp"
#include "utils/input_reads_profiler.hpp"
#include "logging/progress_meter.hpp"
Expand Down Expand Up @@ -73,8 +74,7 @@ class GenomeCallingComponents
boost::optional<Path> legacy() const;
boost::optional<Path> filter_request() const;
boost::optional<Path> bamout() const;
boost::optional<Path> split_bamout() const;
bool write_full_bamouts() const noexcept;
BAMRealigner::Config bamout_config() const noexcept;
boost::optional<Path> data_profile() const;

private:
Expand Down Expand Up @@ -112,8 +112,8 @@ class GenomeCallingComponents
bool sites_only;
boost::optional<Path> legacy;
boost::optional<Path> filter_request;
boost::optional<Path> bamout, split_bamout;
bool write_full_bamouts;
boost::optional<Path> bamout;
BAMRealigner::Config bamout_config;
boost::optional<Path> data_profile;
// Components that require temporary directory during construction appear last to make
// exception handling easier.
Expand Down
1 change: 1 addition & 0 deletions src/core/csr/facets/read_assignments.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ ReadAssignments::ReadAssignments(const ReferenceGenome& reference, const Genotyp
}
for (auto& s : genotype_support) {
safe_realign_to_reference(s.second, s.first);
std::sort(std::begin(s.second), std::end(s.second));
result_.support[sample][s.first] = std::move(s.second);
}
}
Expand Down
49 changes: 2 additions & 47 deletions src/core/octopus.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1439,11 +1439,6 @@ bool is_bam_realignment_requested(const GenomeCallingComponents& components)
return static_cast<bool>(components.bamout());
}

bool is_split_bam_realignment_requested(const GenomeCallingComponents& components)
{
return static_cast<bool>(components.split_bamout());
}

bool is_stdout_final_output(const GenomeCallingComponents& components)
{
return (components.filtered_output() && !components.filtered_output()->path()) || !components.output().path();
Expand Down Expand Up @@ -1555,12 +1550,9 @@ void run_bam_realign(GenomeCallingComponents& components)
if (is_bam_realignment_requested(components)) {
if (check_bam_realign(components)) {
components.read_manager().close();
BAMRealigner::Config config {};
config.copy_hom_ref_reads = components.write_full_bamouts();
config.max_buffer = components.read_buffer_footprint();
if (components.read_manager().paths().size() == 1) {
realign(components.read_manager().paths().front(), get_bam_realignment_vcf(components),
*components.bamout(), components.reference(), config);
*components.bamout(), components.reference(), components.bamout_config());
} else {
namespace fs = boost::filesystem;
const auto bamout_directory = *components.bamout();
Expand All @@ -1581,7 +1573,7 @@ void run_bam_realign(GenomeCallingComponents& components)
auto bamout_path = bamout_directory;
bamout_path /= bamin_path.filename();
if (bamin_path != bamout_path) {
realign(bamin_path, get_bam_realignment_vcf(components), bamout_path, components.reference(), config);
realign(bamin_path, get_bam_realignment_vcf(components), bamout_path, components.reference(), components.bamout_config());
} else {
logging::WarningLogger warn_log {};
stream(warn_log) << "Cannot make evidence bam " << bamout_path << " as it is an input bam";
Expand All @@ -1590,43 +1582,6 @@ void run_bam_realign(GenomeCallingComponents& components)
}
}
}
if (is_split_bam_realignment_requested(components)) {
if (check_bam_realign(components)) {
components.read_manager().close();
BAMRealigner::Config config {};
config.copy_hom_ref_reads = components.write_full_bamouts();
config.max_buffer = components.read_buffer_footprint();
if (components.read_manager().paths().size() == 1) {
auto out_paths = get_haplotype_bam_paths(*components.split_bamout(), get_max_ploidy(components));
realign(components.read_manager().paths().front(), get_bam_realignment_vcf(components),
std::move(out_paths), components.reference(), config);
} else {
namespace fs = boost::filesystem;
const auto bamout_directory = *components.split_bamout();
if (fs::exists(bamout_directory)) {
if (!fs::is_directory(bamout_directory)) {
logging::ErrorLogger error_log {};
stream(error_log) << "The given evidence bam directory " << bamout_directory << " is not a directory";
return;
}
} else {
if (!fs::create_directory(bamout_directory)) {
logging::ErrorLogger error_log {};
stream(error_log) << "Failed to create temporary directory " << bamout_directory << " - check permissions";
return;
}
}
const auto realignment_vcf = get_bam_realignment_vcf(components);
for (const auto& bamin_path : components.read_manager().paths()) {
auto bamout_prefix = bamout_directory;
bamout_prefix /= bamin_path.filename().stem();
const auto max_ploidy = get_max_called_ploidy(realignment_vcf, bamin_path);
auto bamout_paths = get_haplotype_bam_paths(bamout_prefix, max_ploidy);
realign(bamin_path, realignment_vcf, std::move(bamout_paths), components.reference(), config);
}
}
}
}
}

void run_data_profiler(GenomeCallingComponents& components)
Expand Down
Loading

0 comments on commit c047e96

Please sign in to comment.