Skip to content

Commit

Permalink
Merge branch 'jdaw/fix-primer-trim' into 'master'
Browse files Browse the repository at this point in the history
Fix primer trimming intervals

See merge request machine-learning/dorado!775
  • Loading branch information
tijyojwad committed Dec 18, 2023
2 parents b1302ae + 149417d commit 3bfb1f0
Show file tree
Hide file tree
Showing 4 changed files with 17 additions and 3 deletions.
2 changes: 1 addition & 1 deletion dorado/cli/trim.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,7 @@ int trim(int argc, char* argv[]) {
auto hts_writer = pipeline_desc.add_node<HtsWriter>({}, "-", output_mode, trim_writer_threads);

pipeline_desc.add_node<AdapterDetectorNode>({hts_writer}, trim_threads, true,
parser.get<bool>("--no-trim-primers"));
!parser.get<bool>("--no-trim-primers"));

// Create the Pipeline from our description.
std::vector<dorado::stats::StatsReporter> stats_reporters;
Expand Down
6 changes: 4 additions & 2 deletions dorado/demux/AdapterDetector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,8 @@

namespace {

const int TRIM_LENGTH = 150;
const int ADAPTER_TRIM_LENGTH = 75;
const int PRIMER_TRIM_LENGTH = 150;

// Create edlib configuration for detecting adapters and primers.
EdlibAlignConfig init_edlib_config_for_adapters() {
Expand Down Expand Up @@ -116,6 +117,7 @@ AdapterScoreResult AdapterDetector::detect(const std::string& seq,
const std::vector<Query>& queries,
AdapterDetector::QueryType query_type) const {
const std::string_view seq_view(seq);
const auto TRIM_LENGTH = (query_type == ADAPTER ? ADAPTER_TRIM_LENGTH : PRIMER_TRIM_LENGTH);
const std::string_view read_front = seq_view.substr(0, TRIM_LENGTH);
int rear_start = std::max(0, int(seq.length()) - TRIM_LENGTH);
const std::string_view read_rear = seq_view.substr(rear_start, TRIM_LENGTH);
Expand All @@ -128,7 +130,7 @@ AdapterScoreResult AdapterDetector::detect(const std::string& seq,
const auto& name = queries[i].name;
const auto& query_seq = queries[i].sequence;
const auto& query_seq_rev = queries[i].sequence_rev;
spdlog::debug("Checking adapter/primer {}", name);
spdlog::trace("Checking adapter/primer {}", name);

auto front_result = edlibAlign(query_seq.data(), int(query_seq.length()), read_front.data(),
int(read_front.length()), placement_config);
Expand Down
2 changes: 2 additions & 0 deletions dorado/demux/Trimmer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -79,10 +79,12 @@ std::pair<int, int> Trimmer::determine_trim_interval(const AdapterScoreResult& r
trim_interval.first = 0;
} else {
trim_interval.first = res.front.position.second + 1;
spdlog::trace("Detected front interval adapter/primer - {}", res.front.name);
}
if (res.rear.name == "unclassified" || res.rear.score < score_thres) {
trim_interval.second = seqlen;
} else {
spdlog::trace("Detected rear interval adapter/primer - {}", res.rear.name);
trim_interval.second = res.rear.position.first;
}

Expand Down
10 changes: 10 additions & 0 deletions dorado/read_pipeline/AdapterDetectorNode.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,11 @@ void AdapterDetectorNode::process_read(BamPtr& read) {
std::pair<int, int> trim_interval = adapter_trim_interval;
trim_interval.first = std::max(trim_interval.first, primer_trim_interval.first);
trim_interval.second = std::min(trim_interval.second, primer_trim_interval.second);
if (trim_interval.first >= trim_interval.second) {
spdlog::warn("Unexpected adapter/primer trim interval {}-{} for {}",
trim_interval.first, trim_interval.second, seq);
return;
}
read = Trimmer::trim_sequence(std::move(read), trim_interval);
}
}
Expand Down Expand Up @@ -125,6 +130,11 @@ void AdapterDetectorNode::process_read(SimplexRead& read) {
std::pair<int, int> trim_interval = adapter_trim_interval;
trim_interval.first = std::max(trim_interval.first, primer_trim_interval.first);
trim_interval.second = std::min(trim_interval.second, primer_trim_interval.second);
if (trim_interval.first >= trim_interval.second) {
spdlog::warn("Unexpected adapter/primer trim interval {}-{} for {}",
trim_interval.first, trim_interval.second, read.read_common.seq);
return;
}
Trimmer::trim_sequence(read, trim_interval);
}
m_num_records++;
Expand Down

0 comments on commit 3bfb1f0

Please sign in to comment.