From fa5eee594a6a04c44b043405b09fde9e2a01c431 Mon Sep 17 00:00:00 2001 From: Joyjit Daw Date: Fri, 15 Dec 2023 14:08:50 -0500 Subject: [PATCH 1/3] Trim CLI to respect --no-trim-primers option --- dorado/cli/trim.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dorado/cli/trim.cpp b/dorado/cli/trim.cpp index af3aa319..1a134e9a 100644 --- a/dorado/cli/trim.cpp +++ b/dorado/cli/trim.cpp @@ -133,7 +133,7 @@ int trim(int argc, char* argv[]) { auto hts_writer = pipeline_desc.add_node({}, "-", output_mode, trim_writer_threads); pipeline_desc.add_node({hts_writer}, trim_threads, true, - parser.get("--no-trim-primers")); + !parser.get("--no-trim-primers")); // Create the Pipeline from our description. std::vector stats_reporters; From aaea71c18e1a657c422266c71aaa27be859bb529 Mon Sep 17 00:00:00 2001 From: Joyjit Daw Date: Fri, 15 Dec 2023 14:43:52 -0500 Subject: [PATCH 2/3] adjust trim window based on adapter or primer --- dorado/demux/AdapterDetector.cpp | 6 ++++-- dorado/demux/Trimmer.cpp | 2 ++ 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/dorado/demux/AdapterDetector.cpp b/dorado/demux/AdapterDetector.cpp index ec3d58fe..2c9d1f6a 100644 --- a/dorado/demux/AdapterDetector.cpp +++ b/dorado/demux/AdapterDetector.cpp @@ -18,7 +18,8 @@ namespace { -const int TRIM_LENGTH = 150; +const int ADAPTER_TRIM_LENGTH = 75; +const int PRIMER_TRIM_LENGTH = 150; // Create edlib configuration for detecting adapters and primers. EdlibAlignConfig init_edlib_config_for_adapters() { @@ -116,6 +117,7 @@ AdapterScoreResult AdapterDetector::detect(const std::string& seq, const std::vector& queries, AdapterDetector::QueryType query_type) const { const std::string_view seq_view(seq); + const auto TRIM_LENGTH = (query_type == ADAPTER ? ADAPTER_TRIM_LENGTH : PRIMER_TRIM_LENGTH); const std::string_view read_front = seq_view.substr(0, TRIM_LENGTH); int rear_start = std::max(0, int(seq.length()) - TRIM_LENGTH); const std::string_view read_rear = seq_view.substr(rear_start, TRIM_LENGTH); @@ -128,7 +130,7 @@ AdapterScoreResult AdapterDetector::detect(const std::string& seq, const auto& name = queries[i].name; const auto& query_seq = queries[i].sequence; const auto& query_seq_rev = queries[i].sequence_rev; - spdlog::debug("Checking adapter/primer {}", name); + spdlog::trace("Checking adapter/primer {}", name); auto front_result = edlibAlign(query_seq.data(), int(query_seq.length()), read_front.data(), int(read_front.length()), placement_config); diff --git a/dorado/demux/Trimmer.cpp b/dorado/demux/Trimmer.cpp index 69bee7f5..a4f77c96 100644 --- a/dorado/demux/Trimmer.cpp +++ b/dorado/demux/Trimmer.cpp @@ -79,10 +79,12 @@ std::pair Trimmer::determine_trim_interval(const AdapterScoreResult& r trim_interval.first = 0; } else { trim_interval.first = res.front.position.second + 1; + spdlog::trace("Detected front interval adapter/primer - {}", res.front.name); } if (res.rear.name == "unclassified" || res.rear.score < score_thres) { trim_interval.second = seqlen; } else { + spdlog::trace("Detected rear interval adapter/primer - {}", res.rear.name); trim_interval.second = res.rear.position.first; } From 149417d16ebd1bf74f9e05dc83957feb5c58e778 Mon Sep 17 00:00:00 2001 From: Joyjit Daw Date: Fri, 15 Dec 2023 14:56:18 -0500 Subject: [PATCH 3/3] report warning when intervals are unexpected --- dorado/read_pipeline/AdapterDetectorNode.cpp | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/dorado/read_pipeline/AdapterDetectorNode.cpp b/dorado/read_pipeline/AdapterDetectorNode.cpp index 8ae27302..d2bb89ca 100644 --- a/dorado/read_pipeline/AdapterDetectorNode.cpp +++ b/dorado/read_pipeline/AdapterDetectorNode.cpp @@ -94,6 +94,11 @@ void AdapterDetectorNode::process_read(BamPtr& read) { std::pair trim_interval = adapter_trim_interval; trim_interval.first = std::max(trim_interval.first, primer_trim_interval.first); trim_interval.second = std::min(trim_interval.second, primer_trim_interval.second); + if (trim_interval.first >= trim_interval.second) { + spdlog::warn("Unexpected adapter/primer trim interval {}-{} for {}", + trim_interval.first, trim_interval.second, seq); + return; + } read = Trimmer::trim_sequence(std::move(read), trim_interval); } } @@ -125,6 +130,11 @@ void AdapterDetectorNode::process_read(SimplexRead& read) { std::pair trim_interval = adapter_trim_interval; trim_interval.first = std::max(trim_interval.first, primer_trim_interval.first); trim_interval.second = std::min(trim_interval.second, primer_trim_interval.second); + if (trim_interval.first >= trim_interval.second) { + spdlog::warn("Unexpected adapter/primer trim interval {}-{} for {}", + trim_interval.first, trim_interval.second, read.read_common.seq); + return; + } Trimmer::trim_sequence(read, trim_interval); } m_num_records++;