diff --git a/dorado/read_pipeline/ModBaseCallerNode.cpp b/dorado/read_pipeline/ModBaseCallerNode.cpp index ae56520f..04ee23f5 100644 --- a/dorado/read_pipeline/ModBaseCallerNode.cpp +++ b/dorado/read_pipeline/ModBaseCallerNode.cpp @@ -206,6 +206,11 @@ void ModBaseCallerNode::duplex_mod_call(Message&& message) { auto [moves_offset, target_start, new_move_table] = utils::realign_moves(simplex_seq, duplex_seq, simplex_moves); + // If the alignment has failed, the rest of this duplex mod call cannot be completed in this direction + if (moves_offset == -1 && target_start == -1 && new_move_table.empty()) { + continue; + } + auto signal_len = new_move_table.size() * m_block_stride; auto num_moves = std::accumulate(new_move_table.begin(), new_move_table.end(), 0); auto new_seq = duplex_seq.substr(target_start, num_moves); diff --git a/dorado/utils/sequence_utils.cpp b/dorado/utils/sequence_utils.cpp index b5ae9219..9a76490c 100644 --- a/dorado/utils/sequence_utils.cpp +++ b/dorado/utils/sequence_utils.cpp @@ -239,6 +239,11 @@ std::tuple> realign_moves(const std::string& quer query_sequence, target_sequence); // We are going to compute the overlap between the two reads + const auto failed_realignment = std::make_tuple(-1, -1, std::vector()); + // No overlap was computed, so return the tuple (-1, -1) and an empty vector to indicate that no move table realignment was computed + if (!is_overlap) { + return failed_realignment; + } // Advance the query and target position. ++query_start; ++target_start; @@ -259,6 +264,15 @@ std::tuple> realign_moves(const std::string& quer query_sequence_component.data(), static_cast(query_sequence_component.length()), align_config); + // Check if alignment failed (edlib_result.startLocations is null) + if (edlib_result.startLocations == nullptr) { + // Free the memory allocated by edlibAlign + edlibFreeAlignResult(edlib_result); + + // Return the tuple (-1, -1) and an empty vector to indicate that no move table realignment was computed + return failed_realignment; + } + // Let's keep two cursor positions - one for the new move table and one for the old: int new_move_cursor = 0; int old_move_cursor = 0; diff --git a/dorado/utils/sequence_utils.h b/dorado/utils/sequence_utils.h index 35537b7f..df1f93ac 100644 --- a/dorado/utils/sequence_utils.h +++ b/dorado/utils/sequence_utils.h @@ -47,6 +47,29 @@ class BaseInfo { int count_trailing_chars(const std::string_view adapter, char c); +/** + * @brief Realigns a move table based on a given query sequence and a target sequence. + * + * This function adjusts the moves table to align with the target sequence, accounting + * for any differences between the query and target sequences. It returns a tuple containing + * an offset into the original moves table to account for trimming, a location in the target + * sequence where the realigned sequence starts, and the newly computed move table. + * If the new move table cannot be computed, the function returns a tuple with values (-1, -1) + * and an empty vector. + * + * @param query_sequence The original sequence of moves, representing the initial alignment. + * @param target_sequence The sequence to which the moves need to be realigned. This could + * differ from the query sequence. + * @param moves The original move table as a vector of unsigned 8-bit integers, aligned with + * the query sequence. + * + * @return std::tuple> + * A tuple containing: + * 1. An offset into the old moves table (int), accounting for adjustments made during realignment. + * 2. The start location in the target sequence (int) where the realigned sequence begins. + * 3. The newly computed move table (std::vector). + * If the move table cannot be computed, returns (-1, -1) and an empty vector. + */ std::tuple> realign_moves(const std::string& query_sequence, const std::string& target_sequence, const std::vector& moves); diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index ec727dc0..7489333e 100755 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -32,6 +32,7 @@ set(UNIT_TEST_SOURCE_FILES PolyACalculatorTest.cpp ReadFilterNodeTest.cpp ReadTest.cpp + RealignMovesTest.cpp RNASplitTest.cpp ResumeLoaderTest.cpp SampleSheetTests.cpp diff --git a/tests/RealignMovesTest.cpp b/tests/RealignMovesTest.cpp new file mode 100644 index 00000000..95d021ff --- /dev/null +++ b/tests/RealignMovesTest.cpp @@ -0,0 +1,21 @@ +#include "utils/sequence_utils.h" + +#include +#include + +using Slice = at::indexing::Slice; +using namespace dorado; + +#define TEST_GROUP "[utils][realign_moves]" + +TEST_CASE("Realign Moves No Error", TEST_GROUP) { + std::string query_sequence = "ACGTACGTACGTACGTACGTACGTACGTACGT"; // Example query sequence + std::string target_sequence = "ACGTACGTACGTACGTACGTACGTACGTACGT"; // Example target sequence + std::vector moves = { + 1, 0, 1, 0, 1, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 1, 1, 0, 1, 0, 1, 0, + 0, 1, 1, 0, 1, 0, 1, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 1, 1, 0, 1, 0, + 1, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 1}; // Example moves vector + + // Test that calling realign_moves does not throw any exceptions + CHECK_NOTHROW(utils::realign_moves(query_sequence, target_sequence, moves)); +} \ No newline at end of file