Skip to content

Commit

Permalink
Merge branch 'mvella/fix-mod-duplex-segv' into 'master'
Browse files Browse the repository at this point in the history
Fix hemimethylation segfault

See merge request machine-learning/dorado!800

(cherry picked from commit f1b55ea)

a6a203e Solved segfault
ba26224 Solved segfault
e143c83 Handle situation in duplex mod call where a move table realignment fails
95075ea Improved error checking
a7775be Merge branch 'master' into mvella/fix-mod-duplex-segv
d2bb114 Remove test which is causing CI failure due to Minimap bug...
7c62595 Merge branch 'master' into mvella/fix-mod-duplex-segv
52631b6 Addressing MR review
baa4051 Addressing minimap sanitiser fail
3b2986b Addressing minimap sanitiser fail
d90eaf2 Re-enable Metal tests
  • Loading branch information
vellamike authored and tijyojwad committed Jan 17, 2024
1 parent fb06556 commit 0a057bb
Show file tree
Hide file tree
Showing 5 changed files with 64 additions and 0 deletions.
5 changes: 5 additions & 0 deletions dorado/read_pipeline/ModBaseCallerNode.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -206,6 +206,11 @@ void ModBaseCallerNode::duplex_mod_call(Message&& message) {
auto [moves_offset, target_start, new_move_table] =
utils::realign_moves(simplex_seq, duplex_seq, simplex_moves);

// If the alignment has failed, the rest of this duplex mod call cannot be completed in this direction
if (moves_offset == -1 && target_start == -1 && new_move_table.empty()) {
continue;
}

auto signal_len = new_move_table.size() * m_block_stride;
auto num_moves = std::accumulate(new_move_table.begin(), new_move_table.end(), 0);
auto new_seq = duplex_seq.substr(target_start, num_moves);
Expand Down
14 changes: 14 additions & 0 deletions dorado/utils/sequence_utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -239,6 +239,11 @@ std::tuple<int, int, std::vector<uint8_t>> realign_moves(const std::string& quer
query_sequence,
target_sequence); // We are going to compute the overlap between the two reads

const auto failed_realignment = std::make_tuple(-1, -1, std::vector<uint8_t>());
// No overlap was computed, so return the tuple (-1, -1) and an empty vector to indicate that no move table realignment was computed
if (!is_overlap) {
return failed_realignment;
}
// Advance the query and target position.
++query_start;
++target_start;
Expand All @@ -259,6 +264,15 @@ std::tuple<int, int, std::vector<uint8_t>> realign_moves(const std::string& quer
query_sequence_component.data(), static_cast<int>(query_sequence_component.length()),
align_config);

// Check if alignment failed (edlib_result.startLocations is null)
if (edlib_result.startLocations == nullptr) {
// Free the memory allocated by edlibAlign
edlibFreeAlignResult(edlib_result);

// Return the tuple (-1, -1) and an empty vector to indicate that no move table realignment was computed
return failed_realignment;
}

// Let's keep two cursor positions - one for the new move table and one for the old:
int new_move_cursor = 0;
int old_move_cursor = 0;
Expand Down
23 changes: 23 additions & 0 deletions dorado/utils/sequence_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,29 @@ class BaseInfo {

int count_trailing_chars(const std::string_view adapter, char c);

/**
* @brief Realigns a move table based on a given query sequence and a target sequence.
*
* This function adjusts the moves table to align with the target sequence, accounting
* for any differences between the query and target sequences. It returns a tuple containing
* an offset into the original moves table to account for trimming, a location in the target
* sequence where the realigned sequence starts, and the newly computed move table.
* If the new move table cannot be computed, the function returns a tuple with values (-1, -1)
* and an empty vector.
*
* @param query_sequence The original sequence of moves, representing the initial alignment.
* @param target_sequence The sequence to which the moves need to be realigned. This could
* differ from the query sequence.
* @param moves The original move table as a vector of unsigned 8-bit integers, aligned with
* the query sequence.
*
* @return std::tuple<int, int, std::vector<uint8_t>>
* A tuple containing:
* 1. An offset into the old moves table (int), accounting for adjustments made during realignment.
* 2. The start location in the target sequence (int) where the realigned sequence begins.
* 3. The newly computed move table (std::vector<uint8_t>).
* If the move table cannot be computed, returns (-1, -1) and an empty vector.
*/
std::tuple<int, int, std::vector<uint8_t>> realign_moves(const std::string& query_sequence,
const std::string& target_sequence,
const std::vector<uint8_t>& moves);
Expand Down
1 change: 1 addition & 0 deletions tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ set(UNIT_TEST_SOURCE_FILES
PolyACalculatorTest.cpp
ReadFilterNodeTest.cpp
ReadTest.cpp
RealignMovesTest.cpp
RNASplitTest.cpp
ResumeLoaderTest.cpp
SampleSheetTests.cpp
Expand Down
21 changes: 21 additions & 0 deletions tests/RealignMovesTest.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
#include "utils/sequence_utils.h"

#include <ATen/ATen.h>
#include <catch2/catch.hpp>

using Slice = at::indexing::Slice;
using namespace dorado;

#define TEST_GROUP "[utils][realign_moves]"

TEST_CASE("Realign Moves No Error", TEST_GROUP) {
std::string query_sequence = "ACGTACGTACGTACGTACGTACGTACGTACGT"; // Example query sequence
std::string target_sequence = "ACGTACGTACGTACGTACGTACGTACGTACGT"; // Example target sequence
std::vector<uint8_t> moves = {
1, 0, 1, 0, 1, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 1, 1, 0, 1, 0, 1, 0,
0, 1, 1, 0, 1, 0, 1, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 1, 1, 0, 1, 0,
1, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 1}; // Example moves vector

// Test that calling realign_moves does not throw any exceptions
CHECK_NOTHROW(utils::realign_moves(query_sequence, target_sequence, moves));
}

0 comments on commit 0a057bb

Please sign in to comment.