Skip to content

Commit

Permalink
Merge branch 'master' into mvella/hemimethylation
Browse files Browse the repository at this point in the history
  • Loading branch information
HalfPhoton committed Dec 1, 2023
2 parents e95a23f + d871eb1 commit eb2c235
Show file tree
Hide file tree
Showing 8 changed files with 155 additions and 28 deletions.
2 changes: 1 addition & 1 deletion dorado/alignment/Minimap2Aligner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -339,4 +339,4 @@ void Minimap2Aligner::add_tags(bam1_t* record,
bam_aux_append(record, "rl", 'i', sizeof(buf->rep_len), (uint8_t*)&buf->rep_len);
}

} // namespace dorado::alignment
} // namespace dorado::alignment
18 changes: 8 additions & 10 deletions dorado/utils/bam_utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,12 @@ std::string convert_nt16_to_str(uint8_t* bseq, size_t slen) {

namespace dorado::utils {

kstring_t allocate_kstring() {
kstring_t str = {0, 0, NULL};
ks_resize(&str, 1'000'000);
return str;
}

void add_rg_hdr(sam_hdr_t* hdr,
const std::unordered_map<std::string, ReadGroup>& read_groups,
const std::vector<std::string>& barcode_kits,
Expand Down Expand Up @@ -134,7 +140,7 @@ std::map<std::string, std::string> get_read_group_info(sam_hdr_t* header, const
throw std::runtime_error("no read groups in file");
}

kstring_t rg = {0, 0, NULL};
kstring_t rg = allocate_kstring();
std::map<std::string, std::string> read_group_info;

for (int i = 0; i < num_read_groups; ++i) {
Expand Down Expand Up @@ -228,15 +234,7 @@ std::map<std::string, std::string> extract_pg_keys_from_hdr(const std::string fi
if (!header) {
throw std::runtime_error("Could not open header from file: " + filename);
}
kstring_t val = {0, 0, NULL};
// The kstring is pre-allocated with 1MB of space to work around a Windows DLL
// cross-heap segmentation fault. The ks_resize/ks_free functions from htslib
// are inline functions. When htslib is shipped as a DLL, some of these functions
// are inlined into the DLL code through other htslib APIs. But those same functions
// also get inlined into dorado code when invoked directly. As a result, it's possible
// that an htslib APIs resizes a string using the DLL code. But when a ks_free
// is attempted on it from dorado, there's cross-heap behavior and a segfault occurs.
ks_resize(&val, 1'000'000);
kstring_t val = allocate_kstring();
for (auto& k : keys) {
auto ret = sam_hdr_find_tag_id(header.get(), "PG", NULL, NULL, k.c_str(), &val);
if (ret != 0) {
Expand Down
16 changes: 16 additions & 0 deletions dorado/utils/bam_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
#include <vector>

struct sam_hdr_t;
struct kstring_t;

namespace dorado::utils {

Expand Down Expand Up @@ -154,4 +155,19 @@ uint32_t ref_pos_consumed(uint32_t n_cigar, const uint32_t* cigar, uint32_t quer
*/
std::string cigar2str(uint32_t n_cigar, const uint32_t* cigar);

/*
* Allocate kstring_t which is already resized to hold 1 MB of data.
*
* This is done to work around a Windows DLL cross-heap segmentation fault.
* The ks_resize/ks_free functions from htslib
* are inline functions. When htslib is shipped as a DLL, some of these functions
* are inlined into the DLL code through other htslib APIs. But those same functions
* also get inlined into dorado code when invoked directly. As a result, it's possible
* that an htslib APIs resizes a string using the DLL code. But when a ks_free
* is attempted on it from dorado, there's cross-heap behavior and a segfault occurs.
*
* @return kstring_t struct
*/
kstring_t allocate_kstring();

} // namespace dorado::utils
8 changes: 1 addition & 7 deletions tests/BamUtilsTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,13 +22,7 @@ class WrappedKString {
kstring_t m_str = KS_INITIALIZE;

public:
WrappedKString() {
// On Windows |sam_hdr_find_tag_id| lives in a DLL and uses a different heap, but
// |ks_free| is inline so when we call it we crash trying to free unknown memory. To
// work around this we resize the kstring to a big value in our code so no resizing
// happens inside the htslib library.
ks_resize(&m_str, 1'000'000);
}
WrappedKString() { m_str = utils::allocate_kstring(); }
~WrappedKString() { ks_free(&m_str); }

kstring_t *get() { return &m_str; }
Expand Down
21 changes: 18 additions & 3 deletions tests/IndexFileAccessTest.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#include "alignment/IndexFileAccess.h"

#include "StreamUtils.h"
#include "TestUtils.h"
#include "alignment/Minimap2Index.h"

Expand All @@ -9,6 +10,8 @@

#define TEST_GROUP "[alignment::IndexFileAccess]"

using namespace ont::test_utils::streams;

namespace {

const std::string& valid_reference_file() {
Expand All @@ -29,6 +32,15 @@ const dorado::alignment::Minimap2Options& invalid_options() {
}();
return result;
}

dorado::alignment::IndexLoadResult load_index_no_stderr(
dorado::alignment::IndexFileAccess& cut,
const std::string& file,
const dorado::alignment::Minimap2Options& options) {
SuppressStderr no_stderr{};
return cut.load_index(file, options, 1);
}

} // namespace

namespace dorado::alignment::index_file_access {
Expand All @@ -47,7 +59,7 @@ TEST_CASE(TEST_GROUP " load_index with invalid file return reference_file_not_fo
TEST_CASE(TEST_GROUP " load_index with invalid options returns validation_error", TEST_GROUP) {
IndexFileAccess cut{};

REQUIRE(cut.load_index(valid_reference_file(), invalid_options(), 1) ==
REQUIRE(load_index_no_stderr(cut, valid_reference_file(), invalid_options()) ==
IndexLoadResult::validation_error);
}

Expand All @@ -67,7 +79,7 @@ SCENARIO(TEST_GROUP " Load and retrieve index files", TEST_GROUP) {
}

GIVEN("load_index called with valid file but invalid options") {
cut.load_index(valid_reference_file(), invalid_options(), 1);
load_index_no_stderr(cut, valid_reference_file(), invalid_options());
THEN("is_index_loaded returns false") {
REQUIRE_FALSE(cut.is_index_loaded(valid_reference_file(), invalid_options()));
}
Expand Down Expand Up @@ -163,7 +175,10 @@ SCENARIO(TEST_GROUP " Load and retrieve index files", TEST_GROUP) {
}

TEST_CASE(TEST_GROUP " validate_options with invalid options returns false", TEST_GROUP) {
REQUIRE_FALSE(validate_options(invalid_options()));
bool result{};
SuppressStderr::invoke([&result] { result = validate_options(invalid_options()); });

REQUIRE_FALSE(result);
}

TEST_CASE(TEST_GROUP " validate_options with default options returns true", TEST_GROUP) {
Expand Down
23 changes: 17 additions & 6 deletions tests/Minimap2IndexTest.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#include "alignment/Minimap2Index.h"

#include "StreamUtils.h"
#include "TestUtils.h"

#include <catch2/catch.hpp>
Expand All @@ -8,6 +9,8 @@

#define TEST_GROUP "[alignment::Minimap2Index]"

using namespace ont::test_utils::streams;

namespace {

class Minimap2IndexTestFixture {
Expand Down Expand Up @@ -68,12 +71,15 @@ TEST_CASE(TEST_GROUP " initialise() with invalid options returns false", TEST_GR
auto invalid_options{dflt_options};
invalid_options.bandwidth = invalid_options.bandwidth_long + 1;

REQUIRE_FALSE(cut.initialise(invalid_options));
bool result{};
SuppressStderr::invoke(
[&result, &invalid_options, &cut] { result = cut.initialise(invalid_options); });

REQUIRE_FALSE(result);
}

TEST_CASE_METHOD(Minimap2IndexTestFixture,
TEST_GROUP
" load() without invalid reference file returns reference_file_not_found",
TEST_GROUP " load() with invalid reference file returns reference_file_not_found",
TEST_GROUP) {
REQUIRE(cut.load("some_reference_file", 1) == IndexLoadResult::reference_file_not_found);
}
Expand All @@ -85,14 +91,19 @@ TEST_CASE_METHOD(Minimap2IndexTestFixture,
}

TEST_CASE_METHOD(Minimap2IndexTestFixture,
TEST_GROUP
" create_compatible_index() with invalid mapping options returns non-null",
TEST_GROUP " create_compatible_index() with invalid mapping options returns null",
TEST_GROUP) {
cut.load(reference_file, 1);
Minimap2Options invalid_compatible_options{dflt_options};
invalid_compatible_options.bandwidth = invalid_compatible_options.bandwidth_long + 1;

REQUIRE(cut.create_compatible_index(invalid_compatible_options) == nullptr);
std::shared_ptr<Minimap2Index> compatible_index{};
{
SuppressStderr suppressed{};
compatible_index = cut.create_compatible_index(invalid_compatible_options);
}

REQUIRE(compatible_index == nullptr);
}

TEST_CASE_METHOD(Minimap2IndexTestFixture,
Expand Down
77 changes: 77 additions & 0 deletions tests/StreamUtils.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
#ifdef _WIN32
#include <io.h>
#define NULL_DEVICE "NUL:"
#define Close _close
#define Dup _dup
#define Dup2 _dup2
#define Fileno _fileno
#else
#include <unistd.h>
#define NULL_DEVICE "/dev/null"
#define Close close
#define Dup dup
#define Dup2 dup2
#define Fileno fileno
#endif

#include <stdio.h>

#include <functional>

namespace ont::test_utils::streams {

namespace details {
template <class T>
void ignore(const T &) {}
} // namespace details

inline int suppress_stderr() {
fflush(stderr);
int fd = Dup(Fileno(stderr));
auto old_stream = freopen(NULL_DEVICE, "w", stderr);
details::ignore(old_stream);
return fd;
}

inline void restore_stderr(int fd) {
fflush(stderr);
auto dup_result = Dup2(fd, Fileno(stderr));
details::ignore(dup_result);
Close(fd);
}

inline int suppress_stdout() {
fflush(stdout);
int fd = Dup(Fileno(stdout));
auto old_stream = freopen(NULL_DEVICE, "w", stdout);
details::ignore(old_stream);
return fd;
}

inline void restore_stdout(int fd) {
fflush(stdout);
auto dup_result = Dup2(fd, Fileno(stdout));
details::ignore(dup_result);
Close(fd);
}

/// <summary>
/// RAII helper to provide scoped stderr suppression
/// </summary>
class [[nodiscard]] SuppressStderr final {
const int m_fd;

public:
SuppressStderr() : m_fd(suppress_stderr()) {}
~SuppressStderr() { restore_stderr(m_fd); }

/// <summary>
/// Invoke a function with stderr suppressed
/// </summary>
static void invoke(std::function<void()> func) {
SuppressStderr suppression{};
func();
}
};

} // namespace ont::test_utils::streams
18 changes: 17 additions & 1 deletion tests/test_simple_basecaller_execution.bat
Original file line number Diff line number Diff line change
Expand Up @@ -5,28 +5,44 @@ set batch=384

echo dorado basecaller test stage
%dorado_bin% download --model %model%
if %errorlevel% neq 0 exit /b %errorlevel%
%dorado_bin% basecaller %model% tests/data/pod5 -b %batch% --emit-fastq > ref.fq
if %errorlevel% neq 0 exit /b %errorlevel%
%dorado_bin% basecaller %model% tests/data/pod5 -b %batch% --modified-bases 5mCG --emit-moves --reference ref.fq --emit-sam > calls.sam
if %errorlevel% neq 0 exit /b %errorlevel%

echo dorado summary test stage
%dorado_bin% summary calls.sam
if %errorlevel% neq 0 exit /b %errorlevel%

echo dorado aligner test stage
%dorado_bin% aligner ref.fq calls.sam > aligned-calls.bam
if %errorlevel% neq 0 exit /b %errorlevel%

echo dorado duplex basespace test stage
%dorado_bin% duplex basespace tests/data/basespace/pairs.bam --threads 1 --pairs tests/data/basespace/pairs.txt > calls.bam
if %errorlevel% neq 0 exit /b %errorlevel%

echo dorado duplex hac complex
%dorado_bin% duplex hac tests/data/duplex/pod5 --threads 1 > $output_dir/duplex_calls.bam
if %errorlevel% neq 0 exit /b %errorlevel%

echo dorado duplex hac complex with mods
%dorado_bin% duplex hac,5mCG_5hmCG tests/data/duplex/pod5 --threads 1 > $output_dir/duplex_calls_mods.bam
if %errorlevel% neq 0 exit /b %errorlevel%

echo dorado demux test stage
%dorado_bin% demux tests/data/barcode_demux/double_end_variant/EXP-PBC096_BC83.fastq --threads 1 --kit-name EXP-PBC096 --output-dir ./demux
if %errorlevel% neq 0 exit /b %errorlevel%

echo dorado auto model basecaller test stage
%dorado_bin% basecaller %model_speed% tests/data/pod5/dna_r9.4.1_e8/ -b %batch% --emit-fastq > ref.fq
if %errorlevel% neq 0 exit /b %errorlevel%
%dorado_bin% basecaller %model_speed%,5mCG tests/data/pod5/dna_r9.4.1_e8/ -b %batch% --emit-moves --reference ref.fq --emit-sam > calls.sam
if %errorlevel% neq 0 exit /b %errorlevel%

echo dorado auto summary test stage
%dorado_bin% summary calls.sam
if %errorlevel% neq 0 exit /b %errorlevel%

echo finished
echo finished

0 comments on commit eb2c235

Please sign in to comment.