Skip to content

Commit

Permalink
Merge branch 'cherry-pick-307bc744' into 'release-v0.6.0'
Browse files Browse the repository at this point in the history
Merge branch 'DOR-629_clean_custom_sequences' into 'release-v0.6.0'

See merge request machine-learning/dorado!918
  • Loading branch information
tijyojwad committed Mar 26, 2024
2 parents f6b6554 + b3ff151 commit 2919fe0
Show file tree
Hide file tree
Showing 4 changed files with 66 additions and 23 deletions.
24 changes: 16 additions & 8 deletions dorado/cli/basecaller.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
#include "cli/cli_utils.h"
#include "data_loader/DataLoader.h"
#include "data_loader/ModelFinder.h"
#include "demux/parse_custom_sequences.h"
#include "dorado_version.h"
#include "models/kits.h"
#include "models/models.h"
Expand Down Expand Up @@ -176,14 +177,21 @@ void setup(std::vector<std::string> args,

SamHdrPtr hdr(sam_hdr_init());
cli::add_pg_hdr(hdr.get(), args);
if (custom_kit) {
auto [kit_name, kit_info] = get_custom_barcode_kit_info(*custom_kit);
utils::add_rg_headers_with_barcode_kit(hdr.get(), read_groups, kit_name, kit_info,
sample_sheet.get());
} else if (!barcode_kit.empty()) {
const auto kit_info = get_barcode_kit_info(barcode_kit);
utils::add_rg_headers_with_barcode_kit(hdr.get(), read_groups, barcode_kit, kit_info,
sample_sheet.get());

if (barcode_enabled) {
std::unordered_map<std::string, std::string> custom_barcodes{};
if (custom_barcode_file) {
custom_barcodes = demux::parse_custom_sequences(*custom_barcode_file);
}
if (custom_kit) {
auto [kit_name, kit_info] = get_custom_barcode_kit_info(*custom_kit);
utils::add_rg_headers_with_barcode_kit(hdr.get(), read_groups, kit_name, kit_info,
custom_barcodes, sample_sheet.get());
} else {
const auto kit_info = get_barcode_kit_info(barcode_kit);
utils::add_rg_headers_with_barcode_kit(hdr.get(), read_groups, barcode_kit, kit_info,
custom_barcodes, sample_sheet.get());
}
} else {
utils::add_rg_headers(hdr.get(), read_groups);
}
Expand Down
33 changes: 25 additions & 8 deletions dorado/utils/bam_utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -68,10 +68,25 @@ void add_barcode_kit_rg_hdrs(sam_hdr_t* hdr,
const std::unordered_map<std::string, ReadGroup>& read_groups,
const std::string& kit_name,
const barcode_kits::KitInfo& kit_info,
const std::unordered_map<std::string, std::string>& custom_sequences,
const utils::SampleSheet* const sample_sheet) {
const auto& barcode_sequences = barcode_kits::get_barcodes();
auto get_barcode_sequence =
[&custom_sequences,
barcode_sequences = barcode_kits::get_barcodes()](const std::string& barcode_name) {
// Prefer user specified custom sequences
auto sequence_itr = custom_sequences.find(barcode_name);
if (sequence_itr != custom_sequences.end()) {
return sequence_itr->second;
}
sequence_itr = barcode_sequences.find(barcode_name);
if (sequence_itr != barcode_sequences.end()) {
return sequence_itr->second;
}
throw std::runtime_error("Unrecognised barcode name: " + barcode_name);
};

for (const auto& barcode_name : kit_info.barcodes) {
const auto additional_tags = "\tBC:" + barcode_sequences.at(barcode_name);
const auto additional_tags = "\tBC:" + get_barcode_sequence(barcode_name);
const auto normalized_barcode_name = barcode_kits::normalize_barcode_name(barcode_name);
for (const auto& read_group : read_groups) {
std::string alias;
Expand Down Expand Up @@ -111,13 +126,15 @@ void add_rg_headers(sam_hdr_t* hdr, const std::unordered_map<std::string, ReadGr
}
}

void add_rg_headers_with_barcode_kit(sam_hdr_t* hdr,
const std::unordered_map<std::string, ReadGroup>& read_groups,
const std::string& kit_name,
const barcode_kits::KitInfo& kit_info,
const utils::SampleSheet* const sample_sheet) {
void add_rg_headers_with_barcode_kit(
sam_hdr_t* hdr,
const std::unordered_map<std::string, ReadGroup>& read_groups,
const std::string& kit_name,
const barcode_kits::KitInfo& kit_info,
const std::unordered_map<std::string, std::string>& custom_sequences,
const utils::SampleSheet* const sample_sheet) {
add_rg_headers(hdr, read_groups);
add_barcode_kit_rg_hdrs(hdr, read_groups, kit_name, kit_info, sample_sheet);
add_barcode_kit_rg_hdrs(hdr, read_groups, kit_name, kit_info, custom_sequences, sample_sheet);
}

void add_sq_hdr(sam_hdr_t* hdr, const sq_t& seqs) {
Expand Down
12 changes: 7 additions & 5 deletions dorado/utils/bam_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,11 +27,13 @@ struct AlignmentOps {

void add_rg_headers(sam_hdr_t* hdr, const std::unordered_map<std::string, ReadGroup>& read_groups);

void add_rg_headers_with_barcode_kit(sam_hdr_t* hdr,
const std::unordered_map<std::string, ReadGroup>& read_groups,
const std::string& kit_name,
const barcode_kits::KitInfo& kit_info,
const utils::SampleSheet* const sample_sheet);
void add_rg_headers_with_barcode_kit(
sam_hdr_t* hdr,
const std::unordered_map<std::string, ReadGroup>& read_groups,
const std::string& kit_name,
const barcode_kits::KitInfo& kit_info,
const std::unordered_map<std::string, std::string>& custom_sequences,
const utils::SampleSheet* const sample_sheet);

void add_sq_hdr(sam_hdr_t* hdr, const sq_t& seqs);

Expand Down
20 changes: 18 additions & 2 deletions tests/BamUtilsTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -88,8 +88,14 @@ TEST_CASE("BamUtilsTest: Add read group headers scenarios", TEST_GROUP) {
const std::string KIT_NAME{"SQK-RAB204"};
auto kit_info = dorado::barcode_kits::get_kit_info(KIT_NAME);
dorado::SamHdrPtr sam_header(sam_hdr_init());

std::string const CUSTOM_BARCODE_NAME{"BC01"};
std::string const CUSTOM_BARCODE_SEQUENCE{"AAA"};
std::unordered_map<std::string, std::string> custom_barcodes{
{CUSTOM_BARCODE_NAME, CUSTOM_BARCODE_SEQUENCE}};

dorado::utils::add_rg_headers_with_barcode_kit(sam_header.get(), read_groups, KIT_NAME,
*kit_info, nullptr);
*kit_info, custom_barcodes, nullptr);

// Check the IDs of the groups are all there.
const size_t total_groups = read_groups.size() * (kit_info->barcodes.size() + 1);
Expand All @@ -108,8 +114,18 @@ TEST_CASE("BamUtilsTest: Add read group headers scenarios", TEST_GROUP) {
KIT_NAME, barcode_name);
const auto &barcode_seq = barcode_seqs.at(barcode_name);
CHECK(has_read_group_header(sam_header.get(), full_id.c_str()));
CHECK(get_barcode_tag(sam_header.get(), full_id.c_str()) == barcode_seq);
if (barcode_name != CUSTOM_BARCODE_NAME) {
CHECK(get_barcode_tag(sam_header.get(), full_id.c_str()) == barcode_seq);
}
}

// The custom barcode sequence should be present in the barcode tag
const auto custom_full_id = id + "_" +
dorado::barcode_kits::generate_standard_barcode_name(
KIT_NAME, CUSTOM_BARCODE_NAME);
auto actual_barcode_tag_sequence =
get_barcode_tag(sam_header.get(), custom_full_id.c_str());
CHECK(actual_barcode_tag_sequence == CUSTOM_BARCODE_SEQUENCE);
}
}
}
Expand Down

0 comments on commit 2919fe0

Please sign in to comment.