Skip to content

Commit

Permalink
remove MM/MN/ML for all non-primary alignments
Browse files Browse the repository at this point in the history
  • Loading branch information
tijyojwad committed Dec 20, 2023
1 parent 0fd14a3 commit 71e5bb9
Show file tree
Hide file tree
Showing 3 changed files with 25 additions and 9 deletions.
2 changes: 1 addition & 1 deletion dorado/alignment/Minimap2Aligner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -223,7 +223,7 @@ std::vector<BamPtr> Minimap2Aligner::align(bam1_t* irecord, mm_tbuf_t* buf) {
add_sa_tag(record, reg, hits, j, static_cast<int>(l_seq), mm_index, use_hard_clip);

// Remove MM/ML/MN tags if secondary alignment and soft clipping is not enabled.
if ((flag & BAM_FSUPPLEMENTARY) && !(mm_map_opts.flag & MM_F_SOFTCLIP)) {
if ((flag & (BAM_FSUPPLEMENTARY | BAM_FSECONDARY)) && !(mm_map_opts.flag & MM_F_SOFTCLIP)) {
if (auto tag = bam_aux_get(record, "MM"); tag != nullptr) {
bam_aux_del(record, tag);
}
Expand Down
30 changes: 22 additions & 8 deletions tests/AlignerTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -197,19 +197,33 @@ TEST_CASE("AlignerTest: Check modbase tags are removed for secondary alignments"
options.soft_clipping = GENERATE(true, false);
dorado::HtsReader reader(query.string(), std::nullopt);
auto bam_records = RunAlignmentPipeline(reader, ref.string(), options, 10);
REQUIRE(bam_records.size() == 2);
REQUIRE(bam_records.size() == 3);

bam1_t* rec = bam_records[1].get();
bam1_t* primary_rec = bam_records[0].get();
bam1_t* secondary_rec = bam_records[1].get();
bam1_t* supplementary_rec = bam_records[2].get();

// Check aux tags.
if (options.soft_clipping) {
CHECK(bam_aux_get(rec, "MM") != nullptr);
CHECK(bam_aux_get(rec, "ML") != nullptr);
CHECK(bam_aux_get(rec, "MN") != nullptr);
CHECK(bam_aux_get(primary_rec, "MM") != nullptr);
CHECK(bam_aux_get(primary_rec, "ML") != nullptr);
CHECK(bam_aux_get(primary_rec, "MN") != nullptr);
CHECK(bam_aux_get(secondary_rec, "MM") != nullptr);
CHECK(bam_aux_get(secondary_rec, "ML") != nullptr);
CHECK(bam_aux_get(secondary_rec, "MN") != nullptr);
CHECK(bam_aux_get(supplementary_rec, "MM") != nullptr);
CHECK(bam_aux_get(supplementary_rec, "ML") != nullptr);
CHECK(bam_aux_get(supplementary_rec, "MN") != nullptr);
} else {
CHECK(bam_aux_get(rec, "MM") == nullptr);
CHECK(bam_aux_get(rec, "ML") == nullptr);
CHECK(bam_aux_get(rec, "MN") == nullptr);
CHECK(bam_aux_get(primary_rec, "MM") != nullptr);
CHECK(bam_aux_get(primary_rec, "ML") != nullptr);
CHECK(bam_aux_get(primary_rec, "MN") != nullptr);
CHECK(bam_aux_get(secondary_rec, "MM") == nullptr);
CHECK(bam_aux_get(secondary_rec, "ML") == nullptr);
CHECK(bam_aux_get(secondary_rec, "MN") == nullptr);
CHECK(bam_aux_get(supplementary_rec, "MM") == nullptr);
CHECK(bam_aux_get(supplementary_rec, "ML") == nullptr);
CHECK(bam_aux_get(supplementary_rec, "MN") == nullptr);
}
}

Expand Down
2 changes: 2 additions & 0 deletions tests/data/aligner_test/supplementary_basecall_target.fa
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
>read1 999
CCCTTTTTGCCCCTTTACGCCGCACCTAGCATATGCGCCTGCCACTTGCAGCTTTCCCGCCCCTGCCCGCCTGCCCACCCCTTGCCGCCTCCTTATGCGCCGCCCCGCCCACCCCCGCCCACCCACCGCCCCACGCTTCCCCCACCCGCCGCCCACGCGCATCGCCCGCCCCCCCCGCCCCACCCGCCCACCGCCCTGTCCAAAGCCCGCCCCACGCCCGGCCCGCGCCCGCCAGCCCCGCCCCGTGGCCGCCGCCCCCGCCCGCCCCCCACCCGCCCCCCTACCTGCCCGCCTGGCCGCCGCCTCCCATCCCACCCGCCACCCGCCCCCTGTGCCCCACTTCCAGCCCGGCCGCCCATCCCCGCCCCCCCCCCCCACCCACCGCCCCCCGGCCAGCCCCCGCCCCCGCCCCGCCCGCCCGGCGCCGGCCCCATGCCCCCACCCCGCTGCCCCGCGCCCCCGCCCAGCCACACCCACCCCGCCGCCCCACCCGCCCCCACCGGCCCCGCCCGGCGCAGCCCGGCAGCTTCCCCCGCCCACGCCGCCCCCTTGCCAGCCCACCCCGCAGCTCACCCACCAGCCCATCCCGCTCCCCCGGCGCCCCAGCCGCCCGCCCACCCGCCGCGCACCCGCCCCCGCCCGCTCCCCCCCGCCCACCCGCCCGCCTCGCCCATCTGCTCCCCCCACCCACCCCCGCGCCCATGCGCCCCCCGCCCACCCCGCGCCCACCCGCCCCCCGCCCCACCCACCCACCCCGCCCCGCCCCACTTGCCCCCACGCCCCCGCCCCCCGCCGCCGCCCTCCCACACACGCTTCCCAGCCCGCTTCGCACAGCGCCTACCGCCGCCCCACCCGCCCCTTAAAAGCCACCGGCGCCCGCGCACCCACCCCGCCATCCAGCAGCCAACCAGCCCTTGCTTCCCCATACTTGTAAAGCTTCCCCGCACCCGCCCGCCTTCCACCCACCCACCGCCCCCCCGGCCCGCCGCCCGGCCTCGC
>read3 999
CCCTTTTTGCCCCTTTACGCCGCACCTAGCATATGCGCCTGCCACTTGCAGCTTTCCCGCCCCTGCCCGCCTGCCCACCCCTTGCCGCCTCCTTATGCGCCGCCCCGCCCACCCCCGCCCACCCACCGCCCCACGCTTCCCCCACCCGCCGCCCACGCGCATCGCCCGCCCCCCCCGCCCCACCCGCCCACCGCCCTGTCCAAAGCCCGCCCCACGCCCGGCCCGCGCCCGCCAGCCCCGCCCCGTGGCCGCCGCCCCCGCCCGCCCCCCACCCGCCCCCCTACCTGCCCGCCTGGCCGCCGCCTCCCATCCCACCCGCCACCCGCCCCCTGTGCCCCACTTCCAGCCCGGCCGCCCATCCCCGCCCCCCCCCCCCACCCACCGCCCCCCGGCCAGCCCCCGCCCCCGCCCCGCCCGCCCGGCGCCGGCCCCATGCCCCCACCCCGCTGCCCCGCGCCCCCGCCCAGCCACACCCACCCCGCCGCCCCACCCGCCCCCACCGGCCCCGCCCGGCGCAGCCCGGCAGCTTCCCCCGCCCACGCCGCCCCCTTGCCAGCCCACCCCGCAGCTCACCCACCAGCCCATCCCGCTCCCCCGGCGCCCCAGCCGCCCGCCCACCCGCCGCGCACCCGCCCCCGCCCGCTCCCCCCCGCCCACCCGCCCGCCTCGCCCATCTGCTCCCCCCACCCACCCCCGCGCCCATGCGCCCCCCGCCCACCCCGCGCCCACCCGCCCCCCGCCCCACCCACCCACCCCGCCCCGCCCCACTTGCCCCCACGCCCCCGCCCCCCGCCGCCGCCCTCCCACACACGCTTCCCAGCCCGCTTCGCACAGCGCCTACCGCCGCCCCACCCGCCCCTTAAAAGCCACCGGCGCCCGCGCACCCACCCCGCCATCCAGCAGCCAACCAGCCCTTGCTTCCCCATACTTGTAAAGCTTCCCCGCACCCGCCCGCCTTCCACCCACCCACCGCCCCCCCGGCCCGCCGCCCGGCCTCGC
>read2 899
CCGGCCCCGGCCCAGCCTTGCCCGCCTTCTCGAAGCGCCGCCGCGCCTCCAGCCCAGCCCGCCCACCCTCGCCCGCTTGCCCCCGGCTCGCGCTCCAGGCGCCCGGCCCCGGCCCCGCCCGCGCTCTGCCATTTCCACTTCCCACCGGCCAAAGCCCGCCGGCATCCCGCGCGCCCGCACAAGCCAAGCTTCCCCGGCTTGGCGCCATTTAAGCAAAAGCCGCCCGCCAACGCCAGCGCCCGGCGCCCGGCCCGCCACCGGCAGCGCCGCCCTTTCCAGCCCGGCGCTGGCCATCGGCCAGCGCACCACGAAGCCAGCCCACCGCCAGCCCGCCAAAGCCGCTCCCACAAGCCAAAGCCCGCCCACCCCCAGCCACCCATACCCCGCTCGCCCGCCAGCTTCTGTATGCCCACCCAGCGCACTTGCTTTCCACCCCCACCCCCGCCCCGCGCCCGCCCGCCCGCCACCCCTGCCCCGCCCCCCGCCGCTGCCTTACCGCTGCCCCAAAGCGCCCTTCCAGCACTTCCTTAGCCACCCTTTTTAGCTTCACTTCCACCTACCGCTTGCCGCCGGCTGCTTCCCCGCCCATCCCACCGCACCCGCCCGCGCCTTATGCGCACCGCCCGCGCCCGGCCTGCAAGCCAGCTTCGCGCGCCCGCTTCCACCACCCCCGCCAGCCCGCTTCCTGCTTGCCCCCGCCGCACCGCCCACCCACCCCCACCGGCCAGCGCTCATATGCCCCCGCCGCACCAGCACGCCACCGCCCGCCTCCTTCGCCAGCCCGCCCACCCGCCGCCCAGCCACCGCACCCACTCAAGGGGTCGCCTCTCATGCGCATCATGCGCAAAAAAAAAAACGGGGGGGGGGGGGGAAAAAAAAAAAAAAAAAAAAAAAAAAAA

0 comments on commit 71e5bb9

Please sign in to comment.