diff --git a/dorado/alignment/Minimap2Aligner.cpp b/dorado/alignment/Minimap2Aligner.cpp index b3e492bd..217625e5 100644 --- a/dorado/alignment/Minimap2Aligner.cpp +++ b/dorado/alignment/Minimap2Aligner.cpp @@ -222,6 +222,19 @@ std::vector Minimap2Aligner::align(bam1_t* irecord, mm_tbuf_t* buf) { add_tags(record, aln, seq, buf); add_sa_tag(record, reg, hits, j, static_cast(l_seq), mm_index, use_hard_clip); + // Remove MM/ML/MN tags if secondary alignment and soft clipping is not enabled. + if ((flag & (BAM_FSUPPLEMENTARY | BAM_FSECONDARY)) && !(mm_map_opts.flag & MM_F_SOFTCLIP)) { + if (auto tag = bam_aux_get(record, "MM"); tag != nullptr) { + bam_aux_del(record, tag); + } + if (auto tag = bam_aux_get(record, "ML"); tag != nullptr) { + bam_aux_del(record, tag); + } + if (auto tag = bam_aux_get(record, "MN"); tag != nullptr) { + bam_aux_del(record, tag); + } + } + results.push_back(BamPtr(record)); } diff --git a/tests/AlignerTest.cpp b/tests/AlignerTest.cpp index cf0408ff..b0e81026 100644 --- a/tests/AlignerTest.cpp +++ b/tests/AlignerTest.cpp @@ -184,6 +184,49 @@ TEST_CASE("AlignerTest: Check dorado tags are retained", TEST_GROUP) { } } +TEST_CASE("AlignerTest: Check modbase tags are removed for secondary alignments", TEST_GROUP) { + using Catch::Matchers::Contains; + + fs::path aligner_test_dir = fs::path(get_aligner_data_dir()); + auto ref = aligner_test_dir / "supplementary_basecall_target.fa"; + auto query = aligner_test_dir / "basecall.sam"; + + auto options = dorado::alignment::dflt_options; + options.kmer_size = options.window_size = 15; + options.index_batch_size = 1'000'000'000ull; + options.soft_clipping = GENERATE(true, false); + dorado::HtsReader reader(query.string(), std::nullopt); + auto bam_records = RunAlignmentPipeline(reader, ref.string(), options, 10); + REQUIRE(bam_records.size() == 3); + + bam1_t* primary_rec = bam_records[0].get(); + bam1_t* secondary_rec = bam_records[1].get(); + bam1_t* supplementary_rec = bam_records[2].get(); + + // Check aux tags. + if (options.soft_clipping) { + CHECK(bam_aux_get(primary_rec, "MM") != nullptr); + CHECK(bam_aux_get(primary_rec, "ML") != nullptr); + CHECK(bam_aux_get(primary_rec, "MN") != nullptr); + CHECK(bam_aux_get(secondary_rec, "MM") != nullptr); + CHECK(bam_aux_get(secondary_rec, "ML") != nullptr); + CHECK(bam_aux_get(secondary_rec, "MN") != nullptr); + CHECK(bam_aux_get(supplementary_rec, "MM") != nullptr); + CHECK(bam_aux_get(supplementary_rec, "ML") != nullptr); + CHECK(bam_aux_get(supplementary_rec, "MN") != nullptr); + } else { + CHECK(bam_aux_get(primary_rec, "MM") != nullptr); + CHECK(bam_aux_get(primary_rec, "ML") != nullptr); + CHECK(bam_aux_get(primary_rec, "MN") != nullptr); + CHECK(bam_aux_get(secondary_rec, "MM") == nullptr); + CHECK(bam_aux_get(secondary_rec, "ML") == nullptr); + CHECK(bam_aux_get(secondary_rec, "MN") == nullptr); + CHECK(bam_aux_get(supplementary_rec, "MM") == nullptr); + CHECK(bam_aux_get(supplementary_rec, "ML") == nullptr); + CHECK(bam_aux_get(supplementary_rec, "MN") == nullptr); + } +} + TEST_CASE("AlignerTest: Verify impact of updated aligner args", TEST_GROUP) { fs::path aligner_test_dir = fs::path(get_aligner_data_dir()); auto ref = aligner_test_dir / "target.fq"; diff --git a/tests/BamUtilsTest.cpp b/tests/BamUtilsTest.cpp index 5b9bf125..db1aa7e6 100644 --- a/tests/BamUtilsTest.cpp +++ b/tests/BamUtilsTest.cpp @@ -35,10 +35,10 @@ TEST_CASE("BamUtilsTest: fetch keys from PG header", TEST_GROUP) { auto keys = utils::extract_pg_keys_from_hdr(sam.string(), {"PN", "CL", "VN"}); CHECK(keys["PN"] == "dorado"); - CHECK(keys["VN"] == "0.2.3+0f041c4+dirty"); + CHECK(keys["VN"] == "0.5.0+5fa4de73+dirty"); CHECK(keys["CL"] == "dorado basecaller dna_r9.4.1_e8_hac@v3.3 ./tests/data/pod5 -x cpu --modified-bases " - "5mCG"); + "5mCG --emit-sam"); } TEST_CASE("BamUtilsTest: add_rg_hdr read group headers", TEST_GROUP) { diff --git a/tests/data/aligner_test/basecall.sam b/tests/data/aligner_test/basecall.sam index 534d9275..ba9d49bc 100644 --- a/tests/data/aligner_test/basecall.sam +++ b/tests/data/aligner_test/basecall.sam @@ -1,4 +1,4 @@ @HD VN:1.6 SO:unknown -@PG ID:basecaller PN:dorado VN:0.2.3+0f041c4+dirty CL:dorado basecaller dna_r9.4.1_e8_hac@v3.3 ./tests/data/pod5 -x cpu --modified-bases 5mCG -@RG ID:dccc100a-19d5-4440-886b-ed6ba5d9545b_dna_r9.4.1_e8_hac@v3.3 PU:PAK21298 PM:PAPAP48 DT:2022-04-27T16:42:08.827+00:00 PL:ONT DS:basecall_model=dna_r9.4.1_e8_hac@v3.3 runid=dccc100a-19d5-4440-886b-ed6ba5d9545b LB:no_sample SM:no_sample -002bd127-db82-436f-b828-28567c3d505d 4 * 0 0 * * 0 0 CCCTTTTTGCCCCTTTACGCCGCACCTAGCATATGCGCCTGCCACTTGCAGCTTTCCCGCCCCTGCCCGCCTGCCCACCCCTTGCCGCCTCCTTATGCGCCGCCCCGCCCACCCCCGCCCACCCACCGCCCCACGCTTCCCCCACCCGCCGCCCACGCGCATCGCCCGCCCCCCCCGCCCCACCCGCCCACCGCCCTGTCCAAAGCCCGCCCCACGCCCGGCCCGCGCCCGCCAGCCCCGCCCCGTGGCCGCCGCCCCCGCCCGCCCCCCACCCGCCCCCCTACCTGCCCGCCTGGCCGCCGCCTCCCATCCCACCCGCCACCCGCCCCCTGTGCCCCACTTCCAGCCCGGCCGCCCATCCCCGCCCCCCCCCCCCACCCACCGCCCCCCGGCCAGCCCCCGCCCCCGCCCCGCCCGCCCGGCGCCGGCCCCATGCCCCCACCCCGCTGCCCCGCGCCCCCGCCCAGCCACACCCACCCCGCCGCCCCACCCGCCCCCACCGGCCCCGCCCGGCGCAGCCCGGCAGCTTCCCCCGCCCACGCCGCCCCCTTGCCAGCCCACCCCGCAGCTCACCCACCAGCCCATCCCGCTCCCCCGGCGCCCCAGCCGCCCGCCCACCCGCCGCGCACCCGCCCCCGCCCGCTCCCCCCCGCCCACCCGCCCGCCTCGCCCATCTGCTCCCCCCACCCACCCCCGCGCCCATGCGCCCCCCGCCCACCCCGCGCCCACCCGCCCCCCGCCCCACCCACCCACCCCGCCCCGCCCCACTTGCCCCCACGCCCCCGCCCCCCGCCGCCGCCCTCCCACACACGCTTCCCAGCCCGCTTCGCACAGCGCCTACCGCCGCCCCACCCGCCCCTTAAAAGCCACCGGCGCCCGCGCACCCACCCCGCCATCCAGCAGCCAACCAGCCCTTGCTTCCCCATACTTGTAAAGCTTCCCCGCACCCGCCCGCCTTCCACCCACCCACCGCCCCCCCGGCCCGCCGCCCGGCCTCGCCCGGCCCCGGCCCAGCCTTGCCCGCCTTCTCGAAGCGCCGCCGCGCCTCCAGCCCAGCCCGCCCACCCTCGCCCGCTTGCCCCCGGCTCGCGCTCCAGGCGCCCGGCCCCGGCCCCGCCCGCGCTCTGCCATTTCCACTTCCCACCGGCCAAAGCCCGCCGGCATCCCGCGCGCCCGCACAAGCCAAGCTTCCCCGGCTTGGCGCCATTTAAGCAAAAGCCGCCCGCCAACGCCAGCGCCCGGCGCCCGGCCCGCCACCGGCAGCGCCGCCCTTTCCAGCCCGGCGCTGGCCATCGGCCAGCGCACCACGAAGCCAGCCCACCGCCAGCCCGCCAAAGCCGCTCCCACAAGCCAAAGCCCGCCCACCCCCAGCCACCCATACCCCGCTCGCCCGCCAGCTTCTGTATGCCCACCCAGCGCACTTGCTTTCCACCCCCACCCCCGCCCCGCGCCCGCCCGCCCGCCACCCCTGCCCCGCCCCCCGCCGCTGCCTTACCGCTGCCCCAAAGCGCCCTTCCAGCACTTCCTTAGCCACCCTTTTTAGCTTCACTTCCACCTACCGCTTGCCGCCGGCTGCTTCCCCGCCCATCCCACCGCACCCGCCCGCGCCTTATGCGCACCGCCCGCGCCCGGCCTGCAAGCCAGCTTCGCGCGCCCGCTTCCACCACCCCCGCCAGCCCGCTTCCTGCTTGCCCCCGCCGCACCGCCCACCCACCCCCACCGGCCAGCGCTCATATGCCCCCGCCGCACCAGCACGCCACCGCCCGCCTCCTTCGCCAGCCCGCCCACCCGCCGCCCAGCCACCGCACCCACTCAAGGGGTCGCCTCTCATGCGCATCATGCGCAAAAAAAAAAACGGGGGGGGGGGGGGAAAAAAAAAAAAAAAAAAAAAAAAAAAA &&'$%./-/0+)()(((*%%%%%%('$&&&&''&''%&))..(&&&&&&%'))))))*('''**)*)(%%&'%$$)-+))((%$%%&')%$%$%%%&%%&('(((,,(.)'&&&)+.0252''()(**/,++*(&%%%$%(('(+,'%%%%*+))(&%'')&&%%+)((')010/-,*+03.(('()2+,%$%**--)&&%'*+,..02420/,**''&$$%'''(**2*+,,'''(-*('(-+)%%%()*,++--+,,-**/..--0/-+&''+**+-//-&(($%$%(())(+--.),+&**''+,*%$$')**,)''%(((+--,+(''$###+,)&&%$##%%%'%%''%''&())*254348:91(+..01+(((,''$*453,*)'&')))*(',///226768741112,(*')'%%&&$&%)++)$%%&'-/.()*)*+%$$$''(-.0---*))+'''&*%'%%'('%&(,-.,)*,,+*&&'()-0+()))'&%(***+(%$$'%&&%&())((&%%&'((,,,,+,/)'%&((*--+*,+)''&()*+*++++'&&&'&&%$#$&%&%)*+++**(('''(****(&(*-143)&&&*-+/126+(''(-+'$'%%&()'*-056.+,,,+)&%&+),-+(((+,,.//00,&&&&)&$%%&&'&&&')*++.03-030,-.2/0/)*1/)(&&(+46<==:99,////-**&&'&)%$&&&'()(''))*-,+++0/0056620/0013,,,--+&%%%%&/264,'(,--,-+,...-/2*(,))))'%%&(+'()((%%''*)'))**)**'''$$%&%%%&/2+)'&%$$%()+.-,,')*''('&&&&(('&(&'&'().+%%%%%'')()'('()&&%$%$%%%$$$%&'&&&++//,+,+,-./0/-(%%$%$###),,('()**'&&(('),,**+-$###$%&(-''*+))&**43-(&(((+,('&&'(*+-./*&&%%-.../2.%%%&''&&'&&%%%%*+,-/.*(&$#$&##%%('')'(*%%&(,---+(()&&'((&&&%(%&&&%$&'&%%'()**)*('&%%&%%''&%%''()((&'-,.//0(',//))))((%###"####$&$$%%&%'''&&&()*'&'+-+(&$$%%%()('%$##$#%$$$$$$$$%'&'%''(''''&%%%&%$$$%&&%%%%%%)('&&&$$$'()+-1(%$$%),,,-44-*&&'('$$%&'&(((%$$%&&%%%$%%#$%&&')0/***+)*(''*'%%%&&&%$$%%%$$%&''(''$#$&('&'(**+)))*-''''.('&%%%&'&%%&1'%%%%&&*--+'(&&)'''/.,*)(('&&*(++'&$$$$()'&&%%&(%%&&*++)&%%%###$())./0--)*''%%%%$##%#$%(.//,)*+),./().+*++))+*)('()+111/0)-,-,($$$%%%%%*,))&%%$&&(*(&&%&&(+'%%(****)+('(-/)&'&%'((*++.*&'&%$%##&'())(&&$$&'(&#$$$$'))(+*'&'&'%%$%**(('&&%%%&%%&&&&$%%%%()(&&,((('&&%%''&'(&%#"###$%&**.0(((((4'%%%$$%%&),--'&&&(((),,)+-/*)(*++*./,/0154/.1.../++*)'%%&'(&%%&&'((''&%&%%%%%'))')+-...40.)(%'&(*)(&%$$$%%$$$$%&&'()%$$%&%%&&'''-&(*)+0121+)'$$$$%()'&&'')++)+--+(('''%%&&&&&'&%'(+,./,--*'&(%%$$$$$$$#$$$$##$#$%%&&%%$###########$##$$$$$$##########$$$$####%%&&'())*+++++*' qs:i:6 du:f:11.765500 ns:i:47062 ts:i:10 mx:i:2 ch:i:2647 st:Z:2022-04-27T19:56:52.425+00:00 rn:i:12088 fn:Z:single_na24385.pod5 sm:f:38.547 sd:f:78.652 sv:Z:quantile RG:Z:dccc100a-19d5-4440-886b-ed6ba5d9545b_dna_r9.4.1_e8_hac@v3.3 MM:Z:C+m?,7,1,4,9,6,10,4,1,3,7,7,4,8,1,3,0,1,2,7,6,4,7,4,2,2,0,2,5,3,1,1,4,2,8,10,3,1,10,4,14,1,6,16,5,6,4,3,2,2,0,1,12,4,0,4,12,1,6,6,3,2,0,3,6,3,1,13,13,5,0,5,2,5,1,0,3,4,2,7,5,2,2,18,0,3,5,6,0,5,5,13,3,10,4,5,1,1,8,6,1,2,3,1,6,7,0,2,0,7,22,3,2,11,6,2,1,2,2,2,3,7,3,0,1,1,0,9,6,2,5,1,0,3,2,3,3,2,0,11,4,1,3,0,0,2,8,1,4,2,2,2,2,0,2,2,3,1,1,7,0,3,2,3,6,4,3,9,16,1,2,10,14,3,0,2,2,2,9,5,1,4,5,22,2,1,5,7,3,2,0,2,2,2,0,2,6,0,0,2,9,4,8,1,2,12,2,6,1,4,3,2,4,4,5,1,6,6,4,2,1; ML:B:C,117,114,122,77,66,15,106,102,28,39,105,57,179,114,125,46,77,88,244,44,79,207,37,84,75,84,32,78,76,52,102,168,28,112,55,47,89,38,33,25,105,72,130,12,63,80,66,81,33,64,96,44,27,37,104,114,92,2,192,40,39,34,4,64,113,152,4,61,81,87,85,84,42,188,82,31,25,42,88,38,44,75,73,103,82,93,27,13,42,38,34,66,111,41,80,173,125,84,25,76,55,11,136,102,82,111,97,87,163,81,40,2,90,234,58,130,46,87,23,11,74,221,81,116,104,99,91,124,49,147,49,252,119,31,90,36,46,30,232,213,198,142,144,60,73,119,113,202,79,103,75,11,95,12,67,114,59,81,2,30,94,166,66,201,116,128,84,39,89,171,146,68,43,57,56,70,80,94,68,51,33,115,84,126,99,144,60,44,103,59,124,150,53,42,17,87,176,187,18,112,175,95,232,145,241,78,169,198,66,149,92,165,61,190,168,173,63,46,112,202 +@PG ID:basecaller PN:dorado VN:0.5.0+5fa4de73+dirty CL:dorado basecaller dna_r9.4.1_e8_hac@v3.3 ./tests/data/pod5 -x cpu --modified-bases 5mCG --emit-sam +@RG ID:a16f403b6a3655419511bf356ce3b40b65abfae4_dna_r9.4.1_e8_hac@v3.3 PU:PAK21298 PM:PAPAP48 DT:2022-04-27T16:47:57.305+00:00 PL:ONT DS:basecall_model=dna_r9.4.1_e8_hac@v3.3 modbase_models=dna_r9.4.1_e8_hac@v3.3_5mCG@v0.1 runid=a16f403b6a3655419511bf356ce3b40b65abfae4 LB:no_sample SM:no_sample +002bd127-db82-436f-b828-28567c3d505d 4 * 0 0 * * 0 0 CCACTTTTTGCCCTTTACGTAAGCACTTGGCATGGCCGCCTTAGCCGCCCCTTGGCTCCCAGCACACATAGCCCATACCAGCCCACCCCTTGGCATGTCCTTATCAGCGCCCTTCCCCGCCCACCCCCCCGCCCCACCCACACCGCCCCGGCCGCTCCCCCACCACCCATAGCCCGCGCGCGTAAAAGCCCGCCCCCCCCCCCCACCCACATGCAGCCCTGTCAAAGCCCGCCCCGTAGGCCCGCGCCCATCAGCCCAGCCCCATTGTGCATGTCCCCCGGCCCGCCCCCCACCCTTCCCCCCCCCACCTGTGCCTGGCCGCCGCCTCCCATCCACCCGCCACCACTTCCCCTGTGCCCCACTTCCAGCCCGGCCCAGCCATCCCCGCCCCCCCCCCCACCACCGCCCCCCGGCCAGCCCCCGCCCCCGCCCAGCCCGCCAGGAAAACATCAAGCCCCATGCCCCCACCCGCTGCCCCGCGCCCCCGCCCAGCCACACCCACCCCGCCCCCACCCGCCCCCACCGGCCATGGCGCAGCCCGGCAGCTTCCCCGGCCACGCCGCCCCCTTCGCAGCCCACCCCGCAGCCCGCCTGCCAGCCCATCCCGCTCCCCCGGCGCCCCAGCCGCCCGCCCACCCCGCGCGCACCCGCCCCGCCCGCTCCCCCCCGCCCACCCGCCCTTTCCTTGCATCTGCTCCCCCCACCCCACCCCCGCGCCCATCGCCCCCCGCCCACCCCCGCGCCCACCCCCTCCCTTCCCCTTCCCACCCACCCACCCCCGCCCCGCCCCACTTGCCCCACCCGCCCCCGCCCCCCCCGCGCGCTCCTCCCCACACACGGCTTCTGACCAGCTTCGCTTTAAGCCCTGCAAGCGCTCCCCACCACTTACGTAAAGCCGGTAGCCAGCGCACCCCGCCCGTCATCGGCCATCCAAGCCGGCCGGCCTTCCCCCGACCGCCCCTTAAAGCGCGCCCCGGCACCGTAAGCCCGGCCACCCGCGCCCCCCCCACCCCAGCCCCCCCCCGGCCCGCCGTGGCCTAAGCCCGGCCCCCGGCCCAGCCTTGCCGGCCTTCTCGGCCGCCGGCGCGCCAGCCCAGCCATCCGCCTAGCCAAGCGCCCCGGCCGGCGCTCCAGGCCGGCCCCGGCCGCCGGCCTTAGCCGTTTCACTTCCCACCGGCCAAAGCCCGGCATCATGCTCCTTACCGTAAGCTTCCCCGGCTTAGCCGGCAAGCGAAAACGCCCGCCCGGCGTAGCGCTCCCCGGCCATGACCGCCGGCCGTAGCCCTTTCCGGCCGGCGGCCCCGGCCATCGGCCAAGCGCACCACCGAAATGGCCCCGTGACCCATCTCGGCCGCTCTACGCAAGCCAAAGCCGGCCCACCCCCGGCCACCCGCCCTTGCTTCCTTAGCCAAACACGCATGCCCACCCAGCGCGCGCTCCACCCCCCCCGCCCGCGCCCTTCGCCCGCCACCCCACCAGCGCACCGCCCGCTGCCTTACCGCTTAGCCCCAAAACGCCCTTCCGGCGCGCTTCCTTAGCCACCCGCTTGCAACTAAGCACCGCCTACCGCTTGCCGCCGACGCTACGGCCATCCCACCGCTTCCTTCCGTAACCTGTCGCCTTACCGCCCGCGCCCGGCCTGCAAGCCAACCGGCCGGCTACCACCCCCGTAGCCCAAAGCCTGCTTGCCCCGGCCGCTGTCCCCGCCCGCCCCCACCCCGGCCAAGCATCATCCTCCTTCGCCCTTTCCTTGCGCTCCCCACCGTAAGCACTGTCATCTTCCATCACCCTTAAACCCAATACTTGCCCCGCTCAAGTTTCGCCATCTCATGAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAGGGGAATTTTAAAAAAAAAAAAAA ((&'&'++)))'))+'&&%$$%$*--,+'''((&%%%$$$&%$$%$$$&()'''&&*''((%%%%'&(((***&''(())*+--0./.-)'&$$%&&&&$%%%&'&%%%%%%&'((,+&&&&'()(),..-**.//--++'''(&&%&&&(((')*&',,,*+++--*'&&&*,)&%%%')&%$&'))''&())-146321-*2.''&&%$%''((*+*,+))&'*,-,)))),+%%&$$&'&''((()&%%%(,--/,+,+*'%%%$$$#%%$%%+-))&%'(((((,0/,%&&%$%%.+0/-,(%%'$$###$)+-//*,+&'&$$,,,('&$'()-*+)*(&&%%%%+))(%$#$''&%%&&$$%%%&&%'&%%$%'%(+,210//4830(+.010,,'))$(252()'&&'*)))('(((()))((*+*''**'&(&%%%&%$##%%%())*,,)$#$%&.1/**+('&%$$**((())++(&%%&'''*%(&&''(%&)+++('+&%%&%%$%,*''(('((*&$##$&%&%''(+**(((&'''*)'&%(&*&&%$%'('(('%$$%%'(((('()(('&''''&&##$$%)*+*,--*(''''&&&'&%'()+01+'''*,*-014*.--./-,&%&&')*'*/034/--+*('&',)--+&&%'*-/0../-++**(''&%%&'&&%&(),,01410852/0/0,**((---*&&&25<=<655../001.++'&'&*&%,+)'$$%%%%%)(((((1/....---..023333456*)))**'''())047.(('(//,**'(+-.//-,*%&'&&'())*+,+)''&&&$%&)*%$$#$#$$(,*''''&###%&'('&$#%&$$$%&)*)))%$$$###'))*-,*%$%%$#%%$$$'('''*))(((%%%&'$$'('&&%%'%%&&'')'''$$$$'*,,)&%&&*,('+--.+/-''%&%%)(%$%&&%&%%'')'())*'''&()())*--,.//)),+(''(0/-()*'&'&',)'('&$$$+)(%$%&%'((*,-*%%$'(&&&'(*(((&&%&&''%$####%'''&&&%%$&$&&'*(''()&%$%'($'%$$$%$&$$&'()&%$&'''(%%%%$$%&'))*104,..&&%%&&&')&%%%%$$%&%$&'(((())***+(&%&)))&%%$%)$$$#$&%$$%$$$####+*)()*))(%%&&%$&)(&$#%(%$$&%%$$&''&&%&'((&%$$&-'&&*-*))(&%%%*+&+*'&&&$$'''('(*,3.''''&,,(%&''(''(*'&&$$()&$#%'./01**'%&&&%$$%(**&&%%%&'($&$%##%%##$0%$$##$$&'(,*'(%%#%%&+'&%&',(''(&&'%)&%%$$###''&%&%$$%$&$%##$$$##$%%')***''&'**&%)**(+++*(%&*(')*)***0.+***+21111)++++'%%%%%%&$$&%')('&&%%$$&(()**((&&&%%%%&&-''',/((+'%'('))+,-4-*+*(()&%%%%%%(%%$$#$$&&(''(%*++--*&%$%$$$$$$(&%$'&&(%##%&&'())))&&&&&&%&'''&(*--('(('('&%%%&'**()***.((''&&''(*(((%%$%%'(&%%#$###'***41*'&&&*'''''%$$''&&%%%)))(''%%$$##%''&'''''')1-*''&%%&&'(%$%%'('+*&%$$%%'%&&%##*+('''(''&''))+**''&&%('''%&%$%%$$$$&')(*/*&%&&%%%%*(%%$$$$$$$&(('**&%%%%$$$$$$$$#$$#%&&&&&&'&&&&%%%%%%%%%&&&&&&&&&%#$&$#$%$"###""#$$%$$$$$##" qs:i:6 du:f:11.7655 ns:i:47062 ts:i:10 mx:i:2 ch:i:2647 st:Z:2022-04-27T19:56:52.425+00:00 rn:i:12088 fn:Z:single_na24385.pod5 sm:f:55.9973 sd:f:81.9378 sv:Z:med_mad dx:i:0 RG:Z:a16f403b6a3655419511bf356ce3b40b65abfae4_dna_r9.4.1_e8_hac@v3.3 MN:i:1889 MM:Z:C+m?,6,4,3,27,6,9,9,3,1,13,0,0,0,2,23,3,2,0,16,2,23,1,9,18,8,14,5,6,4,5,15,4,0,4,12,7,6,2,3,5,2,1,5,7,3,9,5,0,5,2,6,0,0,3,3,2,7,5,22,0,3,5,7,0,28,3,11,4,7,0,0,9,5,5,8,1,2,4,2,1,5,1,6,1,4,0,3,2,2,4,0,20,2,1,4,4,6,3,1,1,0,0,8,4,3,1,0,4,3,1,1,3,6,4,6,4,2,1,0,2,2,0,0,4,3,1,1,4,1,0,3,2,2,4,3,4,1,2,4,7,4,9,7,0,0,10,2,0,3,2,8,2,2,4,5,4,0,0,7,5,3,2,1,0,1,6,4,2,3,2,0,2,6,1,7,9,1,4,2,8,8,5,6,17,2; ML:B:C,36,41,12,28,172,174,149,51,21,128,60,30,28,65,73,1,18,15,231,7,6,51,3,20,139,146,184,73,37,52,23,44,88,13,54,0,98,50,21,142,37,22,33,5,7,31,12,2,8,19,37,19,5,8,38,7,33,8,68,150,5,7,4,3,49,24,19,8,9,11,7,0,87,75,14,156,55,104,39,207,25,199,229,57,82,108,11,133,8,57,45,236,1,41,20,161,60,94,6,96,13,47,28,68,41,16,20,134,72,11,124,66,53,4,127,122,89,130,138,1,6,1,160,54,101,58,20,3,39,6,67,62,35,173,63,117,11,103,145,132,16,3,34,26,16,45,1,17,95,11,25,34,0,222,174,11,20,15,14,240,214,71,72,146,16,212,5,30,24,7,8,57,37,68,106,87,39,92,7,135,102,12,91,98,122 diff --git a/tests/data/aligner_test/supplementary_basecall_target.fa b/tests/data/aligner_test/supplementary_basecall_target.fa new file mode 100644 index 00000000..4282443c --- /dev/null +++ b/tests/data/aligner_test/supplementary_basecall_target.fa @@ -0,0 +1,6 @@ +>read1 999 +CCCTTTTTGCCCCTTTACGCCGCACCTAGCATATGCGCCTGCCACTTGCAGCTTTCCCGCCCCTGCCCGCCTGCCCACCCCTTGCCGCCTCCTTATGCGCCGCCCCGCCCACCCCCGCCCACCCACCGCCCCACGCTTCCCCCACCCGCCGCCCACGCGCATCGCCCGCCCCCCCCGCCCCACCCGCCCACCGCCCTGTCCAAAGCCCGCCCCACGCCCGGCCCGCGCCCGCCAGCCCCGCCCCGTGGCCGCCGCCCCCGCCCGCCCCCCACCCGCCCCCCTACCTGCCCGCCTGGCCGCCGCCTCCCATCCCACCCGCCACCCGCCCCCTGTGCCCCACTTCCAGCCCGGCCGCCCATCCCCGCCCCCCCCCCCCACCCACCGCCCCCCGGCCAGCCCCCGCCCCCGCCCCGCCCGCCCGGCGCCGGCCCCATGCCCCCACCCCGCTGCCCCGCGCCCCCGCCCAGCCACACCCACCCCGCCGCCCCACCCGCCCCCACCGGCCCCGCCCGGCGCAGCCCGGCAGCTTCCCCCGCCCACGCCGCCCCCTTGCCAGCCCACCCCGCAGCTCACCCACCAGCCCATCCCGCTCCCCCGGCGCCCCAGCCGCCCGCCCACCCGCCGCGCACCCGCCCCCGCCCGCTCCCCCCCGCCCACCCGCCCGCCTCGCCCATCTGCTCCCCCCACCCACCCCCGCGCCCATGCGCCCCCCGCCCACCCCGCGCCCACCCGCCCCCCGCCCCACCCACCCACCCCGCCCCGCCCCACTTGCCCCCACGCCCCCGCCCCCCGCCGCCGCCCTCCCACACACGCTTCCCAGCCCGCTTCGCACAGCGCCTACCGCCGCCCCACCCGCCCCTTAAAAGCCACCGGCGCCCGCGCACCCACCCCGCCATCCAGCAGCCAACCAGCCCTTGCTTCCCCATACTTGTAAAGCTTCCCCGCACCCGCCCGCCTTCCACCCACCCACCGCCCCCCCGGCCCGCCGCCCGGCCTCGC +>read3 999 +CCCTTTTTGCCCCTTTACGCCGCACCTAGCATATGCGCCTGCCACTTGCAGCTTTCCCGCCCCTGCCCGCCTGCCCACCCCTTGCCGCCTCCTTATGCGCCGCCCCGCCCACCCCCGCCCACCCACCGCCCCACGCTTCCCCCACCCGCCGCCCACGCGCATCGCCCGCCCCCCCCGCCCCACCCGCCCACCGCCCTGTCCAAAGCCCGCCCCACGCCCGGCCCGCGCCCGCCAGCCCCGCCCCGTGGCCGCCGCCCCCGCCCGCCCCCCACCCGCCCCCCTACCTGCCCGCCTGGCCGCCGCCTCCCATCCCACCCGCCACCCGCCCCCTGTGCCCCACTTCCAGCCCGGCCGCCCATCCCCGCCCCCCCCCCCCACCCACCGCCCCCCGGCCAGCCCCCGCCCCCGCCCCGCCCGCCCGGCGCCGGCCCCATGCCCCCACCCCGCTGCCCCGCGCCCCCGCCCAGCCACACCCACCCCGCCGCCCCACCCGCCCCCACCGGCCCCGCCCGGCGCAGCCCGGCAGCTTCCCCCGCCCACGCCGCCCCCTTGCCAGCCCACCCCGCAGCTCACCCACCAGCCCATCCCGCTCCCCCGGCGCCCCAGCCGCCCGCCCACCCGCCGCGCACCCGCCCCCGCCCGCTCCCCCCCGCCCACCCGCCCGCCTCGCCCATCTGCTCCCCCCACCCACCCCCGCGCCCATGCGCCCCCCGCCCACCCCGCGCCCACCCGCCCCCCGCCCCACCCACCCACCCCGCCCCGCCCCACTTGCCCCCACGCCCCCGCCCCCCGCCGCCGCCCTCCCACACACGCTTCCCAGCCCGCTTCGCACAGCGCCTACCGCCGCCCCACCCGCCCCTTAAAAGCCACCGGCGCCCGCGCACCCACCCCGCCATCCAGCAGCCAACCAGCCCTTGCTTCCCCATACTTGTAAAGCTTCCCCGCACCCGCCCGCCTTCCACCCACCCACCGCCCCCCCGGCCCGCCGCCCGGCCTCGC +>read2 899 +CCGGCCCCGGCCCAGCCTTGCCCGCCTTCTCGAAGCGCCGCCGCGCCTCCAGCCCAGCCCGCCCACCCTCGCCCGCTTGCCCCCGGCTCGCGCTCCAGGCGCCCGGCCCCGGCCCCGCCCGCGCTCTGCCATTTCCACTTCCCACCGGCCAAAGCCCGCCGGCATCCCGCGCGCCCGCACAAGCCAAGCTTCCCCGGCTTGGCGCCATTTAAGCAAAAGCCGCCCGCCAACGCCAGCGCCCGGCGCCCGGCCCGCCACCGGCAGCGCCGCCCTTTCCAGCCCGGCGCTGGCCATCGGCCAGCGCACCACGAAGCCAGCCCACCGCCAGCCCGCCAAAGCCGCTCCCACAAGCCAAAGCCCGCCCACCCCCAGCCACCCATACCCCGCTCGCCCGCCAGCTTCTGTATGCCCACCCAGCGCACTTGCTTTCCACCCCCACCCCCGCCCCGCGCCCGCCCGCCCGCCACCCCTGCCCCGCCCCCCGCCGCTGCCTTACCGCTGCCCCAAAGCGCCCTTCCAGCACTTCCTTAGCCACCCTTTTTAGCTTCACTTCCACCTACCGCTTGCCGCCGGCTGCTTCCCCGCCCATCCCACCGCACCCGCCCGCGCCTTATGCGCACCGCCCGCGCCCGGCCTGCAAGCCAGCTTCGCGCGCCCGCTTCCACCACCCCCGCCAGCCCGCTTCCTGCTTGCCCCCGCCGCACCGCCCACCCACCCCCACCGGCCAGCGCTCATATGCCCCCGCCGCACCAGCACGCCACCGCCCGCCTCCTTCGCCAGCCCGCCCACCCGCCGCCCAGCCACCGCACCCACTCAAGGGGTCGCCTCTCATGCGCATCATGCGCAAAAAAAAAAACGGGGGGGGGGGGGGAAAAAAAAAAAAAAAAAAAAAAAAAAAA