From b4f9f4814f23903eb79ea49776b234e8569574c3 Mon Sep 17 00:00:00 2001 From: Kevin Dolan Date: Thu, 16 May 2024 17:21:12 +0100 Subject: [PATCH 1/2] Changed qs tag type to float. --- documentation/SAM.md | 2 +- dorado/read_pipeline/messages.cpp | 8 ++++---- dorado/summary/summary.cpp | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/documentation/SAM.md b/documentation/SAM.md index a63183d2..0683431f 100644 --- a/documentation/SAM.md +++ b/documentation/SAM.md @@ -25,7 +25,7 @@ | | | | ------ | -----------------------------------------------------------| | RG:Z: | `__` | -| qs:i: | mean basecall qscore rounded to the nearest integer | +| qs:f: | mean basecall qscore | | ts:i: | the number of samples trimmed from the start of the signal | | ns:i: | the basecalled sequence corresponds to the interval `signal[ts : ns]`
the move table maps to the same interval.
note that `ns` reflects trimming (if any) from the rear
of the signal. | | mx:i: | read mux | diff --git a/dorado/read_pipeline/messages.cpp b/dorado/read_pipeline/messages.cpp index dedf62e5..4390d9a2 100644 --- a/dorado/read_pipeline/messages.cpp +++ b/dorado/read_pipeline/messages.cpp @@ -37,8 +37,8 @@ std::string ReadCommon::generate_read_group() const { } void ReadCommon::generate_read_tags(bam1_t *aln, bool emit_moves, bool is_duplex_parent) const { - int qs = static_cast(std::round(calculate_mean_qscore())); - bam_aux_append(aln, "qs", 'i', sizeof(qs), (uint8_t *)&qs); + float qs = calculate_mean_qscore(); + bam_aux_append(aln, "qs", 'f', sizeof(qs), (uint8_t *)&qs); float du = (float)(get_raw_data_samples() + num_trimmed_samples) / (float)sample_rate; bam_aux_append(aln, "du", 'f', sizeof(du), (uint8_t *)&du); @@ -108,8 +108,8 @@ void ReadCommon::generate_read_tags(bam1_t *aln, bool emit_moves, bool is_duplex } void ReadCommon::generate_duplex_read_tags(bam1_t *aln) const { - int qs = static_cast(std::round(calculate_mean_qscore())); - bam_aux_append(aln, "qs", 'i', sizeof(qs), (uint8_t *)&qs); + float qs = calculate_mean_qscore(); + bam_aux_append(aln, "qs", 'f', sizeof(qs), (uint8_t *)&qs); uint32_t duplex = 1; bam_aux_append(aln, "dx", 'i', sizeof(duplex), (uint8_t *)&duplex); diff --git a/dorado/summary/summary.cpp b/dorado/summary/summary.cpp index 6a668e63..58fd463f 100644 --- a/dorado/summary/summary.cpp +++ b/dorado/summary/summary.cpp @@ -168,7 +168,7 @@ bool SummaryData::write_rows_from_reader( auto duration = reader.get_tag("du"); auto seqlen = reader.record->core.l_qseq; - auto mean_qscore = reader.get_tag("qs"); + auto mean_qscore = reader.get_tag("qs"); auto num_samples = reader.get_tag("ns"); auto trim_samples = reader.get_tag("ts"); From 661ddd78561c1eb55a18fde2c091d2bfe3230567 Mon Sep 17 00:00:00 2001 From: Kevin Dolan Date: Fri, 17 May 2024 10:52:37 +0100 Subject: [PATCH 2/2] Fixed test that was broken by qs type change. --- tests/ReadTest.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/ReadTest.cpp b/tests/ReadTest.cpp index 2e84f4ea..8c050b2e 100644 --- a/tests/ReadTest.cpp +++ b/tests/ReadTest.cpp @@ -36,7 +36,7 @@ TEST_CASE(TEST_GROUP ": Test tag generation", TEST_GROUP) { REQUIRE(alignments.size() == 1); bam1_t* aln = alignments[0].get(); - CHECK(bam_aux2i(bam_aux_get(aln, "qs")) == 14); + CHECK(bam_aux2f(bam_aux_get(aln, "qs")) == 14.0f); CHECK(bam_aux2i(bam_aux_get(aln, "ns")) == 4132); CHECK(bam_aux2i(bam_aux_get(aln, "ts")) == 132); CHECK(bam_aux2i(bam_aux_get(aln, "mx")) == 2);