Skip to content

Commit

Permalink
Merge branch 'master' into mvella/fix-mod-duplex-segv
Browse files Browse the repository at this point in the history
  • Loading branch information
vellamike committed Jan 17, 2024
2 parents d2bb114 + 5dcea6a commit 7c62595
Show file tree
Hide file tree
Showing 81 changed files with 1,033 additions and 1,132 deletions.
1 change: 1 addition & 0 deletions .clang-format
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ IncludeCategories:
IncludeIsMainRegex: '(_test|_tests|Tests|Test)?$'
IndentCaseLabels: false
IndentWidth: 4
InsertBraces: true
PointerAlignment: Middle
ReflowComments: false
SortIncludes: true
Expand Down
75 changes: 40 additions & 35 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -187,59 +187,64 @@ set(LIB_SOURCE_FILES
dorado/api/runner_creation.h
dorado/api/pipeline_creation.cpp
dorado/api/pipeline_creation.h
dorado/read_pipeline/FakeDataLoader.cpp
dorado/read_pipeline/FakeDataLoader.h
dorado/read_pipeline/ReadPipeline.cpp
dorado/read_pipeline/ReadPipeline.h
dorado/read_pipeline/ClientInfo.h
dorado/read_pipeline/DefaultClientInfo.h
dorado/read_pipeline/ScalerNode.cpp
dorado/read_pipeline/ScalerNode.h
dorado/read_pipeline/StereoDuplexEncoderNode.cpp
dorado/read_pipeline/StereoDuplexEncoderNode.h
dorado/read_pipeline/AdapterDetectorNode.cpp
dorado/read_pipeline/AdapterDetectorNode.h
dorado/read_pipeline/AlignerNode.cpp
dorado/read_pipeline/AlignerNode.h
dorado/read_pipeline/BarcodeClassifierNode.cpp
dorado/read_pipeline/BarcodeClassifierNode.h
dorado/read_pipeline/BarcodeDemuxerNode.cpp
dorado/read_pipeline/BarcodeDemuxerNode.h
dorado/read_pipeline/BasecallerNode.cpp
dorado/read_pipeline/BasecallerNode.h
dorado/read_pipeline/ModBaseCallerNode.cpp
dorado/read_pipeline/ModBaseCallerNode.h
dorado/read_pipeline/ReadFilterNode.cpp
dorado/read_pipeline/ReadFilterNode.h
dorado/read_pipeline/ReadToBamTypeNode.cpp
dorado/read_pipeline/ReadToBamTypeNode.h
dorado/read_pipeline/SubreadTaggerNode.cpp
dorado/read_pipeline/SubreadTaggerNode.h
dorado/read_pipeline/BaseSpaceDuplexCallerNode.cpp
dorado/read_pipeline/BaseSpaceDuplexCallerNode.h
dorado/read_pipeline/AlignerNode.cpp
dorado/read_pipeline/AlignerNode.h
dorado/read_pipeline/ClientInfo.h
dorado/read_pipeline/DefaultClientInfo.h
dorado/read_pipeline/DuplexReadTaggingNode.cpp
dorado/read_pipeline/DuplexReadTaggingNode.h
dorado/read_pipeline/FakeDataLoader.cpp
dorado/read_pipeline/FakeDataLoader.h
dorado/read_pipeline/HtsReader.cpp
dorado/read_pipeline/HtsReader.h
dorado/read_pipeline/HtsWriter.cpp
dorado/read_pipeline/HtsWriter.h
dorado/read_pipeline/ProgressTracker.h
dorado/read_pipeline/ResumeLoaderNode.cpp
dorado/read_pipeline/ResumeLoaderNode.h
dorado/read_pipeline/DuplexReadTaggingNode.cpp
dorado/read_pipeline/DuplexReadTaggingNode.h
dorado/read_pipeline/BarcodeClassifierNode.cpp
dorado/read_pipeline/BarcodeClassifierNode.h
dorado/read_pipeline/BarcodeDemuxerNode.cpp
dorado/read_pipeline/BarcodeDemuxerNode.h
dorado/read_pipeline/AdapterDetectorNode.cpp
dorado/read_pipeline/AdapterDetectorNode.h
dorado/read_pipeline/MessageSink.cpp
dorado/read_pipeline/MessageSink.h
dorado/read_pipeline/ModBaseCallerNode.cpp
dorado/read_pipeline/ModBaseCallerNode.h
dorado/read_pipeline/NullNode.h
dorado/read_pipeline/NullNode.cpp
dorado/read_pipeline/PairingNode.cpp
dorado/read_pipeline/PairingNode.h
dorado/read_pipeline/PolyACalculator.cpp
dorado/read_pipeline/PolyACalculator.h
dorado/read_pipeline/PolyACalculatorNode.cpp
dorado/read_pipeline/PolyACalculatorNode.h
dorado/read_pipeline/ProgressTracker.h
dorado/read_pipeline/ReadFilterNode.cpp
dorado/read_pipeline/ReadFilterNode.h
dorado/read_pipeline/ReadPipeline.cpp
dorado/read_pipeline/ReadPipeline.h
dorado/read_pipeline/ReadSplitNode.cpp
dorado/read_pipeline/ReadSplitNode.h
dorado/read_pipeline/ReadToBamTypeNode.cpp
dorado/read_pipeline/ReadToBamTypeNode.h
dorado/read_pipeline/ResumeLoaderNode.cpp
dorado/read_pipeline/ResumeLoaderNode.h
dorado/read_pipeline/ScalerNode.cpp
dorado/read_pipeline/ScalerNode.h
dorado/read_pipeline/StereoDuplexEncoderNode.cpp
dorado/read_pipeline/StereoDuplexEncoderNode.h
dorado/read_pipeline/SubreadTaggerNode.cpp
dorado/read_pipeline/SubreadTaggerNode.h
dorado/read_pipeline/messages.cpp
dorado/read_pipeline/messages.h
dorado/read_pipeline/flush_options.h
dorado/read_pipeline/read_utils.cpp
dorado/read_pipeline/read_utils.h
dorado/read_pipeline/stereo_features.cpp
dorado/read_pipeline/stereo_features.h
dorado/read_pipeline/stitch.cpp
dorado/read_pipeline/stitch.h
dorado/read_pipeline/ReadSplitNode.cpp
dorado/read_pipeline/ReadSplitNode.h
dorado/splitter/DuplexReadSplitter.cpp
dorado/splitter/DuplexReadSplitter.h
dorado/splitter/RNAReadSplitter.cpp
Expand Down
4 changes: 1 addition & 3 deletions DEV.md
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,5 @@ $ brew link autoconf@2.69

The following other packages need to be available as well
```bash
brew install openssl
brew install zstd
brew install libaec
brew install openssl zstd hdf5
```
24 changes: 15 additions & 9 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -341,19 +341,25 @@ Here are a few examples of model complexes:
| fast | Latest compatible **fast** model |
| hac | Latest compatible **hac** model |
| sup | Latest compatible **sup** model |
| hac@latest | Latest compatible **hac** model |
| hac@v4.2.0 | Compatible **hac** model with version `v4.2.0` |
| hac@v3.5 | Compatible **hac** model with version `v3.5.0` |
| hac,5mCG_5hmCG | Latest compatible **hac** model and latest **5mCG_5hmCG** modifications model |
| hac,5mCG_5hmCG@v2 | Latest compatible **hac** model and **5mCG_5hmCG** modifications model with version `v2.0.0` |
| sup,5mCG_5hmCG,6mA | Latest compatible **sup** model and both **5mCG_5hmCG** and **6mA** latest modifications models |
| hac@latest | Latest compatible **hac** simplex basecalling model |
| hac@v4.2.0 | Simplex basecalling **hac** model with version `v4.2.0` |
| hac@v3.5 | Simplex basecalling **hac** model with version `v3.5.0` |
| hac,5mCG_5hmCG | Latest compatible **hac** simplex model and latest **5mCG_5hmCG** modifications model for the chosen basecall model |
| hac,5mCG_5hmCG@v2 | Latest compatible **hac** simplex model and **5mCG_5hmCG** modifications model with version `v2.0.0` |
| sup,5mCG_5hmCG,6mA | Latest compatible **sup** model and latest compatible **5mCG_5hmCG** and **6mA** modifications models |

Automatically selected modification models will always match the base simplex model version selected. Noting the highlighted version changes, for example:
### Modification model versioning

The versioning of modification models is bound to the simplex model. In other words the modification model version is reset for each new simplex model release.

Automatically selected modification models will always match the base simplex model version and will be the latest compatible version unless a specific version is set by the user. Automatic modification model selection will not allow the mixing of modification models which are bound to different simplex model versions.

Note the highlighted version changes in the example below:

| Model Complex | Description | Models |
| :------------ | :---------- | :---------- |
| sup,5mCG_5hmCG | Latest compatible **sup** model and latest **5mCG_5hmCG** modifications model (`v3.1.0`) | dna_r10.4.1_e8.2_400bps_sup@v4.2.0 <br /> dna_r10.4.1_e8.2_400bps_sup@v4.2.0_5mCG_5hmCG@v3.1 |
| sup@v4.1,5mCG_5hmCG | Compatible **sup** model with version `v4.1.0` and latest **5mCG_5hmCG** modifications model (`v2.0.0`) | dna_r10.4.1_e8.2_400bps_sup@`v4.1.0`<br />dna_r10.4.1_e8.2_400bps_sup@`v4.1.0`_5mCG_5hmCG@`v2` |
| sup,5mCG_5hmCG | Latest compatible **sup** model and latest **5mCG_5hmCG** modifications model | dna_r10.4.1_e8.2_400bps_sup@`v4.3.0` <br /> dna_r10.4.1_e8.2_400bps_sup@`v4.3.0`_5mCG_5hmCG@`v1` |
| sup@v4.1,5mCG_5hmCG | Compatible **sup** model with version `v4.1.0` and latest **5mCG_5hmCG** modifications model | dna_r10.4.1_e8.2_400bps_sup@`v4.1.0`<br />dna_r10.4.1_e8.2_400bps_sup@`v4.1.0`_5mCG_5hmCG@`v2` |

## Developer quickstart

Expand Down
4 changes: 2 additions & 2 deletions cmake/HDF5.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -54,5 +54,5 @@ else()
set(HDF5_USE_STATIC_LIBRARIES ON)
endif()

find_package(ZLIB QUIET)
find_package(HDF5 COMPONENTS C QUIET)
find_package(ZLIB REQUIRED)
find_package(HDF5 COMPONENTS C REQUIRED)
2 changes: 1 addition & 1 deletion cmake/Pod5.cmake
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
set(POD5_VERSION 0.2.2)
set(POD5_VERSION 0.2.4)
set(POD5_DIR pod5-${POD5_VERSION}-${CMAKE_SYSTEM_NAME})
set(POD5_REPO "https://github.com/nanoporetech/pod5-file-format")
set(POD5_INCLUDE ${DORADO_3RD_PARTY_DOWNLOAD}/${POD5_DIR}/include)
Expand Down
10 changes: 5 additions & 5 deletions documentation/CustomBarcodes.md
Original file line number Diff line number Diff line change
Expand Up @@ -28,10 +28,10 @@ The following are all the options that can be defined in the arrangement file.
name = "custom_barcode"
kit = "BC"
mask_1_front = "ATCG"
mask_1_rear = "ATCG"
mask_2_front = "TTAA"
mask_2_rear = "GGCC"
mask1_front = "ATCG"
mask1_rear = "ATCG"
mask2_front = "TTAA"
mask2_rear = "GGCC"
# Barcode sequences
barcode1_pattern = "BC%02i"
Expand All @@ -44,7 +44,7 @@ last_index = 96
min_soft_barcode_threshold = 0.2
min_hard_barcode_threshold = 0.2
min_soft_flank_threshold = 0.3
min_hard_barcode_threshold = 0.3
min_hard_flank_threshold = 0.3
min_barcode_score_dist = 0.1
```

Expand Down
24 changes: 15 additions & 9 deletions dorado/alignment/Minimap2Aligner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,7 @@ void add_sa_tag(bam1_t* record,
int32_t hits,
int32_t aln_idx,
int32_t l_seq,
const mm_idx_t* idx,
const bool use_hard_clip) {
const auto clip_char = use_hard_clip ? "H" : "S";
const mm_idx_t* idx) {
std::stringstream ss;
for (int i = 0; i < hits; i++) {
if (i == aln_idx) {
Expand Down Expand Up @@ -50,7 +48,7 @@ void add_sa_tag(bam1_t* record,
ss << r->rs + 1 << ",";
ss << "+-"[r->rev] << ",";
if (clip5) {
ss << clip5 << clip_char;
ss << clip5 << "S";
}
if (num_matches) {
ss << num_matches << "M";
Expand All @@ -62,7 +60,7 @@ void add_sa_tag(bam1_t* record,
ss << num_deletes << "D";
}
if (clip3) {
ss << clip3 << clip_char;
ss << clip3 << "S";
}
ss << "," << r->mapq << "," << (r->blen - r->mlen + r->p->n_ambi) << ";";
}
Expand Down Expand Up @@ -122,8 +120,9 @@ std::vector<BamPtr> Minimap2Aligner::align(bam1_t* irecord, mm_tbuf_t* buf) {
flag |= BAM_FSUPPLEMENTARY;
}

if ((flag & BAM_FSECONDARY) && (mm_map_opts.flag & MM_F_NO_PRINT_2ND))
if ((flag & BAM_FSECONDARY) && (mm_map_opts.flag & MM_F_NO_PRINT_2ND)) {
continue;
}

const bool skip_seq_qual = !(mm_map_opts.flag & MM_F_SOFTCLIP) && (flag & BAM_FSECONDARY) &&
!(mm_map_opts.flag & MM_F_SECONDARY_SEQ);
Expand Down Expand Up @@ -193,10 +192,12 @@ std::vector<BamPtr> Minimap2Aligner::align(bam1_t* irecord, mm_tbuf_t* buf) {
}
if (use_hard_clip) {
l_seq -= clip_len[0] + clip_len[1];
if (seq_tmp)
if (seq_tmp) {
seq_tmp += clip_len[0];
if (qual_tmp)
}
if (qual_tmp) {
qual_tmp += clip_len[0];
}
}

// new output record
Expand All @@ -220,7 +221,12 @@ std::vector<BamPtr> Minimap2Aligner::align(bam1_t* irecord, mm_tbuf_t* buf) {

// Add new tags to match minimap2.
add_tags(record, aln, seq, buf);
add_sa_tag(record, reg, hits, j, static_cast<int>(l_seq), mm_index, use_hard_clip);
if (!skip_seq_qual) {
// Here pass the original query length before any hard clip because the
// the CIGAR string in SA tag only makes use of soft clip. And for that to be
// correct the unclipped query length is needed.
add_sa_tag(record, reg, hits, j, static_cast<int>(seq.size()), mm_index);
}

// Remove MM/ML/MN tags if secondary alignment and soft clipping is not enabled.
if ((flag & (BAM_FSUPPLEMENTARY | BAM_FSECONDARY)) && !(mm_map_opts.flag & MM_F_SOFTCLIP)) {
Expand Down
9 changes: 0 additions & 9 deletions dorado/api/runner_creation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -99,15 +99,6 @@ std::pair<std::vector<basecall::RunnerPtr>, size_t> create_basecall_runners(
for (size_t i = 0; i < num_gpu_runners; i++) {
runners.push_back(std::make_unique<basecall::CudaModelRunner>(callers[j]));
}
if (batch_size == 0) {
spdlog::info(" - set batch size for {} to {}", devices[j],
runners.back()->batch_size());
} else {
if (runners.back()->batch_size() != batch_size) {
spdlog::warn("- set batch size for {} to {}", devices[j],
runners.back()->batch_size());
}
}
}
}
#else
Expand Down

0 comments on commit 7c62595

Please sign in to comment.