Skip to content

Commit

Permalink
Merge branch 'duplex_memory' into 'master'
Browse files Browse the repository at this point in the history
Reduce working reads size, in particular for duplex.

See merge request machine-learning/dorado!670
  • Loading branch information
StuartAbercrombie committed Nov 1, 2023
2 parents 54e14ca + 8c43d3c commit 7c1c0f0
Show file tree
Hide file tree
Showing 2 changed files with 7 additions and 2 deletions.
8 changes: 6 additions & 2 deletions dorado/read_pipeline/BasecallerNode.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,8 @@ void BasecallerNode::input_worker_thread() {
// Put the read in the working list
{
std::lock_guard working_reads_lock(m_working_reads_mutex);
m_working_reads_signal_bytes +=
get_read_common_data(working_read->read).raw_data.nbytes();
m_working_reads.insert(std::move(working_read));
++m_working_reads_size;
}
Expand Down Expand Up @@ -169,6 +171,7 @@ void BasecallerNode::working_reads_manager() {
std::unique_lock<std::mutex> working_reads_lock(m_working_reads_mutex);
auto read_iter = m_working_reads.find(working_read);
if (read_iter != m_working_reads.end()) {
m_working_reads_signal_bytes -= read_common_data.raw_data.nbytes();
m_working_reads.erase(read_iter);
--m_working_reads_size;
} else {
Expand Down Expand Up @@ -282,11 +285,11 @@ namespace {

// Calculates the input queue size.
size_t CalcMaxChunksIn(const std::vector<Runner> &model_runners) {
// Allow 5 batches per model runner on the chunks_in queue
// Allow 2 batches per model runner on the chunks_in queue
size_t max_chunks_in = 0;
// Allows optimal batch size to be used for every GPU
for (auto &runner : model_runners) {
max_chunks_in += runner->batch_size() * 5;
max_chunks_in += runner->batch_size() * 2;
}
return max_chunks_in;
}
Expand Down Expand Up @@ -383,6 +386,7 @@ stats::NamedStats BasecallerNode::sample_stats() const {
stats["call_chunks_ms"] = m_call_chunks_ms;
stats["called_reads_pushed"] = m_called_reads_pushed;
stats["working_reads_items"] = m_working_reads_size;
stats["working_reads_signal_mb"] = m_working_reads_signal_bytes / (1024 * 1024);
stats["bases_processed"] = m_num_bases_processed;
stats["samples_processed"] = m_num_samples_processed;
return stats;
Expand Down
1 change: 1 addition & 0 deletions dorado/read_pipeline/BasecallerNode.h
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,7 @@ class BasecallerNode : public MessageSink {
std::atomic<int64_t> m_working_reads_size = 0;
std::atomic<int64_t> m_num_bases_processed = 0;
std::atomic<int64_t> m_num_samples_processed = 0;
std::atomic<int64_t> m_working_reads_signal_bytes = 0;
};

} // namespace dorado

0 comments on commit 7c1c0f0

Please sign in to comment.