From 6170326f7d3d5133136cc41780c50138b235c298 Mon Sep 17 00:00:00 2001 From: Steve Malton Date: Wed, 31 Jan 2024 15:32:56 +0000 Subject: [PATCH] Fix stream creation for modbase Input tensors are CPU pinned memory, but rather than failing they return a default cuda stream. We want a real stream on the real device, so we need to fetch that from the caller. --- dorado/modbase/ModBaseCaller.h | 1 + dorado/modbase/ModBaseRunner.cpp | 12 ++++++------ 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/dorado/modbase/ModBaseCaller.h b/dorado/modbase/ModBaseCaller.h index 8a476215..c025b1c6 100644 --- a/dorado/modbase/ModBaseCaller.h +++ b/dorado/modbase/ModBaseCaller.h @@ -52,6 +52,7 @@ class ModBaseCaller { std::vector create_input_sig_tensors() const; std::vector create_input_seq_tensors() const; + c10::Device device() const { return m_options.device(); } at::Tensor call_chunks(size_t model_id, at::Tensor& input_sigs, diff --git a/dorado/modbase/ModBaseRunner.cpp b/dorado/modbase/ModBaseRunner.cpp index ee6c6ee9..da4ea134 100644 --- a/dorado/modbase/ModBaseRunner.cpp +++ b/dorado/modbase/ModBaseRunner.cpp @@ -14,12 +14,12 @@ namespace { #if DORADO_CUDA_BUILD -std::vector> get_streams_from_tensors( - const std::vector& tensors) { +std::vector> get_streams_from_caller( + const std::shared_ptr& caller) { std::vector> streams; - for (const auto& tensor : tensors) { - if (tensor.device().is_cuda()) { - streams.push_back(c10::cuda::getStreamFromPool(false, tensor.device().index())); + for (size_t i = 0; i < caller->num_model_callers(); ++i) { + if (caller->device().is_cuda()) { + streams.push_back(c10::cuda::getStreamFromPool(false, caller->device().index())); } else { streams.emplace_back(); } @@ -37,7 +37,7 @@ ModBaseRunner::ModBaseRunner(std::shared_ptr caller) m_input_seqs(m_caller->create_input_seq_tensors()) #if DORADO_CUDA_BUILD , - m_streams(get_streams_from_tensors(m_input_sigs)) + m_streams(get_streams_from_caller(m_caller)) #endif { }