Merge branch 'DOR-466_cuda_oom_during_batch_size_calc' into 'master'

DOR-466 Fix Cuda oom during batch size calc Closes DOR-466 See merge request machine-learning/dorado!804
nanoporetech · Jan 12, 2024 · 0fa2c2f · 0fa2c2f
2 parents 5177e87 + bdac076
commit 0fa2c2f
Showing 1 changed file with 4 additions and 0 deletions.
diff --git a/dorado/basecall/CudaCaller.cpp b/dorado/basecall/CudaCaller.cpp
@@ -265,6 +265,10 @@ int CudaCaller::determine_batch_size(const CRFModelConfig &model_config,
                 best_time = time;
                 best_batch_size = batch_size;
             }
+
+            // Clear the cache each time. Without this, intermittent cuda memory allocation errors
+            // are seen on windows laptop NVIDIA RTX A5500 Laptop GPU. See JIRA issue DOR-466
+            c10::cuda::CUDACachingAllocator::emptyCache();
         }
     } else {
         spdlog::debug("Maximum safe estimated batch size for {}: {}", m_device, max_batch_size);