Skip to content

Commit

Permalink
Merge branch 'DOR-88_multiple_chunk_sizes' into 'master'
Browse files Browse the repository at this point in the history
DOR-88: Implement multiple chunk sizes in CudaCaller

Closes DOR-88

See merge request machine-learning/dorado!828
  • Loading branch information
GKolling committed Feb 19, 2024
2 parents b4fdb24 + 7880345 commit e65eaf4
Show file tree
Hide file tree
Showing 19 changed files with 290 additions and 198 deletions.
12 changes: 6 additions & 6 deletions dorado/api/caller_creation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,14 +12,14 @@ namespace dorado::api {

#if DORADO_CUDA_BUILD
std::shared_ptr<basecall::CudaCaller> create_cuda_caller(
const basecall::CRFModelConfig & model_config,
const basecall::CRFModelConfig& model_config,
int chunk_size,
int batch_size,
const std::string & device,
const std::string& device,
float memory_limit_fraction,
bool exclusive_gpu_access) {
PipelineType pipeline_type) {
return std::make_shared<basecall::CudaCaller>(model_config, chunk_size, batch_size, device,
memory_limit_fraction, exclusive_gpu_access);
memory_limit_fraction, pipeline_type);
}
#elif DORADO_METAL_BUILD
std::shared_ptr<basecall::MetalCaller> create_metal_caller(
Expand All @@ -33,9 +33,9 @@ std::shared_ptr<basecall::MetalCaller> create_metal_caller(
#endif

std::shared_ptr<modbase::ModBaseCaller> create_modbase_caller(
const std::vector<std::filesystem::path> & model_paths,
const std::vector<std::filesystem::path>& model_paths,
int batch_size,
const std::string & device) {
const std::string& device) {
return std::make_shared<modbase::ModBaseCaller>(model_paths, batch_size, device);
}

Expand Down
13 changes: 8 additions & 5 deletions dorado/api/caller_creation.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
#pragma once

#include "basecall/ModelRunnerBase.h"

#include <filesystem>
#include <memory>
#include <string>
Expand All @@ -21,15 +23,16 @@ class ModBaseCaller;
}

namespace dorado::api {
using dorado::basecall::PipelineType;

#if DORADO_CUDA_BUILD
std::shared_ptr<basecall::CudaCaller> create_cuda_caller(
const basecall::CRFModelConfig & model_config,
const basecall::CRFModelConfig& model_config,
int chunk_size,
int batch_size,
const std::string & device,
const std::string& device,
float memory_limit_fraction,
bool exclusive_gpu_access);
PipelineType pipeline_type);
#elif DORADO_METAL_BUILD
std::shared_ptr<basecall::MetalCaller> create_metal_caller(
const basecall::CRFModelConfig& model_config,
Expand All @@ -39,8 +42,8 @@ std::shared_ptr<basecall::MetalCaller> create_metal_caller(
#endif

std::shared_ptr<modbase::ModBaseCaller> create_modbase_caller(
const std::vector<std::filesystem::path> & model_paths,
const std::vector<std::filesystem::path>& model_paths,
int batch_size,
const std::string & device);
const std::string& device);

} // namespace dorado::api
17 changes: 7 additions & 10 deletions dorado/api/pipeline_creation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,6 @@ void create_simplex_pipeline(PipelineDescriptor& pipeline_desc,
overlap = adjusted_overlap;
}

const int kBatchTimeoutMS = 100;
std::string model_name =
std::filesystem::canonical(model_config.model_path).filename().string();

Expand Down Expand Up @@ -67,9 +66,9 @@ void create_simplex_pipeline(PipelineDescriptor& pipeline_desc,
first_node_handle = scaler_node;
}
current_node_handle = scaler_node;
auto basecaller_node = pipeline_desc.add_node<BasecallerNode>(
{}, std::move(runners), overlap, kBatchTimeoutMS, model_name, 1000, "BasecallerNode",
mean_qscore_start_pos);
auto basecaller_node =
pipeline_desc.add_node<BasecallerNode>({}, std::move(runners), overlap, model_name,
1000, "BasecallerNode", mean_qscore_start_pos);
pipeline_desc.add_node_sink(current_node_handle, basecaller_node);
current_node_handle = basecaller_node;
last_node_handle = basecaller_node;
Expand Down Expand Up @@ -127,11 +126,10 @@ void create_stereo_duplex_pipeline(PipelineDescriptor& pipeline_desc,
auto duplex_rg_name = std::string(model_name + "_" + stereo_model_name);
auto stereo_model_stride = stereo_runners.front()->model_stride();
auto adjusted_stereo_overlap = (overlap / stereo_model_stride) * stereo_model_stride;
const int kStereoBatchTimeoutMS = 5000;

auto stereo_basecaller_node = pipeline_desc.add_node<BasecallerNode>(
{}, std::move(stereo_runners), adjusted_stereo_overlap, kStereoBatchTimeoutMS,
duplex_rg_name, 1000, "StereoBasecallerNode", mean_qscore_start_pos);
{}, std::move(stereo_runners), adjusted_stereo_overlap, duplex_rg_name, 1000,
"StereoBasecallerNode", mean_qscore_start_pos);

NodeHandle last_node_handle = stereo_basecaller_node;
if (!modbase_runners.empty()) {
Expand Down Expand Up @@ -169,10 +167,9 @@ void create_stereo_duplex_pipeline(PipelineDescriptor& pipeline_desc,

auto adjusted_simplex_overlap = (overlap / simplex_model_stride) * simplex_model_stride;

const int kSimplexBatchTimeoutMS = 100;
auto basecaller_node = pipeline_desc.add_node<BasecallerNode>(
{splitter_node}, std::move(runners), adjusted_simplex_overlap, kSimplexBatchTimeoutMS,
model_name, 1000, "BasecallerNode", mean_qscore_start_pos);
{splitter_node}, std::move(runners), adjusted_simplex_overlap, model_name, 1000,
"BasecallerNode", mean_qscore_start_pos);

auto scaler_node = pipeline_desc.add_node<ScalerNode>(
{basecaller_node}, model_config.signal_norm_params, basecall::SampleType::DNA, false,
Expand Down
30 changes: 12 additions & 18 deletions dorado/api/runner_creation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,12 @@

#include "basecall/ModelRunner.h"
#include "basecall/crf_utils.h"
#include "caller_creation.h"
#include "modbase/ModBaseModelConfig.h"

#if DORADO_METAL_BUILD
#include "basecall/MetalModelRunner.h"
#elif DORADO_CUDA_BUILD
#include "basecall/CudaCaller.h"
#include "basecall/CudaModelRunner.h"
#include "utils/cuda_utils.h"
#endif
Expand All @@ -27,9 +27,9 @@ std::pair<std::vector<basecall::RunnerPtr>, size_t> create_basecall_runners(
size_t batch_size,
size_t chunk_size,
float memory_fraction,
bool guard_gpus) {
PipelineType pipeline_type) {
#ifdef __APPLE__
(void)guard_gpus;
(void)pipeline_type;
#endif

std::vector<basecall::RunnerPtr> runners;
Expand Down Expand Up @@ -89,32 +89,28 @@ std::pair<std::vector<basecall::RunnerPtr>, size_t> create_basecall_runners(
for (auto device_string : devices) {
futures.push_back(pool.push(create_cuda_caller, model_config, int(chunk_size),
int(batch_size), device_string, memory_fraction,
guard_gpus));
pipeline_type));
}

for (auto& caller : futures) {
callers.push_back(caller.get());
}

for (size_t j = 0; j < num_devices; j++) {
size_t num_batch_dims = callers[j]->num_batch_dims();
for (size_t i = 0; i < num_gpu_runners; i++) {
runners.push_back(std::make_unique<basecall::CudaModelRunner>(callers[j]));
for (size_t batch_dims_idx = 0; batch_dims_idx < num_batch_dims; ++batch_dims_idx) {
runners.push_back(std::make_unique<basecall::CudaModelRunner>(callers[j],
batch_dims_idx));
}
}
}
}
#else
(void)num_gpu_runners;
#endif

#ifndef NDEBUG
auto model_stride = runners.front()->model_stride();
#endif
auto adjusted_chunk_size = runners.front()->chunk_size();
assert(std::all_of(runners.begin(), runners.end(), [&](const auto& runner) {
return runner->model_stride() == model_stride &&
runner->chunk_size() == adjusted_chunk_size;
}));

if (chunk_size != adjusted_chunk_size) {
spdlog::debug("- adjusted chunk size to match model stride: {} -> {}", chunk_size,
adjusted_chunk_size);
Expand Down Expand Up @@ -168,17 +164,15 @@ std::vector<modbase::RunnerPtr> create_modbase_runners(
return remora_runners;
}

#if DORADO_GPU_BUILD
#ifndef __APPLE__
#if DORADO_CUDA_BUILD
basecall::RunnerPtr create_basecall_runner(std::shared_ptr<basecall::CudaCaller> caller) {
return std::make_unique<basecall::CudaModelRunner>(std::move(caller));
return std::make_unique<basecall::CudaModelRunner>(std::move(caller), 0);
}
#else
#elif DORADO_METAL_BUILD
basecall::RunnerPtr create_basecall_runner(std::shared_ptr<basecall::MetalCaller> caller) {
return std::make_unique<basecall::MetalModelRunner>(std::move(caller));
}
#endif
#endif

modbase::RunnerPtr create_modbase_runner(std::shared_ptr<modbase::ModBaseCaller> caller) {
return std::make_unique<::dorado::modbase::ModBaseRunner>(std::move(caller));
Expand Down
25 changes: 4 additions & 21 deletions dorado/api/runner_creation.h
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#pragma once

#include "basecall/ModelRunnerBase.h"
#include "caller_creation.h"
#include "modbase/ModBaseRunner.h"

#include <filesystem>
Expand All @@ -9,24 +10,7 @@
#include <utility>
#include <vector>

namespace dorado {

namespace basecall {
struct CRFModelConfig;

#if DORADO_CUDA_BUILD
class CudaCaller;
#elif DORADO_METAL_BUILD
class MetalCaller;
#endif

} // namespace basecall

namespace modbase {
class ModBaseCaller;
}

namespace api {
namespace dorado::api {

std::pair<std::vector<basecall::RunnerPtr>, size_t> create_basecall_runners(
const basecall::CRFModelConfig& model_config,
Expand All @@ -36,7 +20,7 @@ std::pair<std::vector<basecall::RunnerPtr>, size_t> create_basecall_runners(
size_t batch_size,
size_t chunk_size,
float memory_fraction,
bool guard_gpus);
PipelineType pipeline_type);

std::vector<modbase::RunnerPtr> create_modbase_runners(
const std::vector<std::filesystem::path>& remora_models,
Expand All @@ -52,5 +36,4 @@ basecall::RunnerPtr create_basecall_runner(std::shared_ptr<basecall::MetalCaller

modbase::RunnerPtr create_modbase_runner(std::shared_ptr<modbase::ModBaseCaller> caller);

} // namespace api
} // namespace dorado
} // namespace dorado::api
2 changes: 2 additions & 0 deletions dorado/basecall/CRFModelConfig.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -340,6 +340,8 @@ bool is_rna_model(const CRFModelConfig &model_config) {
return filename.u8string().rfind("rna", 0) == 0;
}

bool is_duplex_model(const CRFModelConfig &model_config) { return model_config.num_features > 1; }

std::string to_string(const Activation &activation) {
switch (activation) {
case Activation::SWISH:
Expand Down
1 change: 1 addition & 0 deletions dorado/basecall/CRFModelConfig.h
Original file line number Diff line number Diff line change
Expand Up @@ -109,5 +109,6 @@ struct CRFModelConfig {
CRFModelConfig load_crf_model_config(const std::filesystem::path& path);

bool is_rna_model(const CRFModelConfig& model_config);
bool is_duplex_model(const CRFModelConfig& model_config);

} // namespace dorado::basecall

0 comments on commit e65eaf4

Please sign in to comment.