Skip to content

Commit

Permalink
Merge branch 'INSTX-3807-metal-batch-size-server' into 'master'
Browse files Browse the repository at this point in the history
[INSTX-3807] Allow the memory fraction to be specified in the metal callers

See merge request machine-learning/dorado!843
  • Loading branch information
blawrence-ont committed Feb 13, 2024
2 parents f844f35 + 7e7a6a7 commit 77c5599
Show file tree
Hide file tree
Showing 7 changed files with 34 additions and 17 deletions.
10 changes: 7 additions & 3 deletions dorado/api/caller_creation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,13 @@ std::shared_ptr<basecall::CudaCaller> create_cuda_caller(
memory_limit_fraction, exclusive_gpu_access);
}
#elif DORADO_METAL_BUILD
std::shared_ptr<basecall::MetalCaller>
create_metal_caller(const basecall::CRFModelConfig& model_config, int chunk_size, int batch_size) {
return std::make_shared<basecall::MetalCaller>(model_config, chunk_size, batch_size);
std::shared_ptr<basecall::MetalCaller> create_metal_caller(
const basecall::CRFModelConfig& model_config,
int chunk_size,
int batch_size,
float memory_limit_fraction) {
return std::make_shared<basecall::MetalCaller>(model_config, chunk_size, batch_size,
memory_limit_fraction);
}
#endif

Expand Down
7 changes: 5 additions & 2 deletions dorado/api/caller_creation.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,11 @@ std::shared_ptr<basecall::CudaCaller> create_cuda_caller(
float memory_limit_fraction,
bool exclusive_gpu_access);
#elif DORADO_METAL_BUILD
std::shared_ptr<basecall::MetalCaller>
create_metal_caller(const basecall::CRFModelConfig& model_config, int chunk_size, int batch_size);
std::shared_ptr<basecall::MetalCaller> create_metal_caller(
const basecall::CRFModelConfig& model_config,
int chunk_size,
int batch_size,
float memory_limit_fraction);
#endif

std::shared_ptr<modbase::ModBaseCaller> create_modbase_caller(
Expand Down
3 changes: 2 additions & 1 deletion dorado/api/runner_creation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,8 @@ std::pair<std::vector<basecall::RunnerPtr>, size_t> create_basecall_runners(
}
#if DORADO_METAL_BUILD
else if (device == "metal") {
auto caller = create_metal_caller(model_config, int(chunk_size), int(batch_size));
auto caller = create_metal_caller(model_config, int(chunk_size), int(batch_size),
memory_fraction);
for (size_t i = 0; i < num_gpu_runners; i++) {
runners.push_back(std::make_unique<basecall::MetalModelRunner>(caller));
}
Expand Down
17 changes: 12 additions & 5 deletions dorado/basecall/MetalCaller.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,10 @@ struct MetalCaller::NNTask {
uint64_t decode_complete_event_id = static_cast<uint64_t>(0);
};

MetalCaller::MetalCaller(const CRFModelConfig &model_config, int chunk_size, int batch_size)
MetalCaller::MetalCaller(const CRFModelConfig &model_config,
int chunk_size,
int batch_size,
float memory_limit_fraction)
: m_config(model_config) {
ScopedAutoReleasePool autorelease_pool;

Expand Down Expand Up @@ -76,7 +79,8 @@ MetalCaller::MetalCaller(const CRFModelConfig &model_config, int chunk_size, int
model_config.model_path, model_config.out_features.has_value(), model_config.bias);

auto selected_batch_size = (batch_size == 0)
? benchmark_batch_sizes(model_config, state_dict, chunk_size)
? benchmark_batch_sizes(model_config, state_dict, chunk_size,
memory_limit_fraction)
: utils::pad_to(batch_size, MTL_CORE_BATCH_SIZE);
set_chunk_batch_size(model_config, state_dict, chunk_size, selected_batch_size);

Expand Down Expand Up @@ -231,9 +235,12 @@ void MetalCaller::set_chunk_batch_size(const CRFModelConfig &model_config,

int MetalCaller::benchmark_batch_sizes(const CRFModelConfig &model_config,
const std::vector<at::Tensor> &state_dict,
int chunk_size) {
int chunk_size,
float memory_limit_fraction) {
const size_t physical_memory = get_apple_physical_memory_bytes();
spdlog::debug("Physical memory available {} GB", physical_memory / (size_t{1} << 30));
const size_t usable_memory = physical_memory * memory_limit_fraction;
spdlog::debug("Physical/Usable memory available: {}/{} GB", physical_memory / BYTES_PER_GB,
usable_memory / BYTES_PER_GB);

// Constrain the maximum batch size to use about half physical memory for decode buffers,
// with neural network GPU buffers and CPU buffers assumed to occupy a subset of the
Expand All @@ -248,7 +255,7 @@ int MetalCaller::benchmark_batch_sizes(const CRFModelConfig &model_config,
static_cast<size_t>(m_states) * sizeof(float)); // Back guides.
spdlog::trace("decode_buffer_size_per_elem {}", decode_buffer_size_per_elem);
const int max_batch_size = static_cast<int>(
std::clamp(utils::pad_to(physical_memory / (2 * decode_buffer_size_per_elem),
std::clamp(utils::pad_to(usable_memory / (2 * decode_buffer_size_per_elem),
static_cast<size_t>(MTL_CORE_BATCH_SIZE)),
static_cast<size_t>(MTL_CORE_BATCH_SIZE),
static_cast<size_t>(MTL_CORE_BATCH_SIZE * get_mtl_device_core_count())));
Expand Down
8 changes: 6 additions & 2 deletions dorado/basecall/MetalCaller.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,10 @@ namespace dorado::basecall {

class MetalCaller {
public:
MetalCaller(const CRFModelConfig &model_config, int chunk_size, int batch_size);
MetalCaller(const CRFModelConfig &model_config,
int chunk_size,
int batch_size,
float memory_limit_fraction);
~MetalCaller();

void call_chunks(at::Tensor &input,
Expand All @@ -40,7 +43,8 @@ class MetalCaller {
int batch_size);
int benchmark_batch_sizes(const CRFModelConfig &model_config,
const std::vector<at::Tensor> &state_dict,
int chunk_size);
int chunk_size,
float memory_limit_fraction);
bool run_scan_kernels(MTL::CommandBuffer *const cb, int try_count);

void start_threads();
Expand Down
4 changes: 0 additions & 4 deletions dorado/utils/memory_utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,6 @@

#include <array>

namespace {
constexpr size_t BYTES_PER_GB{1024 * 1024 * 1024};
}

namespace dorado::utils {

size_t available_host_memory_GB() {
Expand Down
2 changes: 2 additions & 0 deletions dorado/utils/memory_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@

namespace dorado::utils {

inline constexpr size_t BYTES_PER_GB{1024 * 1024 * 1024};

size_t available_host_memory_GB();
size_t total_host_memory_GB();

Expand Down

0 comments on commit 77c5599

Please sign in to comment.