From e90b81ff88432d27237bffac5f6c8df3ef5b9bcb Mon Sep 17 00:00:00 2001 From: Ben Lawrence Date: Tue, 12 Dec 2023 10:55:11 +0000 Subject: [PATCH 1/5] [INSTX-3094] Use find_program() to find autotools programs We can't assume that these are available when building through Xcode since it doesn't know about brew. Also remove the reconf logic that doesn't appear to be needed. --- cmake/Htslib.cmake | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/cmake/Htslib.cmake b/cmake/Htslib.cmake index 077100a4..f3141130 100644 --- a/cmake/Htslib.cmake +++ b/cmake/Htslib.cmake @@ -11,18 +11,14 @@ if(NOT TARGET htslib) # lazy include guard "INTERFACE_INCLUDE_DIRECTORIES" ${HTSLIB_DIR}) target_link_directories(htslib INTERFACE ${HTSLIB_DIR}) else() - message(STATUS "Building htslib") - set(HTSLIB_DIR ${DORADO_3RD_PARTY_SOURCE}/htslib CACHE STRING - "Path to htslib repo") - set(MAKE_COMMAND make) - set(AUTOCONF_COMMAND autoconf) - execute_process(COMMAND bash -c "autoconf -V | sed 's/.* //; q'" - OUTPUT_VARIABLE AUTOCONF_VERS) - if(AUTOCONF_VERS VERSION_GREATER_EQUAL 2.70) - set(AUTOCONF_COMMAND autoreconf --install) - endif() + message(STATUS "Setting up htslib build") + set(HTSLIB_DIR ${DORADO_3RD_PARTY_SOURCE}/htslib CACHE STRING "Path to htslib repo") set(htslib_PREFIX ${CMAKE_BINARY_DIR}/3rdparty/htslib) + find_program(MAKE_COMMAND make REQUIRED) + find_program(AUTOCONF_COMMAND autoconf REQUIRED) + find_program(AUTOHEADER_COMMAND autoheader REQUIRED) + # The Htslib build apparently requires BUILD_IN_SOURCE=1, which is a problem when # switching between build targets because htscodecs object files aren't regenerated. # To avoid this, copy the source tree to a build-specific directory and do the build there. @@ -42,7 +38,7 @@ if(NOT TARGET htslib) # lazy include guard PREFIX ${HTSLIB_BUILD} SOURCE_DIR ${HTSLIB_BUILD}/htslib BUILD_IN_SOURCE 1 - CONFIGURE_COMMAND autoheader + CONFIGURE_COMMAND ${AUTOHEADER_COMMAND} COMMAND ${AUTOCONF_COMMAND} COMMAND ./configure --disable-bz2 --disable-lzma --disable-libcurl --disable-s3 --disable-gcs ${CONFIGURE_FLAGS} BUILD_COMMAND ${MAKE_COMMAND} install prefix=${htslib_PREFIX} From dd24d0740ff48c08494d3bbd75ba1f772be691e9 Mon Sep 17 00:00:00 2001 From: Ben Lawrence Date: Tue, 12 Dec 2023 11:23:42 +0000 Subject: [PATCH 2/5] [INSTX-3094] Fix precision issues spotted by Xcode Xcode adds some additional flags on top of the ones we set, though it seems these are the only issues that get flagged up. --- dorado/decode/beam_search.cpp | 2 +- dorado/modbase/modbase_encoder.cpp | 2 +- dorado/nn/MetalCRFModel.cpp | 16 +++++++++------- dorado/nn/Runners.cpp | 2 +- 4 files changed, 12 insertions(+), 10 deletions(-) diff --git a/dorado/decode/beam_search.cpp b/dorado/decode/beam_search.cpp index 7144fa90..b7393c5f 100644 --- a/dorado/decode/beam_search.cpp +++ b/dorado/decode/beam_search.cpp @@ -324,7 +324,7 @@ float beam_search(const T* const scores, // 8 fold unrolled version has the small upside that both loads // can be done with a single ldp instruction. const int kUnroll = 8; - for (int i = new_elem_count / kUnroll; i; --i) { + for (int i = int(new_elem_count) / kUnroll; i; --i) { // True comparison sets lane bits to 0xffffffff, or -1 in two's complement, // which we subtract to increment our counts. float32x4_t scores_x4_a = vld1q_f32(score_ptr); diff --git a/dorado/modbase/modbase_encoder.cpp b/dorado/modbase/modbase_encoder.cpp index 3ccc3b7f..6c28020c 100644 --- a/dorado/modbase/modbase_encoder.cpp +++ b/dorado/modbase/modbase_encoder.cpp @@ -146,7 +146,7 @@ inline std::vector encode_kmer_generic(const std::vector& seq, for (int i = base_st; i < base_en; ++i) { for (size_t kmer_pos = 0; kmer_pos < size_t(kmer_len); ++kmer_pos) { auto base = seq[seq_pos + kmer_pos]; - uint32_t base_oh = (base == -1) ? 0ul : (1ul << (base << 3)); + uint32_t base_oh = (base == -1) ? uint32_t{} : (uint32_t{1} << (base << 3)); // memcpy will be translated to a single 32 bit write. std::memcpy(output_ptr, &base_oh, sizeof(base_oh)); output_ptr += 4; diff --git a/dorado/nn/MetalCRFModel.cpp b/dorado/nn/MetalCRFModel.cpp index ebc714d2..f09f1ef4 100644 --- a/dorado/nn/MetalCRFModel.cpp +++ b/dorado/nn/MetalCRFModel.cpp @@ -495,8 +495,10 @@ struct MetalBlockImpl : Module { lstm_label.back()++; command_buffer = m_command_queue->commandBuffer(); - const int kResBufSize = dtype_bytes * kernel_simd_groups * 2 * kTileSize * kTileSize; - const int kOutBufSize = dtype_bytes * kernel_simd_groups * kTileSize * kTileSize; + const int kResBufSize = + static_cast(dtype_bytes * kernel_simd_groups * 2 * kTileSize * kTileSize); + const int kOutBufSize = + static_cast(dtype_bytes * kernel_simd_groups * kTileSize * kTileSize); const std::vector tg_buffer_lens{kResBufSize, kOutBufSize}; for (const auto &args_lstm : m_args_lstm) { const std::vector buffers{args_lstm.get(), mat_working_mem.get(), @@ -650,11 +652,11 @@ class MetalCaller { static_cast(m_states) * sizeof(int16_t) + // Posts static_cast(m_states) * sizeof(float)); // Back guides. spdlog::debug("decode_buffer_size_per_elem {}", decode_buffer_size_per_elem); - const int max_batch_size = std::clamp( + const int max_batch_size = static_cast(std::clamp( utils::pad_to(physical_memory / (2 * decode_buffer_size_per_elem), static_cast(MTL_CORE_BATCH_SIZE)), static_cast(MTL_CORE_BATCH_SIZE), - static_cast(MTL_CORE_BATCH_SIZE * get_mtl_device_core_count())); + static_cast(MTL_CORE_BATCH_SIZE * get_mtl_device_core_count()))); spdlog::debug("max_batch_size {}", max_batch_size); // Subject to the above memory constraint, impose a minimum batch size @@ -671,8 +673,8 @@ class MetalCaller { static_cast(kNumSmallerSizes); for (int i = 0; i < kNumSmallerSizes; ++i) { const int test_batch_size = - utils::pad_to(min_batch_size + static_cast(i * test_size_increment), - static_cast(MTL_CORE_BATCH_SIZE)); + utils::pad_to(min_batch_size + static_cast(i * test_size_increment), + static_cast(MTL_CORE_BATCH_SIZE)); test_batch_sizes.insert(test_batch_size); } @@ -684,7 +686,7 @@ class MetalCaller { // Iterate through batch size candidates to find the most efficient one. int best_batch_size = -1; - int best_us_per_batch_element = std::numeric_limits::max(); + long long best_us_per_batch_element = std::numeric_limits::max(); for (int batch_size : test_batch_sizes) { spdlog::debug("Trying batch size {}", batch_size); set_chunk_batch_size(model_config, state_dict, benchmark_chunk_size, batch_size); diff --git a/dorado/nn/Runners.cpp b/dorado/nn/Runners.cpp index c4787fe0..94ed34f6 100644 --- a/dorado/nn/Runners.cpp +++ b/dorado/nn/Runners.cpp @@ -61,7 +61,7 @@ std::pair, size_t> create_basecall_runners( #if DORADO_GPU_BUILD #ifdef __APPLE__ else if (device == "metal") { - auto caller = dorado::create_metal_caller(model_config, chunk_size, batch_size); + auto caller = dorado::create_metal_caller(model_config, int(chunk_size), int(batch_size)); for (size_t i = 0; i < num_gpu_runners; i++) { runners.push_back(std::make_shared(caller)); } From b256cbd2e775d22a7ae9293c4449753164030d6f Mon Sep 17 00:00:00 2001 From: Ben Lawrence Date: Tue, 12 Dec 2023 11:25:22 +0000 Subject: [PATCH 3/5] [INSTX-3094] Remove inline in .cpp file The inline keyword doesn't mean that the code should be inlined: it means that it's OK for multiple instances of this symbol to exist and that the linker can pick an arbitrary one at link time, and I don't want to think what that means for symbols in an anonymous namespace. --- dorado/modbase/modbase_encoder.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/dorado/modbase/modbase_encoder.cpp b/dorado/modbase/modbase_encoder.cpp index 6c28020c..5a14f916 100644 --- a/dorado/modbase/modbase_encoder.cpp +++ b/dorado/modbase/modbase_encoder.cpp @@ -129,12 +129,12 @@ int ModBaseEncoder::compute_sample_pos(int base_pos) const { namespace { // Fallback path for non-AVX / kmer lengths not specifically optimised. -inline std::vector encode_kmer_generic(const std::vector& seq, - const std::vector& seq_mappings, - int bases_before, - int bases_after, - int context_samples, - int kmer_len) { +std::vector encode_kmer_generic(const std::vector& seq, + const std::vector& seq_mappings, + int bases_before, + int bases_after, + int context_samples, + int kmer_len) { const size_t seq_len = seq.size() - bases_before - bases_after; std::vector output(kmer_len * utils::BaseInfo::NUM_BASES * context_samples); From 867a5c3348e25da6a97d16bbbd43645e05052a0f Mon Sep 17 00:00:00 2001 From: Ben Lawrence Date: Tue, 12 Dec 2023 11:58:58 +0000 Subject: [PATCH 4/5] [INSTX-3094] Update minimap2 to fix build errors --- dorado/3rdparty/ont-minimap2 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dorado/3rdparty/ont-minimap2 b/dorado/3rdparty/ont-minimap2 index 0ca16202..ffea5e27 160000 --- a/dorado/3rdparty/ont-minimap2 +++ b/dorado/3rdparty/ont-minimap2 @@ -1 +1 @@ -Subproject commit 0ca1620283f30f98e573cbed5e5a6cd061f9a2f2 +Subproject commit ffea5e27a4b46b00e0d100f68fd06032ad9e8caf From 3b681931ab970d09f45ebba2f3156c65359095a0 Mon Sep 17 00:00:00 2001 From: Ben Lawrence Date: Tue, 12 Dec 2023 17:54:50 +0000 Subject: [PATCH 5/5] [INSTX-3094] Bring back autoreconf but only for non-Xcode generators autoreconf fails to find aclocal when running under Xcode, so assume that the old method works (which it seems to on latest brew). --- cmake/Htslib.cmake | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/cmake/Htslib.cmake b/cmake/Htslib.cmake index f3141130..caabc1a3 100644 --- a/cmake/Htslib.cmake +++ b/cmake/Htslib.cmake @@ -18,6 +18,11 @@ if(NOT TARGET htslib) # lazy include guard find_program(MAKE_COMMAND make REQUIRED) find_program(AUTOCONF_COMMAND autoconf REQUIRED) find_program(AUTOHEADER_COMMAND autoheader REQUIRED) + execute_process(COMMAND bash -c "${AUTOCONF_COMMAND} -V | sed 's/.* //; q'" + OUTPUT_VARIABLE AUTOCONF_VERS) + if (AUTOCONF_VERS VERSION_GREATER_EQUAL 2.70 AND NOT CMAKE_GENERATOR STREQUAL "Xcode") + set(AUTOCONF_COMMAND autoreconf --install) + endif() # The Htslib build apparently requires BUILD_IN_SOURCE=1, which is a problem when # switching between build targets because htscodecs object files aren't regenerated.