From e46732ac26f7874cf7dd5e1aaa262ea25094a1d2 Mon Sep 17 00:00:00 2001 From: Ben Lawrence Date: Fri, 8 Mar 2024 13:25:35 +0000 Subject: [PATCH 1/7] Log errors reported by metal This helped diagnose an issue with AIR versions. --- dorado/utils/metal_utils.cpp | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/dorado/utils/metal_utils.cpp b/dorado/utils/metal_utils.cpp index 860970d5..9c01a056 100644 --- a/dorado/utils/metal_utils.cpp +++ b/dorado/utils/metal_utils.cpp @@ -55,6 +55,17 @@ auto get_library_location() { return NS::String::string(fspath.c_str(), NS::ASCIIStringEncoding); } +void report_error(NS::Error *error) { + if (error == nil) { + return; + } + auto safe_c_str = [](NS::String *str) { + return str ? str->cString(NS::ASCIIStringEncoding) : ""; + }; + spdlog::error("code={}, domain={}, description={}", error->code(), safe_c_str(error->domain()), + safe_c_str(error->localizedDescription())); +} + #if !TARGET_OS_IPHONE // Retrieves a single int64_t property associated with the given class/name. @@ -120,7 +131,9 @@ NS::SharedPtr make_cps( if (!default_library) { auto lib_path = get_library_location(); + error = nil; default_library = NS::TransferPtr(device->newLibrary(lib_path, &error)); + report_error(error); if (!default_library) { throw std::runtime_error("Failed to load metallib library."); } @@ -142,8 +155,10 @@ NS::SharedPtr make_cps( } auto kernel_name = NS::String::string(name.c_str(), NS::ASCIIStringEncoding); + error = nil; auto kernel = NS::TransferPtr(default_library->newFunction(kernel_name, constant_vals.get(), &error)); + report_error(error); if (!kernel) { throw std::runtime_error("Failed to find the kernel: " + name); } @@ -154,8 +169,10 @@ NS::SharedPtr make_cps( cp_descriptor->setMaxTotalThreadsPerThreadgroup(*max_total_threads_per_tg); } + error = nil; auto cps = NS::TransferPtr(device->newComputePipelineState( cp_descriptor.get(), MTL::PipelineOptionNone, nullptr, &error)); + report_error(error); if (!cps) { auto e_code = std::to_string(((int)error->code())); auto e_str = error->domain()->cString(NS::ASCIIStringEncoding); From c2579806daa5d53046af835048c412d25cdd1c61 Mon Sep 17 00:00:00 2001 From: Ben Lawrence Date: Fri, 8 Mar 2024 13:34:37 +0000 Subject: [PATCH 2/7] Enable warnings for nn.metal No functional change intended. --- cmake/Metal.cmake | 14 ++++++++++++-- dorado/basecall/metal/nn.metal | 30 +++++++++++++++--------------- 2 files changed, 27 insertions(+), 17 deletions(-) diff --git a/cmake/Metal.cmake b/cmake/Metal.cmake index feb0f176..cecea8d7 100644 --- a/cmake/Metal.cmake +++ b/cmake/Metal.cmake @@ -14,7 +14,14 @@ foreach(source ${METAL_SOURCES}) set(air_path "${CMAKE_BINARY_DIR}/${basename}.air") add_custom_command( OUTPUT "${air_path}" - COMMAND xcrun -sdk ${XCRUN_SDK} metal -ffast-math -c "${CMAKE_CURRENT_SOURCE_DIR}/${source}" -o "${air_path}" + COMMAND + xcrun -sdk ${XCRUN_SDK} metal + -Werror + -Wall -Wextra -pedantic + -Wno-c++17-attribute-extensions # [[maybe_unused]] is C++17 + -ffast-math + -c "${CMAKE_CURRENT_SOURCE_DIR}/${source}" + -o "${air_path}" DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/${source}" COMMENT "Compiling metal kernels" ) @@ -23,7 +30,10 @@ endforeach() add_custom_command( OUTPUT default.metallib - COMMAND xcrun -sdk ${XCRUN_SDK} metallib ${AIR_FILES} -o ${CMAKE_BINARY_DIR}/lib/default.metallib + COMMAND + xcrun -sdk ${XCRUN_SDK} metallib + ${AIR_FILES} + -o ${CMAKE_BINARY_DIR}/lib/default.metallib DEPENDS ${AIR_FILES} COMMENT "Creating metallib" ) diff --git a/dorado/basecall/metal/nn.metal b/dorado/basecall/metal/nn.metal index 33060453..2715c383 100644 --- a/dorado/basecall/metal/nn.metal +++ b/dorado/basecall/metal/nn.metal @@ -57,12 +57,12 @@ typedef float ftype_out; #define MAX_LAYER_SIZE 512 #define KERNEL_INDEX_INPUTS \ - uint tid [[thread_index_in_threadgroup]], \ - uint gid [[threadgroup_position_in_grid]], \ - uint sid [[simdgroup_index_in_threadgroup]], \ - uint simdgroups [[simdgroups_per_threadgroup]], \ - uint threadgroups [[threadgroups_per_grid]], \ - uint threads [[threads_per_threadgroup]] + [[maybe_unused]] uint tid [[thread_index_in_threadgroup]], \ + [[maybe_unused]] uint gid [[threadgroup_position_in_grid]], \ + [[maybe_unused]] uint sid [[simdgroup_index_in_threadgroup]], \ + [[maybe_unused]] uint simdgroups [[simdgroups_per_threadgroup]], \ + [[maybe_unused]] uint threadgroups [[threadgroups_per_grid]], \ + [[maybe_unused]] uint threads [[threads_per_threadgroup]] struct ScanArgs { int T; @@ -290,12 +290,12 @@ kernel void conv( */ struct RowMajor { - static int inner(int r, int c) { return c; } - static int outer(int r, int c) { return r; } + static int inner(int /* r */, int c) { return c; } + static int outer(int r, int /* c */) { return r; } }; struct ColMajor { - static int inner(int r, int c) { return r; } - static int outer(int r, int c) { return c; } + static int inner(int r, int /* c */) { return r; } + static int outer(int /* r */, int c) { return c; } }; // 2D matrix layouts using 8x8 tiles. Note that RowMajor/ColMajor apply to the order of tiles, *NOT* within tiles // RC == RowMajor: layout RrCc, where: R = row / 8; r = row % 8; C = col / 8; c = col % 8 @@ -321,13 +321,13 @@ struct MatLayoutRowMajor { using TileBlockConst = TileBlock; using TileBlock = TileBlock; using ftype = FTYPE; - static TileBlock tnc_block(device FTYPE* const ptr, int T, int N, int C, int t, int n_blk, int c_blk) { + static TileBlock tnc_block(device FTYPE* const ptr, int /* T */, int N, int C, int t, int n_blk, int c_blk) { return TileBlock(ptr, C, t * N + n_blk * SIMD_TILES_N * TILE_SIZE, c_blk * SIMD_TILES_C * TILE_SIZE); } - static TileBlockConst tnc_block(device const FTYPE* const ptr, int T, int N, int C, int t, int n_blk, int c_blk) { + static TileBlockConst tnc_block(device const FTYPE* const ptr, int /* T */, int N, int C, int t, int n_blk, int c_blk) { return TileBlockConst(ptr, C, t * N + n_blk * SIMD_TILES_N * TILE_SIZE, c_blk * SIMD_TILES_C * TILE_SIZE); } - static void zero_initial_state(device FTYPE* const ptr, int, int, int, uint, uint, uint, uint) {} + static void zero_initial_state(device FTYPE* const, int, int, int, uint, uint, uint, uint) {} }; // The memory layout of LSTM input/output matrices matches a contiguous tensor of sizes [T+3, C/8, 8, N/8, 8], @@ -396,10 +396,10 @@ struct MatLayoutLSTM { using TileBlockConst = TileBlock; using TileBlock = TileBlock; using ftype = FTYPE; - static TileBlock tnc_block(device FTYPE* const ptr, int T, int N, int C, int t, int n_blk, int c_blk) { + static TileBlock tnc_block(device FTYPE* const ptr, int /* T */, int N, int C, int t, int n_blk, int c_blk) { return TileBlock(ptr, N, n_blk * SIMD_TILES_N * TILE_SIZE, (t+T_OFFSET) * C + c_blk * SIMD_TILES_C * TILE_SIZE); } - static TileBlockConst tnc_block(device const FTYPE* const ptr, int T, int N, int C, int t, int n_blk, int c_blk) { + static TileBlockConst tnc_block(device const FTYPE* const ptr, int /* T */, int N, int C, int t, int n_blk, int c_blk) { return TileBlockConst(ptr, N, n_blk * SIMD_TILES_N * TILE_SIZE, (t+T_OFFSET) * C + c_blk * SIMD_TILES_C * TILE_SIZE); } From ed426db6fa827bb510ba301d065b45f12945246e Mon Sep 17 00:00:00 2001 From: Ben Lawrence Date: Fri, 8 Mar 2024 13:42:12 +0000 Subject: [PATCH 3/7] Use a more generic warning for C++17 features Some of our CI machines don't like the -attribute part. --- cmake/Metal.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmake/Metal.cmake b/cmake/Metal.cmake index cecea8d7..815199f5 100644 --- a/cmake/Metal.cmake +++ b/cmake/Metal.cmake @@ -18,7 +18,7 @@ foreach(source ${METAL_SOURCES}) xcrun -sdk ${XCRUN_SDK} metal -Werror -Wall -Wextra -pedantic - -Wno-c++17-attribute-extensions # [[maybe_unused]] is C++17 + -Wno-c++17-extensions # [[maybe_unused]] is C++17 -ffast-math -c "${CMAKE_CURRENT_SOURCE_DIR}/${source}" -o "${air_path}" From dce250ea3d320f7bf6f2c38407f7fd812cbd2b81 Mon Sep 17 00:00:00 2001 From: Ben Lawrence Date: Fri, 8 Mar 2024 16:59:41 +0000 Subject: [PATCH 4/7] Add missing const --- dorado/utils/metal_utils.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dorado/utils/metal_utils.cpp b/dorado/utils/metal_utils.cpp index 9c01a056..a8611c32 100644 --- a/dorado/utils/metal_utils.cpp +++ b/dorado/utils/metal_utils.cpp @@ -55,7 +55,7 @@ auto get_library_location() { return NS::String::string(fspath.c_str(), NS::ASCIIStringEncoding); } -void report_error(NS::Error *error) { +void report_error(const NS::Error *error) { if (error == nil) { return; } From 89c8eb8ecb8a997da06c9ca593f4a4d648b46464 Mon Sep 17 00:00:00 2001 From: Ben Lawrence Date: Fri, 8 Mar 2024 17:00:26 +0000 Subject: [PATCH 5/7] Remove duplicate logging of an error --- dorado/utils/metal_utils.cpp | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/dorado/utils/metal_utils.cpp b/dorado/utils/metal_utils.cpp index a8611c32..2ba8a00e 100644 --- a/dorado/utils/metal_utils.cpp +++ b/dorado/utils/metal_utils.cpp @@ -174,10 +174,7 @@ NS::SharedPtr make_cps( cp_descriptor.get(), MTL::PipelineOptionNone, nullptr, &error)); report_error(error); if (!cps) { - auto e_code = std::to_string(((int)error->code())); - auto e_str = error->domain()->cString(NS::ASCIIStringEncoding); - throw std::runtime_error("failed to build compute pipeline for " + name + " - " + e_str + - ": error " + e_code); + throw std::runtime_error("Failed to build compute pipeline for " + name); } return cps; From 0fbd77fa743defd4a5ac5f16dc77cb3aa7221a42 Mon Sep 17 00:00:00 2001 From: Ben Lawrence Date: Fri, 8 Mar 2024 17:02:18 +0000 Subject: [PATCH 6/7] Add a helper for error checking metal calls The metal calls we make all have the error as the last arg, and hopefully that applies for any we make in the future too. --- dorado/utils/metal_utils.cpp | 32 +++++++++++++++++--------------- 1 file changed, 17 insertions(+), 15 deletions(-) diff --git a/dorado/utils/metal_utils.cpp b/dorado/utils/metal_utils.cpp index 2ba8a00e..54ac3d4a 100644 --- a/dorado/utils/metal_utils.cpp +++ b/dorado/utils/metal_utils.cpp @@ -55,17 +55,25 @@ auto get_library_location() { return NS::String::string(fspath.c_str(), NS::ASCIIStringEncoding); } -void report_error(const NS::Error *error) { +void report_error(const NS::Error *error, const char *function) { if (error == nil) { return; } auto safe_c_str = [](NS::String *str) { return str ? str->cString(NS::ASCIIStringEncoding) : ""; }; - spdlog::error("code={}, domain={}, description={}", error->code(), safe_c_str(error->domain()), - safe_c_str(error->localizedDescription())); + spdlog::error("function={}, code={}, domain={}, description={}", function, error->code(), + safe_c_str(error->domain()), safe_c_str(error->localizedDescription())); } +#define wrap_func_with_err(func_with_err, ...) \ + ({ \ + NS::Error *error_ = nil; \ + auto result = func_with_err(__VA_ARGS__, &error_); \ + report_error(error_, #func_with_err); \ + result; \ + }) + #if !TARGET_OS_IPHONE // Retrieves a single int64_t property associated with the given class/name. @@ -126,14 +134,11 @@ NS::SharedPtr make_cps( const std::string &name, const std::vector> &named_constants, const std::optional max_total_threads_per_tg) { - NS::Error *error; auto default_library = NS::TransferPtr(device->newDefaultLibrary()); if (!default_library) { auto lib_path = get_library_location(); - error = nil; - default_library = NS::TransferPtr(device->newLibrary(lib_path, &error)); - report_error(error); + default_library = NS::TransferPtr(wrap_func_with_err(device->newLibrary, lib_path)); if (!default_library) { throw std::runtime_error("Failed to load metallib library."); } @@ -155,10 +160,8 @@ NS::SharedPtr make_cps( } auto kernel_name = NS::String::string(name.c_str(), NS::ASCIIStringEncoding); - error = nil; - auto kernel = - NS::TransferPtr(default_library->newFunction(kernel_name, constant_vals.get(), &error)); - report_error(error); + auto kernel = NS::TransferPtr( + wrap_func_with_err(default_library->newFunction, kernel_name, constant_vals.get())); if (!kernel) { throw std::runtime_error("Failed to find the kernel: " + name); } @@ -169,10 +172,9 @@ NS::SharedPtr make_cps( cp_descriptor->setMaxTotalThreadsPerThreadgroup(*max_total_threads_per_tg); } - error = nil; - auto cps = NS::TransferPtr(device->newComputePipelineState( - cp_descriptor.get(), MTL::PipelineOptionNone, nullptr, &error)); - report_error(error); + auto cps = + NS::TransferPtr(wrap_func_with_err(device->newComputePipelineState, cp_descriptor.get(), + MTL::PipelineOptionNone, nullptr)); if (!cps) { throw std::runtime_error("Failed to build compute pipeline for " + name); } From c3f58a5445335b54fb9cf6424a8c1a8c1ff89804 Mon Sep 17 00:00:00 2001 From: Ben Lawrence Date: Fri, 8 Mar 2024 17:04:03 +0000 Subject: [PATCH 7/7] Be explicit about the version of metal we're using This doesn't appear to be the cause of the bitcode mismatch, but it doesn't hurt to be consistent. --- cmake/Metal.cmake | 1 + 1 file changed, 1 insertion(+) diff --git a/cmake/Metal.cmake b/cmake/Metal.cmake index 815199f5..f5291232 100644 --- a/cmake/Metal.cmake +++ b/cmake/Metal.cmake @@ -19,6 +19,7 @@ foreach(source ${METAL_SOURCES}) -Werror -Wall -Wextra -pedantic -Wno-c++17-extensions # [[maybe_unused]] is C++17 + -std=metal3.0 -ffast-math -c "${CMAKE_CURRENT_SOURCE_DIR}/${source}" -o "${air_path}"