diff --git a/cmake/Metal.cmake b/cmake/Metal.cmake index feb0f176..f5291232 100644 --- a/cmake/Metal.cmake +++ b/cmake/Metal.cmake @@ -14,7 +14,15 @@ foreach(source ${METAL_SOURCES}) set(air_path "${CMAKE_BINARY_DIR}/${basename}.air") add_custom_command( OUTPUT "${air_path}" - COMMAND xcrun -sdk ${XCRUN_SDK} metal -ffast-math -c "${CMAKE_CURRENT_SOURCE_DIR}/${source}" -o "${air_path}" + COMMAND + xcrun -sdk ${XCRUN_SDK} metal + -Werror + -Wall -Wextra -pedantic + -Wno-c++17-extensions # [[maybe_unused]] is C++17 + -std=metal3.0 + -ffast-math + -c "${CMAKE_CURRENT_SOURCE_DIR}/${source}" + -o "${air_path}" DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/${source}" COMMENT "Compiling metal kernels" ) @@ -23,7 +31,10 @@ endforeach() add_custom_command( OUTPUT default.metallib - COMMAND xcrun -sdk ${XCRUN_SDK} metallib ${AIR_FILES} -o ${CMAKE_BINARY_DIR}/lib/default.metallib + COMMAND + xcrun -sdk ${XCRUN_SDK} metallib + ${AIR_FILES} + -o ${CMAKE_BINARY_DIR}/lib/default.metallib DEPENDS ${AIR_FILES} COMMENT "Creating metallib" ) diff --git a/dorado/basecall/metal/nn.metal b/dorado/basecall/metal/nn.metal index 33060453..2715c383 100644 --- a/dorado/basecall/metal/nn.metal +++ b/dorado/basecall/metal/nn.metal @@ -57,12 +57,12 @@ typedef float ftype_out; #define MAX_LAYER_SIZE 512 #define KERNEL_INDEX_INPUTS \ - uint tid [[thread_index_in_threadgroup]], \ - uint gid [[threadgroup_position_in_grid]], \ - uint sid [[simdgroup_index_in_threadgroup]], \ - uint simdgroups [[simdgroups_per_threadgroup]], \ - uint threadgroups [[threadgroups_per_grid]], \ - uint threads [[threads_per_threadgroup]] + [[maybe_unused]] uint tid [[thread_index_in_threadgroup]], \ + [[maybe_unused]] uint gid [[threadgroup_position_in_grid]], \ + [[maybe_unused]] uint sid [[simdgroup_index_in_threadgroup]], \ + [[maybe_unused]] uint simdgroups [[simdgroups_per_threadgroup]], \ + [[maybe_unused]] uint threadgroups [[threadgroups_per_grid]], \ + [[maybe_unused]] uint threads [[threads_per_threadgroup]] struct ScanArgs { int T; @@ -290,12 +290,12 @@ kernel void conv( */ struct RowMajor { - static int inner(int r, int c) { return c; } - static int outer(int r, int c) { return r; } + static int inner(int /* r */, int c) { return c; } + static int outer(int r, int /* c */) { return r; } }; struct ColMajor { - static int inner(int r, int c) { return r; } - static int outer(int r, int c) { return c; } + static int inner(int r, int /* c */) { return r; } + static int outer(int /* r */, int c) { return c; } }; // 2D matrix layouts using 8x8 tiles. Note that RowMajor/ColMajor apply to the order of tiles, *NOT* within tiles // RC == RowMajor: layout RrCc, where: R = row / 8; r = row % 8; C = col / 8; c = col % 8 @@ -321,13 +321,13 @@ struct MatLayoutRowMajor { using TileBlockConst = TileBlock; using TileBlock = TileBlock; using ftype = FTYPE; - static TileBlock tnc_block(device FTYPE* const ptr, int T, int N, int C, int t, int n_blk, int c_blk) { + static TileBlock tnc_block(device FTYPE* const ptr, int /* T */, int N, int C, int t, int n_blk, int c_blk) { return TileBlock(ptr, C, t * N + n_blk * SIMD_TILES_N * TILE_SIZE, c_blk * SIMD_TILES_C * TILE_SIZE); } - static TileBlockConst tnc_block(device const FTYPE* const ptr, int T, int N, int C, int t, int n_blk, int c_blk) { + static TileBlockConst tnc_block(device const FTYPE* const ptr, int /* T */, int N, int C, int t, int n_blk, int c_blk) { return TileBlockConst(ptr, C, t * N + n_blk * SIMD_TILES_N * TILE_SIZE, c_blk * SIMD_TILES_C * TILE_SIZE); } - static void zero_initial_state(device FTYPE* const ptr, int, int, int, uint, uint, uint, uint) {} + static void zero_initial_state(device FTYPE* const, int, int, int, uint, uint, uint, uint) {} }; // The memory layout of LSTM input/output matrices matches a contiguous tensor of sizes [T+3, C/8, 8, N/8, 8], @@ -396,10 +396,10 @@ struct MatLayoutLSTM { using TileBlockConst = TileBlock; using TileBlock = TileBlock; using ftype = FTYPE; - static TileBlock tnc_block(device FTYPE* const ptr, int T, int N, int C, int t, int n_blk, int c_blk) { + static TileBlock tnc_block(device FTYPE* const ptr, int /* T */, int N, int C, int t, int n_blk, int c_blk) { return TileBlock(ptr, N, n_blk * SIMD_TILES_N * TILE_SIZE, (t+T_OFFSET) * C + c_blk * SIMD_TILES_C * TILE_SIZE); } - static TileBlockConst tnc_block(device const FTYPE* const ptr, int T, int N, int C, int t, int n_blk, int c_blk) { + static TileBlockConst tnc_block(device const FTYPE* const ptr, int /* T */, int N, int C, int t, int n_blk, int c_blk) { return TileBlockConst(ptr, N, n_blk * SIMD_TILES_N * TILE_SIZE, (t+T_OFFSET) * C + c_blk * SIMD_TILES_C * TILE_SIZE); } diff --git a/dorado/utils/metal_utils.cpp b/dorado/utils/metal_utils.cpp index 860970d5..54ac3d4a 100644 --- a/dorado/utils/metal_utils.cpp +++ b/dorado/utils/metal_utils.cpp @@ -55,6 +55,25 @@ auto get_library_location() { return NS::String::string(fspath.c_str(), NS::ASCIIStringEncoding); } +void report_error(const NS::Error *error, const char *function) { + if (error == nil) { + return; + } + auto safe_c_str = [](NS::String *str) { + return str ? str->cString(NS::ASCIIStringEncoding) : ""; + }; + spdlog::error("function={}, code={}, domain={}, description={}", function, error->code(), + safe_c_str(error->domain()), safe_c_str(error->localizedDescription())); +} + +#define wrap_func_with_err(func_with_err, ...) \ + ({ \ + NS::Error *error_ = nil; \ + auto result = func_with_err(__VA_ARGS__, &error_); \ + report_error(error_, #func_with_err); \ + result; \ + }) + #if !TARGET_OS_IPHONE // Retrieves a single int64_t property associated with the given class/name. @@ -115,12 +134,11 @@ NS::SharedPtr make_cps( const std::string &name, const std::vector> &named_constants, const std::optional max_total_threads_per_tg) { - NS::Error *error; auto default_library = NS::TransferPtr(device->newDefaultLibrary()); if (!default_library) { auto lib_path = get_library_location(); - default_library = NS::TransferPtr(device->newLibrary(lib_path, &error)); + default_library = NS::TransferPtr(wrap_func_with_err(device->newLibrary, lib_path)); if (!default_library) { throw std::runtime_error("Failed to load metallib library."); } @@ -142,8 +160,8 @@ NS::SharedPtr make_cps( } auto kernel_name = NS::String::string(name.c_str(), NS::ASCIIStringEncoding); - auto kernel = - NS::TransferPtr(default_library->newFunction(kernel_name, constant_vals.get(), &error)); + auto kernel = NS::TransferPtr( + wrap_func_with_err(default_library->newFunction, kernel_name, constant_vals.get())); if (!kernel) { throw std::runtime_error("Failed to find the kernel: " + name); } @@ -154,13 +172,11 @@ NS::SharedPtr make_cps( cp_descriptor->setMaxTotalThreadsPerThreadgroup(*max_total_threads_per_tg); } - auto cps = NS::TransferPtr(device->newComputePipelineState( - cp_descriptor.get(), MTL::PipelineOptionNone, nullptr, &error)); + auto cps = + NS::TransferPtr(wrap_func_with_err(device->newComputePipelineState, cp_descriptor.get(), + MTL::PipelineOptionNone, nullptr)); if (!cps) { - auto e_code = std::to_string(((int)error->code())); - auto e_str = error->domain()->cString(NS::ASCIIStringEncoding); - throw std::runtime_error("failed to build compute pipeline for " + name + " - " + e_str + - ": error " + e_code); + throw std::runtime_error("Failed to build compute pipeline for " + name); } return cps;