Skip to content

Commit

Permalink
Merge branch 'blawrence/metal-warnings' into 'master'
Browse files Browse the repository at this point in the history
Log errors reported by metal + enable warnings

See merge request machine-learning/dorado!880
  • Loading branch information
blawrence-ont committed Mar 15, 2024
2 parents e61cfe4 + c3f58a5 commit e3442ec
Show file tree
Hide file tree
Showing 3 changed files with 54 additions and 27 deletions.
15 changes: 13 additions & 2 deletions cmake/Metal.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,15 @@ foreach(source ${METAL_SOURCES})
set(air_path "${CMAKE_BINARY_DIR}/${basename}.air")
add_custom_command(
OUTPUT "${air_path}"
COMMAND xcrun -sdk ${XCRUN_SDK} metal -ffast-math -c "${CMAKE_CURRENT_SOURCE_DIR}/${source}" -o "${air_path}"
COMMAND
xcrun -sdk ${XCRUN_SDK} metal
-Werror
-Wall -Wextra -pedantic
-Wno-c++17-extensions # [[maybe_unused]] is C++17
-std=metal3.0
-ffast-math
-c "${CMAKE_CURRENT_SOURCE_DIR}/${source}"
-o "${air_path}"
DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/${source}"
COMMENT "Compiling metal kernels"
)
Expand All @@ -23,7 +31,10 @@ endforeach()

add_custom_command(
OUTPUT default.metallib
COMMAND xcrun -sdk ${XCRUN_SDK} metallib ${AIR_FILES} -o ${CMAKE_BINARY_DIR}/lib/default.metallib
COMMAND
xcrun -sdk ${XCRUN_SDK} metallib
${AIR_FILES}
-o ${CMAKE_BINARY_DIR}/lib/default.metallib
DEPENDS ${AIR_FILES}
COMMENT "Creating metallib"
)
Expand Down
30 changes: 15 additions & 15 deletions dorado/basecall/metal/nn.metal
Original file line number Diff line number Diff line change
Expand Up @@ -57,12 +57,12 @@ typedef float ftype_out;

#define MAX_LAYER_SIZE 512
#define KERNEL_INDEX_INPUTS \
uint tid [[thread_index_in_threadgroup]], \
uint gid [[threadgroup_position_in_grid]], \
uint sid [[simdgroup_index_in_threadgroup]], \
uint simdgroups [[simdgroups_per_threadgroup]], \
uint threadgroups [[threadgroups_per_grid]], \
uint threads [[threads_per_threadgroup]]
[[maybe_unused]] uint tid [[thread_index_in_threadgroup]], \
[[maybe_unused]] uint gid [[threadgroup_position_in_grid]], \
[[maybe_unused]] uint sid [[simdgroup_index_in_threadgroup]], \
[[maybe_unused]] uint simdgroups [[simdgroups_per_threadgroup]], \
[[maybe_unused]] uint threadgroups [[threadgroups_per_grid]], \
[[maybe_unused]] uint threads [[threads_per_threadgroup]]

struct ScanArgs {
int T;
Expand Down Expand Up @@ -290,12 +290,12 @@ kernel void conv(
*/

struct RowMajor {
static int inner(int r, int c) { return c; }
static int outer(int r, int c) { return r; }
static int inner(int /* r */, int c) { return c; }
static int outer(int r, int /* c */) { return r; }
};
struct ColMajor {
static int inner(int r, int c) { return r; }
static int outer(int r, int c) { return c; }
static int inner(int r, int /* c */) { return r; }
static int outer(int /* r */, int c) { return c; }
};
// 2D matrix layouts using 8x8 tiles. Note that RowMajor/ColMajor apply to the order of tiles, *NOT* within tiles
// RC == RowMajor: layout RrCc, where: R = row / 8; r = row % 8; C = col / 8; c = col % 8
Expand All @@ -321,13 +321,13 @@ struct MatLayoutRowMajor {
using TileBlockConst = TileBlock<RowMajor, device const FTYPE*, FTYPE>;
using TileBlock = TileBlock<RowMajor, device FTYPE*, FTYPE>;
using ftype = FTYPE;
static TileBlock tnc_block(device FTYPE* const ptr, int T, int N, int C, int t, int n_blk, int c_blk) {
static TileBlock tnc_block(device FTYPE* const ptr, int /* T */, int N, int C, int t, int n_blk, int c_blk) {
return TileBlock(ptr, C, t * N + n_blk * SIMD_TILES_N * TILE_SIZE, c_blk * SIMD_TILES_C * TILE_SIZE);
}
static TileBlockConst tnc_block(device const FTYPE* const ptr, int T, int N, int C, int t, int n_blk, int c_blk) {
static TileBlockConst tnc_block(device const FTYPE* const ptr, int /* T */, int N, int C, int t, int n_blk, int c_blk) {
return TileBlockConst(ptr, C, t * N + n_blk * SIMD_TILES_N * TILE_SIZE, c_blk * SIMD_TILES_C * TILE_SIZE);
}
static void zero_initial_state(device FTYPE* const ptr, int, int, int, uint, uint, uint, uint) {}
static void zero_initial_state(device FTYPE* const, int, int, int, uint, uint, uint, uint) {}
};

// The memory layout of LSTM input/output matrices matches a contiguous tensor of sizes [T+3, C/8, 8, N/8, 8],
Expand Down Expand Up @@ -396,10 +396,10 @@ struct MatLayoutLSTM {
using TileBlockConst = TileBlock<ColMajor, device const FTYPE*, FTYPE>;
using TileBlock = TileBlock<ColMajor, device FTYPE*, FTYPE>;
using ftype = FTYPE;
static TileBlock tnc_block(device FTYPE* const ptr, int T, int N, int C, int t, int n_blk, int c_blk) {
static TileBlock tnc_block(device FTYPE* const ptr, int /* T */, int N, int C, int t, int n_blk, int c_blk) {
return TileBlock(ptr, N, n_blk * SIMD_TILES_N * TILE_SIZE, (t+T_OFFSET) * C + c_blk * SIMD_TILES_C * TILE_SIZE);
}
static TileBlockConst tnc_block(device const FTYPE* const ptr, int T, int N, int C, int t, int n_blk, int c_blk) {
static TileBlockConst tnc_block(device const FTYPE* const ptr, int /* T */, int N, int C, int t, int n_blk, int c_blk) {
return TileBlockConst(ptr, N, n_blk * SIMD_TILES_N * TILE_SIZE, (t+T_OFFSET) * C + c_blk * SIMD_TILES_C * TILE_SIZE);
}

Expand Down
36 changes: 26 additions & 10 deletions dorado/utils/metal_utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,25 @@ auto get_library_location() {
return NS::String::string(fspath.c_str(), NS::ASCIIStringEncoding);
}

void report_error(const NS::Error *error, const char *function) {
if (error == nil) {
return;
}
auto safe_c_str = [](NS::String *str) {
return str ? str->cString(NS::ASCIIStringEncoding) : "<none>";
};
spdlog::error("function={}, code={}, domain={}, description={}", function, error->code(),
safe_c_str(error->domain()), safe_c_str(error->localizedDescription()));
}

#define wrap_func_with_err(func_with_err, ...) \
({ \
NS::Error *error_ = nil; \
auto result = func_with_err(__VA_ARGS__, &error_); \
report_error(error_, #func_with_err); \
result; \
})

#if !TARGET_OS_IPHONE

// Retrieves a single int64_t property associated with the given class/name.
Expand Down Expand Up @@ -115,12 +134,11 @@ NS::SharedPtr<MTL::ComputePipelineState> make_cps(
const std::string &name,
const std::vector<std::tuple<std::string, MetalConstant>> &named_constants,
const std::optional<int> max_total_threads_per_tg) {
NS::Error *error;
auto default_library = NS::TransferPtr(device->newDefaultLibrary());

if (!default_library) {
auto lib_path = get_library_location();
default_library = NS::TransferPtr(device->newLibrary(lib_path, &error));
default_library = NS::TransferPtr(wrap_func_with_err(device->newLibrary, lib_path));
if (!default_library) {
throw std::runtime_error("Failed to load metallib library.");
}
Expand All @@ -142,8 +160,8 @@ NS::SharedPtr<MTL::ComputePipelineState> make_cps(
}

auto kernel_name = NS::String::string(name.c_str(), NS::ASCIIStringEncoding);
auto kernel =
NS::TransferPtr(default_library->newFunction(kernel_name, constant_vals.get(), &error));
auto kernel = NS::TransferPtr(
wrap_func_with_err(default_library->newFunction, kernel_name, constant_vals.get()));
if (!kernel) {
throw std::runtime_error("Failed to find the kernel: " + name);
}
Expand All @@ -154,13 +172,11 @@ NS::SharedPtr<MTL::ComputePipelineState> make_cps(
cp_descriptor->setMaxTotalThreadsPerThreadgroup(*max_total_threads_per_tg);
}

auto cps = NS::TransferPtr(device->newComputePipelineState(
cp_descriptor.get(), MTL::PipelineOptionNone, nullptr, &error));
auto cps =
NS::TransferPtr(wrap_func_with_err(device->newComputePipelineState, cp_descriptor.get(),
MTL::PipelineOptionNone, nullptr));
if (!cps) {
auto e_code = std::to_string(((int)error->code()));
auto e_str = error->domain()->cString(NS::ASCIIStringEncoding);
throw std::runtime_error("failed to build compute pipeline for " + name + " - " + e_str +
": error " + e_code);
throw std::runtime_error("Failed to build compute pipeline for " + name);
}

return cps;
Expand Down

0 comments on commit e3442ec

Please sign in to comment.