diff --git a/cmake/Metal.cmake b/cmake/Metal.cmake
index feb0f176..f5291232 100644
--- a/cmake/Metal.cmake
+++ b/cmake/Metal.cmake
@@ -14,7 +14,15 @@ foreach(source ${METAL_SOURCES})
     set(air_path "${CMAKE_BINARY_DIR}/${basename}.air")
     add_custom_command(
         OUTPUT "${air_path}"
-        COMMAND xcrun -sdk ${XCRUN_SDK} metal -ffast-math -c "${CMAKE_CURRENT_SOURCE_DIR}/${source}" -o "${air_path}"
+        COMMAND
+            xcrun -sdk ${XCRUN_SDK} metal
+                -Werror
+                -Wall -Wextra -pedantic
+                -Wno-c++17-extensions # [[maybe_unused]] is C++17
+                -std=metal3.0
+                -ffast-math
+                -c "${CMAKE_CURRENT_SOURCE_DIR}/${source}"
+                -o "${air_path}"
         DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/${source}"
         COMMENT "Compiling metal kernels"
     )
@@ -23,7 +31,10 @@ endforeach()
 
 add_custom_command(
     OUTPUT default.metallib
-    COMMAND xcrun -sdk ${XCRUN_SDK} metallib ${AIR_FILES} -o ${CMAKE_BINARY_DIR}/lib/default.metallib
+    COMMAND
+        xcrun -sdk ${XCRUN_SDK} metallib
+            ${AIR_FILES}
+            -o ${CMAKE_BINARY_DIR}/lib/default.metallib
     DEPENDS ${AIR_FILES}
     COMMENT "Creating metallib"
 )
diff --git a/dorado/basecall/metal/nn.metal b/dorado/basecall/metal/nn.metal
index 33060453..2715c383 100644
--- a/dorado/basecall/metal/nn.metal
+++ b/dorado/basecall/metal/nn.metal
@@ -57,12 +57,12 @@ typedef float ftype_out;
 
 #define MAX_LAYER_SIZE 512
 #define KERNEL_INDEX_INPUTS \
-    uint tid [[thread_index_in_threadgroup]], \
-    uint gid [[threadgroup_position_in_grid]], \
-    uint sid [[simdgroup_index_in_threadgroup]], \
-    uint simdgroups [[simdgroups_per_threadgroup]], \
-    uint threadgroups [[threadgroups_per_grid]], \
-    uint threads [[threads_per_threadgroup]]
+    [[maybe_unused]] uint tid [[thread_index_in_threadgroup]], \
+    [[maybe_unused]] uint gid [[threadgroup_position_in_grid]], \
+    [[maybe_unused]] uint sid [[simdgroup_index_in_threadgroup]], \
+    [[maybe_unused]] uint simdgroups [[simdgroups_per_threadgroup]], \
+    [[maybe_unused]] uint threadgroups [[threadgroups_per_grid]], \
+    [[maybe_unused]] uint threads [[threads_per_threadgroup]]
 
 struct ScanArgs {
     int T;
@@ -290,12 +290,12 @@ kernel void conv(
 */
 
 struct RowMajor {
-    static int inner(int r, int c) { return c; }
-    static int outer(int r, int c) { return r; }
+    static int inner(int /* r */, int c) { return c; }
+    static int outer(int r, int /* c */) { return r; }
 };
 struct ColMajor {
-    static int inner(int r, int c) { return r; }
-    static int outer(int r, int c) { return c; }
+    static int inner(int r, int /* c */) { return r; }
+    static int outer(int /* r */, int c) { return c; }
 };
 // 2D matrix layouts using 8x8 tiles. Note that RowMajor/ColMajor apply to the order of tiles, *NOT* within tiles
 // RC == RowMajor: layout RrCc, where: R = row / 8; r = row % 8; C = col / 8; c = col % 8
@@ -321,13 +321,13 @@ struct MatLayoutRowMajor {
     using TileBlockConst = TileBlock<RowMajor, device const FTYPE*, FTYPE>;
     using TileBlock = TileBlock<RowMajor, device FTYPE*, FTYPE>;
     using ftype = FTYPE;
-    static TileBlock tnc_block(device FTYPE* const ptr, int T, int N, int C, int t, int n_blk, int c_blk) {
+    static TileBlock tnc_block(device FTYPE* const ptr, int /* T */, int N, int C, int t, int n_blk, int c_blk) {
         return TileBlock(ptr, C, t * N + n_blk * SIMD_TILES_N * TILE_SIZE, c_blk * SIMD_TILES_C * TILE_SIZE);
     }
-    static TileBlockConst tnc_block(device const FTYPE* const ptr, int T, int N, int C, int t, int n_blk, int c_blk) {
+    static TileBlockConst tnc_block(device const FTYPE* const ptr, int /* T */, int N, int C, int t, int n_blk, int c_blk) {
         return TileBlockConst(ptr, C, t * N + n_blk * SIMD_TILES_N * TILE_SIZE, c_blk * SIMD_TILES_C * TILE_SIZE);
     }
-    static void zero_initial_state(device FTYPE* const ptr, int, int, int, uint, uint, uint, uint) {}
+    static void zero_initial_state(device FTYPE* const, int, int, int, uint, uint, uint, uint) {}
 };
 
 // The memory layout of LSTM input/output matrices matches a contiguous tensor of sizes [T+3, C/8, 8, N/8, 8],
@@ -396,10 +396,10 @@ struct MatLayoutLSTM {
     using TileBlockConst = TileBlock<ColMajor, device const FTYPE*, FTYPE>;
     using TileBlock = TileBlock<ColMajor, device FTYPE*, FTYPE>;
     using ftype = FTYPE;
-    static TileBlock tnc_block(device FTYPE* const ptr, int T, int N, int C, int t, int n_blk, int c_blk) {
+    static TileBlock tnc_block(device FTYPE* const ptr, int /* T */, int N, int C, int t, int n_blk, int c_blk) {
         return TileBlock(ptr, N, n_blk * SIMD_TILES_N * TILE_SIZE, (t+T_OFFSET) * C + c_blk * SIMD_TILES_C * TILE_SIZE);
     }
-    static TileBlockConst tnc_block(device const FTYPE* const ptr, int T, int N, int C, int t, int n_blk, int c_blk) {
+    static TileBlockConst tnc_block(device const FTYPE* const ptr, int /* T */, int N, int C, int t, int n_blk, int c_blk) {
         return TileBlockConst(ptr, N, n_blk * SIMD_TILES_N * TILE_SIZE, (t+T_OFFSET) * C + c_blk * SIMD_TILES_C * TILE_SIZE);
     }
 
diff --git a/dorado/utils/metal_utils.cpp b/dorado/utils/metal_utils.cpp
index 860970d5..54ac3d4a 100644
--- a/dorado/utils/metal_utils.cpp
+++ b/dorado/utils/metal_utils.cpp
@@ -55,6 +55,25 @@ auto get_library_location() {
     return NS::String::string(fspath.c_str(), NS::ASCIIStringEncoding);
 }
 
+void report_error(const NS::Error *error, const char *function) {
+    if (error == nil) {
+        return;
+    }
+    auto safe_c_str = [](NS::String *str) {
+        return str ? str->cString(NS::ASCIIStringEncoding) : "<none>";
+    };
+    spdlog::error("function={}, code={}, domain={}, description={}", function, error->code(),
+                  safe_c_str(error->domain()), safe_c_str(error->localizedDescription()));
+}
+
+#define wrap_func_with_err(func_with_err, ...)             \
+    ({                                                     \
+        NS::Error *error_ = nil;                           \
+        auto result = func_with_err(__VA_ARGS__, &error_); \
+        report_error(error_, #func_with_err);              \
+        result;                                            \
+    })
+
 #if !TARGET_OS_IPHONE
 
 // Retrieves a single int64_t property associated with the given class/name.
@@ -115,12 +134,11 @@ NS::SharedPtr<MTL::ComputePipelineState> make_cps(
         const std::string &name,
         const std::vector<std::tuple<std::string, MetalConstant>> &named_constants,
         const std::optional<int> max_total_threads_per_tg) {
-    NS::Error *error;
     auto default_library = NS::TransferPtr(device->newDefaultLibrary());
 
     if (!default_library) {
         auto lib_path = get_library_location();
-        default_library = NS::TransferPtr(device->newLibrary(lib_path, &error));
+        default_library = NS::TransferPtr(wrap_func_with_err(device->newLibrary, lib_path));
         if (!default_library) {
             throw std::runtime_error("Failed to load metallib library.");
         }
@@ -142,8 +160,8 @@ NS::SharedPtr<MTL::ComputePipelineState> make_cps(
     }
 
     auto kernel_name = NS::String::string(name.c_str(), NS::ASCIIStringEncoding);
-    auto kernel =
-            NS::TransferPtr(default_library->newFunction(kernel_name, constant_vals.get(), &error));
+    auto kernel = NS::TransferPtr(
+            wrap_func_with_err(default_library->newFunction, kernel_name, constant_vals.get()));
     if (!kernel) {
         throw std::runtime_error("Failed to find the kernel: " + name);
     }
@@ -154,13 +172,11 @@ NS::SharedPtr<MTL::ComputePipelineState> make_cps(
         cp_descriptor->setMaxTotalThreadsPerThreadgroup(*max_total_threads_per_tg);
     }
 
-    auto cps = NS::TransferPtr(device->newComputePipelineState(
-            cp_descriptor.get(), MTL::PipelineOptionNone, nullptr, &error));
+    auto cps =
+            NS::TransferPtr(wrap_func_with_err(device->newComputePipelineState, cp_descriptor.get(),
+                                               MTL::PipelineOptionNone, nullptr));
     if (!cps) {
-        auto e_code = std::to_string(((int)error->code()));
-        auto e_str = error->domain()->cString(NS::ASCIIStringEncoding);
-        throw std::runtime_error("failed to build compute pipeline for " + name + " - " + e_str +
-                                 ": error " + e_code);
+        throw std::runtime_error("Failed to build compute pipeline for " + name);
     }
 
     return cps;