diff --git a/CMakeLists.txt b/CMakeLists.txt index ad08c72d1ae..33ae1f2533b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -602,15 +602,23 @@ if(EXECUTORCH_BUILD_CORTEX_M) list(APPEND _executorch_backends coretex_m_backend) endif() -if(EXECUTORCH_BUILD_CUDA) - # Build common AOTI functionality (required for CUDA) +# Build common AOTI functionality if needed by CUDA or Metal backends +if(EXECUTORCH_BUILD_CUDA OR EXECUTORCH_BUILD_METAL) add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/backends/aoti) +endif() + +if(EXECUTORCH_BUILD_CUDA) # Build CUDA-specific AOTI functionality add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/backends/cuda) # Add aoti_cuda to backends - it already depends on aoti_common list(APPEND _executorch_backends aoti_cuda) endif() +if(EXECUTORCH_BUILD_METAL) + add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/backends/apple/metal) + list(APPEND _executorch_backends metal_backend) +endif() + if(EXECUTORCH_BUILD_EXTENSION_APPLE) add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/extension/apple) endif() diff --git a/backends/aoti/CMakeLists.txt b/backends/aoti/CMakeLists.txt index 845144af50f..5306c8ad74b 100644 --- a/backends/aoti/CMakeLists.txt +++ b/backends/aoti/CMakeLists.txt @@ -38,7 +38,11 @@ target_include_directories( ) target_compile_options(aoti_common PUBLIC -fexceptions -frtti -fPIC) # Ensure symbols are exported properly -target_link_options(aoti_common PUBLIC -Wl,--export-dynamic) +if(APPLE) + target_link_options(aoti_common PUBLIC -Wl,-export_dynamic) +else() + target_link_options(aoti_common PUBLIC -Wl,--export-dynamic) +endif() # Link against ExecuTorch libraries and standard libraries target_link_libraries(aoti_common PUBLIC extension_tensor ${CMAKE_DL_LIBS}) diff --git a/backends/apple/metal/CMakeLists.txt b/backends/apple/metal/CMakeLists.txt new file mode 100644 index 00000000000..3d1ec39801a --- /dev/null +++ b/backends/apple/metal/CMakeLists.txt @@ -0,0 +1,111 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. +# +# Build AOTI Metal backend for runtime. +# +# ### Editing this file ### +# +# This file should be formatted with +# ~~~ +# cmake-format -i CMakeLists.txt +# ~~~ +# It should also be cmake-lint clean. +# +set(CMAKE_EXPORT_COMPILE_COMMANDS ON) + +# Source root directory for executorch. +if(NOT EXECUTORCH_ROOT) + set(EXECUTORCH_ROOT ${CMAKE_CURRENT_SOURCE_DIR}/../..) +endif() + +include(${EXECUTORCH_ROOT}/tools/cmake/Utils.cmake) +# Use full torch package to get library paths, but only link specific libraries +find_package_torch() + +set(_aoti_metal_sources + runtime/metal_backend.cpp + runtime/shims/memory.cpp + runtime/shims/et_metal.mm + runtime/shims/et_metal_ops.mm + runtime/shims/shim_mps.mm + runtime/shims/tensor_attribute.cpp + runtime/shims/utils.cpp +) + +add_library(metal_backend STATIC ${_aoti_metal_sources}) +target_include_directories( + metal_backend + PUBLIC $ $ + # PyTorch AOTI headers from ExecutorTorch's torch detection + ${TORCH_INCLUDE_DIRS} +) + +# Link Metal framework +find_library(METAL_LIBRARY Metal REQUIRED) +find_library(FOUNDATION_LIBRARY Foundation REQUIRED) +find_library(METALPERFORMANCESHADERS_LIBRARY MetalPerformanceShaders REQUIRED) +find_library( + METALPERFORMANCESHADERSGRAPH_LIBRARY MetalPerformanceShadersGraph REQUIRED +) +target_link_libraries( + metal_backend + PUBLIC ${METAL_LIBRARY} ${FOUNDATION_LIBRARY} + ${METALPERFORMANCESHADERS_LIBRARY} + ${METALPERFORMANCESHADERSGRAPH_LIBRARY} +) + +target_compile_options(metal_backend PUBLIC -fexceptions -frtti -fPIC) + +target_link_options(metal_backend PUBLIC -Wl,-export_dynamic) + +# Find PyTorch's OpenMP library specifically for libtorch-less AOTI +get_torch_base_path(TORCH_BASE_PATH) +find_library( + TORCH_OMP_LIBRARY + NAMES omp libomp + PATHS "${TORCH_BASE_PATH}/lib" + NO_DEFAULT_PATH +) + +if(TORCH_OMP_LIBRARY) + message(STATUS "Found PyTorch OpenMP library: ${TORCH_OMP_LIBRARY}") + # Get the directory containing the OpenMP library for rpath + get_filename_component(TORCH_OMP_LIB_DIR ${TORCH_OMP_LIBRARY} DIRECTORY) + message(STATUS "OpenMP library directory: ${TORCH_OMP_LIB_DIR}") +else() + message( + WARNING "PyTorch OpenMP library not found, may cause runtime linking issues" + ) +endif() + +# Link against appropriate backends and standard libraries +target_link_libraries( + metal_backend PUBLIC aoti_common extension_tensor ${CMAKE_DL_LIBS} + ${TORCH_OMP_LIBRARY} +) + +# Set rpath for OpenMP library to avoid runtime linking issues +if(TORCH_OMP_LIBRARY AND TORCH_OMP_LIB_DIR) + # Add the OpenMP library directory to the rpath + set_target_properties( + metal_backend PROPERTIES BUILD_RPATH "${TORCH_OMP_LIB_DIR}" + INSTALL_RPATH "${TORCH_OMP_LIB_DIR}" + ) + # Also try common OpenMP library locations + target_link_options( + metal_backend PUBLIC -Wl,-rpath,${TORCH_OMP_LIB_DIR} + -Wl,-rpath,/usr/local/opt/libomp/lib + -Wl,-rpath,/opt/homebrew/opt/libomp/lib + ) + message(STATUS "Added rpath for OpenMP library: ${TORCH_OMP_LIB_DIR}") +endif() + +executorch_target_link_options_shared_lib(metal_backend) +install( + TARGETS metal_backend + EXPORT ExecuTorchTargets + DESTINATION lib +) diff --git a/backends/apple/metal/runtime/metal_backend.cpp b/backends/apple/metal/runtime/metal_backend.cpp new file mode 100644 index 00000000000..33699c51c45 --- /dev/null +++ b/backends/apple/metal/runtime/metal_backend.cpp @@ -0,0 +1,546 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +// Include AOTI common headers (from aoti_common library) +#include +#include + +// Include our Metal-specific shim layer headers +#include +#include +#include +#include +#include + +namespace executorch { +namespace backends { +namespace metal { + +using namespace std; +using namespace aoti; + +using executorch::aten::ScalarType; +using executorch::runtime::ArrayRef; +using executorch::runtime::Backend; +using executorch::runtime::BackendExecutionContext; +using executorch::runtime::BackendInitContext; +using executorch::runtime::CompileSpec; +using executorch::runtime::DelegateHandle; +using executorch::runtime::Error; +using executorch::runtime::EValue; +using executorch::runtime::FreeableBuffer; +using executorch::runtime::MemoryAllocator; +using executorch::runtime::NamedDataMap; +using executorch::runtime::Result; +using executorch::runtime::Span; +using executorch::runtime::etensor::Tensor; + +class MetalBackend final : public ::executorch::runtime::BackendInterface { + public: + // Once in program + MetalBackend() { + ET_LOG(Info, "MetalBackend ctor"); + } + + bool is_available() const override { + return 1; + } + + // Once per loaded binary blob + Result init( + BackendInitContext& context, + FreeableBuffer* processed, // This will be a empty buffer + ArrayRef compile_specs // This will be my empty list + ) const override { + ET_LOG(Info, "MetalBackend::init - Starting initialization"); + + std::string method_name; + for (const CompileSpec& spec : compile_specs) { + if (std::strcmp(spec.key, "method_name") == 0) { + method_name.assign( + static_cast(spec.value.buffer), + spec.value.nbytes); // no nullptr guarantee, so pass size + break; + } + } + + std::string so_blob_key = + method_name.empty() ? "so_blob" : method_name + "_so_blob"; + ET_LOG(Info, "MetalBackend::init - so_blob_key: %s", so_blob_key.c_str()); + + const NamedDataMap* named_data_map = context.get_named_data_map(); + ET_LOG(Info, "MetalBackend::init - Got named data map: %p", named_data_map); + + ET_LOG( + Info, + "MetalBackend::init - Looking for blob key: %s", + so_blob_key.c_str()); + + Result aoti_metal_buffer = + named_data_map->get_data(so_blob_key.c_str()); + ET_LOG(Info, "MetalBackend::init - Got buffer result"); + + if (!aoti_metal_buffer.ok()) { + ET_LOG( + Error, + "MetalBackend::init - Failed to get buffer for key %s: 0x%x", + so_blob_key.c_str(), + aoti_metal_buffer.error()); + return Error::InvalidArgument; + } + + ET_LOG( + Info, + "MetalBackend::init - Buffer is OK, size: %zu", + aoti_metal_buffer->size()); + + if (aoti_metal_buffer->data() == nullptr) { + ET_LOG(Error, "MetalBackend::init - Buffer data is null"); + return Error::InvalidArgument; + } + + ET_LOG( + Info, + "MetalBackend::init - Buffer data pointer: %p", + aoti_metal_buffer->data()); + + // Generate dynamic temporary file path + filesystem::path temp_dir = filesystem::temp_directory_path(); + filesystem::path so_path = + temp_dir / (so_blob_key + to_string(getpid()) + ".so"); + + // Create a temporary file + ET_LOG( + Info, "MetalBackend::init - Creating temp file: %s", so_path.c_str()); + std::ofstream outfile(so_path.c_str(), std::ios::binary); + + if (!outfile.is_open()) { + ET_LOG(Error, "MetalBackend::init - Failed to create temp file"); + return Error::AccessFailed; + } + ET_LOG(Info, "MetalBackend::init - Temp file created successfully"); + + // Write the ELF buffer to the temporary file + size_t buffer_size = aoti_metal_buffer->size(); + ET_LOG( + Info, + "MetalBackend::init - About to write %zu bytes to file", + buffer_size); + + // Write the buffer directly, not using sizeof(void*) multiplication + outfile.write((char*)aoti_metal_buffer->data(), buffer_size); + + if (outfile.bad()) { + ET_LOG(Error, "MetalBackend::init - File write failed"); + return Error::AccessFailed; + } + ET_LOG(Info, "MetalBackend::init - Buffer written successfully"); + + // Finish writing the file to disk + outfile.close(); + ET_LOG(Info, "MetalBackend::init - File closed successfully"); + + // Load the ELF using dlopen + void* so_handle = dlopen(so_path.c_str(), RTLD_LAZY | RTLD_LOCAL); + if (so_handle == nullptr) { + std::cout << dlerror() << std::endl; + return Error::AccessFailed; + } + + processed->Free(); + + AOTInductorModelContainerCreateWithDevice = + reinterpret_cast( + dlsym(so_handle, "AOTInductorModelContainerCreateWithDevice")); + if (AOTInductorModelContainerCreateWithDevice == nullptr) { + perror("dlsym1"); + return Error::AccessFailed; + } + AOTInductorModelContainerDelete = + reinterpret_cast( + dlsym(so_handle, "AOTInductorModelContainerDelete")); + if (AOTInductorModelContainerDelete == nullptr) { + perror("dlsym2"); + return Error::AccessFailed; + } + AOTInductorModelContainerGetNumInputs = + reinterpret_cast( + dlsym(so_handle, "AOTInductorModelContainerGetNumInputs")); + if (AOTInductorModelContainerGetNumInputs == nullptr) { + perror("dlsym3"); + return Error::AccessFailed; + } + + AOTInductorModelContainerGetNumConstants = + reinterpret_cast( + dlsym(so_handle, "AOTInductorModelContainerGetNumConstants")); + if (AOTInductorModelContainerGetNumConstants == nullptr) { + perror("dlsym AOTInductorModelContainerGetNumConstants"); + return Error::AccessFailed; + } + + AOTInductorModelContainerGetInputName = + reinterpret_cast( + dlsym(so_handle, "AOTInductorModelContainerGetInputName")); + if (AOTInductorModelContainerGetInputName == nullptr) { + perror("dlsym AOTInductorModelContainerGetInputName"); + return Error::AccessFailed; + } + + AOTInductorModelContainerGetNumOutputs = + reinterpret_cast( + dlsym(so_handle, "AOTInductorModelContainerGetNumOutputs")); + if (AOTInductorModelContainerGetNumOutputs == nullptr) { + perror("dlsym4"); + return Error::AccessFailed; + } + AOTInductorModelContainerRun = + reinterpret_cast( + dlsym(so_handle, "AOTInductorModelContainerRun")); + if (AOTInductorModelContainerRun == nullptr) { + perror("dlsym5"); + return Error::AccessFailed; + } + + AOTInductorModelContainerHandle container_handle = nullptr; + ET_LOG( + Info, + "MetalBackend::init - About to create AOTI container with device='mps'"); + + AOTIRuntimeError err = AOTInductorModelContainerCreateWithDevice( + &container_handle, 1, "mps", nullptr); + ET_LOG( + Info, + "MetalBackend::init - AOTInductorModelContainerCreateWithDevice returned err=%d", + err); + + if (err != Error::Ok) { + ET_LOG( + Error, + "Failed to initialize AOTInductorModelContainer with error %d", + err); + return err; + } + ET_LOG( + Info, + "Successfully initialized container_handle = %p", + container_handle); + + AOTIDelegateHandle* handle = new AOTIDelegateHandle(); + handle->so_handle = so_handle; + handle->container_handle = container_handle; + return (DelegateHandle*)handle; // Return the handle post-processing + } + + // Once per execution + Error execute( + BackendExecutionContext& context, + DelegateHandle* handle_, + Span args) const override { + ET_LOG(Debug, "MetalBackend execute"); + + AOTIDelegateHandle* handle = (AOTIDelegateHandle*)handle_; + + ET_LOG(Debug, "MetalBackend Handle generated"); + + size_t n_inputs; + AOTInductorModelContainerGetNumInputs(handle->container_handle, &n_inputs); + + size_t n_outputs; + AOTInductorModelContainerGetNumOutputs( + handle->container_handle, &n_outputs); + + ET_LOG(Debug, "MetalBackend n_outputs %zd generated", n_outputs); + + if (n_inputs + n_outputs != args.size()) { + ET_LOG( + Error, + "number of user input %zd and output %zd generated from AOT Inductor does not match ET runner's %zd. Exit.", + n_inputs, + n_outputs, + args.size()); + return Error::InvalidArgument; + } + + ET_LOG( + Debug, + "number of user input %zd and output %zd generated from AOT Inductor matches ET runner's %zd.", + n_inputs, + n_outputs, + args.size()); + + // NOTE: ExecutorTorch tensors are always on CPU/host memory + // We need to create GPU copies for Metal kernel execution + std::vector gpu_inputs( + n_inputs); // GPU copies for kernel execution + std::vector gpu_outputs( + n_outputs); // GPU tensors for kernel output + + ET_LOG(Debug, "MetalBackend input/output vectors generated"); + + // Process input tensors: ExecutorTorch provides CPU tensors, create GPU + // copies + for (int i = 0; i < n_inputs; i++) { + ET_LOG(Debug, "Processing input %d from args to inputs vector", i); + ET_LOG( + Debug, "is %d input a tensor input? %d", i, int(args[i]->isTensor())); + + // Get tensor dimensions and properties from ExecutorTorch CPU tensor + auto cpu_tensor = &(args[i]->toTensor()); + auto sizes = cpu_tensor->sizes(); + auto scalar_type = cpu_tensor->scalar_type(); + ET_LOG( + Debug, + "MetalBackend input %d scalar_type=%d", + i, + static_cast(scalar_type)); + + // Create GPU tensor with same shape + std::vector sizes_vec(sizes.begin(), sizes.end()); + + AOTITensorHandle gpu_input_handle; + Error create_err = aoti_torch_empty_strided( + sizes_vec.size(), + sizes_vec.data(), + nullptr, // use default strides + static_cast(scalar_type), + 2, // device_type = mps + 0, // device_index = 0 + &gpu_input_handle); + + if (create_err != Error::Ok) { + ET_LOG(Error, "Failed to create GPU tensor for input %d", i); + return Error::Internal; + } + + // Log the created GPU tensor scalar type + auto gpu_tensor = reinterpret_cast( + gpu_input_handle); + ET_LOG( + Debug, + "MetalBackend created GPU tensor %d scalar_type=%d", + i, + static_cast(gpu_tensor->scalar_type())); + + gpu_inputs[i] = gpu_input_handle; + + // Log the CPU tensor data before copying to GPU + void* cpu_data = cpu_tensor->mutable_data_ptr(); + if (cpu_data && cpu_tensor->numel() > 0) { + float* cpu_float_data = (float*)cpu_data; + ET_LOG( + Debug, + "CPU input %d data before copy: [%.3f, %.3f, %.3f, ...] (numel=%zd)", + i, + cpu_float_data[0], + cpu_float_data[1], + cpu_float_data[2], + cpu_tensor->numel()); + } + + // Copy data from CPU to GPU + Error copy_err = aoti_torch_copy_(gpu_inputs[i], cpu_tensor, 0); + if (copy_err != Error::Ok) { + ET_LOG(Error, "Failed to copy input %d from CPU to GPU", i); + return Error::Internal; + } + + // Log the GPU tensor scalar type after copy + auto gpu_tensor_after = + reinterpret_cast( + gpu_inputs[i]); + ET_LOG( + Debug, + "MetalBackend GPU tensor %d scalar_type after copy=%d", + i, + static_cast(gpu_tensor_after->scalar_type())); + + ET_LOG(Debug, "Successfully copied input %d from CPU to GPU", i); + } + + ET_LOG(Debug, "MetalBackend GPU inputs generated"); + + // Process output tensors: create GPU counterparts for ExecutorTorch CPU + // tensors + for (int i = 0; i < n_outputs; i++) { + // Get output tensor dimensions from ExecutorTorch CPU tensor + auto cpu_output_tensor = &(args[i + n_inputs]->toTensor()); + auto sizes = cpu_output_tensor->sizes(); + auto scalar_type = cpu_output_tensor->scalar_type(); + ET_LOG( + Debug, + "MetalBackend output %d scalar_type=%d", + i, + static_cast(scalar_type)); + + // Create GPU tensor with same shape for kernel output + std::vector sizes_vec(sizes.begin(), sizes.end()); + + AOTITensorHandle gpu_output_handle; + Error create_err = aoti_torch_empty_strided( + sizes_vec.size(), + sizes_vec.data(), + nullptr, // use default strides + static_cast(scalar_type), + 2, // device_type = mps + 0, // device_index = 0 + &gpu_output_handle); + + if (create_err != Error::Ok) { + ET_LOG(Error, "Failed to create GPU tensor for output %d", i); + return Error::Internal; + } + + gpu_outputs[i] = gpu_output_handle; + ET_LOG(Debug, "Created GPU output tensor %d", i); + } + + ET_LOG(Debug, "MetalBackend output generated"); + + // Log tensor handles before passing to AOTI container + ET_LOG(Debug, "Passing to AOTInductorModelContainerRun:"); + for (int i = 0; i < n_inputs; i++) { + void* gpu_input_data = gpu_inputs[i]->mutable_data_ptr(); + ET_LOG( + Debug, + " gpu_inputs[%d] = %p, data_ptr = %p", + i, + gpu_inputs[i], + gpu_input_data); + } + for (int i = 0; i < n_outputs; i++) { + void* gpu_output_data = gpu_outputs[i]->mutable_data_ptr(); + ET_LOG( + Debug, + " gpu_outputs[%d] = %p, data_ptr = %p", + i, + gpu_outputs[i], + gpu_output_data); + } + + // Run AOTI container with GPU tensors + AOTIRuntimeError error = AOTInductorModelContainerRun( + handle->container_handle, + gpu_inputs.data(), // Use GPU input tensors + n_inputs, + gpu_outputs.data(), // Use GPU output tensors + n_outputs, + nullptr, // Pass the actual Metal stream! + nullptr); // proxy_executor_handle can remain nullptr + + if (error != Error::Ok) { + ET_LOG( + Error, + "AOTInductorModelContainerRun failed with error code %d", + error); + return Error::Internal; + } + + // Ensure all GPU work is completed before reading results + try { + synchronize_metal_stream(); + } catch (const std::exception& e) { + ET_LOG( + Error, + "Failed to synchronize Metal stream after kernel execution: %s", + e.what()); + return Error::Internal; + } catch (...) { + ET_LOG( + Error, + "Failed to synchronize Metal stream after kernel execution: unknown exception"); + return Error::Internal; + } + + ET_LOG(Debug, "MetalBackend running done and synchronized"); + + // Copy GPU output results back to CPU output tensors + for (int i = 0; i < n_outputs; i++) { + auto cpu_output_tensor = &(args[i + n_inputs]->toTensor()); + Error copy_err = aoti_torch_copy_(cpu_output_tensor, gpu_outputs[i], 0); + if (copy_err != Error::Ok) { + ET_LOG(Error, "Failed to copy GPU output %d back to CPU", i); + return Error::Internal; + } + ET_LOG(Debug, "Copied GPU output %d back to CPU", i); + } + + // Clean up GPU tensors that we created (ExecutorTorch tensors are always + // CPU, so all GPU tensors are our copies) + for (int i = 0; i < n_inputs; i++) { + // All GPU input tensors were created by us, delete them + aoti_torch_delete_tensor_object(gpu_inputs[i]); + } + + for (int i = 0; i < n_outputs; i++) { + // All GPU output tensors were created by us, delete them + aoti_torch_delete_tensor_object(gpu_outputs[i]); + } + + ET_LOG(Debug, "MetalBackend execution completed successfully"); + + return Error::Ok; + } + + void destroy(DelegateHandle* handle_) const override { + AOTIDelegateHandle* handle = (AOTIDelegateHandle*)handle_; + + // Delete the container BEFORE closing the shared library + if (handle->container_handle != nullptr) { + AOTIRuntimeError delete_result = + AOTInductorModelContainerDelete(handle->container_handle); + if (delete_result != Error::Ok) { + ET_LOG( + Error, + "AOTInductorModelContainerDelete failed with error code %d", + delete_result); + } + } + + // Now close the shared library + if (handle->so_handle != nullptr) { + dlclose(handle->so_handle); + } + + free(handle); + cleanup_memory(); + executorch::backends::aoti::cleanup_tensor_metadata(); + ET_LOG(Debug, "MetalBackend handle %p destroy", handle_); + } +}; + +} // namespace metal + +namespace { +auto cls = metal::MetalBackend(); +executorch::runtime::Backend backend{"MetalBackend", &cls}; +static executorch::runtime::Error success_with_compiler = + register_backend(backend); +} // namespace + +} // namespace backends +} // namespace executorch diff --git a/tools/cmake/preset/default.cmake b/tools/cmake/preset/default.cmake index 04e84622589..b2b09091014 100644 --- a/tools/cmake/preset/default.cmake +++ b/tools/cmake/preset/default.cmake @@ -152,6 +152,9 @@ define_overridable_option( define_overridable_option( EXECUTORCH_BUILD_CUDA "Build the CUDA backend" BOOL OFF ) +define_overridable_option( + EXECUTORCH_BUILD_METAL "Build the AOTI Metal backend" BOOL OFF +) define_overridable_option( EXECUTORCH_BUILD_VGF "Build the Arm VGF backend" BOOL OFF ) @@ -389,6 +392,10 @@ check_required_options_on( IF_ON EXECUTORCH_BUILD_CUDA REQUIRES EXECUTORCH_BUILD_EXTENSION_TENSOR ) +check_required_options_on( + IF_ON EXECUTORCH_BUILD_METAL REQUIRES EXECUTORCH_BUILD_EXTENSION_TENSOR +) + if(NOT EXISTS ${EXECUTORCH_PAL_DEFAULT_FILE_PATH}) message( FATAL_ERROR