diff --git a/CMakeLists.txt b/CMakeLists.txt index 95e33fb109e..e83d8ea11b5 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -817,6 +817,10 @@ endif() if(EXECUTORCH_BUILD_VULKAN) add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/backends/vulkan) endif() +if(EXECUTORCH_BUILD_VGF) + add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/backends/arm) +endif() + if(EXECUTORCH_BUILD_ANDROID_JNI) add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/extension/android) diff --git a/backends/arm/CMakeLists.txt b/backends/arm/CMakeLists.txt index b5e76e778a5..11f61c0dfee 100644 --- a/backends/arm/CMakeLists.txt +++ b/backends/arm/CMakeLists.txt @@ -12,13 +12,17 @@ if(NOT EXECUTORCH_ROOT) set(EXECUTORCH_ROOT ${CMAKE_CURRENT_SOURCE_DIR}/../..) endif() -add_compile_options("-Wall" "-Werror") - include(${EXECUTORCH_ROOT}/tools/cmake/Utils.cmake) set(_common_include_directories ${EXECUTORCH_ROOT}/.. ${EXECUTORCH_ROOT}/runtime/core/portable_type/c10) add_compile_definitions(C10_USING_CUSTOM_GENERATED_MACROS) + +# bare metal backend builds +if(EXECUTORCH_BUILD_ARM_BAREMETAL) + +add_compile_options("-Wall" "-Werror") + # Third-party folder and Ethos-U driver inclued set(THIRD_PARTY_ROOT "${CMAKE_CURRENT_SOURCE_DIR}/third-party") set(DRIVER_ETHOSU_INCLUDE_DIR "${THIRD_PARTY_ROOT}/ethos-u-core-driver/include") @@ -36,3 +40,47 @@ target_include_directories( target_include_directories( executorch_delegate_ethos_u PUBLIC ${DRIVER_ETHOSU_INCLUDE_DIR} ) + +# end config for bare metal builds +endif() + + +# VGF backend builds +if(EXECUTORCH_BUILD_VGF) + +# include libvgf +set(LIBVGF_PATH "${EXECUTORCH_ROOT}/examples/arm/ethos-u-scratch/ml-sdk-for-vulkan-manifest/sw/vgf-lib/") + +set(VULKAN_THIRD_PARTY_PATH ${EXECUTORCH_ROOT}/backends/vulkan/third-party) +set(VULKAN_HEADERS_PATH ${VULKAN_THIRD_PARTY_PATH}/Vulkan-Headers/include) +set(VOLK_HEADERS_PATH ${VULKAN_THIRD_PARTY_PATH}/volk) + +set(LIBVGF_STATIC "${LIBVGF_PATH}/build/src/libvgf.a") +set(LIBVGF_INCLUDE "${LIBVGF_PATH}/include/") + +add_library(vgf STATIC IMPORTED) +set_property( TARGET vgf PROPERTY IMPORTED_LOCATION "${LIBVGF_STATIC}" ) +target_include_directories(vgf INTERFACE "${LIBVGF_INCLUDE}") + +# Add backend delegate for VGF +set(_vgf_backend_sources backends/arm/runtime/VGFBackend.cpp + backends/arm/runtime/VGFSetup.cpp ) + +# vgf backend +list(TRANSFORM _vgf_backend_sources PREPEND "${EXECUTORCH_ROOT}/") +add_library(vgf_backend ${_vgf_backend_sources}) +target_include_directories( + vgf_backend PUBLIC + ${_common_include_directories} + ${VULKAN_HEADERS_PATH} + ${VOLK_HEADERS_PATH} +) +target_compile_options(vgf_backend PRIVATE -DUSE_VULKAN_WRAPPER -DUSE_VULKAN_VOLK) + + +target_link_libraries(vgf_backend PRIVATE executorch_core) +target_link_libraries(vgf_backend PRIVATE vgf) +executorch_target_link_options_shared_lib(vgf_backend) + +# end config for VGF builds +endif() diff --git a/backends/arm/runtime/VGFBackend.cpp b/backends/arm/runtime/VGFBackend.cpp new file mode 100644 index 00000000000..ea4f4286eb9 --- /dev/null +++ b/backends/arm/runtime/VGFBackend.cpp @@ -0,0 +1,361 @@ +/* + * Copyright 2025 Arm Limited and/or its affiliates. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#include +#include +using namespace std; + +#include +#include +#include + +using executorch::aten::Tensor; +using executorch::runtime::ArrayRef; +using executorch::runtime::Backend; +using executorch::runtime::BackendExecutionContext; +using executorch::runtime::BackendInitContext; +using executorch::runtime::CompileSpec; +using executorch::runtime::DelegateHandle; +using executorch::runtime::Error; +using executorch::runtime::EValue; +using executorch::runtime::FreeableBuffer; +using executorch::runtime::MemoryAllocator; +using executorch::runtime::Result; + +// We use the platform and runtime environment provided by the Vulkan delegate +#include + +// Dependencies for processing VGF files into Vulkan calls +#include +#include + +#include + +namespace executorch { +namespace backends { +namespace vgf { + +/* + * Simple function to populate function pointers for the relevant Tensor + * and DataGraph extension APIs. + */ +VkResult vkml_load_extensions(VkDevice const* device) { + // Note: + // We no longer PFN_vkCreateTensorARM)vkGetDeviceProcAddr(*device, + // "vkCreateTensorARM"); We just verify that the function pointers have + // been populated by the loader + if (vkCreateTensorARM && vkDestroyTensorARM && vkCreateTensorViewARM && + vkDestroyTensorViewARM && vkGetTensorMemoryRequirementsARM && + vkBindTensorMemoryARM && vkCreateDataGraphPipelinesARM && + vkCmdDispatchDataGraphARM && vkCreateDataGraphPipelineSessionARM) { + ET_LOG(Info, "VKML Extensions loaded"); + return VK_SUCCESS; + } + ET_LOG(Error, "Failed to load VKML extensions"); + return VK_ERROR_UNKNOWN; +} + +/* + * Fetch vulkan basic objects - intended to be replaced with a shared + * device setup with the Vulkan backend. + */ +VkResult vkml_allocate_basics( + VkInstance* instance, + VkPhysicalDevice* physical_device, + VkDevice* device, + VkQueue* queue, + VkCommandPool* command_pool); + +void vkml_free_basics( + VkInstance* instance, + VkDevice* device, + VkCommandPool* command_pool) { + vkDestroyCommandPool(*device, *command_pool, nullptr); + // Note: These primitives are used by the emulation layer for vulkan + // object allocation, the vulkan objects are freed in in library + // shutdown, so we can't yet destroy these here without causing + // a crash there. + // vkDestroyDevice(*device, nullptr); + // vkDestroyInstance(*instance, nullptr); +} + +class VGFBackend final : public ::executorch::runtime::BackendInterface { + public: + VGFBackend() { + VkResult result; + + // Fetch basic vulkan objects once + result = vkml_allocate_basics( + &vk_instance, + &vk_physical_device, + &vk_device, + &vk_queue, + &vk_command_pool); + if (result != VK_SUCCESS) { + ET_LOG( + Error, "Failed to initialize the Vulkan device error 0x%08X", result); + return; + } + + // Query the device to ensure it has needed extensions + result = vkml_load_extensions(&vk_device); + if (result != VK_SUCCESS) { + ET_LOG( + Error, + "Failed to verify VKML extensions needed, error 0x%08X", + result); + return; + } + } + ~VGFBackend() { + vkml_free_basics(&vk_instance, &vk_device, &vk_command_pool); + } + + bool is_available() const override { + VkResult result; + + ET_LOG(Info, "Checking VGFBackend is available"); + // Query the device prepared in constructor for needed extensions + result = vkml_load_extensions(&vk_device); + if (result != VK_SUCCESS) + return false; + + return true; + } + + Result init( + BackendInitContext& context, + FreeableBuffer* processed, + ArrayRef compile_specs) const override { + ET_LOG(Info, "Entered VGF init"); + + const char* vgf_data = reinterpret_cast(processed->data()); + + MemoryAllocator* allocator = context.get_runtime_allocator(); + VgfRepr* repr = allocator->allocateInstance(); + new (repr) VgfRepr( + vk_instance, vk_physical_device, vk_device, vk_queue, vk_command_pool); + + auto valid_vgf = repr->process_vgf(vgf_data, compile_specs); + if (!valid_vgf) { + ET_LOG(Error, "Failed to process VGF blob."); + return Error::Internal; + } + + return repr; + } + + Error execute( + ET_UNUSED BackendExecutionContext& context, + DelegateHandle* handle, + EValue** args) const override { + VgfRepr* repr = static_cast(handle); + + // Copy all inputs from EValue to VkDeviceMemory + for (int i = 0; i < repr->IOs.size(); i++) { + if (!args[i]->isTensor()) { + ET_LOG( + Error, + "Expected EValue %d to be tensor, got %d", + i, + static_cast(args[i]->tag)); + return Error::InvalidArgument; + } + + Tensor* tensor = &args[i]->toTensor(); + IO* io = &repr->IOs[i]; + + // skip non-inputs + if (!io->is_input) + continue; + + size_t io_size = accumulate( + io->size.begin(), io->size.end(), io->elt_size, std::multiplies<>()); + + void* data; + if (!repr->map_io(io, &data)) { + ET_LOG(Error, "Failed to map Vulkan IO memory"); + return Error::Internal; + } + memcpy(data, tensor->mutable_data_ptr(), io_size); + repr->unmap_io(io); + } + + // Execute the workload + if (!repr->execute_vgf()) { + ET_LOG(Error, "Failed to execute the VGF representation"); + return Error::Internal; + } + + // Copy all outputs from VKDeviceMemory to EValue + for (int i = 0; i < repr->IOs.size(); i++) { + if (!args[i]->isTensor()) { + ET_LOG( + Error, + "Expected EValue %d to be tensor, got %d", + i, + static_cast(args[i]->tag)); + return Error::InvalidArgument; + } + Tensor* tensor = &args[i]->toTensor(); + IO* io = &repr->IOs[i]; + + // skip non-outputs + if (io->is_input) + continue; + + size_t io_size = accumulate( + io->size.begin(), io->size.end(), io->elt_size, std::multiplies<>()); + + void* data; + if (!repr->map_io(io, &data)) { + ET_LOG(Error, "Failed to map Vulkan IO memory"); + return Error::Internal; + } + memcpy(tensor->mutable_data_ptr(), data, io_size); + repr->unmap_io(io); + } + + return Error::Ok; + } + + void destroy(DelegateHandle* handle) const override { + VgfRepr* repr = static_cast(handle); + repr->~VgfRepr(); + } + + private: + VkInstance vk_instance; + VkPhysicalDevice vk_physical_device; + VkDevice vk_device; + VkQueue vk_queue; + VkCommandPool vk_command_pool; +}; + +namespace { +auto cls = VGFBackend(); +Backend backend{"VgfBackend", &cls}; +static auto success_with_compiler = register_backend(backend); +} // namespace + +VkResult vkml_allocate_basics( + VkInstance* instance, + VkPhysicalDevice* physical_device, + VkDevice* device, + VkQueue* queue, + VkCommandPool* command_pool) { + const char* dev_exts[] = {"VK_ARM_tensors", "VK_ARM_data_graph"}; + VkResult result; + + if (VK_SUCCESS != volkInitialize()) { + ET_LOG(Error, "Volk failed to initialize"); + } + + VkApplicationInfo app_info{ + .sType = VK_STRUCTURE_TYPE_APPLICATION_INFO, + .pNext = nullptr, + .pApplicationName = "VGF", + .applicationVersion = 0, + .pEngineName = "executorch", + .engineVersion = 0, + .apiVersion = VK_API_VERSION_1_3, + }; + VkInstanceCreateInfo instance_info{ + .sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .pApplicationInfo = &app_info, + 0, + nullptr, + 0, + nullptr}; + result = vkCreateInstance(&instance_info, nullptr, instance); + if (result != VK_SUCCESS) { + ET_LOG(Error, "Failed to create VkInstance"); + return result; + } + volkLoadInstance(*instance); + + // Pick first GPU + uint32_t gpu_count = 0; + vkEnumeratePhysicalDevices(*instance, &gpu_count, nullptr); + if (gpu_count == 0) { + ET_LOG(Error, "Found no suitable devices"); + return VK_ERROR_UNKNOWN; + } + vector gpus(gpu_count); + result = vkEnumeratePhysicalDevices(*instance, &gpu_count, gpus.data()); + *physical_device = gpus[0]; + if (result != VK_SUCCESS) { + ET_LOG(Error, "Failed to select physical device"); + return result; + } + + // Find suitable queue family + uint32_t qf_count; + vkGetPhysicalDeviceQueueFamilyProperties( + *physical_device, &qf_count, nullptr); + vector qps(qf_count); + vkGetPhysicalDeviceQueueFamilyProperties( + *physical_device, &qf_count, qps.data()); + uint32_t qf = UINT32_MAX; + for (uint32_t i = 0; i < qf_count; ++i) { + if (qps[i].queueFlags & + (VK_QUEUE_COMPUTE_BIT | VK_QUEUE_DATA_GRAPH_BIT_ARM)) { + qf = i; + break; + } + } + if (qf == UINT32_MAX) { + ET_LOG(Error, "Failed to find suitable queue"); + return VK_ERROR_UNKNOWN; + } + + // Device with ML tensor extension + float qp = 1.0f; + VkDeviceQueueCreateInfo queue_info{ + .sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .queueFamilyIndex = qf, + .queueCount = 1, + .pQueuePriorities = &qp, + }; + + VkDeviceCreateInfo dci{VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO, nullptr}; + dci.queueCreateInfoCount = 1; + dci.pQueueCreateInfos = &queue_info; + dci.enabledExtensionCount = 2; + dci.ppEnabledExtensionNames = dev_exts; + result = vkCreateDevice(*physical_device, &dci, nullptr, device); + if (result != VK_SUCCESS) { + ET_LOG(Error, "Failed to create VkDevice"); + return result; + } + // Load the device with volk and populate function pointers + volkLoadDevice(*device); + + vkGetDeviceQueue(*device, qf, 0, queue); + + VkCommandPoolCreateInfo poolInfo{ + .sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .queueFamilyIndex = qf, + }; + result = vkCreateCommandPool(*device, &poolInfo, nullptr, command_pool); + if (result != VK_SUCCESS) { + ET_LOG(Error, "Failed to create VkCommandPool"); + return result; + } + + return result; +} + +} // namespace vgf +} // namespace backends +} // namespace executorch diff --git a/backends/arm/runtime/VGFSetup.cpp b/backends/arm/runtime/VGFSetup.cpp new file mode 100644 index 00000000000..18c9dbc9727 --- /dev/null +++ b/backends/arm/runtime/VGFSetup.cpp @@ -0,0 +1,780 @@ +/* + * Copyright 2025 Arm Limited and/or its affiliates. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +/* + * VGF functions which prepare a graph for execution by allocating the + * appropriate vulkan structures. + */ + +#include + +#include +#include + +using namespace mlsdk; + +namespace executorch { +namespace backends { +namespace vgf { + +/* static function to map format to byte count */ +static uint32_t get_format_size(VkFormat format); + +// Debug function to inspect memory properties +static string memory_flags_to_string(VkMemoryPropertyFlags flags) { + if (flags == 0) + return "0"; + + vector parts; +#define TRY_FLAG(f) \ + if (flags & (f)) { \ + parts.emplace_back(#f); \ + flags &= ~(f); \ + } + + TRY_FLAG(VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT) + TRY_FLAG(VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) + TRY_FLAG(VK_MEMORY_PROPERTY_HOST_COHERENT_BIT) + TRY_FLAG(VK_MEMORY_PROPERTY_HOST_CACHED_BIT) + TRY_FLAG(VK_MEMORY_PROPERTY_LAZILY_ALLOCATED_BIT) +#ifdef VK_MEMORY_PROPERTY_PROTECTED_BIT + TRY_FLAG(VK_MEMORY_PROPERTY_PROTECTED_BIT) +#endif +#undef TRY_FLAG + + if (flags) { + // any leftover bits we didn’t name + ostringstream hex; + hex << "0x" << std::hex << flags; + parts.emplace_back(hex.str()); + } + + ostringstream joined; + for (size_t i = 0; i < parts.size(); ++i) { + if (i) + joined << " | "; + joined << parts[i]; + } + return joined.str(); +} + +/** + * Tensor free helper function + */ +void free_tensor( + VkDevice device, + VkTensorViewARM tensor_view, + VkTensorARM tensor, + VkDeviceMemory memory) { + vkDestroyTensorViewARM(device, tensor_view, nullptr); + vkDestroyTensorARM(device, tensor, nullptr); + vkFreeMemory(device, memory, nullptr); +} + +/** + * Tensor allocation helper function + */ +VkResult allocate_tensor( + VkPhysicalDevice physical, + VkDevice device, + VkFormat format, + uint32_t shape_size, + const int64_t* shape, + uint32_t stride_size, + const int64_t* stride, + VkTensorDescriptionARM* description, + VkTensorViewARM* tensor_view, + VkTensorARM* tensor, + VkDeviceMemory* memory) { + VkResult result; + + *description = VkTensorDescriptionARM{ + .sType = VK_STRUCTURE_TYPE_TENSOR_DESCRIPTION_ARM, + .pNext = nullptr, + .tiling = VK_TENSOR_TILING_LINEAR_ARM, + .format = format, + .dimensionCount = shape_size, + .pDimensions = shape, + // Note: stride_data of 0's causes size==0, null means stride==size + .pStrides = (0 == stride_size ? nullptr : stride), + .usage = VK_TENSOR_USAGE_SHADER_BIT_ARM | + VK_TENSOR_USAGE_TRANSFER_SRC_BIT_ARM | + VK_TENSOR_USAGE_TRANSFER_DST_BIT_ARM | + VK_TENSOR_USAGE_DATA_GRAPH_BIT_ARM, + }; + const VkTensorCreateInfoARM create_info = { + .sType = VK_STRUCTURE_TYPE_TENSOR_CREATE_INFO_ARM, + .pNext = nullptr, + .flags = 0, + .pDescription = description, + .sharingMode = VK_SHARING_MODE_EXCLUSIVE, + .queueFamilyIndexCount = 0, + .pQueueFamilyIndices = nullptr, + }; + + result = vkCreateTensorARM(device, &create_info, nullptr, tensor); + if (result != VK_SUCCESS) { + ET_LOG(Error, "Failed to CreateTensor, error %d", result); + return result; + } + + // Get backing memory requirements + const VkTensorMemoryRequirementsInfoARM memory_requirements_info = { + .sType = VK_STRUCTURE_TYPE_TENSOR_MEMORY_REQUIREMENTS_INFO_ARM, + .pNext = nullptr, + .tensor = *tensor, + }; + VkMemoryRequirements2 memory_requirements = { + .sType = VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2, + .pNext = nullptr, + }; + vkGetTensorMemoryRequirementsARM( + device, &memory_requirements_info, &memory_requirements); + + VkPhysicalDeviceMemoryProperties memProps; + vkGetPhysicalDeviceMemoryProperties(physical, &memProps); + + // Allocate memory + uint32_t memory_type = 0; + for (size_t j = 0; j < 31; ++j) { + if (memory_requirements.memoryRequirements.memoryTypeBits & (0x1 << j)) { + memory_type = j; + uint32_t aims = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | + VK_MEMORY_PROPERTY_HOST_COHERENT_BIT; + if ((memProps.memoryTypes[j].propertyFlags & aims) == aims) + break; + } + } + const VkMemoryAllocateInfo allocate_info = { + .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, + .pNext = nullptr, + .allocationSize = memory_requirements.memoryRequirements.size, + .memoryTypeIndex = memory_type}; + + vkAllocateMemory(device, &allocate_info, nullptr, memory); + + // Bind tensor to memory + const VkBindTensorMemoryInfoARM bind_info = { + .sType = VK_STRUCTURE_TYPE_BIND_TENSOR_MEMORY_INFO_ARM, + .pNext = nullptr, + .tensor = *tensor, + .memory = *memory, + .memoryOffset = 0, + }; + vkBindTensorMemoryARM(device, 1, &bind_info); + + VkTensorViewCreateInfoARM tensor_view_info = { + .sType = VK_STRUCTURE_TYPE_TENSOR_VIEW_CREATE_INFO_ARM, + .pNext = nullptr, + .flags = 0, + .tensor = *tensor, + .format = format, + }; + VkResult res_tv = + vkCreateTensorViewARM(device, &tensor_view_info, nullptr, tensor_view); + ET_LOG(Info, " tensor view (success %d)", res_tv == VK_SUCCESS); + + return res_tv; +} + +static void debug_print_sequence( + unique_ptr& sequence_decoder) { + ET_LOG(Info, "VGF Sequences:"); + for (int i = 0; i < sequence_decoder->modelSequenceTableSize(); i++) { + ET_LOG( + Info, + " Sequence(%d) '%s':", + i, + string(sequence_decoder->getSegmentName(i)).c_str()); + auto dispatch_shape = sequence_decoder->getSegmentDispatchShape(i); + ET_LOG( + Info, + " dispatch shape %d %d %d", + dispatch_shape[0], + dispatch_shape[1], + dispatch_shape[2]); + ET_LOG( + Info, + " is graph? %d", + vgflib::ModuleType::GRAPH == sequence_decoder->getSegmentType(i)); + ET_LOG( + Info, + " module index %d", + sequence_decoder->getSegmentModuleIndex(i)); + auto input_names = sequence_decoder->getModelSequenceInputNamesHandle(); + ET_LOG( + Info, " names (%ld):", sequence_decoder->getNamesSize(input_names)); + for (int j = 0; j < sequence_decoder->getNamesSize(input_names); j++) { + ET_LOG( + Info, + " %d: %s", + i, + string(sequence_decoder->getName(input_names, i)).c_str()); + } + } +} + +static void debug_print_resources( + unique_ptr& resource_decoder) { + ET_LOG(Info, "Resources:"); + for (int i = 0; i < resource_decoder->size(); i++) { + ET_LOG(Info, " MRT entry %d", i); + if (!resource_decoder->getDescriptorType(i).has_value()) { + ET_LOG(Info, " DescriptorType NONE"); + } else { + ET_LOG( + Info, + " DescriptorType %u, is tensor? %d", + resource_decoder->getDescriptorType(i).value(), + resource_decoder->getDescriptorType(i).value() == + VK_DESCRIPTOR_TYPE_TENSOR_ARM); + } + ET_LOG( + Info, + " VkFormat %u from vgf format %u", + vgflib::ToVkFormat(resource_decoder->getVkFormat(i)), + resource_decoder->getVkFormat(i)); + switch (resource_decoder->getCategory(i)) { + case vgflib::ResourceCategory::INPUT: + case vgflib::ResourceCategory::OUTPUT: { + ET_LOG(Info, " Category INPUT/OUTPUT"); + // Get tensor shape and strides + auto shape = resource_decoder->getTensorShape(i); + const vector the_shape(shape.begin(), shape.end()); + auto stride = resource_decoder->getTensorStride(i); + const vector the_stride(stride.begin(), stride.end()); + ET_LOG( + Info, + " rank %ld, stride rank %ld", + the_shape.size(), + the_stride.size()); + for (int j = 0; j < the_shape.size(); j++) { + ET_LOG(Info, " %d: dim %ld", j, the_shape[j]); + } + // Allocate a tensor with bound memory + break; + } + case vgflib::ResourceCategory::INTERMEDIATE: + ET_LOG(Info, " Category INTERMEDIATE"); + break; + case vgflib::ResourceCategory::CONSTANT: + ET_LOG(Info, " Category CONSTANT"); + break; + default: + ET_LOG(Info, " Category UNKNOWN"); + break; + } + } +} + +static void debug_print_modules( + unique_ptr& module_decoder) { + ET_LOG(Info, "VGF Modules:"); + for (int i = 0; i < module_decoder->size(); i++) { + auto name = string(module_decoder->getModuleName(i)); + auto entrypoint = string(module_decoder->getModuleEntryPoint(i)); + auto type = module_decoder->getModuleType(i); + auto spirv = module_decoder->getModuleCode(i); + ET_LOG(Info, " Module(%d) '%s':", i, name.c_str()); + ET_LOG( + Info, + " is graph? %d", + vgflib::ModuleType::GRAPH == module_decoder->getModuleType(i)); + ET_LOG(Info, " entrypoint '%s'", entrypoint.c_str()); + ET_LOG(Info, " has spirv %d", module_decoder->hasSPIRV(i)); + ET_LOG( + Info, " code size %lu", spirv.size()); // read the .begin() to .end() + } +} + +bool VgfRepr::process_vgf(const char* vgf_data, ArrayRef specs) { + ET_LOG(Info, "Preparing VGF as Vulkan objects"); + + VkResult result; + + // Prepare temporary decoders + unique_ptr header_decoder = + vgflib::CreateHeaderDecoder(vgf_data); + unique_ptr sequence_decoder = + vgflib::CreateModelSequenceTableDecoder( + vgf_data + header_decoder->GetModelSequenceTableOffset()); + unique_ptr module_decoder = + vgflib::CreateModuleTableDecoder( + vgf_data + header_decoder->GetModuleTableOffset()); + unique_ptr resource_decoder = + vgflib::CreateModelResourceTableDecoder( + vgf_data + header_decoder->GetModelResourceTableOffset()); + unique_ptr constant_decoder = + vgflib::CreateConstantDecoder( + vgf_data + header_decoder->GetConstantsOffset()); + // Check the VGF decoders + if (not(header_decoder && module_decoder && sequence_decoder && + resource_decoder && constant_decoder && header_decoder->IsValid() && + header_decoder->CheckVersion())) { + ET_LOG(Error, "Failed to process VGF file internalsr"); + return false; + } + + // Parse the sequences in the VGF (while there can be multiple sequences of + // COMPUTE and GRAPH segments in the sequence, we currently expect a single + // GRAPH segment to be present. + debug_print_sequence(sequence_decoder); + if (sequence_decoder->modelSequenceTableSize() != 1) { + ET_LOG(Error, "Expected sequence length 1"); + return false; + } + if (sequence_decoder->getSegmentType(0) != vgflib::ModuleType::GRAPH) { + ET_LOG(Error, "Expected segment to be of type GRAPH"); + return false; + } + + // Extract first segment and it's associated module + debug_print_modules(module_decoder); + auto segment_name = string(sequence_decoder->getSegmentName(0)); + auto segment_module = sequence_decoder->getSegmentModuleIndex(0); + + auto segment_m_name = string(module_decoder->getModuleName(segment_module)); + auto segment_m_entrypoint = + string(module_decoder->getModuleEntryPoint(segment_module)); + auto segment_m_spirv = module_decoder->getModuleCode(segment_module); + + // Build a shader from the module + VkShaderModuleCreateInfo smci{ + .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .codeSize = segment_m_spirv.size() * sizeof(uint32_t), + .pCode = segment_m_spirv.begin(), + }; + result = vkCreateShaderModule(vk_device, &smci, nullptr, &vk_shader); + if (result != VK_SUCCESS) { + ET_LOG(Error, "Failed to load shader from segment %d", segment_module); + return false; + } + + // Record our shader and entrypoint string + vector> shader_modules; + shader_modules.push_back({vk_shader, segment_m_entrypoint}); + + // Load our resource (tensors, constants) into their appropriate Vk objects + vector descriptors; + vector> resources; + vector constants; + + int IO_count = resource_decoder->size(); + for (int i = 0; i < IO_count; i++) { + auto resource_type = resource_decoder->getDescriptorType(i).value_or(0); + auto resource_format = vgflib::ToVkFormat(resource_decoder->getVkFormat(i)); + + // Get tensor shape and strides + auto shape = resource_decoder->getTensorShape(i); + auto stride = resource_decoder->getTensorStride(i); + + switch (resource_decoder->getCategory(i)) { + case vgflib::ResourceCategory::INPUT: + case vgflib::ResourceCategory::OUTPUT: { + // Expect IO to be a tensor type + if (resource_type != VK_DESCRIPTOR_TYPE_TENSOR_ARM) { + ET_LOG( + Error, + "Expected tensor type descriptor %u got %u", + VK_DESCRIPTOR_TYPE_TENSOR_ARM, + resource_type); + return false; + } + + // Allocate a tensor with backing memory + VkTensorARM tensor; + VkTensorViewARM tensor_view; + VkDeviceMemory tensor_memory; + VkTensorDescriptionARM tensor_description; + result = allocate_tensor( + vk_physical, + vk_device, + vgflib::ToVkFormat(resource_decoder->getVkFormat(i)), + static_cast(shape.size()), + shape.begin(), + static_cast(stride.size()), + stride.begin(), + &tensor_description, + &tensor_view, + &tensor, + &tensor_memory); + if (result != VK_SUCCESS) { + ET_LOG(Error, "Failed to allocate tensor for VGF resource %d", i); + return false; + } + size_t e_size = get_format_size( + vgflib::ToVkFormat(resource_decoder->getVkFormat(i))); + if (0 == e_size) { + ET_LOG(Error, "failed to get element size of VkFormat"); + return false; + } + + bool is_in = + resource_decoder->getCategory(i) == vgflib::ResourceCategory::INPUT; + IOs.push_back( + IO{vector(shape.begin(), shape.end()), + vector(stride.begin(), stride.end()), + e_size, + tensor, + tensor_view, + tensor_memory, + is_in}); + resources.push_back({tensor, tensor_view}); + descriptors.push_back(tensor_description); + break; + } + case vgflib::ResourceCategory::CONSTANT: + // Constants just need a descriptor + descriptors.push_back(VkTensorDescriptionARM{ + .sType = VK_STRUCTURE_TYPE_TENSOR_DESCRIPTION_ARM, + .pNext = nullptr, + .tiling = VK_TENSOR_TILING_LINEAR_ARM, + .format = vgflib::ToVkFormat(resource_decoder->getVkFormat(i)), + .dimensionCount = static_cast(shape.size()), + .pDimensions = shape.begin(), + // Note: stride_data of 0's causes size==0, null means stride==size + .pStrides = (0 == stride.size() ? nullptr : stride.begin()), + .usage = VK_TENSOR_USAGE_DATA_GRAPH_BIT_ARM, + }); + break; + case vgflib::ResourceCategory::INTERMEDIATE: + ET_LOG(Error, "Unsupported resource category INTERMEDIATE"); + return false; + default: + ET_LOG(Info, "Unsupported resource category UNKNOWN"); + return false; + } + } + + // Constants table - mapping of shader bindings to MRT's and their descriptors + for (int i = 0; i < constant_decoder->size(); i++) { + auto mrt_i = constant_decoder->getConstantMrtIndex(i); + auto constant_data = constant_decoder->getConstant(i); + constants.push_back(VkDataGraphPipelineConstantARM{ + .sType = VK_STRUCTURE_TYPE_DATA_GRAPH_PIPELINE_CONSTANT_ARM, + .pNext = &descriptors[mrt_i], + .id = mrt_i, + .pConstantData = constant_data.begin(), + }); + } + + // Prepare our layout bindings from the segment's information + vector layout_bindings; + vector data_graph_resources; + + auto set_count = sequence_decoder->getSegmentDescriptorSetInfosSize(0); + for (uint32_t d_idx = 0; d_idx < set_count; d_idx++) { + auto handle = sequence_decoder->getDescriptorBindingSlotsHandle(0, d_idx); + auto binding_count = sequence_decoder->getBindingsSize(handle); + for (int binding = 0; binding < binding_count; binding++) { + auto binding_index = + sequence_decoder->getBindingSlotBinding(handle, binding); + auto MRT_index = + sequence_decoder->getBindingSlotMrtIndex(handle, binding); + auto MRT_type = resource_decoder->getDescriptorType(MRT_index).value(); + + const VkDescriptorSetLayoutBinding layout_binding{ + .binding = binding_index, + .descriptorType = vgflib::ToVkDescriptorType(MRT_type), + .descriptorCount = 1, + .stageFlags = VK_SHADER_STAGE_ALL, + .pImmutableSamplers = nullptr, + }; + layout_bindings.push_back(layout_binding); + + const VkDataGraphPipelineResourceInfoARM resource{ + .sType = VK_STRUCTURE_TYPE_DATA_GRAPH_PIPELINE_RESOURCE_INFO_ARM, + // Note: we populate the resource_descriptors 1:1 with the MRT table, + // so can directly use that index into the resource_descriptors + .pNext = &descriptors[MRT_index], + .descriptorSet = d_idx, + .binding = binding_index, + .arrayElement = 0, + }; + data_graph_resources.push_back(resource); + } + } + + // create fixed layout for this module + const VkDescriptorSetLayoutCreateInfo layout_info = { + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .bindingCount = static_cast(layout_bindings.size()), + layout_bindings.data(), + }; + result = + vkCreateDescriptorSetLayout(vk_device, &layout_info, nullptr, &vk_layout); + if (result != VK_SUCCESS) { + ET_LOG(Error, "Failed to create descriptor layout"); + return false; + } + + // Create descriptor pool and descriptors for pipeline + const VkDescriptorPoolCreateInfo descriptor_pool_info = { + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .maxSets = static_cast(set_count), + .poolSizeCount = 0, + .pPoolSizes = nullptr, + }; + result = vkCreateDescriptorPool( + vk_device, &descriptor_pool_info, nullptr, &vk_descriptor_pool); + if (result != VK_SUCCESS) { + ET_LOG(Error, "Failed to create descriptor pool"); + return false; + } + + const VkDescriptorSetAllocateInfo descriptor_set_info = { + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, + .pNext = nullptr, + .descriptorPool = vk_descriptor_pool, + .descriptorSetCount = static_cast(set_count), + .pSetLayouts = &vk_layout, + }; + + // Alloc descriptor sets + // currently, as we require modelSequenceTableSize to == 1 + // we can only get one descriptor set. + vector descriptor_sets; + descriptor_sets.resize(1); + result = vkAllocateDescriptorSets( + vk_device, &descriptor_set_info, descriptor_sets.data()); + if (result != VK_SUCCESS) { + ET_LOG(Error, "Failed to allocate descriptor sets"); + return false; + } + + // write descriptor updates for every input + auto input_slots = sequence_decoder->getSegmentInputBindingSlotsHandle(0); + auto input_size = sequence_decoder->getBindingsSize(input_slots); + for (uint32_t i = 0; i < input_size; i++) { + auto binding = sequence_decoder->getBindingSlotBinding(input_slots, i); + auto mrt_i = sequence_decoder->getBindingSlotMrtIndex(input_slots, i); + + VkWriteDescriptorSetTensorARM write_desc = { + .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET_TENSOR_ARM, + .pNext = nullptr, + .tensorViewCount = 1, + .pTensorViews = &get<1>(resources[i]), + }; + VkWriteDescriptorSet desc_set = { + .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, + .pNext = &write_desc, + .dstSet = descriptor_sets[0], + .dstBinding = binding, + .dstArrayElement = 0, + .descriptorCount = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_TENSOR_ARM, + .pImageInfo = nullptr, + .pBufferInfo = nullptr, + .pTexelBufferView = nullptr, + }; + vkUpdateDescriptorSets(vk_device, 1, &desc_set, 0, nullptr); + } + + // write descriptor updates for every output + auto output_slots = sequence_decoder->getSegmentOutputBindingSlotsHandle(0); + auto output_size = sequence_decoder->getBindingsSize(output_slots); + for (uint32_t i = 0; i < output_size; i++) { + auto binding = sequence_decoder->getBindingSlotBinding(output_slots, i); + auto mrt_i = sequence_decoder->getBindingSlotMrtIndex(output_slots, i); + + VkWriteDescriptorSetTensorARM write_desc = { + .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET_TENSOR_ARM, + .pNext = nullptr, + .tensorViewCount = 1, + .pTensorViews = &get<1>(resources[i + input_size]), + }; + VkWriteDescriptorSet desc_set = { + .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, + .pNext = &write_desc, + .dstSet = descriptor_sets[0], + .dstBinding = binding, + .dstArrayElement = 0, + .descriptorCount = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_TENSOR_ARM, + .pImageInfo = nullptr, + .pBufferInfo = nullptr, + .pTexelBufferView = nullptr, + }; + vkUpdateDescriptorSets(vk_device, 1, &desc_set, 0, nullptr); + } + + // create our pipeline + VkPipelineLayoutCreateInfo pipeline_layout_info = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .setLayoutCount = 1, + .pSetLayouts = &vk_layout, + .pushConstantRangeCount = 0, + .pPushConstantRanges = nullptr, + }; + result = vkCreatePipelineLayout( + vk_device, &pipeline_layout_info, nullptr, &vk_pipeline_layout); + if (result != VK_SUCCESS) { + ET_LOG(Error, "Failed to create pipeline layout"); + return false; + } + + // Shader Module Create + VkDataGraphPipelineShaderModuleCreateInfoARM shader_info{ + .sType = + VK_STRUCTURE_TYPE_DATA_GRAPH_PIPELINE_SHADER_MODULE_CREATE_INFO_ARM, + .pNext = nullptr, + .module = get<0>(shader_modules[0]), + .pName = get<1>(shader_modules[0]).c_str(), + .pSpecializationInfo = nullptr, + .constantCount = static_cast(constants.size()), + .pConstants = constants.data(), + }; + + // Prepare Graph Pipeline + VkDataGraphPipelineCreateInfoARM graph_pipeline_info{ + .sType = VK_STRUCTURE_TYPE_DATA_GRAPH_PIPELINE_CREATE_INFO_ARM, + .pNext = &shader_info, + .flags = VK_PIPELINE_CREATE_2_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT | + VK_PIPELINE_CREATE_2_EARLY_RETURN_ON_FAILURE_BIT_KHR, + .layout = vk_pipeline_layout, + .resourceInfoCount = static_cast(data_graph_resources.size()), + .pResourceInfos = data_graph_resources.data(), + }; + + result = vkCreateDataGraphPipelinesARM( + vk_device, // device + VK_NULL_HANDLE, // deferredOperation + VK_NULL_HANDLE, // VkPipelineCache + 1, // createInfoCount + &graph_pipeline_info, // pCreateInfos + nullptr, // pAllocator + &vk_pipeline // pPipelines (VkPipeline*) + ); + if (result != VK_SUCCESS) { + ET_LOG(Error, "Failed to create DataGraphPipeline"); + return result; + } + + // prepare the graph pipeline session + VkDataGraphPipelineSessionCreateInfoARM pipeline_session_info{ + .sType = VK_STRUCTURE_TYPE_DATA_GRAPH_PIPELINE_SESSION_CREATE_INFO_ARM, + .pNext = nullptr, + .flags = 0, + .dataGraphPipeline = vk_pipeline, + }; + result = vkCreateDataGraphPipelineSessionARM( + vk_device, &pipeline_session_info, nullptr, &vk_session); + if (result != VK_SUCCESS) { + ET_LOG(Error, "Failed to create DataGraphPipelineSession"); + return result; + } + + // Allocate command buffer + VkCommandBufferAllocateInfo allocate_info{ + .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO, + .pNext = nullptr, + .commandPool = vk_command_pool, + .level = VK_COMMAND_BUFFER_LEVEL_PRIMARY, + .commandBufferCount = 1}; + result = vkAllocateCommandBuffers(vk_device, &allocate_info, &vk_execute_cmd); + if (result != VK_SUCCESS) { + ET_LOG(Error, "Failed to allocate command buffers"); + return result; + } + + // Populate command once with our dispatch information + VkCommandBufferBeginInfo beginInfo{ + VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO}; + vkBeginCommandBuffer(vk_execute_cmd, &beginInfo); + + // bind pipeline + descriptor set + vkCmdBindPipeline( + vk_execute_cmd, VK_PIPELINE_BIND_POINT_DATA_GRAPH_ARM, vk_pipeline); + + vkCmdBindDescriptorSets( + vk_execute_cmd, + VK_PIPELINE_BIND_POINT_DATA_GRAPH_ARM, + vk_pipeline_layout, + 0, // first set + 1, + descriptor_sets.data(), // descriptor set count + pointer + 0, + nullptr // no dynamic offsets + ); + + // Dispatch the graph command + vkCmdDispatchDataGraphARM(vk_execute_cmd, vk_session, nullptr); + + // end the command buffer + vkEndCommandBuffer(vk_execute_cmd); + + return true; +} + +bool VgfRepr::execute_vgf() { + ET_LOG(Info, "Executing vgf"); + + // Submit & wait for idle + VkSubmitInfo submit{VK_STRUCTURE_TYPE_SUBMIT_INFO}; + submit.commandBufferCount = 1; + submit.pCommandBuffers = &vk_execute_cmd; + VkResult result = vkQueueSubmit(vk_queue, 1, &submit, VK_NULL_HANDLE); + if (result != VK_SUCCESS) { + ET_LOG(Error, "VGF/VkCommandBuffer command submission failed"); + return false; + } + vkQueueWaitIdle(vk_queue); + + return true; +} + +void VgfRepr::free_vgf() { + vkFreeCommandBuffers(vk_device, vk_command_pool, 1, &vk_execute_cmd); + vkDestroyDataGraphPipelineSessionARM(vk_device, vk_session, nullptr); + vkDestroyPipeline(vk_device, vk_pipeline, nullptr); + vkDestroyPipelineLayout(vk_device, vk_pipeline_layout, nullptr); + vkDestroyDescriptorPool(vk_device, vk_descriptor_pool, nullptr); + vkDestroyDescriptorSetLayout(vk_device, vk_layout, nullptr); + vkDestroyShaderModule(vk_device, vk_shader, nullptr); + for (int i = 0; i < IOs.size(); i++) { + free_tensor( + vk_device, IOs[i].tensor_view, IOs[i].tensor, IOs[i].tensor_memory); + } +} + +static uint32_t get_format_size(VkFormat format) { + // Note: While this is a small subset of VkFormat, this supports all base + // types for tensors coming from the compiler flow. Tensor formats only + // specify single element type. + switch (format) { + case VK_FORMAT_R8_BOOL_ARM: + case VK_FORMAT_R8_UINT: + case VK_FORMAT_R8_SINT: + return 1; + case VK_FORMAT_R16_UINT: + case VK_FORMAT_R16_SINT: + case VK_FORMAT_R16_SFLOAT: + return 2; + case VK_FORMAT_R32_UINT: + case VK_FORMAT_R32_SINT: + case VK_FORMAT_R32_SFLOAT: + return 4; + case VK_FORMAT_R64_SINT: + return 8; + default: + ET_LOG(Error, "Unknown tensor format"); + return 0; + } +} + +} // namespace vgf +} // namespace backends +} // namespace executorch diff --git a/backends/arm/runtime/VGFSetup.h b/backends/arm/runtime/VGFSetup.h new file mode 100644 index 00000000000..29fc287865e --- /dev/null +++ b/backends/arm/runtime/VGFSetup.h @@ -0,0 +1,119 @@ +/* + * Copyright 2025 Arm Limited and/or its affiliates. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#include +#include +#include +using namespace std; + +#include + +using executorch::runtime::ArrayRef; +using executorch::runtime::CompileSpec; + +// We use the platform and runtime environment provided by the Vulkan delegate +#include + +namespace executorch { +namespace backends { +namespace vgf { + +class VgfRepr; + +/* + * Info about IOs used during execution + */ +typedef struct IO { + vector size; + vector stride; + size_t elt_size; + VkTensorARM tensor; + VkTensorViewARM tensor_view; + VkDeviceMemory tensor_memory; + bool is_input; +} IO; + +/* + * In memory, and in-vulkan-object representation of the loaded + * VGF graph - ready to be dispatched based on provided inputs. + */ +class VgfRepr { + public: + VgfRepr( + VkInstance inst, + VkPhysicalDevice phys, + VkDevice dev, + VkQueue queue, + VkCommandPool pool) + : vk_instance(inst), + vk_physical(phys), + vk_device(dev), + vk_queue(queue), + vk_command_pool(pool) {} + + /* + * Process a VGF ready for execution, allocate necessary Vulkan objects. + */ + bool process_vgf(const char* vgf_data, ArrayRef specs); + + /* + * Execute the VGF we've previously processed. + */ + bool execute_vgf(); + + /* + * Free any allocations made in process_vgf. + */ + void free_vgf(); + + /* + * input and outputs from the VGF - these are memory mapped and populated + * with the EValues coming the backend execute call + */ + vector IOs; + + bool map_io(IO* io, void** handle) { + VkResult result = + vkMapMemory(vk_device, io->tensor_memory, 0, VK_WHOLE_SIZE, 0, handle); + if (result != VK_SUCCESS) { + ET_LOG(Error, "Failed to map Vulkan IO memory"); + return false; + } + return true; + } + + void unmap_io(IO* io) { + vkUnmapMemory(vk_device, io->tensor_memory); + } + + ~VgfRepr() { + free_vgf(); + } + + private: + // Basic Vulkan objects passed to us and re-used + VkInstance vk_instance; + VkPhysicalDevice vk_physical; + VkDevice vk_device; + VkQueue vk_queue; + VkCommandPool vk_command_pool; + + // per-VgfRepr-instance objects allocated in process_vgf, used (can be more + // than once) in execute_vgf + VkCommandBuffer vk_execute_cmd = VK_NULL_HANDLE; + VkDataGraphPipelineSessionARM vk_session = VK_NULL_HANDLE; + VkPipeline vk_pipeline = VK_NULL_HANDLE; + VkPipelineLayout vk_pipeline_layout = VK_NULL_HANDLE; + VkDescriptorPool vk_descriptor_pool; + VkDescriptorSetLayout vk_layout; + VkShaderModule vk_shader; + // Note: the vector of tensor memory is stored in IOs above +}; + +} // namespace vgf +} // namespace backends +} // namespace executorch diff --git a/backends/vulkan/CMakeLists.txt b/backends/vulkan/CMakeLists.txt index cb1b8a06afd..0b805aef5f4 100644 --- a/backends/vulkan/CMakeLists.txt +++ b/backends/vulkan/CMakeLists.txt @@ -1,5 +1,6 @@ # Copyright (c) Meta Platforms, Inc. and affiliates. # All rights reserved. +# Copyright 2025 Arm Limited and/or its affiliates. # # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. @@ -127,11 +128,18 @@ if(NOT CMAKE_TOOLCHAIN_FILE MATCHES ".*(iOS|ios\.toolchain)\.cmake$") set(VULKAN_RUNNER_SRCS ${_executor_runner__srcs}) list(TRANSFORM VULKAN_RUNNER_SRCS PREPEND "${EXECUTORCH_ROOT}/") + set(VGF_BACKEND ) + if(EXECUTORCH_BUILD_VGF) + set(VGF_BACKEND vgf_backend) + endif() + add_executable(vulkan_executor_runner ${VULKAN_RUNNER_SRCS}) target_link_libraries( vulkan_executor_runner ${_executor_runner_libs} vulkan_schema vulkan_backend + ${VGF_BACKEND} ) + target_compile_options(vulkan_executor_runner PUBLIC ${VULKAN_CXX_FLAGS}) endif() diff --git a/backends/vulkan/runtime/graph/containers/Types.h b/backends/vulkan/runtime/graph/containers/Types.h index 5840d1695ee..48232179e06 100644 --- a/backends/vulkan/runtime/graph/containers/Types.h +++ b/backends/vulkan/runtime/graph/containers/Types.h @@ -1,6 +1,7 @@ /* * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. + * Copyright 2025 Arm Limited and/or its affiliates. * * This source code is licensed under the BSD-style license found in the * LICENSE file in the root directory of this source tree. @@ -8,6 +9,7 @@ #pragma once +#include #include namespace vkcompute { diff --git a/backends/vulkan/third-party/Vulkan-Headers b/backends/vulkan/third-party/Vulkan-Headers index 0c5928795a6..10739e8e00a 160000 --- a/backends/vulkan/third-party/Vulkan-Headers +++ b/backends/vulkan/third-party/Vulkan-Headers @@ -1 +1 @@ -Subproject commit 0c5928795a66e93f65e5e68a36d8daa79a209dc2 +Subproject commit 10739e8e00a7b6f74d22dd0a547f1406ff1f5eb9 diff --git a/backends/vulkan/third-party/volk b/backends/vulkan/third-party/volk index b3bc21e584f..49ba6858c13 160000 --- a/backends/vulkan/third-party/volk +++ b/backends/vulkan/third-party/volk @@ -1 +1 @@ -Subproject commit b3bc21e584f97400b6884cb2a541a56c6a5ddba3 +Subproject commit 49ba6858c13516019d699d94c31d5814025dd005 diff --git a/tools/cmake/preset/default.cmake b/tools/cmake/preset/default.cmake index 551c69bc93e..06558b85460 100644 --- a/tools/cmake/preset/default.cmake +++ b/tools/cmake/preset/default.cmake @@ -145,6 +145,9 @@ define_overridable_option( define_overridable_option( EXECUTORCH_BUILD_CORTEX_M "Build the Cortex-M backend" BOOL OFF ) +define_overridable_option( + EXECUTORCH_BUILD_VGF "Build the Arm VGF backend" BOOL OFF +) define_overridable_option( EXECUTORCH_COREML_BUILD_EXECUTOR_RUNNER "Build CoreML executor runner." BOOL OFF