From 5ba68454f851a92db7e56d29854b63c4762e1b3a Mon Sep 17 00:00:00 2001 From: Vivek Trivedi Date: Wed, 13 Nov 2024 16:58:35 -0800 Subject: [PATCH] Use Linear tiling by default for executorch vulkan tensor images (#6838) Summary: This diff changes the default image layout for a tensor from TILING_OPTIMAL to TILING_LINEAR. Linear tiling helps improve memory utilization by minimizing texture padding and give better control over texture caching. q_8w_linear op shader and dispatch settings are modified to utilize linearity of texture. Reviewed By: nathanaelsee Differential Revision: D65912644 --- backends/vulkan/runtime/graph/ops/glsl/q_8w_linear.glsl | 5 ++++- .../vulkan/runtime/graph/ops/impl/QuantizedLinear.cpp | 8 ++++++-- backends/vulkan/runtime/vk_api/memory/Image.cpp | 2 +- 3 files changed, 11 insertions(+), 4 deletions(-) diff --git a/backends/vulkan/runtime/graph/ops/glsl/q_8w_linear.glsl b/backends/vulkan/runtime/graph/ops/glsl/q_8w_linear.glsl index f679732ddb3..f18adf1e889 100644 --- a/backends/vulkan/runtime/graph/ops/glsl/q_8w_linear.glsl +++ b/backends/vulkan/runtime/graph/ops/glsl/q_8w_linear.glsl @@ -118,7 +118,10 @@ VEC4_T q_8w_linear(const u16vec3 out_pos, const uint16_t K) { } void main() { - const u16vec3 out_pos = u16vec3(gl_GlobalInvocationID); + const u16vec3 out_pos = u16vec3( + gl_GlobalInvocationID.x / (out_limits.y * out_limits.z), + (gl_GlobalInvocationID.x / out_limits.z) % out_limits.y, + gl_GlobalInvocationID.x % out_limits.z); if (any(greaterThanEqual(out_pos, out_limits))) { return; } diff --git a/backends/vulkan/runtime/graph/ops/impl/QuantizedLinear.cpp b/backends/vulkan/runtime/graph/ops/impl/QuantizedLinear.cpp index cb3bafbb81b..a78ac0519c4 100644 --- a/backends/vulkan/runtime/graph/ops/impl/QuantizedLinear.cpp +++ b/backends/vulkan/runtime/graph/ops/impl/QuantizedLinear.cpp @@ -109,11 +109,15 @@ void add_q_8w_linear_node( graph.sizes_ubo(mat1_W_packed)}); } + // set global work group size to be 1 dimensional + const utils::uvec3 wg_size = { + static_cast(graph.numel_of(out_W_packed)), 1, 1}; + graph.execute_nodes().emplace_back(new DispatchNode( graph, VK_KERNEL_FROM_STR(kernel_name), - graph.create_global_wg_size(out_W_packed), - graph.create_local_wg_size(out_W_packed), + wg_size, + graph.create_local_wg_size(wg_size), // Inputs and Outputs {{out_W_packed, vkapi::MemoryAccessType::WRITE}, {{mat1_W_packed, q_mat2, scales}, vkapi::MemoryAccessType::READ}}, diff --git a/backends/vulkan/runtime/vk_api/memory/Image.cpp b/backends/vulkan/runtime/vk_api/memory/Image.cpp index 503938c4067..108befaeb5c 100644 --- a/backends/vulkan/runtime/vk_api/memory/Image.cpp +++ b/backends/vulkan/runtime/vk_api/memory/Image.cpp @@ -156,7 +156,7 @@ VulkanImage::VulkanImage( 1u, // mipLevels 1u, // arrayLayers VK_SAMPLE_COUNT_1_BIT, // samples - VK_IMAGE_TILING_OPTIMAL, // tiling + VK_IMAGE_TILING_LINEAR, // tiling image_properties_.image_usage, // usage VK_SHARING_MODE_EXCLUSIVE, // sharingMode 0u, // queueFamilyIndexCount