diff --git a/backends/vulkan/runtime/graph/ops/glsl/q_8w_linear.glsl b/backends/vulkan/runtime/graph/ops/glsl/q_8w_linear.glsl index f679732ddb3..f18adf1e889 100644 --- a/backends/vulkan/runtime/graph/ops/glsl/q_8w_linear.glsl +++ b/backends/vulkan/runtime/graph/ops/glsl/q_8w_linear.glsl @@ -118,7 +118,10 @@ VEC4_T q_8w_linear(const u16vec3 out_pos, const uint16_t K) { } void main() { - const u16vec3 out_pos = u16vec3(gl_GlobalInvocationID); + const u16vec3 out_pos = u16vec3( + gl_GlobalInvocationID.x / (out_limits.y * out_limits.z), + (gl_GlobalInvocationID.x / out_limits.z) % out_limits.y, + gl_GlobalInvocationID.x % out_limits.z); if (any(greaterThanEqual(out_pos, out_limits))) { return; } diff --git a/backends/vulkan/runtime/graph/ops/impl/QuantizedLinear.cpp b/backends/vulkan/runtime/graph/ops/impl/QuantizedLinear.cpp index cb3bafbb81b..a78ac0519c4 100644 --- a/backends/vulkan/runtime/graph/ops/impl/QuantizedLinear.cpp +++ b/backends/vulkan/runtime/graph/ops/impl/QuantizedLinear.cpp @@ -109,11 +109,15 @@ void add_q_8w_linear_node( graph.sizes_ubo(mat1_W_packed)}); } + // set global work group size to be 1 dimensional + const utils::uvec3 wg_size = { + static_cast(graph.numel_of(out_W_packed)), 1, 1}; + graph.execute_nodes().emplace_back(new DispatchNode( graph, VK_KERNEL_FROM_STR(kernel_name), - graph.create_global_wg_size(out_W_packed), - graph.create_local_wg_size(out_W_packed), + wg_size, + graph.create_local_wg_size(wg_size), // Inputs and Outputs {{out_W_packed, vkapi::MemoryAccessType::WRITE}, {{mat1_W_packed, q_mat2, scales}, vkapi::MemoryAccessType::READ}}, diff --git a/backends/vulkan/runtime/vk_api/memory/Image.cpp b/backends/vulkan/runtime/vk_api/memory/Image.cpp index 503938c4067..108befaeb5c 100644 --- a/backends/vulkan/runtime/vk_api/memory/Image.cpp +++ b/backends/vulkan/runtime/vk_api/memory/Image.cpp @@ -156,7 +156,7 @@ VulkanImage::VulkanImage( 1u, // mipLevels 1u, // arrayLayers VK_SAMPLE_COUNT_1_BIT, // samples - VK_IMAGE_TILING_OPTIMAL, // tiling + VK_IMAGE_TILING_LINEAR, // tiling image_properties_.image_usage, // usage VK_SHARING_MODE_EXCLUSIVE, // sharingMode 0u, // queueFamilyIndexCount