From 30027762da8ea7f82ea57d01faa293159081c7e5 Mon Sep 17 00:00:00 2001 From: Vivek Trivedi Date: Tue, 21 Oct 2025 15:59:56 -0700 Subject: [PATCH] Minor perf improvements to quantized mat mul shader. (#15261) Summary: The diff includes minor performance improvements to the quantized matrix multiplication shader. Reviewed By: SS-JIA Differential Revision: D84998542 --- .../vulkan/runtime/graph/ops/glsl/linear_qcsnw_tiled.glsl | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/backends/vulkan/runtime/graph/ops/glsl/linear_qcsnw_tiled.glsl b/backends/vulkan/runtime/graph/ops/glsl/linear_qcsnw_tiled.glsl index 88b054e2cb2..0e02e075bf1 100644 --- a/backends/vulkan/runtime/graph/ops/glsl/linear_qcsnw_tiled.glsl +++ b/backends/vulkan/runtime/graph/ops/glsl/linear_qcsnw_tiled.glsl @@ -69,15 +69,14 @@ void main() { sums[r][${c}] = VEC4_T(0.0); } + const int in_row_txstride = div4(in_sizes.x); + for (int pos = 0, txpos = 0; - pos < in_sizes.x; + txpos < in_row_txstride; pos += 4, txpos += 1) { T mat1[TILE_ROWS][4]; - $if IN_STORAGE == "buffer": - uint in_row_txstride = div4(in_sizes.x); - // Preload input tensor for (int i = 0; i < TILE_ROWS; i++) { $if IN_STORAGE == "buffer":