From f28125c526cf47b258f0c06c0f7285587ec69d97 Mon Sep 17 00:00:00 2001 From: Vivek Trivedi Date: Tue, 22 Oct 2024 04:24:54 -0700 Subject: [PATCH] Reduced int precision for texture coordinates in q_linear op, to reduce shader register pressure. (#6354) Summary: This diff reduces precision ints for texture coordinates in the q_linear op's texture based multiplication implementation. **Context:** Texture size in a single dimension even on most high ends GPUs are limited to 65535, thus ushort is sufficient for storing texture coordinates. Reviewed By: nathanaelsee, SS-JIA Differential Revision: D64191093 --- .../vulkan/runtime/graph/ops/glsl/q_8w_linear.glsl | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/backends/vulkan/runtime/graph/ops/glsl/q_8w_linear.glsl b/backends/vulkan/runtime/graph/ops/glsl/q_8w_linear.glsl index 02cae3ed980..624878a17cc 100644 --- a/backends/vulkan/runtime/graph/ops/glsl/q_8w_linear.glsl +++ b/backends/vulkan/runtime/graph/ops/glsl/q_8w_linear.glsl @@ -89,13 +89,15 @@ void main() { #else // USING_TEXTURE +#extension GL_EXT_shader_explicit_arithmetic_types_int16 : require + VEC4_T q_8w_linear(const ivec3 out_pos, const int K) { - ivec3 mat1_pos = ivec3(0, out_pos.yz); - ivec3 qmat2_pos = ivec3(0, out_pos.x * 4, 0); + u16vec3 mat1_pos = u16vec3(0, out_pos.yz); + u16vec3 qmat2_pos = u16vec3(0, out_pos.x * 4, 0); VEC4_T outtex = VEC4_T(0); - const ivec3 scales_pos = ivec3(out_pos.x, 0, 0); + const u16vec3 scales_pos = u16vec3(out_pos.x, 0, 0); const VEC4_T scales = load_texel(t_scales, scales_pos); for (int i = 0; i < K; i += 4) { @@ -104,11 +106,11 @@ VEC4_T q_8w_linear(const ivec3 out_pos, const int K) { const VEC4_T sums = VEC4_T( dot(mat1_tex, load_texel(t_qmat2, qmat2_pos) * scales.x), dot(mat1_tex, - load_texel(t_qmat2, qmat2_pos + ivec3(0, 1, 0)) * scales.y), + load_texel(t_qmat2, qmat2_pos + u16vec3(0, 1, 0)) * scales.y), dot(mat1_tex, - load_texel(t_qmat2, qmat2_pos + ivec3(0, 2, 0)) * scales.z), + load_texel(t_qmat2, qmat2_pos + u16vec3(0, 2, 0)) * scales.z), dot(mat1_tex, - load_texel(t_qmat2, qmat2_pos + ivec3(0, 3, 0)) * scales.w)); + load_texel(t_qmat2, qmat2_pos + u16vec3(0, 3, 0)) * scales.w)); outtex += sums;