diff --git a/backends/vulkan/runtime/graph/ops/glsl/addmm_naive_texture3d.glsl b/backends/vulkan/runtime/graph/ops/glsl/addmm_naive_texture3d.glsl
index 911a2f37ce9..2104f7d796d 100644
--- a/backends/vulkan/runtime/graph/ops/glsl/addmm_naive_texture3d.glsl
+++ b/backends/vulkan/runtime/graph/ops/glsl/addmm_naive_texture3d.glsl
@@ -24,7 +24,7 @@ ${layout_declare_tensor(B, "r", "mat2_tensor", DTYPE, "texture3d")}
 $if HAS_BIAS:
   ${layout_declare_tensor(B, "r", "bias_tensor", DTYPE, "texture3d")}
 ${layout_declare_ubo(B, "ivec4", "out_sizes")}
-${layout_declare_ubo(B, "ivec3", "out_logical_limits")}
+${layout_declare_ubo(B, "ivec3", "out_limits")}
 ${layout_declare_ubo(B, "ivec4", "out_axis_map")}
 ${layout_declare_ubo(B, "ivec4", "mat1_sizes")}
 ${layout_declare_ubo(B, "ivec4", "mat1_axis_map")}
@@ -63,11 +63,11 @@ vec4 get_bias_texel_W_packed(ivec3 logical_pos) {
 }
 #endif // HAS_BIAS
 
-vec4 matmul_naive_k_dim_packed(const ivec3 out_mpos) {
+vec4 matmul_naive_k_dim_packed(const ivec3 out_lpos) {
   ivec3 mat1_pos;
   mat1_pos[mat1_axis_map.x] = 0;
-  mat1_pos[mat1_axis_map.y] = out_mpos.y;
-  mat1_pos[mat1_axis_map.z] = out_mpos.z;
+  mat1_pos[mat1_axis_map.y] = out_lpos.y;
+  mat1_pos[mat1_axis_map.z] = out_lpos.z;
 #ifdef MAT2_IS_TRANSPOSED
   const int mat2_k_axis = mat2_axis_map.x;
   const int mat2_row_axis = mat2_axis_map.y;
@@ -88,9 +88,9 @@ vec4 matmul_naive_k_dim_packed(const ivec3 out_mpos) {
       // latency. Surprisingly, this doesn't translate to mat1_pos.
       ivec3 mat2_pos = ivec3(0);
       mat2_pos[mat2_k_axis] = i;
-      mat2_pos[mat2_row_axis] = out_mpos.x * 4 + r;
+      mat2_pos[mat2_row_axis] = out_lpos.x * 4 + r;
 #ifndef MAT2_IS_TRANSPOSED
-      mat2_pos[mat2_axis_map.z] = out_mpos.z;
+      mat2_pos[mat2_axis_map.z] = out_lpos.z;
 #endif // MAT2_IS_TRANSPOSED
       sums[r] = dot(mat1_tex, texelFetch(mat2_tensor, mat2_pos, 0));
     }
@@ -103,16 +103,16 @@ vec4 matmul_naive_k_dim_packed(const ivec3 out_mpos) {
   return texel;
 }
 
-vec4 matmul_naive_k_dim_packed_row_dim_packed(const ivec3 out_mpos) {
+vec4 matmul_naive_k_dim_packed_row_dim_packed(const ivec3 out_lpos) {
   ivec3 mat1_pos;
   mat1_pos[mat1_axis_map.x] = 0;
-  mat1_pos[mat1_axis_map.y] = out_mpos.y;
-  mat1_pos[mat1_axis_map.z] = out_mpos.z;
+  mat1_pos[mat1_axis_map.y] = out_lpos.y;
+  mat1_pos[mat1_axis_map.z] = out_lpos.z;
 
   ivec3 mat2_pos;
-  mat2_pos[mat2_axis_map.x] = out_mpos.x;
+  mat2_pos[mat2_axis_map.x] = out_lpos.x;
   mat2_pos[mat2_axis_map.y] = 0;
-  mat2_pos[mat2_axis_map.z] = out_mpos.z;
+  mat2_pos[mat2_axis_map.z] = out_lpos.z;
 
   ivec3 mat2_pos_offset = ivec3(0);
   mat2_pos_offset[mat2_axis_map.y] = 1;
@@ -131,9 +131,9 @@ vec4 matmul_naive_k_dim_packed_row_dim_packed(const ivec3 out_mpos) {
       // On-demand construction of mat2_pos appears to provide the lowest
       // latency. Surprisingly, this doesn't translate to mat1_pos.
       ivec3 mat2_pos = ivec3(0);
-      mat2_pos[mat2_axis_map.x] = out_mpos.x;
+      mat2_pos[mat2_axis_map.x] = out_lpos.x;
       mat2_pos[mat2_axis_map.y] = 4 * i + r;
-      mat2_pos[mat2_axis_map.z] = out_mpos.z;
+      mat2_pos[mat2_axis_map.z] = out_lpos.z;
 
       vec4 mat1_comp_vec = vec4(mat1_tex[r]);
       texel = fma(mat1_comp_vec, texelFetch(mat2_tensor, mat2_pos, 0), texel);
@@ -144,8 +144,8 @@ vec4 matmul_naive_k_dim_packed_row_dim_packed(const ivec3 out_mpos) {
 }
 
 void main() {
-  const ivec3 out_mpos = ivec3(gl_GlobalInvocationID);
-  if (any(greaterThanEqual(out_mpos, out_logical_limits))) {
+  const ivec3 out_lpos = ivec3(gl_GlobalInvocationID);
+  if (any(greaterThanEqual(out_lpos, out_limits))) {
     return;
   }
 
@@ -153,24 +153,22 @@ void main() {
 
 #ifdef MAT2_IS_TRANSPOSED
   if (mat2_packed_dim == W_DIM) {
-    texel = matmul_naive_k_dim_packed(out_mpos);
+    texel = matmul_naive_k_dim_packed(out_lpos);
   } else {
-    texel = matmul_naive_k_dim_packed_row_dim_packed(out_mpos);
+    texel = matmul_naive_k_dim_packed_row_dim_packed(out_lpos);
   }
 #else
   if (mat2_packed_dim == W_DIM) {
-    texel = matmul_naive_k_dim_packed_row_dim_packed(out_mpos);
+    texel = matmul_naive_k_dim_packed_row_dim_packed(out_lpos);
   } else {
-    texel = matmul_naive_k_dim_packed(out_mpos);
+    texel = matmul_naive_k_dim_packed(out_lpos);
   }
 #endif // MAT2_IS_TRANSPOSED
 
 #ifdef HAS_BIAS
-  vec4 bias_texel = get_bias_texel_W_packed(out_mpos);
+  vec4 bias_texel = get_bias_texel_W_packed(out_lpos);
   texel = beta * bias_texel + alpha * texel;
 #endif // HAS_BIAS
 
-  ivec3 out_pos = to_texture_pos(out_mpos, out_axis_map);
-
-  imageStore(out_tensor, out_pos, texel);
+  imageStore(out_tensor, lpos_to_pos(out_lpos, out_axis_map), texel);
 }
diff --git a/backends/vulkan/runtime/graph/ops/glsl/addmm_optimized.glsl b/backends/vulkan/runtime/graph/ops/glsl/addmm_optimized.glsl
index 3c0024713fa..ad794d6db49 100644
--- a/backends/vulkan/runtime/graph/ops/glsl/addmm_optimized.glsl
+++ b/backends/vulkan/runtime/graph/ops/glsl/addmm_optimized.glsl
@@ -158,7 +158,7 @@ FloatMatrix matmul_partial(const ivec4 out_idx_tl) {
 //
 
 void write_results_C_packed(const ivec4 out_idx_tl, FloatMatrix results) {
-  ivec3 out_pos = to_texture_pos(
+  ivec3 out_pos = tidx_to_pos(
       out_idx_tl, out_sizes, out_axis_map, out_packed_dim);
 
   for (int tile_c = 0;
diff --git a/backends/vulkan/runtime/graph/ops/glsl/binary_op.glsl b/backends/vulkan/runtime/graph/ops/glsl/binary_op.glsl
index 62474534137..bf68ea2d9a8 100644
--- a/backends/vulkan/runtime/graph/ops/glsl/binary_op.glsl
+++ b/backends/vulkan/runtime/graph/ops/glsl/binary_op.glsl
@@ -36,28 +36,26 @@ layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;
 layout(constant_id = 3) const int packed_dim = C_DIM;
 
 void main() {
-  // pos is physical (x, y, z), as global workgroup uses image extents
-  const ivec3 pos = ivec3(gl_GlobalInvocationID);
-  // physical pos (x, y, z) -> logical (w, c, h, n) output
-  const ivec4 idx = to_tensor_idx(pos, out_sizes, out_axis_map, packed_dim);
+  const ivec3 lpos = ivec3(gl_GlobalInvocationID);
+  const ivec4 tidx = lpos_to_tidx(lpos, out_sizes, out_axis_map.w, packed_dim);
 
-  if (any(greaterThanEqual(idx, out_sizes))) {
+  if (any(greaterThanEqual(tidx, out_sizes))) {
     return;
   }
 
   // broadcast on logical sizes
-  ivec4 in_idx = broadcast_indices(idx, in_sizes);
+  ivec4 in_idx = broadcast_indices(tidx, in_sizes);
   VEC4_T in_texel = VEC4_T(load_texel(
     t_in,
     // read axis mapped texel
-    to_texture_pos(in_idx, in_sizes, in_axis_map, packed_dim)));
+    tidx_to_pos(in_idx, in_sizes, in_axis_map, packed_dim)));
 
   // broadcast on logical sizes
-  ivec4 other_idx = broadcast_indices(idx, other_sizes);
+  ivec4 other_idx = broadcast_indices(tidx, other_sizes);
   VEC4_T other_texel = VEC4_T(load_texel(
     t_other,
     // read axis mapped texel
-    to_texture_pos(other_idx, other_sizes, other_axis_map, packed_dim)));
+    tidx_to_pos(other_idx, other_sizes, other_axis_map, packed_dim)));
 
   // Check boolean broadcast flags; we use ivec2 instead of bvec2 for alignment.
   if (broadcast_params.x > 0) {
@@ -68,6 +66,6 @@ void main() {
   }
 
   imageStore(t_out,
-    to_texture_pos(idx, out_sizes, out_axis_map, packed_dim),
+    tidx_to_pos(tidx, out_sizes, out_axis_map, packed_dim),
     VEC4_T(op(in_texel, other_texel, alpha)));
 }
diff --git a/backends/vulkan/runtime/graph/ops/glsl/buffer_to_nchw.glsl b/backends/vulkan/runtime/graph/ops/glsl/buffer_to_nchw.glsl
index 58796879e85..201b4d17262 100644
--- a/backends/vulkan/runtime/graph/ops/glsl/buffer_to_nchw.glsl
+++ b/backends/vulkan/runtime/graph/ops/glsl/buffer_to_nchw.glsl
@@ -23,13 +23,13 @@ layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;
 layout(constant_id = 3) const int UNUSED_packed_dim = W_DIM;
 
 void main() {
-  int out_id = int(gl_GlobalInvocationID.x);
-  if (out_id >= numel) {
+  int nchwi = int(gl_GlobalInvocationID.x);
+  if (nchwi >= numel) {
     return;
   }
 
-  ivec4 t_in_idx = from_nchw_buffer_i(out_id, in_sizes);
-  const int in_id = to_buffer_id(t_in_idx, in_strides);
+  ivec4 in_tidx = nchwi_to_tidx(nchwi, in_sizes);
+  const int in_bufi = tidx_to_bufi(in_tidx, in_strides);
 
-  nchw_buf[out_id] = t_in[in_id];
+  nchw_buf[nchwi] = t_in[in_bufi];
 }
diff --git a/backends/vulkan/runtime/graph/ops/glsl/conv2d_dw_prepack_weights.glsl b/backends/vulkan/runtime/graph/ops/glsl/conv2d_dw_prepack_weights.glsl
index 18202e4a51f..49ce76423d5 100644
--- a/backends/vulkan/runtime/graph/ops/glsl/conv2d_dw_prepack_weights.glsl
+++ b/backends/vulkan/runtime/graph/ops/glsl/conv2d_dw_prepack_weights.glsl
@@ -53,7 +53,7 @@ void main() {
   }
 
   // Map tensor_idx to normal buffer_i
-  const ivec4 p0 = get_texel_nchw_buffer_ixs(idx, sizes, packed_dim);
+  const ivec4 p0 = tidx_to_nchwi(idx, sizes, packed_dim);
 
   // Compute modified tensor_idx by inverting the CPU function
   const int N = original_sizes.w;
diff --git a/backends/vulkan/runtime/graph/ops/glsl/conv2d_prepack_weights.glsl b/backends/vulkan/runtime/graph/ops/glsl/conv2d_prepack_weights.glsl
index 493a614ee81..4e8bff94947 100644
--- a/backends/vulkan/runtime/graph/ops/glsl/conv2d_prepack_weights.glsl
+++ b/backends/vulkan/runtime/graph/ops/glsl/conv2d_prepack_weights.glsl
@@ -53,7 +53,7 @@ void main() {
   }
 
   // Map tensor_idx to normal buffer_i
-  const ivec4 p0 = get_texel_nchw_buffer_ixs(idx, sizes, packed_dim);
+  const ivec4 p0 = tidx_to_nchwi(idx, sizes, packed_dim);
 
   // Compute modified tensor_idx by inverting the CPU function
   const int N = original_sizes.w;
diff --git a/backends/vulkan/runtime/graph/ops/glsl/conv_transpose2d_prepack_weights.glsl b/backends/vulkan/runtime/graph/ops/glsl/conv_transpose2d_prepack_weights.glsl
index d2978ffe7e6..df8589e737f 100644
--- a/backends/vulkan/runtime/graph/ops/glsl/conv_transpose2d_prepack_weights.glsl
+++ b/backends/vulkan/runtime/graph/ops/glsl/conv_transpose2d_prepack_weights.glsl
@@ -53,7 +53,7 @@ void main() {
   }
 
   // Map tensor_idx to normal buffer_i
-  const ivec4 p0 = get_texel_nchw_buffer_ixs(idx, sizes, packed_dim);
+  const ivec4 p0 = tidx_to_nchwi(idx, sizes, packed_dim);
 
   // Compute modified tensor_idx by inverting the CPU function
   const int N = original_sizes.w;
diff --git a/backends/vulkan/runtime/graph/ops/glsl/embedding.glsl b/backends/vulkan/runtime/graph/ops/glsl/embedding.glsl
index 32ca5d2f064..1a3fef2b310 100644
--- a/backends/vulkan/runtime/graph/ops/glsl/embedding.glsl
+++ b/backends/vulkan/runtime/graph/ops/glsl/embedding.glsl
@@ -29,9 +29,9 @@ layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;
 layout(constant_id = 3) const int packed_dim = C_DIM;
 
 void main() {
-  const ivec3 out_pos = ivec3(gl_GlobalInvocationID);
-  const ivec4 out_idx = to_tensor_idx(out_pos, sizes, out_axis_map, packed_dim);
-  if (any(greaterThanEqual(out_idx, sizes))) {
+  const ivec3 out_lpos = ivec3(gl_GlobalInvocationID);
+  const ivec4 out_tidx = lpos_to_tidx(out_lpos, sizes, out_axis_map.w, packed_dim);
+  if (any(greaterThanEqual(out_tidx, sizes))) {
     return;
   }
   VEC4_T out_texel;
@@ -39,13 +39,13 @@ void main() {
   // Consider optimizing via W-packing format for t_in and t_weight.
   for (int i = 0; i < 4; ++i) {
     // Read input tensor for embedding index.
-    const ivec3 in_pos = to_texture_pos(ivec3(out_idx.y, out_idx.z * 4 + i, out_idx.w / 4), in_axis_map);
-    const int in_texel_elem = load_texel(t_in, in_pos)[out_idx.w % 4];
+    const ivec3 in_pos = lpos_to_pos(ivec3(out_tidx.y, out_tidx.z * 4 + i, out_tidx.w / 4), in_axis_map);
+    const int in_texel_elem = load_texel(t_in, in_pos)[out_tidx.w % 4];
 
     // Read weight tensor for embedding.
-    const ivec3 weight_pos = to_texture_pos(ivec3(out_idx.x, in_texel_elem, 0), weight_axis_map);
+    const ivec3 weight_pos = lpos_to_pos(ivec3(out_tidx.x, in_texel_elem, 0), weight_axis_map);
     out_texel[i] = load_texel(t_weight, weight_pos).x;
   }
 
-  imageStore(t_out, out_pos, out_texel);
+  imageStore(t_out, lpos_to_pos(out_lpos, out_axis_map), out_texel);
 }
diff --git a/backends/vulkan/runtime/graph/ops/glsl/image_to_nchw.glsl b/backends/vulkan/runtime/graph/ops/glsl/image_to_nchw.glsl
index 1e88ffd5975..be3901799f8 100644
--- a/backends/vulkan/runtime/graph/ops/glsl/image_to_nchw.glsl
+++ b/backends/vulkan/runtime/graph/ops/glsl/image_to_nchw.glsl
@@ -31,7 +31,7 @@ layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;
 layout(constant_id = 3) const int packed_dim = C_DIM;
 
 void write_out_texel(VEC4_T texel, ivec4 tensor_idx) {
-  const ivec4 buf_indices = get_texel_nchw_buffer_ixs(
+  const ivec4 buf_indices = tidx_to_nchwi(
       tensor_idx,
       sizes,
       packed_dim);
@@ -51,13 +51,13 @@ void write_out_texel(VEC4_T texel, ivec4 tensor_idx) {
 }
 
 void main() {
-  const ivec3 pos = ivec3(gl_GlobalInvocationID);
-  const ivec4 tensor_idx = to_tensor_idx(pos, sizes, axis_map, packed_dim);
+  const ivec3 lpos = ivec3(gl_GlobalInvocationID);
+  const ivec4 tidx = lpos_to_tidx(lpos, sizes, axis_map.w, packed_dim);
 
-  if (any(greaterThanEqual(tensor_idx, sizes))) {
+  if (any(greaterThanEqual(tidx, sizes))) {
     return;
   }
 
-  const VEC4_T intex = load_texel(t_in, pos);
-  write_out_texel(intex, tensor_idx);
+  const VEC4_T intex = load_texel(t_in, lpos_to_pos(lpos, axis_map));
+  write_out_texel(intex, tidx);
 }
diff --git a/backends/vulkan/runtime/graph/ops/glsl/index_select_channel.glsl b/backends/vulkan/runtime/graph/ops/glsl/index_select_channel.glsl
index ba60000f3d4..76ec540838c 100644
--- a/backends/vulkan/runtime/graph/ops/glsl/index_select_channel.glsl
+++ b/backends/vulkan/runtime/graph/ops/glsl/index_select_channel.glsl
@@ -34,18 +34,18 @@ void main() {
   }
 
   const ivec4 idx = to_tensor_idx(out_pos, out_sizes, packed_dim);
-  const ivec4 buffer_ixs = get_texel_nchw_buffer_ixs(idx, out_sizes, packed_dim);
+  const ivec4 buffer_ixs = tidx_to_nchwi(idx, out_sizes, packed_dim);
 
   VEC4_T out_texel;
   for (int i = 0; i < 4; ++i) {
-      const ivec4 out_idx = from_nchw_buffer_i(buffer_ixs[i], out_sizes);
-      int out_channel = out_idx.z;
+      const ivec4 out_tidx = nchwi_to_tidx(buffer_ixs[i], out_sizes);
+      int out_channel = out_tidx.z;
       int in_channel = texelFetch(t_idx, ivec3(out_channel, 0, 0), 0).x;
 
-      ivec4 in_idx = out_idx;
-      in_idx.z = in_channel;
+      ivec4 in_tidx = out_tidx;
+      in_tidx.z = in_channel;
 
-      ivec4 in_elem_pos = to_texture_elem_pos(in_idx, in_sizes, packed_dim);
+      ivec4 in_elem_pos = to_texture_elem_pos(in_tidx, in_sizes, packed_dim);
 
       VEC4_T in_texel = texelFetch(t_in, in_elem_pos.xyz, 0);
 
diff --git a/backends/vulkan/runtime/graph/ops/glsl/indexing_utils.h b/backends/vulkan/runtime/graph/ops/glsl/indexing_utils.h
index 4eed38d9ea1..73df7cfccc2 100644
--- a/backends/vulkan/runtime/graph/ops/glsl/indexing_utils.h
+++ b/backends/vulkan/runtime/graph/ops/glsl/indexing_utils.h
@@ -9,27 +9,34 @@
 #ifndef INDEXING_UTILS_H
 #define INDEXING_UTILS_H
 
-// Width Dim Index, assuming (W, H, C, N) order
-#define W_DIM 0
-// Height, assuming (W, H, C, N) order
-#define H_DIM 1
-// Channels, assuming (W, H, C, N) order
-#define C_DIM 2
-
 /*
- * Describes which texture axis the "batches" dimension runs along in a 4D
- * texture.
+ * The functions defined in this header file use the following shorthand to
+ * represent tensor related data structures.
  *
- * Currently it is set to 2 since we represent batches by concatenating along
- * the channels dim, which has index 2 in (W, H, C, N) order and maps to the
- * depth dimension of a texture, which also corresponds to index 2 in (x, y, z)
- * order.
+ * tidx  - ivec4 tensor indices, listed in WHCN order.
+ *
+ * pos   - ivec3 texel position, used to fetch from an image texture via the
+ *         texelFetch(image, pos, lod) GLSL function.
+ * posi  - ivec4 texel element position. It is the same as pos, except with an
+ *         additional component of the index of an element within the texel.
+ * lpos  - ivec3 logical position, listed in WHC order. This is a permutation of
+ *         texture position based on a tensor's axis_map. lpos.x is the position
+ *         component that corresponds to the tensor's width dimension, lpos.y is
+ *         the position component that corresponds to the tensor's height dim,
+ *         and so on.
+ *
+ * bufi  - int index into a GPU buffer that backs a tensor.
+ * nchwi - int index into a staging buffer for a tensor. The data in the
+ *         staging buffer is stored in contiguous data layout, irrespective of
+ *         the tensor's strides.
  */
-#define BATCH_AXIS 2
 
-//
-// Basic Indexing Utility Macros and Functions
-//
+// Width Dim Index, assuming WHCN order
+#define W_DIM 0
+// Height, assuming WHCN order
+#define H_DIM 1
+// Channels, assuming WHCN order
+#define C_DIM 2
 
 /*
  * Fast division by 4 using bit shifting
@@ -39,7 +46,7 @@
 /*
  * Divides input and rounds up to 4
  */
-#define divup4(x) ((x + 3) / 4)
+#define divup4(x) ((x + 3) >> 2)
 
 /*
  * Aligns input to the next multiple of 4
@@ -47,8 +54,8 @@
 #define alignup4(x) ((x + 3) & -4)
 
 /*
- * Input: (W, H, C, N) strides of a tensor
- * Returns: the WHCN index of the fastest moving dimension
+ * Find the packed dimension of a tensor given its strides. The packed dimension
+ * is the "fastest moving" dimension which will have a stride of 1.
  */
 int find_packed_dim(const ivec4 strides) {
   int packed_dim = 0;
@@ -62,93 +69,174 @@ int find_packed_dim(const ivec4 strides) {
 }
 
 /*
- * Return the elements of a texture position such that the first element is the
- * texture coordinate corresponding to the width dimension, the second element
- * is the texture coordinate corresponding to the height dimension, and the
- * third element is the texture coordinate corresponding to the channels
- * dimension.
- */
-ivec3 get_logical_pos(const ivec3 pos, const ivec4 axis_map) {
-  return ivec3(pos[axis_map.x], pos[axis_map.y], pos[axis_map.z]);
-}
-
-//
-// (w, h, c, n) Tensor Index <-> Contiguous Buffer Index Conversion
-//
-
-/*
- * Input: (w, h, c, n) tensor index, (W, H, C, N) sizes of a tensor, which dim
- *        is packed along a texel
- * Output: A ivec4 containing the buffer indices corresponding to each texel
- *         element.
+ * Get the staging buffer indices that contain the data of the texel that
+ * corresponds to the provided tensor index. Since the texel have 4 elements,
+ * 4 buffer indices will be retrieved.
  */
-ivec4 get_texel_nchw_buffer_ixs(ivec4 idx, ivec4 sizes, int packed_dim) {
+ivec4 tidx_to_nchwi(const ivec4 tidx, const ivec4 sizes, const int packed_dim) {
   ivec4 strides =
       ivec4(1, sizes.x, sizes.x * sizes.y, sizes.x * sizes.y * sizes.z);
 
-  int base_i = idx.x * strides.x + idx.y * strides.y + idx.z * strides.z +
-      idx.w * strides.w;
+  int base_i = tidx.x * strides.x + tidx.y * strides.y + tidx.z * strides.z +
+      tidx.w * strides.w;
 
   return base_i + ivec4(0, 1, 2, 3) * strides[packed_dim];
 }
 
-/*
- * Input: Index into a tensor's data buffer, (W, H, C, N) sizes of a tensor
- * Returns: The WCHN index of the tensor that corresponds to the specified
- *          buffer index, assuming the buffer has contiguous memory layout
- */
-ivec4 from_nchw_buffer_i(int buf_i, ivec4 sizes) {
+ivec4 nchwi_to_tidx(const int nchwi, const ivec4 sizes) {
   return ivec4(
-      buf_i % sizes.x,
-      (buf_i / (sizes.x)) % sizes.y,
-      (buf_i / (sizes.x * sizes.y)) % sizes.z,
-      (buf_i / (sizes.x * sizes.y * sizes.z)));
+      nchwi % sizes.x,
+      (nchwi / (sizes.x)) % sizes.y,
+      (nchwi / (sizes.x * sizes.y)) % sizes.z,
+      (nchwi / (sizes.x * sizes.y * sizes.z)));
 }
 
-int to_nchw_buffer_i(const ivec4 tensor_idx, const ivec4 sizes) {
-  return tensor_idx.w * sizes.x * sizes.y * sizes.z +
-      tensor_idx.z * sizes.x * sizes.y + tensor_idx.y * sizes.x + tensor_idx.x;
+int tidx_to_nchwi(const ivec4 tidx, const ivec4 sizes) {
+  return tidx.w * sizes.x * sizes.y * sizes.z + tidx.z * sizes.x * sizes.y +
+      tidx.y * sizes.x + tidx.x;
 }
 
-/*
- * Input: Texel buffer index, (W, H, C, N) strides of a tensor, which dim is
- *        packed along a texel
- * Returns: The (w, h, c, n) tensor index corresponding to the buffer element
- */
-ivec4 to_tensor_idx(int buffer_id, const ivec4 strides, const int packed_dim) {
+// TODO(ssjia): make this function use dim order so that it can work with any
+// dim order. Currently it assumes that the dim order is contiguous, except for
+// the packed dim.
+ivec4 bufi_to_tidx(int bufi, const ivec4 strides, const int packed_dim) {
   ivec4 idx;
   for (int i = 3; i >= 0; i--) {
     if (i != packed_dim) {
-      idx[i] = buffer_id / strides[i];
-      buffer_id %= strides[i];
+      idx[i] = bufi / strides[i];
+      bufi %= strides[i];
     }
   }
-  idx[packed_dim] = buffer_id;
+  idx[packed_dim] = bufi;
   return idx;
 }
 
-/*
- * Input: Texel buffer index, (W, H, C, N) strides of a tensor
- * Returns: The (w, h, c, n) tensor index corresponding to the buffer element
- *
- * This is a convenience overload of the above function. If the packed dim is
- * not known, it can be found by finding the first dimension with a stride of 1.
- * However, this process adds some overhead, so if performance is a concern then
- * the above function should be used instead so that the packed dim is provided.
- */
-ivec4 to_tensor_idx(int buffer_id, const ivec4 strides) {
+// Convenience overload of the above function, which will determine the packed
+// dim from the strides automatically so it doesn't have to be passed in as a
+// function argument.
+ivec4 bufi_to_tidx(const int bufi, const ivec4 strides) {
   int packed_dim = find_packed_dim(strides);
-  return to_tensor_idx(buffer_id, strides, packed_dim);
+  return bufi_to_tidx(bufi, strides, packed_dim);
+}
+
+int tidx_to_bufi(const ivec4 tidx, ivec4 strides) {
+  return tidx.x * strides.x + tidx.y * strides.y + tidx.z * strides.z +
+      tidx.w * strides.w;
 }
 
+ivec4 lpos_to_tidx(
+    ivec3 lpos,
+    ivec4 sizes,
+    const int batch_inner_dim,
+    const int packed_dim) {
+  // Align packed dim to next multiple of 4 to account for texel padding
+  sizes[packed_dim] = alignup4(sizes[packed_dim]);
+  // Moving 1 texel along the packed dim traverses 4 tensor elements
+  lpos[packed_dim] *= 4;
+
+  ivec4 tidx = ivec4(lpos, 0);
+
+  if (sizes.w > 1) {
+    tidx.w = tidx[batch_inner_dim] / sizes[batch_inner_dim];
+    tidx[batch_inner_dim] %= sizes[batch_inner_dim];
+  }
+  return tidx;
+}
+
+ivec3 tidx_to_lpos(
+    ivec4 tidx,
+    ivec4 sizes,
+    const int batch_inner_dim,
+    const int packed_dim) {
+  // Align packed dim to next multiple of 4 to account for texel padding
+  sizes[packed_dim] = alignup4(sizes[packed_dim]);
+
+  ivec3 lpos = tidx.xyz;
+
+  // Adjust batch inner dim by batch index if needed
+  if (sizes.w > 1) {
+    lpos[batch_inner_dim] += tidx.w * sizes[batch_inner_dim];
+  }
+  // Fast division by 4, since moving 1 texel along the packed dim traverses 4
+  // tensor elements.
+  lpos[packed_dim] >>= 2;
+  return lpos;
+}
+
+ivec3 tidx_to_pos(
+    ivec4 tidx,
+    ivec4 sizes,
+    const ivec4 axis_map,
+    const int packed_dim) {
+  // Align packed dim to next multiple of 4 to account for texel padding
+  sizes[packed_dim] = alignup4(sizes[packed_dim]);
+
+  ivec3 pos;
+  for (int dim = 0; dim < 3; ++dim) {
+    pos[axis_map[dim]] = tidx[dim];
+  }
+
+  // Adjust batch inner dim by batch index if needed
+  if (sizes.w > 1) {
+    pos[axis_map[axis_map.w]] += tidx.w * sizes[axis_map.w];
+  }
+  // Fast division by 4, since moving 1 texel along the packed dim traverses 4
+  // tensor elements.
+  pos[axis_map[packed_dim]] >>= 2;
+  return pos;
+}
+
+ivec4 tidx_to_posi(
+    ivec4 tidx,
+    ivec4 sizes,
+    const ivec4 axis_map,
+    const int packed_dim) {
+  return ivec4(
+      tidx_to_pos(tidx, sizes, axis_map, packed_dim), tidx[packed_dim] % 4);
+}
+
+ivec3 lpos_to_pos(const ivec3 lpos, const ivec4 axis_map) {
+  ivec3 pos;
+  pos[axis_map.x] = lpos.x;
+  pos[axis_map.y] = lpos.y;
+  pos[axis_map.z] = lpos.z;
+  return pos;
+}
+
+#ifdef USING_BUFFER
+#define load_texel(buf, idx) buf[idx]
+#elif defined(USING_TEXTURE2D)
+#define load_texel(im, pos) texelFetch(im, pos.xy, 0)
+#else // defined(USING_TEXTURE3D)
+#define load_texel(im, pos) texelFetch(im, pos, 0)
+#endif
+
+#ifdef USING_BUFFER
+#define write_texel(buf, idx, texel) buf[idx] = texel
+#elif defined(USING_TEXTURE2D)
+#define write_texel(im, pos, texel) imageStore(im, pos.xy, texel)
+#else // defined(USING_TEXTURE3D)
+#define write_texel(im, pos, texel) imageStore(im, pos, texel)
+#endif
+
+/************************
+ * Deprecated Functions *
+ ************************/
+
+// The below functions and macros are in the process of being deprecated in
+// favor of newer indexing functions that account for axis mapping and have more
+// explicit function names and more updated terminology.
+
 /*
- * Input: (w, h, c, n) tensor index, (W, H, C, N) strides of the tensor buffer
- * Returns: the buffer index corresponding to the specified tensor index
+ * Describes which texture axis the "batches" dimension runs along in a 4D
+ * texture.
+ *
+ * Currently it is set to 2 since we represent batches by concatenating along
+ * the channels dim, which has index 2 in (W, H, C, N) order and maps to the
+ * depth dimension of a texture, which also corresponds to index 2 in (x, y, z)
+ * order.
  */
-int to_buffer_id(const ivec4 tensor_idx, ivec4 strides) {
-  return tensor_idx.x * strides.x + tensor_idx.y * strides.y +
-      tensor_idx.z * strides.z + tensor_idx.w * strides.w;
-}
+#define BATCH_AXIS 2
 
 //
 // (w, h, c, n) Tensor Index <-> (x, y, z) Texture Position Conversion
@@ -199,42 +287,6 @@ ivec4 to_tensor_idx(ivec3 pos, ivec4 sizes, int packed_dim) {
   return tensor_idx;
 }
 
-/*
- * Derive (w,h,c,n) tensor indices from (x,y,z) texture position using axis
- * mapping.
- */
-ivec4 to_tensor_idx(
-    ivec3 pos,
-    ivec4 sizes,
-    const ivec4 axis_map,
-    const int packed_dim) {
-  // Align packed dim to next multiple of 4 to account for texel padding
-  sizes[packed_dim] = alignup4(sizes[packed_dim]);
-
-  // Packed dim contains 4 elements per texel, so moving 1 unit traverses 4
-  // elements in the tensor.
-  pos[axis_map[packed_dim]] *= 4;
-
-  ivec4 tensor_idx;
-  for (int dim = 0; dim < 3; ++dim) {
-    tensor_idx[dim] = pos[axis_map[dim]];
-  }
-
-  // Early return if batch is 1. Batch index will be 0.
-  if (sizes.w == 1) {
-    tensor_idx.w = 0;
-    return tensor_idx;
-  }
-
-  // Else, adjust the dim that's concatenated with batch. Note that the axis
-  // mapping for the batch dim indicates WHCN dim index of the dim that it is
-  // concatenated with, not a texture axis.
-  tensor_idx.w = tensor_idx[axis_map[3]] / sizes[axis_map[3]];
-  tensor_idx[axis_map[3]] %= sizes[axis_map[3]];
-
-  return tensor_idx;
-}
-
 /*
  * Input: (w, h, c, n) tensor index, (W, H, C, N) sizes of a tensor, which dim
  *        is packed along a texel
@@ -251,34 +303,6 @@ ivec3 to_texture_pos(ivec4 idx, ivec4 sizes, int packed_dim) {
   return pos;
 }
 
-/*
- * Derive (x,y,z) texture position from (w,h,c,n) tensor indices using axis
- * mapping.
- */
-ivec3 to_texture_pos(
-    const ivec4 idx,
-    ivec4 sizes,
-    const ivec4 axis_map,
-    const int packed_dim) {
-  // Align packed dim to next multiple of 4 to account for texel padding
-  sizes[packed_dim] = alignup4(sizes[packed_dim]);
-
-  ivec3 pos;
-  for (int dim = 0; dim < 3; ++dim) {
-    pos[axis_map[dim]] = idx[dim];
-  }
-
-  // Adjust batch dim if needed
-  if (sizes.w > 1) {
-    pos[axis_map[axis_map.w]] += idx.w * sizes[axis_map.w];
-  }
-
-  // Adjust packed dim. Moving 1 texel unit along the packed dim traverses 4
-  // tensor elements in that dim.
-  pos[axis_map[packed_dim]] /= 4;
-  return pos;
-}
-
 /*
  * Input: (w, h, c, n) tensor index, (W, H, C, N) sizes of the tensor, which dim
  *        is packed along a texel
@@ -298,71 +322,6 @@ ivec4 to_texture_elem_pos(ivec4 idx, ivec4 sizes, int packed_dim) {
   return pos;
 }
 
-/*
- * Derive (x,y,z,i) texel element position from the (w,h,c,n) tensor index using
- * the axis mapping.
- */
-ivec4 to_texture_elem_pos(
-    const ivec4 idx,
-    ivec4 sizes,
-    const ivec4 axis_map,
-    const int packed_dim) {
-  // Align packed dim to next multiple of 4 to account for texel padding
-  sizes[packed_dim] = alignup4(sizes[packed_dim]);
-
-  ivec4 pos;
-  for (int dim = 0; dim < 3; ++dim) {
-    pos[axis_map[dim]] = idx[dim];
-  }
-
-  // Adjust batch dim if needed
-  if (sizes.w > 1) {
-    pos[axis_map[axis_map.w]] += idx.w * sizes[axis_map.w];
-  }
-
-  // Adjust packed dim. Moving 1 texel unit along the packed dim traverses 4
-  // tensor elements in that dim.
-  pos[axis_map[packed_dim]] /= 4;
-  pos.w = idx[packed_dim] % 4;
-  return pos;
-}
-
-//
-// Convert between physical texture position and logical tensor position
-//
-
-/*
- * Derive (x,y,z) physical texture position from (w,h,d) logical texture
- * position using the axis mapping.
- */
-ivec3 to_texture_pos(const ivec3 logical_pos, const ivec4 axis_map) {
-  ivec3 pos;
-  pos[axis_map.x] = logical_pos.x;
-  pos[axis_map.y] = logical_pos.y;
-  pos[axis_map.z] = logical_pos.z;
-  return pos;
-}
-
-//
-// Texel Access and Storage
-//
-
-#ifdef USING_BUFFER
-#define load_texel(buf, idx) buf[idx]
-#elif defined(USING_TEXTURE2D)
-#define load_texel(im, pos) texelFetch(im, pos.xy, 0)
-#else // defined(USING_TEXTURE3D)
-#define load_texel(im, pos) texelFetch(im, pos, 0)
-#endif
-
-#ifdef USING_BUFFER
-#define write_texel(buf, idx, texel) buf[idx] = texel
-#elif defined(USING_TEXTURE2D)
-#define write_texel(im, pos, texel) imageStore(im, pos.xy, texel)
-#else // defined(USING_TEXTURE3D)
-#define write_texel(im, pos, texel) imageStore(im, pos, texel)
-#endif
-
 //
 // Miscellaneous Utility Functions and Macros
 //
diff --git a/backends/vulkan/runtime/graph/ops/glsl/int8_image_to_nchw_noint8.glsl b/backends/vulkan/runtime/graph/ops/glsl/int8_image_to_nchw_noint8.glsl
index b8a291fd044..f7133dd0452 100644
--- a/backends/vulkan/runtime/graph/ops/glsl/int8_image_to_nchw_noint8.glsl
+++ b/backends/vulkan/runtime/graph/ops/glsl/int8_image_to_nchw_noint8.glsl
@@ -41,9 +41,9 @@ void main() {
   int in_buf_idx = 4 * out_buf_idx;
 
   [[unroll]] for (int i = 0; i < 4; ++i) {
-    const ivec4 tensor_idx = from_nchw_buffer_i(in_buf_idx, tensor_sizes);
+    const ivec4 tidx = nchwi_to_tidx(in_buf_idx, tensor_sizes);
     const ivec4 texture_pos = to_texture_elem_pos(
-        tensor_idx, tensor_sizes, packed_dim);
+        tidx, tensor_sizes, packed_dim);
     values[i] = load_texel(t_in, texture_pos.xyz)[texture_pos.w];
     in_buf_idx++;
   }
diff --git a/backends/vulkan/runtime/graph/ops/glsl/matmul_naive_buffer.glsl b/backends/vulkan/runtime/graph/ops/glsl/matmul_naive_buffer.glsl
index 25a6a742779..e4064eed2fa 100644
--- a/backends/vulkan/runtime/graph/ops/glsl/matmul_naive_buffer.glsl
+++ b/backends/vulkan/runtime/graph/ops/glsl/matmul_naive_buffer.glsl
@@ -32,29 +32,29 @@ ${layout_declare_ubo(9, "int", "out_numel")}
 layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;
 
 void main() {
-  const ivec4 out_idx = ivec4(
+  const ivec4 out_bufix = ivec4(
       gl_GlobalInvocationID.x,
       gl_GlobalInvocationID.y,
       gl_GlobalInvocationID.z % out_sizes.z,
       gl_GlobalInvocationID.z / out_sizes.z);
 
-  if (any(greaterThanEqual(out_idx, out_sizes))) {
+  if (any(greaterThanEqual(out_bufix, out_sizes))) {
     return;
   }
 
-  int mat1_id = to_buffer_id(
-      ivec4(0, out_idx.y, out_idx.z, out_idx.w), mat1_strides);
-  int mat2_id = to_buffer_id(
-      ivec4(out_idx.x, 0, out_idx.z, out_idx.w), mat2_strides);
+  int mat1_bufi = tidx_to_bufi(
+      ivec4(0, out_bufix.y, out_bufix.z, out_bufix.w), mat1_strides);
+  int mat2_bufi = tidx_to_bufi(
+      ivec4(out_bufix.x, 0, out_bufix.z, out_bufix.w), mat2_strides);
 
   T sum = T(0.0);
   for (int i = 0; i < mat1_sizes.x; ++i) {
-    sum += t_mat1[mat1_id] * t_mat2[mat2_id];
+    sum += t_mat1[mat1_bufi] * t_mat2[mat2_bufi];
 
-    mat1_id += mat1_strides.x;
-    mat2_id += mat2_strides.y;
+    mat1_bufi += mat1_strides.x;
+    mat2_bufi += mat2_strides.y;
   }
 
-  const int out_id = to_buffer_id(out_idx, out_strides);
-  t_out[out_id] = T(sum);
+  const int out_bufi = tidx_to_bufi(out_bufix, out_strides);
+  t_out[out_bufi] = T(sum);
 }
diff --git a/backends/vulkan/runtime/graph/ops/glsl/nchw_to_buffer.glsl b/backends/vulkan/runtime/graph/ops/glsl/nchw_to_buffer.glsl
index d861972f935..ea4e0d300cc 100644
--- a/backends/vulkan/runtime/graph/ops/glsl/nchw_to_buffer.glsl
+++ b/backends/vulkan/runtime/graph/ops/glsl/nchw_to_buffer.glsl
@@ -23,13 +23,13 @@ layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;
 layout(constant_id = 3) const int UNUSED_packed_dim = W_DIM;
 
 void main() {
-  int out_id = int(gl_GlobalInvocationID.x);
-  if (out_id >= numel) {
+  int out_bufi = int(gl_GlobalInvocationID.x);
+  if (out_bufi >= numel) {
     return;
   }
 
-  ivec4 out_idx = to_tensor_idx(out_id, out_strides);
-  const int in_id = to_nchw_buffer_i(out_idx, out_sizes);
+  ivec4 out_tidx = bufi_to_tidx(out_bufi, out_strides);
+  const int in_nchwi = tidx_to_nchwi(out_tidx, out_sizes);
 
-  t_out[out_id] = nchw_in[in_id];
+  t_out[out_bufi] = nchw_in[in_nchwi];
 }
diff --git a/backends/vulkan/runtime/graph/ops/glsl/nchw_to_image.glsl b/backends/vulkan/runtime/graph/ops/glsl/nchw_to_image.glsl
index d553ad3624f..b86a59fc234 100644
--- a/backends/vulkan/runtime/graph/ops/glsl/nchw_to_image.glsl
+++ b/backends/vulkan/runtime/graph/ops/glsl/nchw_to_image.glsl
@@ -30,34 +30,34 @@ layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;
 
 layout(constant_id = 3) const int packed_dim = C_DIM;
 
-VEC4_T read_texel(ivec4 tensor_idx) {
-  const ivec4 buf_indices = get_texel_nchw_buffer_ixs(
-      tensor_idx,
+VEC4_T read_texel(ivec4 tidx) {
+  const ivec4 buf_indices = tidx_to_nchwi(
+      tidx,
       sizes,
       packed_dim);
 
   VEC4_T texel = VEC4_T(0);
-  if (tensor_idx[packed_dim] < sizes[packed_dim]) {
+  if (tidx[packed_dim] < sizes[packed_dim]) {
     texel.x = SCALAR_T(nchw_in[buf_indices.x]);
   }
-  if (tensor_idx[packed_dim] + 1 < sizes[packed_dim]) {
+  if (tidx[packed_dim] + 1 < sizes[packed_dim]) {
     texel.y = SCALAR_T(nchw_in[buf_indices.y]);
   }
-  if (tensor_idx[packed_dim] + 2 < sizes[packed_dim]) {
+  if (tidx[packed_dim] + 2 < sizes[packed_dim]) {
     texel.z = SCALAR_T(nchw_in[buf_indices.z]);
   }
-  if (tensor_idx[packed_dim] + 3 < sizes[packed_dim]) {
+  if (tidx[packed_dim] + 3 < sizes[packed_dim]) {
     texel.w = SCALAR_T(nchw_in[buf_indices.w]);
   }
   return texel;
 }
 
 void main() {
-  const ivec3 pos = ivec3(gl_GlobalInvocationID);
-  const ivec4 tensor_idx = to_tensor_idx(pos, sizes, axis_map, packed_dim);
-  if (any(greaterThanEqual(tensor_idx, sizes))) {
+  const ivec3 lpos = ivec3(gl_GlobalInvocationID);
+  const ivec4 tidx = lpos_to_tidx(lpos, sizes, axis_map.w, packed_dim);
+  if (any(greaterThanEqual(tidx, sizes))) {
     return;
   }
 
-  write_texel(t_out, pos, read_texel(tensor_idx));
+  write_texel(t_out, lpos_to_pos(lpos, axis_map), read_texel(tidx));
 }
diff --git a/backends/vulkan/runtime/graph/ops/glsl/nchw_to_int8_image_noint8.glsl b/backends/vulkan/runtime/graph/ops/glsl/nchw_to_int8_image_noint8.glsl
index 48b2abb2af2..f3a3370f3ba 100644
--- a/backends/vulkan/runtime/graph/ops/glsl/nchw_to_int8_image_noint8.glsl
+++ b/backends/vulkan/runtime/graph/ops/glsl/nchw_to_int8_image_noint8.glsl
@@ -35,9 +35,9 @@ int extend_sign(int x) {
   return x;
 }
 
-ivec4 read_texel(ivec4 tensor_idx) {
-  const ivec4 buf_indices = get_texel_nchw_buffer_ixs(
-      tensor_idx, sizes, packed_dim);
+ivec4 read_texel(ivec4 tidx) {
+  const ivec4 buf_indices = tidx_to_nchwi(
+      tidx, sizes, packed_dim);
 
   int shift = (1 << 8) - 1;
   ivec4 masks;
@@ -52,7 +52,7 @@ ivec4 read_texel(ivec4 tensor_idx) {
   ivec4 out_tex = ivec4(0);
 
   [[unroll]] for (int i = 0; i < 4; ++i) {
-    if (tensor_idx[packed_dim] + i < sizes[packed_dim]) {
+    if (tidx[packed_dim] + i < sizes[packed_dim]) {
       int in_texel = nchw_in[buf_indices[i] / 4];
       int extracted_val = (in_texel & masks[i]) >> (8 * (buf_indices[i] % 4));
       extracted_val = extend_sign(extracted_val);
@@ -64,12 +64,12 @@ ivec4 read_texel(ivec4 tensor_idx) {
 }
 
 void main() {
-  const ivec3 pos = ivec3(gl_GlobalInvocationID);
-  const ivec4 tensor_idx = to_tensor_idx(pos, sizes, axis_map, packed_dim);
+  const ivec3 lpos = ivec3(gl_GlobalInvocationID);
+  const ivec4 tidx = lpos_to_tidx(lpos, sizes, axis_map.w, packed_dim);
 
-  if (any(greaterThanEqual(tensor_idx, sizes))) {
+  if (any(greaterThanEqual(tidx, sizes))) {
     return;
   }
 
-  write_texel(t_out, pos, read_texel(tensor_idx));
+  write_texel(t_out, lpos_to_pos(lpos, axis_map), read_texel(tidx));
 }
diff --git a/backends/vulkan/runtime/graph/ops/glsl/q_4w_linear.glsl b/backends/vulkan/runtime/graph/ops/glsl/q_4w_linear.glsl
index 751d513d59d..d07d45251fb 100644
--- a/backends/vulkan/runtime/graph/ops/glsl/q_4w_linear.glsl
+++ b/backends/vulkan/runtime/graph/ops/glsl/q_4w_linear.glsl
@@ -74,31 +74,31 @@ void main() {
 
       for (int kb = 0; kb < k_block; kb++) {
         scale_pos.x = kb;
-        const int scale_id = to_buffer_id(scale_pos, scales_strides);
-        const float scale = float(t_scales_and_zeros[scale_id]);
+        const int scale_bufi = tidx_to_bufi(scale_pos, scales_strides);
+        const float scale = float(t_scales_and_zeros[scale_bufi]);
 
         zero_pos.x = kb;
-        const int zero_id = to_buffer_id(zero_pos, scales_strides);
-        const float zero = float(t_scales_and_zeros[zero_id]) - scale * 8.0;
+        const int zero_bufi = tidx_to_bufi(zero_pos, scales_strides);
+        const float zero = float(t_scales_and_zeros[zero_bufi]) - scale * 8.0;
 
         for(uint idx = 0; idx < group_size && k < K; idx++, k++) {
           mat1_pos.x = k;
-          const int mat1_id = to_buffer_id(mat1_pos, mat1_strides);
-          const float mat1_val = float(t_mat1[mat1_id]);
+          const int mat1_bufi = tidx_to_bufi(mat1_pos, mat1_strides);
+          const float mat1_val = float(t_mat1[mat1_bufi]);
 
           mat2_pos.x = k / 2;
-          const int mat2_id = to_buffer_id(mat2_pos, mat2_strides);
+          const int mat2_bufi = tidx_to_bufi(mat2_pos, mat2_strides);
           // Bitwise op treats sign bit from int8 as a value bit instead,
           // since there is no uint8_t datatype
-          uint mat2_val = (t_mat2[mat2_id] & 0xFF);
+          uint mat2_val = (t_mat2[mat2_bufi] & 0xFF);
           mat2_val = (k & 1) == 0 ? mat2_val & mask : (mat2_val >> 4);
 
           rc += mat1_val * (scale * float(mat2_val) + zero);
         }
       }
 
-      const int out_id = to_buffer_id(out_pos, out_strides);
-      t_out[out_id] = FLOAT_T(rc);
+      const int out_bufi = tidx_to_bufi(out_pos, out_strides);
+      t_out[out_bufi] = FLOAT_T(rc);
 
     #else // Using texture
       const uint texel_group_size = group_size / FOUR;
diff --git a/backends/vulkan/runtime/graph/ops/glsl/q_8w_linear.glsl b/backends/vulkan/runtime/graph/ops/glsl/q_8w_linear.glsl
index 7557a7b0c3d..a72df89b634 100644
--- a/backends/vulkan/runtime/graph/ops/glsl/q_8w_linear.glsl
+++ b/backends/vulkan/runtime/graph/ops/glsl/q_8w_linear.glsl
@@ -49,14 +49,14 @@ layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;
 #ifdef USING_BUFFER
 
 void main() {
-  const int t_id = int(gl_GlobalInvocationID.x);
-  if (t_id >= out_numel) {
+  const int out_bufi = int(gl_GlobalInvocationID.x);
+  if (out_bufi >= out_numel) {
     return;
   }
 
-  const ivec4 out_idx = to_tensor_idx(t_id, out_strides, 0);
+  const ivec4 out_tidx = bufi_to_tidx(out_bufi, out_strides, 0);
 
-  t_out[t_id] = q_8w_linear(out_idx, mat1_sizes.x);
+  t_out[out_bufi] = q_8w_linear(out_tidx, mat1_sizes.x);
 }
 
 #else // USING_TEXTURE
diff --git a/backends/vulkan/runtime/graph/ops/glsl/slice_channel.glsl b/backends/vulkan/runtime/graph/ops/glsl/slice_channel.glsl
index d1562d65762..45e6c3358e8 100644
--- a/backends/vulkan/runtime/graph/ops/glsl/slice_channel.glsl
+++ b/backends/vulkan/runtime/graph/ops/glsl/slice_channel.glsl
@@ -43,11 +43,11 @@ void main() {
   // we calculate the source whcn-coordinate amended with offset-ed channel
   // value.  Then we calculate the actual texture position from the
   // whcn-coordinate.
-  const ivec4 buf_indices = get_texel_nchw_buffer_ixs(idx, out_sizes, packed_dim);
+  const ivec4 buf_indices = tidx_to_nchwi(idx, out_sizes, packed_dim);
 
   vec4 outex;
   for (int i=0;i<4;i++) {
-      ivec4 user_coor = from_nchw_buffer_i(buf_indices[i], out_sizes);
+      ivec4 user_coor = nchwi_to_tidx(buf_indices[i], out_sizes);
 
       int in_channel = user_coor.z;
 
diff --git a/backends/vulkan/runtime/graph/ops/glsl/view.glsl b/backends/vulkan/runtime/graph/ops/glsl/view.glsl
index 0b0f587d1d5..8d45e65b396 100644
--- a/backends/vulkan/runtime/graph/ops/glsl/view.glsl
+++ b/backends/vulkan/runtime/graph/ops/glsl/view.glsl
@@ -39,13 +39,13 @@ void main() {
   // Assume there is a virtual continous buffer in nchw format. From the output
   // pos, we first calculate the index in the virual buffer, and then calculate
   // the input position from the indx.
-  const ivec4 buf_indices = get_texel_nchw_buffer_ixs(out_tensor_idx, out_sizes, out_packed_dim);
+  const ivec4 buf_indices = tidx_to_nchwi(out_tensor_idx, out_sizes, out_packed_dim);
 
   VEC4_T value = VEC4_T(0);
   // Need to look up the 4 values in the output texel separately.
   for (int i = 0 ; i < 4; i++) {
     if (out_tensor_idx[out_packed_dim]++ < out_sizes[out_packed_dim]) {
-      ivec4 user_coor = from_nchw_buffer_i(buf_indices[i], in_sizes);
+      ivec4 user_coor = nchwi_to_tidx(buf_indices[i], in_sizes);
       ivec4 in_pos_elem = to_texture_elem_pos(user_coor, in_sizes, in_packed_dim);
       VEC4_T intex = texelFetch(t_in, in_pos_elem.xyz, 0);
       value[i] = intex[in_pos_elem.w];