diff --git a/backends/vulkan/runtime/graph/ops/glsl/copy_offset.glsl b/backends/vulkan/runtime/graph/ops/glsl/copy_offset.glsl index a23822765a3..178814a90c3 100644 --- a/backends/vulkan/runtime/graph/ops/glsl/copy_offset.glsl +++ b/backends/vulkan/runtime/graph/ops/glsl/copy_offset.glsl @@ -35,6 +35,8 @@ const lowp ivec4 out_axis_map = unhash_axis_map(out_layout); ${layout_declare_spec_const(C, "int", "in_layout", "DEFAULT_LAYOUT")} const lowp ivec4 in_axis_map = unhash_axis_map(in_layout); +${layout_declare_spec_const(C, "int", "batch_index_function", "0")} + void main() { const ivec3 pos = ivec3(gl_GlobalInvocationID); @@ -42,14 +44,20 @@ void main() { return; } - const ivec3 in_pos = pos + src_offset.xyz; + ivec3 in_pos = pos + src_offset.xyz; ivec3 out_pos = pos + dst_offset.xyz; - - // If source channel size is specified compose output z based on channel and batch index if (src_offset.w > 0) { - const int channel_index = in_pos.z % src_offset.w; - const int batch_index = in_pos.z / src_offset.w; - out_pos.z = channel_index + dst_offset.z + batch_index * dst_offset.w; + if (batch_index_function == 1) { + // batch index is calculated using source channel size + const int channel_index = pos.z % src_offset.w; + const int batch_index = pos.z / src_offset.w; + out_pos.z = channel_index + dst_offset.z + batch_index * dst_offset.w; + } else if (batch_index_function == 2) { + // batch index is calculated using destination channel size + const int channel_index = pos.z % dst_offset.w; + const int batch_index = pos.z / dst_offset.w; + in_pos.z = channel_index + src_offset.z + batch_index * src_offset.w; + } } write_texel_lpos( diff --git a/backends/vulkan/runtime/graph/ops/impl/Cat.cpp b/backends/vulkan/runtime/graph/ops/impl/Cat.cpp index 5f172454121..25a0ff9a7f5 100644 --- a/backends/vulkan/runtime/graph/ops/impl/Cat.cpp +++ b/backends/vulkan/runtime/graph/ops/impl/Cat.cpp @@ -80,7 +80,7 @@ void add_cat_default_node( // concatenating channels src_offset[3] = is_concat_channel ? in_channel_size : 0; add_copy_offset_node( - graph, input_ref, range, src_offset, dst_offset, out); + graph, input_ref, range, src_offset, dst_offset, out, true, false); dst_offset[dim_xyz_index] += is_concat_channel ? in_channel_size : range[dim_xyz_index]; } diff --git a/backends/vulkan/runtime/graph/ops/impl/Copy.cpp b/backends/vulkan/runtime/graph/ops/impl/Copy.cpp index 4b09fbe8619..2ecc7400d3e 100644 --- a/backends/vulkan/runtime/graph/ops/impl/Copy.cpp +++ b/backends/vulkan/runtime/graph/ops/impl/Copy.cpp @@ -25,7 +25,9 @@ void add_copy_offset_node( const ivec3& range, const ivec4& src_offset, const ivec4& dst_offset, - const ValueRef out) { + const ValueRef out, + bool calc_out_pos_using_src_chnl, + bool calc_in_pos_using_dst_chnl) { vTensorPtr t_in = graph.get_tensor(in); vTensorPtr t_out = graph.get_tensor(out); @@ -49,7 +51,11 @@ void add_copy_offset_node( // Parameter buffers {}, // Specialization Constants - {graph.hashed_layout_of(out), graph.hashed_layout_of(in)}, + {graph.hashed_layout_of(out), + graph.hashed_layout_of(in), + (calc_out_pos_using_src_chnl ? 1 + : calc_in_pos_using_dst_chnl ? 2 + : 0)}, nullptr, {}, { @@ -256,7 +262,8 @@ void add_copy_offset_node( ivec4 src_offset = {src[0], src[1], src[2], 0}; ivec4 dst_offset = {dst[0], dst[1], dst[2], 0}; - add_copy_offset_node(graph, in, range, src_offset, dst_offset, out); + add_copy_offset_node( + graph, in, range, src_offset, dst_offset, out, false, false); } void copy_offset(ComputeGraph& graph, const std::vector& args) { diff --git a/backends/vulkan/runtime/graph/ops/impl/Copy.h b/backends/vulkan/runtime/graph/ops/impl/Copy.h index d4b4c0dcc03..e9388345afa 100644 --- a/backends/vulkan/runtime/graph/ops/impl/Copy.h +++ b/backends/vulkan/runtime/graph/ops/impl/Copy.h @@ -22,13 +22,28 @@ namespace vkcompute { // It is possible to have input and output to point to the same image // object. But when the source range and destination range overlap, the behavior // is undefined. +// +// boolean flags calc_out_pos_using_src_chnl and calc_in_pos_using_dst_chnl +// can be used to specify an indexing function in the shader +// If calc_out_pos_using_src_chnl is set to true channel and batch index will be +// calculated based on source channel size and will be used to determine +// destination texel position. +// +// If calc_in_pos_using_dst_chnl is set to truechannel and batch index will be +// calculated based on destination channel size and will be used to determine +// source texel position. +// +// If both are true calc_out_pos_using_src_chnl is picked. If both are false no +// index calculation happens. void add_copy_offset_node( ComputeGraph& graph, const ValueRef in, const utils::ivec3& range, const utils::ivec4& src_offset, const utils::ivec4& dst_offset, - const ValueRef out); + const ValueRef out, + bool calc_out_pos_using_src_chnl, + bool calc_in_pos_using_dst_chnl); // add_copy_packed_dim_offset_node behaves similar to add_copy_node, except that // its used when copying packed dimension, if tensor is width or height packed. diff --git a/backends/vulkan/runtime/graph/ops/impl/Repeat.cpp b/backends/vulkan/runtime/graph/ops/impl/Repeat.cpp index 49daabdcb76..3f4ed4f1090 100644 --- a/backends/vulkan/runtime/graph/ops/impl/Repeat.cpp +++ b/backends/vulkan/runtime/graph/ops/impl/Repeat.cpp @@ -151,7 +151,8 @@ void add_repeat_node( utils::ivec4 src_offset{0, 0, 0, 0}; utils::ivec4 dst_offset{0, 0, 0, 0}; - add_copy_offset_node(graph, in, running_range, src_offset, dst_offset, out); + add_copy_offset_node( + graph, in, running_range, src_offset, dst_offset, out, false, false); } else { add_repeat_channel_node(graph, in, channel_repeat, out, running_range); @@ -166,7 +167,7 @@ void add_repeat_node( utils::ivec4 dst_offset{i * dim_at(in_sizes), 0, 0, 0}; add_copy_offset_node( - graph, out, running_range, src_offset, dst_offset, out); + graph, out, running_range, src_offset, dst_offset, out, true, false); } running_range[0] = running_range[0] * width_repeat; @@ -180,7 +181,7 @@ void add_repeat_node( utils::ivec4 dst_offset = {0, i * dim_at(in_sizes), 0, 0}; add_copy_offset_node( - graph, out, running_range, src_offset, dst_offset, out); + graph, out, running_range, src_offset, dst_offset, out, true, false); } running_range[1] = running_range[1] * height_repeat; @@ -194,7 +195,7 @@ void add_repeat_node( utils::ivec4 dst_offset = {0, 0, i * running_range[2], 0}; add_copy_offset_node( - graph, out, running_range, src_offset, dst_offset, out); + graph, out, running_range, src_offset, dst_offset, out, true, false); } running_range[2] = running_range[2] * batch_repeat; diff --git a/backends/vulkan/runtime/graph/ops/impl/Split.cpp b/backends/vulkan/runtime/graph/ops/impl/Split.cpp index ca585f1fb6d..b74317b078e 100644 --- a/backends/vulkan/runtime/graph/ops/impl/Split.cpp +++ b/backends/vulkan/runtime/graph/ops/impl/Split.cpp @@ -51,7 +51,8 @@ void add_split_with_sizes_default_node( // output tensor's size matches with the split_size. vTensorPtr t_out = graph.get_tensor(out_ref); utils::ivec3 range = t_out->logical_limits(); - add_copy_offset_node(graph, in, range, src_offset, dst_offset, out_ref); + add_copy_offset_node( + graph, in, range, src_offset, dst_offset, out_ref, false, true); src_offset[0] += range[0]; } @@ -62,7 +63,8 @@ void add_split_with_sizes_default_node( for (ValueRef out_ref : *out_list) { vTensorPtr t_out = graph.get_tensor(out_ref); utils::ivec3 range = t_out->logical_limits(); - add_copy_offset_node(graph, in, range, src_offset, dst_offset, out_ref); + add_copy_offset_node( + graph, in, range, src_offset, dst_offset, out_ref, false, true); src_offset[1] += range[1]; } @@ -73,7 +75,8 @@ void add_split_with_sizes_default_node( for (ValueRef out_ref : *out_list) { vTensorPtr t_out = graph.get_tensor(out_ref); utils::ivec3 range = t_out->logical_limits(); - add_copy_offset_node(graph, in, range, src_offset, dst_offset, out_ref); + add_copy_offset_node( + graph, in, range, src_offset, dst_offset, out_ref, false, true); src_offset[2] += range[2]; }