From 0db3178ad48b59222d24eea8ad87c24a68c387aa Mon Sep 17 00:00:00 2001 From: ssjia Date: Wed, 15 Oct 2025 09:07:25 -0700 Subject: [PATCH] [ET-VK] Introduce `TextureMetadata` struct Title says it all! Introduce a utility struct `TextureMetadata` to make it easier to pass tensor metadata to compute shaders, as a direct equivalent to `BufferMetadata`. Differential Revision: [D84716457](https://our.internmc.facebook.com/intern/diff/D84716457/) [ghstack-poisoned] --- .../vulkan/runtime/api/containers/Tensor.cpp | 60 +++++++++++++++++++ .../vulkan/runtime/api/containers/Tensor.h | 27 +++++++++ backends/vulkan/runtime/graph/ComputeGraph.h | 12 ++++ .../runtime/graph/ops/glsl/indexing.glslh | 31 ++++++++++ 4 files changed, 130 insertions(+) diff --git a/backends/vulkan/runtime/api/containers/Tensor.cpp b/backends/vulkan/runtime/api/containers/Tensor.cpp index d798b203673..5a1c445889e 100644 --- a/backends/vulkan/runtime/api/containers/Tensor.cpp +++ b/backends/vulkan/runtime/api/containers/Tensor.cpp @@ -836,6 +836,50 @@ void vTensor::BufferMetadata::update( numel = utils::safe_downcast(src_numel); } +vTensor::TextureMetadata::TextureMetadata( + const std::vector& src_sizes, + const TextureLimits& src_logical_limits, + const std::vector& src_axis_map, + const int32_t src_packed_dim) { + update(src_sizes, src_logical_limits, src_axis_map, src_packed_dim); +} + +void vTensor::TextureMetadata::update( + const std::vector& src_sizes, + const TextureLimits& src_logical_limits, + const std::vector& src_axis_map, + const int32_t src_packed_dim) { + // Convert sizes to flipped and unsqueezed format (fixed to 4 dimensions for + // texture) + std::vector fu_sizes = + flip_and_unsqueeze(src_sizes, kTensorSizes, 0, 4); + + // Copy sizes (up to 4 elements) + for (int i = 0; i < 4; ++i) { + sizes[i] = fu_sizes.at(i); + } + + // Copy logical limits (3 elements) + logical_limits[0] = + utils::safe_downcast(src_logical_limits.limits[0]); + logical_limits[1] = + utils::safe_downcast(src_logical_limits.limits[1]); + logical_limits[2] = + utils::safe_downcast(src_logical_limits.limits[2]); + logical_limits[3] = 1u; + + // Copy axis map (up to 4 elements) + for (int i = 0; i < 4 && i < src_axis_map.size(); ++i) { + axis_map[i] = utils::safe_downcast(src_axis_map.at(i)); + } + // Pad with zeros if axis_map is smaller than 4 + for (int i = src_axis_map.size(); i < 4; ++i) { + axis_map[i] = 0; + } + + packed_dim = src_packed_dim; +} + vkapi::VulkanImage& vTensor::image( vkapi::PipelineBarrier& pipeline_barrier, const vkapi::PipelineStageFlags stage) & { @@ -948,6 +992,16 @@ const vkapi::BufferBindInfo vTensor::buffer_meta_ubo() { return vkapi::BufferBindInfo(buffer_meta_.buffer(), 0, ubo_nbytes); } +const vkapi::BufferBindInfo vTensor::texture_meta_ubo() { + size_t ubo_nbytes = sizeof(TextureMetadata); + if (!texture_meta_.buffer()) { + TextureLimits limits(logical_limits()); + TextureMetadata data(sizes_, limits, axis_map_, packed_dim_); + texture_meta_ = ParamsBuffer(storage_->context_, data); + } + return vkapi::BufferBindInfo(texture_meta_.buffer(), 0, ubo_nbytes); +} + VkMemoryRequirements vTensor::get_memory_requirements() const { switch (storage_type()) { case utils::kBuffer: @@ -1031,6 +1085,12 @@ void vTensor::update_metadata() { BufferMetadata data(sizes_, dim_order_, strides_, numel_); buffer_meta_.update(data); } + + if (texture_meta_.buffer()) { + TextureMetadata data( + sizes_, uniform_data_->logical_limits, axis_map_, packed_dim_); + texture_meta_.update(data); + } } void vTensor::check_sizes(const std::vector& sizes) const { diff --git a/backends/vulkan/runtime/api/containers/Tensor.h b/backends/vulkan/runtime/api/containers/Tensor.h index d9fc7784cbc..967148b8dbe 100644 --- a/backends/vulkan/runtime/api/containers/Tensor.h +++ b/backends/vulkan/runtime/api/containers/Tensor.h @@ -285,6 +285,25 @@ class vTensor final { size_t numel); }; + struct TextureMetadata { + int32_t sizes[4]; + int32_t logical_limits[4]; + int32_t axis_map[4]; + int32_t packed_dim; + + TextureMetadata( + const std::vector& sizes, + const TextureLimits& logical_limits, + const std::vector& axis_map, + const int32_t packed_dim); + + void update( + const std::vector& sizes, + const TextureLimits& logical_limits, + const std::vector& axis_map, + const int32_t packed_dim); + }; + private: /* * "Core" tensor metadata. They are the minimum amount of information required @@ -360,6 +379,12 @@ class vTensor final { */ ParamsBuffer buffer_meta_; + /* + * Used to store data for TextureMetadata to pass to shaders as + * texture_meta_ubo + */ + ParamsBuffer texture_meta_; + uint32_t uniforms_size_ = 0u; uint32_t sizes_uniform_offset_ = kUniformOffsetUnset; uint32_t dim_order_uniform_offset_ = kUniformOffsetUnset; @@ -587,6 +612,8 @@ class vTensor final { const vkapi::BufferBindInfo buffer_meta_ubo(); + const vkapi::BufferBindInfo texture_meta_ubo(); + public: inline size_t staging_buffer_numel() const { return storage_->buffer_len(); diff --git a/backends/vulkan/runtime/graph/ComputeGraph.h b/backends/vulkan/runtime/graph/ComputeGraph.h index baa15233a00..dbd5536279c 100644 --- a/backends/vulkan/runtime/graph/ComputeGraph.h +++ b/backends/vulkan/runtime/graph/ComputeGraph.h @@ -449,6 +449,18 @@ class ComputeGraph final { return values_.at(idx).toTensor().buffer_meta_ubo(); } + inline vkapi::BufferBindInfo texture_meta_ubo(const ValueRef idx) { + return values_.at(idx).toTensor().texture_meta_ubo(); + } + + inline vkapi::BufferBindInfo meta_ubo(const ValueRef idx) { + if (is_buffer_storage(idx)) { + return buffer_meta_ubo(idx); + } else { + return texture_meta_ubo(idx); + } + } + inline vkapi::BufferBindInfo strides_ubo(const ValueRef idx) { return values_.at(idx).toTensor().strides_ubo(); } diff --git a/backends/vulkan/runtime/graph/ops/glsl/indexing.glslh b/backends/vulkan/runtime/graph/ops/glsl/indexing.glslh index 81783422ab4..d5148994e60 100644 --- a/backends/vulkan/runtime/graph/ops/glsl/indexing.glslh +++ b/backends/vulkan/runtime/graph/ops/glsl/indexing.glslh @@ -81,6 +81,25 @@ bool are_equal(const BufferMetadata meta1, const BufferMetadata meta2) { return true; } +bool out_of_bounds(const uint bufi, const BufferMetadata meta) { + return bufi >= meta.ndim_numel[1]; +} + +// +// TextureMetadata +// + +struct TextureMetadata { + ivec4 sizes; + ivec3 limits; + ivec4 axis_map; + int packed_dim; +}; + +bool out_of_bounds(const ivec3 pos, const TextureMetadata meta) { + return any(greaterThanEqual(pos, meta.limits)); +} + // // TensorIndex // @@ -186,6 +205,8 @@ void clamp_tensor_idx(const BufferMetadata meta, inout TensorIndex tidx) { #ifdef DEBUG_MODE +#extension GL_EXT_debug_printf : enable + void printTensorIndex(const TensorIndex tidx) { debugPrintfEXT( "TensorIndex: tidx=[%u %u %u %u %u %u %u %u]\\n", @@ -211,6 +232,16 @@ void printBufferMetadata(const BufferMetadata meta) { ); } +void printTextureMetadata(const TextureMetadata meta) { + debugPrintfEXT( + "TextureMetadata:\\n sizes=[%u %u %u %u]\\n limits=[%u %u %u]\\n axis_map=[%u %u %u %u]\\n packed_dim=%u\\n", + meta.sizes[0], meta.sizes[1], meta.sizes[2], meta.sizes[3], + meta.limits[0], meta.limits[1], meta.limits[2], + meta.axis_map[0], meta.axis_map[1], meta.axis_map[2], meta.axis_map[3], + meta.packed_dim + ); +} + #endif #endif // INDEXING_GLSLH