pytorch · SS-JIA · Mar 12, 2024 · digantdesai · Mar 12, 2024
diff --git a/aten/src/ATen/native/vulkan/api/Context.h b/aten/src/ATen/native/vulkan/api/Context.h
@@ -205,6 +205,7 @@ class Context final {
 class UniformParamsBuffer final {
  private:
   Context* context_p_;
+  size_t nbytes_;
   VulkanBuffer vulkan_buffer_;
 
  public:
@@ -213,6 +214,7 @@ class UniformParamsBuffer final {
   template <typename Block>
   UniformParamsBuffer(Context* context_p, const Block& block)
       : context_p_(context_p),
+        nbytes_(sizeof(block)),
         vulkan_buffer_(
             context_p_->adapter_ptr()->vma().create_params_buffer(block)) {}
 
@@ -231,13 +233,29 @@ class UniformParamsBuffer final {
   VulkanBuffer& buffer() {
     return vulkan_buffer_;
   }
+
+  template <typename Block>
+  void update(const Block& block) {
+    if (sizeof(block) != nbytes_) {
+      VK_THROW(
+          "Attempted to update UniformParamsBuffer with data of different size");
+    }
+    // Fill the uniform buffer with data in block
+    {
+      MemoryMap mapping(vulkan_buffer_, MemoryAccessType::WRITE);
+      Block* data_ptr = mapping.template data<Block>();
+
+      *data_ptr = block;
+    }
+  }
 };
 
 class StorageBuffer final {
  private:
   Context* context_p_;
   ScalarType dtype_;
   size_t numel_;
+  size_t nbytes_;
   VulkanBuffer vulkan_buffer_;
 
  public:
@@ -249,8 +267,9 @@ class StorageBuffer final {
       : context_p_(context_p),
         dtype_(dtype),
         numel_(numel),
+        nbytes_(element_size(dtype_) * numel_),
         vulkan_buffer_(context_p_->adapter_ptr()->vma().create_storage_buffer(
-            element_size(dtype_) * numel_,
+            nbytes_,
             gpuonly)) {}
 
   StorageBuffer(const StorageBuffer&) = delete;
@@ -270,6 +289,14 @@ class StorageBuffer final {
   inline VulkanBuffer& buffer() {
     return vulkan_buffer_;
   }
+
+  inline size_t numel() {
+    return numel_;
+  }
+
+  inline size_t nbytes() {
+    return nbytes_;
+  }
 };
 
 bool available();

diff --git a/aten/src/ATen/native/vulkan/api/Resource.h b/aten/src/ATen/native/vulkan/api/Resource.h
@@ -151,6 +151,10 @@ class VulkanBuffer final {
     return (memory_.allocation != VK_NULL_HANDLE);
   }
 
+  inline bool owns_memory() const {
+    return owns_memory_;
+  }
+
   operator bool() const {
     return (handle_ != VK_NULL_HANDLE);
   }
@@ -372,6 +376,10 @@ class VulkanImage final {
     return (memory_.allocation != VK_NULL_HANDLE);
   }
 
+  inline bool owns_memory() const {
+    return owns_memory_;
+  }
+
   inline operator bool() const {
     return (handles_.image != VK_NULL_HANDLE);
   }

diff --git a/aten/src/ATen/native/vulkan/api/ShaderRegistry.h b/aten/src/ATen/native/vulkan/api/ShaderRegistry.h
@@ -12,6 +12,9 @@
 #define VK_KERNEL(shader_name) \
   ::at::native::vulkan::api::shader_registry().get_shader_info(#shader_name)
 
+#define VK_KERNEL_FROM_STR(shader_name_str) \
+  ::at::native::vulkan::api::shader_registry().get_shader_info(shader_name_str)
+
 namespace at {
 namespace native {
 namespace vulkan {

diff --git a/aten/src/ATen/native/vulkan/api/Tensor.cpp b/aten/src/ATen/native/vulkan/api/Tensor.cpp
@@ -318,8 +318,8 @@ api::UniformParamsBuffer make_metadata_uniform(
   }
 
   vTensor::BufferMetadata metadata{
-      api::utils::make_nchw_uvec4(sizes),
-      api::utils::make_nchw_uvec4(strides),
+      api::utils::make_whcn_uvec4(sizes),
+      api::utils::make_whcn_uvec4(strides),
       api::utils::safe_downcast<uint32_t>(sizes.size()),
       api::utils::safe_downcast<uint32_t>(api::utils::multiply_integers(sizes)),
   };
@@ -347,12 +347,13 @@ vTensor::vTensor(
       strides_{calc_strides(sizes, memory_layout_, storage_type)},
       gpu_sizes_{calc_gpu_sizes(sizes, memory_layout_, storage_type)},
       gpu_strides_{calc_strides(gpu_sizes_, memory_layout_, storage_type)},
-      // Vulkan uniform buffer containing sizes and stride info
-      metadata_uniform_{make_metadata_uniform(
-          context,
-          gpu_sizes_,
-          gpu_strides_,
-          storage_type)},
+      virtual_extents_(
+          create_image_extents(gpu_sizes_, storage_type, memory_layout)),
+      // Utility Uniform Buffers that can be passed to shaders as arguments
+      metadata_uniform_(),
+      cpu_sizes_uniform_(nullptr),
+      gpu_sizes_uniform_(nullptr),
+      extents_uniform_(nullptr),
       // Construct Tensor storage
       view_(std::make_shared<vTensorStorage>(
           context,
@@ -377,12 +378,13 @@ vTensor::vTensor(
       strides_{calc_strides(sizes, memory_layout_, storage_type)},
       gpu_sizes_{calc_gpu_sizes(sizes, memory_layout_, storage_type)},
       gpu_strides_{calc_strides(gpu_sizes_, memory_layout_, storage_type)},
+      virtual_extents_(
+          create_image_extents(gpu_sizes_, storage_type, memory_layout)),
       // Vulkan uniform buffer containing sizes and stride info
-      metadata_uniform_{make_metadata_uniform(
-          context,
-          gpu_sizes_,
-          gpu_strides_,
-          storage_type)},
+      metadata_uniform_(),
+      cpu_sizes_uniform_(nullptr),
+      gpu_sizes_uniform_(nullptr),
+      extents_uniform_(nullptr),
       // Quantization params
       is_quantized_{true},
       q_scale_{q_scale},
@@ -425,10 +427,47 @@ api::VulkanBuffer& vTensor::buffer(
   return view_->buffer_;
 }
 
+api::VulkanBuffer& vTensor::buffer_metadata() {
+  if (!metadata_uniform_.buffer()) {
+    metadata_uniform_ = make_metadata_uniform(
+        view_->context_, gpu_sizes_, gpu_strides_, storage_type());
+  }
+  return metadata_uniform_.buffer();
+}
+
+std::shared_ptr<api::UniformParamsBuffer> vTensor::cpu_sizes_ubo() {
+  if (!cpu_sizes_uniform_) {
+    cpu_sizes_uniform_.reset(new api::UniformParamsBuffer(
+        view_->context_, api::utils::make_whcn_ivec4(sizes_)));
+  }
+  return cpu_sizes_uniform_;
+}
+
+std::shared_ptr<api::UniformParamsBuffer> vTensor::gpu_sizes_ubo() {
+  if (!gpu_sizes_uniform_) {
+    gpu_sizes_uniform_.reset(new api::UniformParamsBuffer(
+        view_->context_, api::utils::make_whcn_ivec4(gpu_sizes_)));
+  }
+  return gpu_sizes_uniform_;
+}
+
+std::shared_ptr<api::UniformParamsBuffer> vTensor::extents_ubo() {
+  if (!extents_uniform_) {
+    extents_uniform_.reset(new api::UniformParamsBuffer(
+        view_->context_,
+        api::utils::uvec4(
+            {view_->extents_.data[0],
+             view_->extents_.data[1],
+             view_->extents_.data[2],
+             1u})));
+  }
+  return extents_uniform_;
+}
+
 vTensor::BufferMetadata vTensor::get_cpu_buffer_metadata() const {
   return {
-      api::utils::make_nchw_uvec4(sizes_),
-      api::utils::make_nchw_uvec4(strides_),
+      api::utils::make_whcn_uvec4(sizes_),
+      api::utils::make_whcn_uvec4(strides_),
       api::utils::safe_downcast<uint32_t>(sizes_.size()),
       api::utils::safe_downcast<uint32_t>(
           api::utils::multiply_integers(sizes_)),
@@ -473,6 +512,65 @@ void vTensor::bind_allocation(const api::MemoryAllocation& allocation) {
   }
 }
 
+void vTensor::update_size_metadata(const std::vector<int64_t>& new_sizes) {
+  sizes_ = new_sizes;
+  gpu_sizes_ = calc_gpu_sizes(sizes_, memory_layout_, storage_type());
+  virtual_extents_ =
+      create_image_extents(gpu_sizes_, storage_type(), memory_layout_);
+
+  if (cpu_sizes_uniform_) {
+    cpu_sizes_uniform_->update(api::utils::make_whcn_ivec4(sizes_));
+  }
+
+  if (gpu_sizes_uniform_) {
+    gpu_sizes_uniform_->update(api::utils::make_whcn_ivec4(gpu_sizes_));
+  }
+
+  if (extents_uniform_) {
+    extents_uniform_->update(api::utils::uvec4(
+        {virtual_extents_.data[0],
+         virtual_extents_.data[1],
+         virtual_extents_.data[2],
+         1u}));
+  }
+}
+
+void vTensor::reallocate(const std::vector<int64_t>& new_sizes) {
+  update_size_metadata(new_sizes);
+  view_->discard_and_reallocate(
+      calc_gpu_sizes(new_sizes, memory_layout_, storage_type()),
+      memory_layout_,
+      dtype_);
+}
+
+void vTensor::virtual_resize(const std::vector<int64_t>& new_sizes) {
+  update_size_metadata(new_sizes);
+  if (storage_type() == api::StorageType::BUFFER) {
+    if (gpu_nbytes() > view_->buffer_.mem_size()) {
+      VK_THROW(
+          "Cannot virtual_resize a vTensor with sizes that require a larger "
+          "buffer! reallocate() should be used instead.");
+    }
+  } else {
+    bool valid_resize = true;
+    if (virtual_extents_.data[0] > view_->extents_.data[0]) {
+      valid_resize = false;
+    }
+    if (virtual_extents_.data[1] > view_->extents_.data[1]) {
+      valid_resize = false;
+    }
+    if (virtual_extents_.data[2] > view_->extents_.data[2]) {
+      valid_resize = false;
+    }
+
+    if (!valid_resize) {
+      VK_THROW(
+          "Cannot virtual_resize a vTensor with sizes that require a larger "
+          "image texture! reallocate() should be used instead.");
+    }
+  }
+}
+
 //
 // vTensorStorage
 //
@@ -569,11 +667,16 @@ vTensorStorage::vTensorStorage(
       last_access_{} {}
 
 vTensorStorage::~vTensorStorage() {
+  flush();
+}
+
+void vTensorStorage::flush() {
   if (image_) {
     context_->register_image_cleanup(image_);
   } else if (buffer_) {
     context_->register_buffer_cleanup(buffer_);
   }
+  last_access_ = {};
 }
 
 void vTensorStorage::transition(
@@ -663,6 +766,28 @@ void add_buffer_barrier(
   }
 }
 
+void vTensorStorage::discard_and_reallocate(
+    const std::vector<int64_t>& gpu_sizes,
+    const api::GPUMemoryLayout gpu_memory_layout,
+    const api::ScalarType dtype) {
+  const bool image_owns_memory = image_.owns_memory();
+  const bool buffer_owns_memory = buffer_.owns_memory();
+
+  flush();
+
+  extents_ = create_image_extents(gpu_sizes, storage_type_, gpu_memory_layout);
+  image_ = allocate_image(
+      context_,
+      extents_,
+      storage_type_,
+      api::to_vkformat(dtype),
+      image_owns_memory);
+
+  buffer_length_ = api::utils::multiply_integers(gpu_sizes);
+  buffer_ = allocate_buffer(
+      context_, buffer_length_, storage_type_, dtype, buffer_owns_memory);
+}
+
 } // namespace vulkan
 } // namespace native
 } // namespace at