Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[ET-VK] Enable Dynamic shape support via tensor virtual and physical resizing #121598

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
29 changes: 28 additions & 1 deletion aten/src/ATen/native/vulkan/api/Context.h
Expand Up @@ -205,6 +205,7 @@ class Context final {
class UniformParamsBuffer final {
private:
Context* context_p_;
size_t nbytes_;
VulkanBuffer vulkan_buffer_;

public:
Expand All @@ -213,6 +214,7 @@ class UniformParamsBuffer final {
template <typename Block>
UniformParamsBuffer(Context* context_p, const Block& block)
: context_p_(context_p),
nbytes_(sizeof(block)),
vulkan_buffer_(
context_p_->adapter_ptr()->vma().create_params_buffer(block)) {}

Expand All @@ -231,13 +233,29 @@ class UniformParamsBuffer final {
VulkanBuffer& buffer() {
return vulkan_buffer_;
}

template <typename Block>
void update(const Block& block) {
if (sizeof(block) != nbytes_) {
VK_THROW(
"Attempted to update UniformParamsBuffer with data of different size");
}
// Fill the uniform buffer with data in block
{
MemoryMap mapping(vulkan_buffer_, MemoryAccessType::WRITE);
Block* data_ptr = mapping.template data<Block>();

*data_ptr = block;
}
}
};

class StorageBuffer final {
private:
Context* context_p_;
ScalarType dtype_;
size_t numel_;
size_t nbytes_;
VulkanBuffer vulkan_buffer_;

public:
Expand All @@ -249,8 +267,9 @@ class StorageBuffer final {
: context_p_(context_p),
dtype_(dtype),
numel_(numel),
nbytes_(element_size(dtype_) * numel_),
vulkan_buffer_(context_p_->adapter_ptr()->vma().create_storage_buffer(
element_size(dtype_) * numel_,
nbytes_,
gpuonly)) {}

StorageBuffer(const StorageBuffer&) = delete;
Expand All @@ -270,6 +289,14 @@ class StorageBuffer final {
inline VulkanBuffer& buffer() {
return vulkan_buffer_;
}

inline size_t numel() {
return numel_;
}

inline size_t nbytes() {
return nbytes_;
}
};

bool available();
Expand Down
8 changes: 8 additions & 0 deletions aten/src/ATen/native/vulkan/api/Resource.h
Expand Up @@ -151,6 +151,10 @@ class VulkanBuffer final {
return (memory_.allocation != VK_NULL_HANDLE);
}

inline bool owns_memory() const {
return owns_memory_;
}

operator bool() const {
return (handle_ != VK_NULL_HANDLE);
}
Expand Down Expand Up @@ -372,6 +376,10 @@ class VulkanImage final {
return (memory_.allocation != VK_NULL_HANDLE);
}

inline bool owns_memory() const {
return owns_memory_;
}

inline operator bool() const {
return (handles_.image != VK_NULL_HANDLE);
}
Expand Down
3 changes: 3 additions & 0 deletions aten/src/ATen/native/vulkan/api/ShaderRegistry.h
Expand Up @@ -12,6 +12,9 @@
#define VK_KERNEL(shader_name) \
::at::native::vulkan::api::shader_registry().get_shader_info(#shader_name)

#define VK_KERNEL_FROM_STR(shader_name_str) \
::at::native::vulkan::api::shader_registry().get_shader_info(shader_name_str)

namespace at {
namespace native {
namespace vulkan {
Expand Down
155 changes: 140 additions & 15 deletions aten/src/ATen/native/vulkan/api/Tensor.cpp
Expand Up @@ -318,8 +318,8 @@ api::UniformParamsBuffer make_metadata_uniform(
}

vTensor::BufferMetadata metadata{
api::utils::make_nchw_uvec4(sizes),
api::utils::make_nchw_uvec4(strides),
api::utils::make_whcn_uvec4(sizes),
api::utils::make_whcn_uvec4(strides),
api::utils::safe_downcast<uint32_t>(sizes.size()),
api::utils::safe_downcast<uint32_t>(api::utils::multiply_integers(sizes)),
};
Expand Down Expand Up @@ -347,12 +347,13 @@ vTensor::vTensor(
strides_{calc_strides(sizes, memory_layout_, storage_type)},
gpu_sizes_{calc_gpu_sizes(sizes, memory_layout_, storage_type)},
gpu_strides_{calc_strides(gpu_sizes_, memory_layout_, storage_type)},
// Vulkan uniform buffer containing sizes and stride info
metadata_uniform_{make_metadata_uniform(
context,
gpu_sizes_,
gpu_strides_,
storage_type)},
virtual_extents_(
create_image_extents(gpu_sizes_, storage_type, memory_layout)),
// Utility Uniform Buffers that can be passed to shaders as arguments
metadata_uniform_(),
cpu_sizes_uniform_(nullptr),
gpu_sizes_uniform_(nullptr),
extents_uniform_(nullptr),
// Construct Tensor storage
view_(std::make_shared<vTensorStorage>(
context,
Expand All @@ -377,12 +378,13 @@ vTensor::vTensor(
strides_{calc_strides(sizes, memory_layout_, storage_type)},
gpu_sizes_{calc_gpu_sizes(sizes, memory_layout_, storage_type)},
gpu_strides_{calc_strides(gpu_sizes_, memory_layout_, storage_type)},
virtual_extents_(
create_image_extents(gpu_sizes_, storage_type, memory_layout)),
// Vulkan uniform buffer containing sizes and stride info
metadata_uniform_{make_metadata_uniform(
context,
gpu_sizes_,
gpu_strides_,
storage_type)},
metadata_uniform_(),
cpu_sizes_uniform_(nullptr),
gpu_sizes_uniform_(nullptr),
extents_uniform_(nullptr),
// Quantization params
is_quantized_{true},
q_scale_{q_scale},
Expand Down Expand Up @@ -425,10 +427,47 @@ api::VulkanBuffer& vTensor::buffer(
return view_->buffer_;
}

api::VulkanBuffer& vTensor::buffer_metadata() {
if (!metadata_uniform_.buffer()) {
metadata_uniform_ = make_metadata_uniform(
view_->context_, gpu_sizes_, gpu_strides_, storage_type());
}
return metadata_uniform_.buffer();
}

std::shared_ptr<api::UniformParamsBuffer> vTensor::cpu_sizes_ubo() {
if (!cpu_sizes_uniform_) {
cpu_sizes_uniform_.reset(new api::UniformParamsBuffer(
view_->context_, api::utils::make_whcn_ivec4(sizes_)));
}
return cpu_sizes_uniform_;
}

std::shared_ptr<api::UniformParamsBuffer> vTensor::gpu_sizes_ubo() {
if (!gpu_sizes_uniform_) {
gpu_sizes_uniform_.reset(new api::UniformParamsBuffer(
view_->context_, api::utils::make_whcn_ivec4(gpu_sizes_)));
}
return gpu_sizes_uniform_;
}

std::shared_ptr<api::UniformParamsBuffer> vTensor::extents_ubo() {
if (!extents_uniform_) {
extents_uniform_.reset(new api::UniformParamsBuffer(
view_->context_,
api::utils::uvec4(
{view_->extents_.data[0],
view_->extents_.data[1],
view_->extents_.data[2],
1u})));
}
return extents_uniform_;
}

vTensor::BufferMetadata vTensor::get_cpu_buffer_metadata() const {
return {
api::utils::make_nchw_uvec4(sizes_),
api::utils::make_nchw_uvec4(strides_),
api::utils::make_whcn_uvec4(sizes_),
api::utils::make_whcn_uvec4(strides_),
api::utils::safe_downcast<uint32_t>(sizes_.size()),
api::utils::safe_downcast<uint32_t>(
api::utils::multiply_integers(sizes_)),
Expand Down Expand Up @@ -473,6 +512,65 @@ void vTensor::bind_allocation(const api::MemoryAllocation& allocation) {
}
}

void vTensor::update_size_metadata(const std::vector<int64_t>& new_sizes) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I see what you mean by updating metadata now

sizes_ = new_sizes;
gpu_sizes_ = calc_gpu_sizes(sizes_, memory_layout_, storage_type());
virtual_extents_ =
create_image_extents(gpu_sizes_, storage_type(), memory_layout_);

if (cpu_sizes_uniform_) {
cpu_sizes_uniform_->update(api::utils::make_whcn_ivec4(sizes_));
}

if (gpu_sizes_uniform_) {
gpu_sizes_uniform_->update(api::utils::make_whcn_ivec4(gpu_sizes_));
}

if (extents_uniform_) {
extents_uniform_->update(api::utils::uvec4(
{virtual_extents_.data[0],
virtual_extents_.data[1],
virtual_extents_.data[2],
1u}));
}
}

void vTensor::reallocate(const std::vector<int64_t>& new_sizes) {
update_size_metadata(new_sizes);
view_->discard_and_reallocate(
calc_gpu_sizes(new_sizes, memory_layout_, storage_type()),
memory_layout_,
dtype_);
}

void vTensor::virtual_resize(const std::vector<int64_t>& new_sizes) {
update_size_metadata(new_sizes);
if (storage_type() == api::StorageType::BUFFER) {
if (gpu_nbytes() > view_->buffer_.mem_size()) {
VK_THROW(
"Cannot virtual_resize a vTensor with sizes that require a larger "
"buffer! reallocate() should be used instead.");
}
} else {
bool valid_resize = true;
if (virtual_extents_.data[0] > view_->extents_.data[0]) {
valid_resize = false;
}
if (virtual_extents_.data[1] > view_->extents_.data[1]) {
valid_resize = false;
}
if (virtual_extents_.data[2] > view_->extents_.data[2]) {
valid_resize = false;
}

if (!valid_resize) {
VK_THROW(
"Cannot virtual_resize a vTensor with sizes that require a larger "
"image texture! reallocate() should be used instead.");
}
}
}

//
// vTensorStorage
//
Expand Down Expand Up @@ -569,11 +667,16 @@ vTensorStorage::vTensorStorage(
last_access_{} {}

vTensorStorage::~vTensorStorage() {
flush();
}

void vTensorStorage::flush() {
if (image_) {
context_->register_image_cleanup(image_);
} else if (buffer_) {
context_->register_buffer_cleanup(buffer_);
}
last_access_ = {};
}

void vTensorStorage::transition(
Expand Down Expand Up @@ -663,6 +766,28 @@ void add_buffer_barrier(
}
}

void vTensorStorage::discard_and_reallocate(
const std::vector<int64_t>& gpu_sizes,
const api::GPUMemoryLayout gpu_memory_layout,
const api::ScalarType dtype) {
const bool image_owns_memory = image_.owns_memory();
const bool buffer_owns_memory = buffer_.owns_memory();

flush();

extents_ = create_image_extents(gpu_sizes, storage_type_, gpu_memory_layout);
image_ = allocate_image(
context_,
extents_,
storage_type_,
api::to_vkformat(dtype),
image_owns_memory);

buffer_length_ = api::utils::multiply_integers(gpu_sizes);
buffer_ = allocate_buffer(
context_, buffer_length_, storage_type_, dtype, buffer_owns_memory);
}

} // namespace vulkan
} // namespace native
} // namespace at