Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 9 additions & 5 deletions backends/vulkan/runtime/api/containers/ParamsBuffer.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,13 @@ class ParamsBuffer final {
vulkan_buffer_(
context_p_->adapter_ptr()->vma().create_params_buffer(block)) {}

template <typename Block>
ParamsBuffer(Context* context_p, const VkDeviceSize nbytes)
: context_p_(context_p),
nbytes_(nbytes),
vulkan_buffer_(
context_p_->adapter_ptr()->vma().create_uniform_buffer(nbytes)) {}

ParamsBuffer(const ParamsBuffer&);
ParamsBuffer& operator=(const ParamsBuffer&);

Expand All @@ -50,14 +57,11 @@ class ParamsBuffer final {
}

template <typename Block>
void update(const Block& block) {
if (sizeof(block) != nbytes_) {
VK_THROW("Attempted to update ParamsBuffer with data of different size");
}
void update(const Block& block, const uint32_t offset = 0) {
// Fill the uniform buffer with data in block
{
vkapi::MemoryMap mapping(vulkan_buffer_, vkapi::kWrite);
Block* data_ptr = mapping.template data<Block>();
Block* data_ptr = mapping.template data<Block>(offset);

*data_ptr = block;
}
Expand Down
119 changes: 81 additions & 38 deletions backends/vulkan/runtime/api/containers/Tensor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -451,11 +451,13 @@ vTensor::vTensor(
unsqueezed_strides_{unsqueeze_strides(strides_, numel_)},
padded_numel_(utils::multiply_integers(padded_sizes_)),
logical_limits_{{0, 0, 0}},
uniforms_(),
// Utility Uniform Buffers that can be passed to shaders as arguments
sizes_uniform_(),
strides_uniform_(),
numel_uniform_(),
logical_limits_uniform_(),
uniforms_size_(0),
sizes_uniform_offset_(kUniformOffsetUnset),
unsqueezed_strides_offset_(kUniformOffsetUnset),
numel_uniform_offset_(kUniformOffsetUnset),
logical_limits_uniform_offset_(kUniformOffsetUnset),
// Construct Tensor storage
storage_(
context,
Expand Down Expand Up @@ -497,11 +499,13 @@ vTensor::vTensor(
unsqueezed_strides_(),
padded_numel_(utils::multiply_integers(padded_sizes_)),
logical_limits_(),
uniforms_(),
// Utility Uniform Buffers that can be passed to shaders as arguments
sizes_uniform_(),
strides_uniform_(),
numel_uniform_(),
logical_limits_uniform_(),
uniforms_size_(0),
sizes_uniform_offset_(kUniformOffsetUnset),
unsqueezed_strides_offset_(kUniformOffsetUnset),
numel_uniform_offset_(kUniformOffsetUnset),
logical_limits_uniform_offset_(kUniformOffsetUnset),
// Construct Tensor storage
storage_(context, image) {
set_logical_limits(storage_.image_extents_);
Expand All @@ -522,11 +526,13 @@ vTensor::vTensor(vTensor& other)
other.unsqueezed_strides_.end()},
padded_numel_(other.padded_numel_),
logical_limits_{other.logical_limits_},
uniforms_(),
// Empty initialize Utility Uniform Buffers
sizes_uniform_(),
strides_uniform_(),
numel_uniform_(),
logical_limits_uniform_(),
uniforms_size_(0),
sizes_uniform_offset_(kUniformOffsetUnset),
unsqueezed_strides_offset_(kUniformOffsetUnset),
numel_uniform_offset_(kUniformOffsetUnset),
logical_limits_uniform_offset_(kUniformOffsetUnset),
// Copy Tensor storage
storage_(other.storage_) {}

Expand All @@ -547,11 +553,13 @@ vTensor::vTensor(
unsqueezed_strides_{unsqueeze_strides(strides_, numel_)},
padded_numel_(utils::multiply_integers(padded_sizes_)),
logical_limits_(other.logical_limits_),
uniforms_(),
// Empty initialize Utility Uniform Buffers
sizes_uniform_(),
strides_uniform_(),
numel_uniform_(),
logical_limits_uniform_(),
uniforms_size_(0),
sizes_uniform_offset_(kUniformOffsetUnset),
unsqueezed_strides_offset_(kUniformOffsetUnset),
numel_uniform_offset_(kUniformOffsetUnset),
logical_limits_uniform_offset_(kUniformOffsetUnset),
// Copy Tensor storage
storage_(other.storage_, vkapi::element_size(dtype_) * offset_numel) {
VK_CHECK_COND(
Expand Down Expand Up @@ -612,33 +620,66 @@ utils::GPUMemoryLayout vTensor::estimate_memory_layout() const {
}

const vkapi::BufferBindInfo vTensor::sizes_ubo() {
if (!sizes_uniform_.buffer()) {
sizes_uniform_ =
ParamsBuffer(storage_.context_, utils::make_whcn_ivec4(sizes_));
if (!uniforms_.buffer()) {
uniforms_ = ParamsBuffer(storage_.context_, kMaxUniformBufferSize);
}
return vkapi::BufferBindInfo(sizes_uniform_.buffer());
if (sizes_uniform_offset_ == kUniformOffsetUnset) {
VK_CHECK_COND(
(uniforms_size_ + kSizePerUniform) <= kMaxUniformBufferSize,
"Uniform data allocation has exceeded Tensor uniform buffer size");
sizes_uniform_offset_ = uniforms_size_;
uniforms_size_ += kSizePerUniform;
uniforms_.update(utils::make_whcn_ivec4(sizes_), sizes_uniform_offset_);
}
return vkapi::BufferBindInfo(uniforms_.buffer(), sizes_uniform_offset_);
}

const vkapi::BufferBindInfo vTensor::strides_ubo() {
if (!strides_uniform_.buffer()) {
strides_uniform_ = ParamsBuffer(
storage_.context_, utils::make_whcn_ivec4(unsqueezed_strides_));
if (!uniforms_.buffer()) {
uniforms_ = ParamsBuffer(storage_.context_, kMaxUniformBufferSize);
}
if (unsqueezed_strides_offset_ == kUniformOffsetUnset) {
VK_CHECK_COND(
(uniforms_size_ + kSizePerUniform) <= kMaxUniformBufferSize,
"Uniform data allocation has exceeded Tensor uniform buffer size");
unsqueezed_strides_offset_ = uniforms_size_;
uniforms_size_ += kSizePerUniform;
uniforms_.update(
utils::make_whcn_ivec4(unsqueezed_strides_),
unsqueezed_strides_offset_);
}
return vkapi::BufferBindInfo(strides_uniform_.buffer());
return vkapi::BufferBindInfo(uniforms_.buffer(), unsqueezed_strides_offset_);
}

const vkapi::BufferBindInfo vTensor::logical_limits_ubo() {
if (!logical_limits_uniform_.buffer()) {
logical_limits_uniform_ = ParamsBuffer(storage_.context_, logical_limits_);
if (!uniforms_.buffer()) {
uniforms_ = ParamsBuffer(storage_.context_, kMaxUniformBufferSize);
}
return vkapi::BufferBindInfo(logical_limits_uniform_.buffer());
if (logical_limits_uniform_offset_ == kUniformOffsetUnset) {
VK_CHECK_COND(
(uniforms_size_ + kSizePerUniform) <= kMaxUniformBufferSize,
"Uniform data allocation has exceeded Tensor uniform buffer size");
logical_limits_uniform_offset_ = uniforms_size_;
uniforms_size_ += kSizePerUniform;
uniforms_.update(logical_limits_, logical_limits_uniform_offset_);
}
return vkapi::BufferBindInfo(
uniforms_.buffer(), logical_limits_uniform_offset_);
}

const vkapi::BufferBindInfo vTensor::numel_ubo() {
if (!numel_uniform_.buffer()) {
numel_uniform_ = ParamsBuffer(storage_.context_, numel_);
if (!uniforms_.buffer()) {
uniforms_ = ParamsBuffer(storage_.context_, kMaxUniformBufferSize);
}
if (numel_uniform_offset_ == kUniformOffsetUnset) {
VK_CHECK_COND(
(uniforms_size_ + kSizePerUniform) <= kMaxUniformBufferSize,
"Uniform data allocation has exceeded Tensor uniform buffer size");
numel_uniform_offset_ = uniforms_size_;
uniforms_size_ += kSizePerUniform;
uniforms_.update(numel_, numel_uniform_offset_);
}
return vkapi::BufferBindInfo(numel_uniform_.buffer());
return vkapi::BufferBindInfo(uniforms_.buffer(), numel_uniform_offset_);
}

size_t vTensor::staging_buffer_numel() const {
Expand Down Expand Up @@ -690,17 +731,19 @@ void vTensor::update_metadata() {
set_logical_limits(
calculate_image_extents(padded_sizes_, axis_map_, packed_dim_));

if (sizes_uniform_.buffer()) {
sizes_uniform_.update(utils::make_whcn_ivec4(sizes_));
if (sizes_uniform_offset_ != kUniformOffsetUnset) {
uniforms_.update(utils::make_whcn_ivec4(sizes_), sizes_uniform_offset_);
}
if (strides_uniform_.buffer()) {
strides_uniform_.update(utils::make_whcn_ivec4(unsqueezed_strides_));
if (unsqueezed_strides_offset_ != kUniformOffsetUnset) {
uniforms_.update(
utils::make_whcn_ivec4(unsqueezed_strides_),
unsqueezed_strides_offset_);
}
if (numel_uniform_.buffer()) {
numel_uniform_.update(numel_);
if (numel_uniform_offset_ != kUniformOffsetUnset) {
uniforms_.update(numel_, numel_uniform_offset_);
}
if (logical_limits_uniform_.buffer()) {
logical_limits_uniform_.update(logical_limits_);
if (logical_limits_uniform_offset_ != kUniformOffsetUnset) {
uniforms_.update(logical_limits_, logical_limits_uniform_offset_);
}
}

Expand Down
30 changes: 22 additions & 8 deletions backends/vulkan/runtime/api/containers/Tensor.h
Original file line number Diff line number Diff line change
Expand Up @@ -297,18 +297,32 @@ class vTensor final {
TextureLimits logical_limits_;

/*
* Utility GPU buffers that can be passed to shaders in order to convey tensor
* metadata. These buffers will be initialized the first time they are
* accessed via the corresponding *_ubo() function, and their contents will be
* updated whenever virtual_resize() is called.
* Utility GPU buffer that can be passed to shaders in order to convey tensor
* metadata. Uniform buffer will be initialized only the first time a ubo is
* requested. Buffer offsets will be initialized the first time they are
* accessed via the corresponding *_ubo() function. Uniform buffer's contents
* will be updated whenever virtual_resize() is called.
*
* Refer to the comments for the corresponding *_ubo() functions for more
* context about the data contained in each buffer.
*/
ParamsBuffer sizes_uniform_;
ParamsBuffer strides_uniform_;
ParamsBuffer numel_uniform_;
ParamsBuffer logical_limits_uniform_;
ParamsBuffer uniforms_;
uint32_t uniforms_size_;
uint32_t sizes_uniform_offset_;
uint32_t unsqueezed_strides_offset_;
uint32_t numel_uniform_offset_;
uint32_t logical_limits_uniform_offset_;

// Size allocated for each uniform
// each uniform is assumed to be a vec of 4 ints to maintain 16 byte alignemnt
constexpr static size_t kSizePerUniform = sizeof(utils::ivec4);
// Total size of tensor's uniform buffer
constexpr static size_t kMaxUniformBufferSize =
4 * // we have 4 uniforms that are passed on to shaders
kSizePerUniform;

// Initial value of uniform buffer offsets
constexpr static uint32_t kUniformOffsetUnset = kMaxUniformBufferSize;

vTensorStorage storage_;

Expand Down
6 changes: 4 additions & 2 deletions backends/vulkan/runtime/vk_api/Descriptor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,11 @@ namespace vkapi {
BufferBindInfo::BufferBindInfo()
: handle(VK_NULL_HANDLE), offset(0u), range(0u) {}

BufferBindInfo::BufferBindInfo(const VulkanBuffer& buffer_p)
BufferBindInfo::BufferBindInfo(
const VulkanBuffer& buffer_p,
const uint32_t offset_p)
: handle(buffer_p.handle()),
offset(buffer_p.mem_offset()),
offset(buffer_p.mem_offset() + offset_p),
range(buffer_p.mem_range()) {}

//
Expand Down
2 changes: 1 addition & 1 deletion backends/vulkan/runtime/vk_api/Descriptor.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ struct BufferBindInfo final {
VkDeviceSize range;

BufferBindInfo();
BufferBindInfo(const VulkanBuffer& buffer_p);
BufferBindInfo(const VulkanBuffer& buffer_p, const uint32_t offset_p = 0u);
};

struct ParamsBindList final {
Expand Down
4 changes: 2 additions & 2 deletions backends/vulkan/runtime/vk_api/memory/Buffer.h
Original file line number Diff line number Diff line change
Expand Up @@ -195,8 +195,8 @@ class MemoryMap final {

public:
template <typename T>
T* data() {
return reinterpret_cast<T*>(data_);
T* data(const uint32_t offset = 0) {
return reinterpret_cast<T*>(static_cast<uint8_t*>(data_) + offset);
}

inline size_t nbytes() {
Expand Down
Loading