Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
204 changes: 140 additions & 64 deletions backends/vulkan/runtime/api/containers/Tensor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,36 +13,15 @@
namespace vkcompute {
namespace api {

/*
* Given the strides of a buffer-backed tensor, find the index of the "fastest
* moving" dimension in WHCN dimension order. If multiple dims have the lowest
* stride, then the "earlier" dim is assumed to be the fastest moving (width is
* "earlier" than height).
*/
int32_t find_fastest_whcn_dim(const std::vector<int64_t>& strides) {
if (strides.size() == 0) {
return 0;
}
int32_t fastest_dim = 0;
int64_t min_stride = strides.at(0);
for (int d = strides.size() - 1; d >= 0; --d) {
if (strides.at(d) < min_stride) {
fastest_dim = d;
min_stride = strides.at(d);
}
}
return (strides.size() - 1 - fastest_dim);
}

/*
* Given the strides of a buffer-backed tensor, estimate the equivalent memory
* layout enum value by identifying the fastest moving dimension.
*/
utils::GPUMemoryLayout estimate_memory_layout(
const std::vector<int64_t>& strides) {
int32_t fastest_dim = find_fastest_whcn_dim(strides);
if (fastest_dim <= 3) {
return utils::GPUMemoryLayout(fastest_dim);
const std::vector<int64_t>& dim_order) {
int64_t fastest_dim_whcn = dim_order.size() - 1 - dim_order.back();
if (fastest_dim_whcn >= 0 && fastest_dim_whcn < 3) {
return utils::GPUMemoryLayout(fastest_dim_whcn);
}

// TODO(ssjia) find a way to gracefully recover from this case by i.e. adding
Expand All @@ -51,41 +30,70 @@ utils::GPUMemoryLayout estimate_memory_layout(
VK_THROW("No compatible GPUMemoryLayout value");
}

std::vector<int64_t> calculate_dim_order(
const size_t ndim,
const utils::GPUMemoryLayout memory_layout) {
// Special case for zero dim tensors
if (ndim == 0) {
return {0};
}
std::vector<int64_t> dim_order(ndim);
int64_t last_dim =
ndim - utils::to_packed_dim_nchw_offset<int64_t>(memory_layout);

int64_t cur_dim = 0;
for (int d = 0; d < ndim; ++d) {
if (d == last_dim) {
cur_dim++;
}
dim_order[d] = cur_dim;
cur_dim++;
}
if (last_dim >= 0) {
dim_order[ndim - 1] = last_dim;
}

return dim_order;
}

std::vector<int64_t> calculate_strides(
const std::vector<int64_t>& sizes,
const utils::GPUMemoryLayout memory_layout) {
const std::vector<int64_t>& dim_order) {
// For zero dim tensors
if (sizes.size() == 0) {
return {1};
}

const int64_t dim_offset =
utils::to_packed_dim_nchw_offset<int64_t>(memory_layout);
int64_t last_dim = sizes.size() - dim_offset;
if (last_dim < 0) {
last_dim = sizes.size() - 1;
}

size_t ndim = sizes.size();
std::vector<int64_t> strides(ndim);

const int64_t last_dim_size = sizes.at(last_dim);

for (int stride_d = ndim - 1; stride_d >= 0; stride_d--) {
strides.at(stride_d) = 1;
if (stride_d == last_dim) {
continue;
}
strides.at(stride_d) = last_dim_size;
for (int size_d = ndim - 1; size_d > stride_d; size_d--) {
if (size_d != last_dim) {
strides.at(stride_d) *= sizes.at(size_d);
}
strides[dim_order[ndim - 1]] = 1;
for (int32_t i = ndim - 2; i >= 0; --i) {
if (sizes[dim_order[i + 1]] == 0) {
strides[dim_order[i]] = strides[dim_order[i + 1]];
} else {
strides[dim_order[i]] =
strides[dim_order[i + 1]] * sizes[dim_order[i + 1]];
}
}

return strides;
}

bool dim_order_is_valid(const std::vector<int64_t>& dim_order) {
int64_t sum = 0;
for (size_t i = 0; i < dim_order.size(); ++i) {
if (dim_order[i] < 0 || dim_order[i] >= dim_order.size()) {
return false;
}
sum += dim_order[i];
}
int64_t n = static_cast<int64_t>(dim_order.size() - 1);
// Sanity check that the sum of the indices in the vector is equal to the sum
// of 0 + 1 + 2 + ... + (ndim - 1)
return sum == n * (n + 1) / 2;
}

std::vector<int64_t> unsqueeze_strides(
const std::vector<int64_t>& strides,
const int64_t numel) {
Expand Down Expand Up @@ -170,7 +178,8 @@ vTensor::vTensor(
memory_layout_(memory_layout),
// Calculate tensor size metadata
sizes_(sizes.begin(), sizes.end()),
strides_(calculate_strides(sizes, memory_layout_)),
dim_order_(calculate_dim_order(sizes_.size(), memory_layout_)),
strides_(calculate_strides(sizes, dim_order_)),
numel_(utils::multiply_integers(sizes_)),
padded_sizes_{calculate_padded_sizes(sizes, memory_layout_)},
unsqueezed_strides_{unsqueeze_strides(strides_, numel_)},
Expand All @@ -189,6 +198,9 @@ vTensor::vTensor(
padded_sizes_,
dtype_,
allocate_memory) {
VK_CHECK_COND(
dim_order_is_valid(dim_order_), "computed dim order is invalid");

if (storage_type != utils::kBuffer) {
texture_limits_.limits = utils::ivec3{
utils::safe_downcast<int32_t>(storage_.image_extents_[0]),
Expand All @@ -204,16 +216,39 @@ vTensor::vTensor(
}
}

vTensor::vTensor(const vTensor& other)
: dtype_(other.dtype_),
memory_layout_(other.memory_layout_),
// Copy tensor size metadata
sizes_(other.sizes_.begin(), other.sizes_.end()),
dim_order_(other.dim_order_.begin(), other.dim_order_.end()),
strides_(other.strides_.begin(), other.strides_.end()),
numel_(other.numel_),
padded_sizes_{other.padded_sizes_.begin(), other.padded_sizes_.end()},
unsqueezed_strides_{
other.unsqueezed_strides_.begin(),
other.unsqueezed_strides_.end()},
padded_numel_(other.padded_numel_),
texture_limits_{other.texture_limits_},
// Empty initialize Utility Uniform Buffers
sizes_uniform_(),
strides_uniform_(),
numel_uniform_(),
texture_limits_uniform_(),
// Copy Tensor storage
storage_(other.storage_) {}

vTensor::vTensor(
const vTensor& other,
const std::vector<int64_t>& sizes,
const std::vector<int64_t>& strides,
const size_t offset_numel)
const std::vector<int64_t>& dim_order,
const int64_t offset_numel)
: dtype_(other.dtype_),
memory_layout_(estimate_memory_layout(strides)),
memory_layout_(estimate_memory_layout(dim_order)),
// Copy tensor size metadata
sizes_(sizes.begin(), sizes.end()),
strides_(strides.begin(), strides.end()),
dim_order_(dim_order.begin(), dim_order.end()),
strides_(calculate_strides(sizes_, dim_order_)),
numel_(utils::multiply_integers(sizes_)),
padded_sizes_{calculate_padded_sizes(sizes, memory_layout_)},
unsqueezed_strides_{unsqueeze_strides(strides_, numel_)},
Expand All @@ -226,6 +261,8 @@ vTensor::vTensor(
texture_limits_uniform_(),
// Copy Tensor storage
storage_(other.storage_, vkapi::element_size(dtype_) * offset_numel) {
VK_CHECK_COND(
dim_order_is_valid(dim_order_), "new dim order provided is invalid");
VK_CHECK_COND(
offset_numel + numel_ <= other.numel(),
"Tensor alias cannot access more elements than available in the original"
Expand Down Expand Up @@ -339,9 +376,17 @@ void vTensor::bind_allocation(const vkapi::Allocation& allocation) {
}
}

void vTensor::update_size_metadata(const std::vector<int64_t>& new_sizes) {
void vTensor::update_metadata(
const std::vector<int64_t>& new_sizes,
const std::vector<int64_t>& new_dim_order) {
sizes_ = new_sizes;
strides_ = calculate_strides(new_sizes, memory_layout_);
dim_order_ = new_dim_order;
strides_ = calculate_strides(sizes_, dim_order_);
// Only update the memory layout for buffer-backed tensors. Strides are
// meaningless for texture-backed tensors and do not impact the memory layout.
if (storage_type() == utils::kBuffer) {
memory_layout_ = estimate_memory_layout(dim_order_);
}
numel_ = utils::multiply_integers(sizes_);

padded_sizes_ = calculate_padded_sizes(sizes_, memory_layout_);
Expand Down Expand Up @@ -373,15 +418,7 @@ void vTensor::update_size_metadata(const std::vector<int64_t>& new_sizes) {
}
}

void vTensor::reallocate(const std::vector<int64_t>& new_sizes) {
update_size_metadata(new_sizes);
storage_.discard_and_reallocate(
calculate_padded_sizes(new_sizes, memory_layout_),
memory_layout_,
dtype_);
}

void vTensor::virtual_resize(const std::vector<int64_t>& new_sizes) {
void vTensor::check_sizes(const std::vector<int64_t>& sizes) const {
if (storage_type() != utils::kBuffer) {
// For texture storage check that the current texture is large enough for
// the new sizes of the tensor.
Expand All @@ -394,10 +431,47 @@ void vTensor::virtual_resize(const std::vector<int64_t>& new_sizes) {

VK_CHECK_COND(
valid_resize,
"Cannot use virtual resize if new sizes requires a larger texture.");
"tensor sizes requires a larger texture than the current one.");
} else {
// For buffer storage check that the current buffer is large enough for the
// new sizes of the tensor.
int64_t numel = utils::multiply_integers(sizes);
bool valid_resize =
numel + storage_.buffer_offset_ <= storage_.buffer_length_;
VK_CHECK_COND(
valid_resize,
"tensor sizes requires a larger buffer than the current one.");
}
}

void vTensor::virtual_reconfigure(
const std::vector<int64_t>& new_sizes,
const std::vector<int64_t>& new_dim_order) {
VK_CHECK_COND(
storage_type() == utils::kBuffer,
"virtual_reconfigure is only applicable for buffer backed tensors");
VK_CHECK_COND(new_sizes.size() == new_dim_order.size());
VK_CHECK_COND(dim_order_is_valid(new_dim_order));

update_size_metadata(new_sizes);
check_sizes(new_sizes);
update_metadata(new_sizes, new_dim_order);
}

void vTensor::virtual_resize(const std::vector<int64_t>& new_sizes) {
VK_CHECK_COND(
new_sizes.size() == dim_order_.size(),
"new sizes cannot modify the dimensionality of the tensor ");

check_sizes(new_sizes);
update_metadata(new_sizes, dim_order_);
}

void vTensor::reallocate(const std::vector<int64_t>& new_sizes) {
update_metadata(new_sizes, dim_order_);
storage_.discard_and_reallocate(
calculate_padded_sizes(new_sizes, memory_layout_),
memory_layout_,
dtype_);
}

//
Expand Down Expand Up @@ -480,6 +554,7 @@ vTensorStorage::vTensorStorage(
storage_type_{storage_type},
image_extents_(calculate_image_extents(padded_sizes, gpu_memory_layout)),
buffer_length_{utils::multiply_integers(padded_sizes)},
buffer_offset_{0},
image_(allocate_image(
context_,
image_extents_,
Expand All @@ -496,11 +571,12 @@ vTensorStorage::vTensorStorage(

vTensorStorage::vTensorStorage(
const vTensorStorage& other,
const size_t buffer_offset)
const int64_t buffer_offset)
: context_(other.context_),
storage_type_{other.storage_type_},
image_extents_(other.image_extents_),
buffer_length_{other.buffer_length_},
buffer_offset_{buffer_offset},
image_(),
buffer_(other.buffer_, buffer_offset),
last_access_{other.last_access_} {
Expand Down
Loading