Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
109 changes: 43 additions & 66 deletions backends/vulkan/runtime/api/containers/Tensor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,33 +13,15 @@
namespace vkcompute {
namespace api {

/*
* Given the strides of a buffer-backed tensor, estimate the equivalent memory
* layout enum value by identifying the fastest moving dimension.
*/
utils::GPUMemoryLayout estimate_memory_layout(
const std::vector<int64_t>& dim_order) {
int64_t fastest_dim_whcn = dim_order.size() - 1 - dim_order.back();
if (fastest_dim_whcn >= 0 && fastest_dim_whcn < 3) {
return utils::GPUMemoryLayout(fastest_dim_whcn);
}

// TODO(ssjia) find a way to gracefully recover from this case by i.e. adding
// a UNKOWN GPUMemoryLayout. This is not high priority though because we don't
// expect this to ever come up in practice.
VK_THROW("No compatible GPUMemoryLayout value");
}

std::vector<int64_t> calculate_dim_order(
const size_t ndim,
const utils::GPUMemoryLayout memory_layout) {
const int32_t packed_dim_whcn_idx) {
// Special case for zero dim tensors
if (ndim == 0) {
return {0};
}
std::vector<int64_t> dim_order(ndim);
int64_t last_dim =
ndim - utils::to_packed_dim_nchw_offset<int64_t>(memory_layout);
int64_t last_dim = ndim - 1 - packed_dim_whcn_idx;

int64_t cur_dim = 0;
for (int d = 0; d < ndim; ++d) {
Expand Down Expand Up @@ -149,7 +131,7 @@ std::vector<int64_t> unsqueeze_strides(

std::vector<int64_t> calculate_padded_sizes(
const std::vector<int64_t>& sizes,
const utils::GPUMemoryLayout memory_layout) {
const int32_t packed_dim_whcn_idx) {
int64_t ndim = sizes.size();
if (ndim == 0) {
ndim = 1;
Expand All @@ -163,8 +145,7 @@ std::vector<int64_t> calculate_padded_sizes(
}

// Pad the packed dim to the next multiple of 4.
const int64_t dim_offset =
utils::to_packed_dim_nchw_offset<int64_t>(memory_layout);
const int64_t dim_offset = packed_dim_whcn_idx + 1;
const int64_t padded_dim_size = utils::val_at(-dim_offset, sizes);
padded_sizes.at(ndim_up4 - dim_offset) = utils::align_up_4(padded_dim_size);

Expand All @@ -174,7 +155,7 @@ std::vector<int64_t> calculate_padded_sizes(
utils::uvec3 calculate_image_extents(
const std::vector<int64_t>& padded_sizes,
const std::vector<int64_t>& axis_map,
const utils::GPUMemoryLayout memory_layout) {
const int32_t packed_dim_whcn_idx) {
VK_CHECK_COND(padded_sizes.size() == 4);
VK_CHECK_COND(axis_map.size() == 4);

Expand All @@ -195,21 +176,8 @@ utils::uvec3 calculate_image_extents(
// Multiply the extents of the batch axis by the batch size.
extents[batch_axis] *= padded_sizes.at(0);

switch (memory_layout) {
case utils::kWidthPacked:
VK_CHECK_COND(extents[axis_map.at(0)] % 4 == 0);
extents[axis_map.at(0)] /= 4;
break;
case utils::kHeightPacked:
VK_CHECK_COND(extents[axis_map.at(1)] % 4 == 0);
extents[axis_map.at(1)] /= 4;
break;
case utils::kChannelsPacked:
VK_CHECK_COND(extents[axis_map.at(2)] % 4 == 0);
extents[axis_map.at(2)] /= 4;
break;
}

VK_CHECK_COND(extents[axis_map.at(packed_dim_whcn_idx)] % 4 == 0);
extents[axis_map.at(packed_dim_whcn_idx)] /= 4;
return extents;
}

Expand Down Expand Up @@ -285,15 +253,15 @@ vkapi::VulkanBuffer allocate_buffer(
vTensorStorage::vTensorStorage(
Context* const context,
const utils::StorageType storage_type,
const utils::GPUMemoryLayout gpu_memory_layout,
const std::vector<int64_t>& axis_map,
const int32_t packed_dim_whcn_idx,
const std::vector<int64_t>& padded_sizes,
const vkapi::ScalarType dtype,
const bool allocate_memory)
: context_(context),
storage_type_{storage_type},
image_extents_(
calculate_image_extents(padded_sizes, axis_map, gpu_memory_layout)),
calculate_image_extents(padded_sizes, axis_map, packed_dim_whcn_idx)),
buffer_length_{utils::multiply_integers(padded_sizes)},
buffer_offset_{0},
image_(allocate_image(
Expand Down Expand Up @@ -408,14 +376,15 @@ vTensor::vTensor(
const utils::GPUMemoryLayout memory_layout,
const bool allocate_memory)
: dtype_(dtype),
memory_layout_(memory_layout),
// Calculate tensor metadata
sizes_(sizes.begin(), sizes.end()),
dim_order_(calculate_dim_order(sizes_.size(), memory_layout_)),
packed_dim_whcn_idx_(
utils::to_packed_dim_whcn_idx<int32_t>(memory_layout)),
dim_order_(calculate_dim_order(sizes_.size(), packed_dim_whcn_idx_)),
axis_map_(default_axis_map()),
strides_(calculate_strides(sizes, dim_order_)),
numel_(utils::multiply_integers(sizes_)),
padded_sizes_{calculate_padded_sizes(sizes, memory_layout_)},
padded_sizes_{calculate_padded_sizes(sizes, packed_dim_whcn_idx_)},
unsqueezed_strides_{unsqueeze_strides(strides_, numel_)},
padded_numel_(utils::multiply_integers(padded_sizes_)),
logical_limits_{{0, 0, 0}},
Expand All @@ -429,8 +398,8 @@ vTensor::vTensor(
storage_(
context,
storage_type,
memory_layout_,
axis_map_,
packed_dim_whcn_idx_,
padded_sizes_,
dtype_,
allocate_memory) {
Expand All @@ -451,9 +420,9 @@ vTensor::vTensor(

vTensor::vTensor(const vTensor& other)
: dtype_(other.dtype_),
memory_layout_(other.memory_layout_),
// Copy tensor size metadata
sizes_(other.sizes_.begin(), other.sizes_.end()),
packed_dim_whcn_idx_{other.packed_dim_whcn_idx_},
dim_order_(other.dim_order_.begin(), other.dim_order_.end()),
axis_map_(other.axis_map_.begin(), other.axis_map_.end()),
strides_(other.strides_.begin(), other.strides_.end()),
Expand All @@ -479,14 +448,14 @@ vTensor::vTensor(
const std::vector<int64_t>& dim_order,
const int64_t offset_numel)
: dtype_(other.dtype_),
memory_layout_(estimate_memory_layout(dim_order)),
// Copy tensor size metadata
sizes_(sizes.begin(), sizes.end()),
packed_dim_whcn_idx_(other.packed_dim_whcn_idx_),
dim_order_(dim_order.begin(), dim_order.end()),
axis_map_(default_axis_map()),
strides_(calculate_strides(sizes_, dim_order_)),
numel_(utils::multiply_integers(sizes_)),
padded_sizes_{calculate_padded_sizes(sizes, memory_layout_)},
padded_sizes_{calculate_padded_sizes(sizes, packed_dim_whcn_idx_)},
unsqueezed_strides_{unsqueeze_strides(strides_, numel_)},
padded_numel_(utils::multiply_integers(padded_sizes_)),
logical_limits_(other.logical_limits_),
Expand Down Expand Up @@ -542,6 +511,19 @@ void vTensor::set_logical_limits(const utils::uvec3& image_extents) {
logical_limits_.limits[2] = image_extents[axis_map_.at(2)];
}

utils::GPUMemoryLayout vTensor::estimate_memory_layout() const {
switch (packed_dim_whcn_idx_) {
case WHCN::kWidthDim:
return utils::kWidthPacked;
case WHCN::kHeightDim:
return utils::kHeightPacked;
case WHCN::kChannelsDim:
return utils::kChannelsPacked;
default:
VK_THROW("Invalid packed dim");
}
}

const vkapi::BufferBindInfo vTensor::sizes_ubo() {
if (!sizes_uniform_.buffer()) {
sizes_uniform_ =
Expand Down Expand Up @@ -618,21 +600,16 @@ void vTensor::bind_allocation(const vkapi::Allocation& allocation) {

void vTensor::update_metadata() {
strides_ = calculate_strides(sizes_, dim_order_);
// Only update the memory layout for buffer-backed tensors. Strides are
// meaningless for texture-backed tensors and do not impact the memory layout.
if (storage_type() == utils::kBuffer) {
memory_layout_ = estimate_memory_layout(dim_order_);
}
numel_ = utils::multiply_integers(sizes_);

padded_sizes_ = calculate_padded_sizes(sizes_, memory_layout_);
padded_sizes_ = calculate_padded_sizes(sizes_, packed_dim_whcn_idx_);
unsqueezed_strides_ = unsqueeze_strides(strides_, numel_);
padded_numel_ = utils::multiply_integers(padded_sizes_);

// Calculate the image extents that would have been used to allocate a texture
// withthe current sizes, and use that to set the logical limits.
set_logical_limits(
calculate_image_extents(padded_sizes_, axis_map_, memory_layout_));
calculate_image_extents(padded_sizes_, axis_map_, packed_dim_whcn_idx_));

if (sizes_uniform_.buffer()) {
sizes_uniform_.update(utils::make_whcn_ivec4(sizes_));
Expand All @@ -656,7 +633,7 @@ void vTensor::check_sizes(const std::vector<int64_t>& sizes) const {
// For texture storage check that the current texture is large enough for
// the new sizes of the tensor.
utils::uvec3 virtual_extents =
calculate_image_extents(padded_sizes_, axis_map_, memory_layout_);
calculate_image_extents(padded_sizes_, axis_map_, packed_dim_whcn_idx_);

bool valid_resize = virtual_extents[0] <= storage_.image_extents_[0];
valid_resize =
Expand Down Expand Up @@ -725,23 +702,23 @@ void transpose_dim_order_inplace(

void vTensor::virtual_transpose(const int64_t dim0, const int64_t dim1) {
std::iter_swap(sizes_.begin() + dim0, sizes_.begin() + dim1);

const int dim0_whcn = sizes_.size() - 1 - dim0;
const int dim1_whcn = sizes_.size() - 1 - dim1;
if (packed_dim_whcn_idx_ == dim0_whcn) {
packed_dim_whcn_idx_ = dim1_whcn;
}
if (packed_dim_whcn_idx_ == dim1_whcn) {
packed_dim_whcn_idx_ = dim0_whcn;
}

if (storage_type() == utils::kBuffer) {
transpose_dim_order_inplace(dim_order_, dim0, dim1);
} else {
const int dim0_whcn = sizes_.size() - 1 - dim0;
const int dim1_whcn = sizes_.size() - 1 - dim1;
// Cannot transpose batch dimension for texture storage
VK_CHECK_COND(dim0_whcn < 3 && dim1_whcn < 3);

std::iter_swap(
axis_map_.begin() + dim0_whcn, axis_map_.begin() + dim1_whcn);

if (packed_dim_whcn_idx() == dim0_whcn) {
memory_layout_ = utils::GPUMemoryLayout(dim1_whcn);
}
if (packed_dim_whcn_idx() == dim1_whcn) {
memory_layout_ = utils::GPUMemoryLayout(dim0_whcn);
}
}
update_metadata();
}
Expand Down
36 changes: 23 additions & 13 deletions backends/vulkan/runtime/api/containers/Tensor.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ namespace api {
*/
std::vector<int64_t> calculate_dim_order(
const size_t ndim,
const utils::GPUMemoryLayout memory_layout);
const int32_t packed_dim_whcn_idx);

/*
* Given the sizes of a tensor and the dim order of the tensor (both in NCHW)
Expand Down Expand Up @@ -57,15 +57,15 @@ std::vector<int64_t> unsqueeze_strides(
*/
std::vector<int64_t> calculate_padded_sizes(
const std::vector<int64_t>& sizes,
const utils::GPUMemoryLayout memory_layout);
const int32_t packed_dim_whcn_idx);

/*
* Calculate the image extents required of a texture backed tensor.
*/
utils::uvec3 calculate_image_extents(
const std::vector<int64_t>& padded_sizes,
const std::vector<int64_t>& axis_map,
const utils::GPUMemoryLayout memory_layout);
const int32_t packed_dim_whcn_idx);

struct LastAccess {
vkapi::PipelineStageFlags stage;
Expand All @@ -89,8 +89,8 @@ class vTensorStorage final {
vTensorStorage(
Context* context,
const utils::StorageType storage_type,
const utils::GPUMemoryLayout gpu_memory_layout,
const std::vector<int64_t>& axis_map,
const int32_t packed_dim_whcn_idx,
const std::vector<int64_t>& padded_sizes,
const vkapi::ScalarType dtype,
const bool allocate_memory = true);
Expand Down Expand Up @@ -221,13 +221,14 @@ class vTensor final {

// Whether the tensor has elements of type float, int, etc.
vkapi::ScalarType dtype_;
// Describes which dimension is "tightly packed". For texture backed tensors,
// this describes which dimension is packed along a texel. For buffer backed
// tensors, this describes which dimension has a stride of 1 (i.e. is last in
// the dim order).
utils::GPUMemoryLayout memory_layout_;
// sizes of the tensor in NCHW dimension order
std::vector<int64_t> sizes_;
// Describes which dimension is "tightly packed" using WHCN index (i.e. 0 for
// width, 1 for height, etc.). For texture backed tensors, this describes
// which dimension is packed along a texel. For buffer backed tensors, this
// describes which dimension has a stride of 1 (i.e. is last in the dim
// order).
int32_t packed_dim_whcn_idx_;

/*
* "Layout" metadata. These describe with further detail how tensor data is
Expand Down Expand Up @@ -371,12 +372,18 @@ class vTensor final {
return dtype_;
}

inline utils::GPUMemoryLayout gpu_memory_layout() const {
return memory_layout_;
}
/*
* Provide a "best guess" of a memory layout that can be used to construct a
* tensor with similar layout metadata (i.e. strides, axis_map, etc.) as this
* tensor. In some scenarios, the exact layout of the tensor may not be able
* to be replicated due to calling `virtual_*()` functions after construction;
* however, this function will provide a memory layout that will produce the
* same `packed_dim_whcn_idx` as this tensor.
*/
utils::GPUMemoryLayout estimate_memory_layout() const;

inline int32_t packed_dim_whcn_idx() const {
return static_cast<int32_t>(memory_layout_);
return packed_dim_whcn_idx_;
}

inline const std::vector<int64_t>& sizes() const {
Expand Down Expand Up @@ -496,6 +503,9 @@ class vTensor final {
*
* This function can only be used for buffer-backed tensors, since texture
* backed buffers cannot change dimensionality or memory layout.
*
* TODO(ssjia): delete this API. prefer functions such as virtual_transpose
* instead.
*/
void virtual_reconfigure(
const std::vector<int64_t>& new_sizes,
Expand Down
5 changes: 3 additions & 2 deletions backends/vulkan/runtime/graph/ComputeGraph.h
Original file line number Diff line number Diff line change
Expand Up @@ -307,8 +307,9 @@ class ComputeGraph final {
.is_view_of(values_.at(base).toConstTensor());
}

inline utils::GPUMemoryLayout memory_layout_of(const ValueRef idx) const {
return values_.at(idx).toConstTensor().gpu_memory_layout();
inline utils::GPUMemoryLayout estimate_memory_layout_of(
const ValueRef idx) const {
return values_.at(idx).toConstTensor().estimate_memory_layout();
}

inline int32_t packed_dim_whcn_idx_of(const ValueRef idx) const {
Expand Down
4 changes: 2 additions & 2 deletions backends/vulkan/runtime/graph/ops/impl/BinaryOp.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ void check_binary_op_args(
const api::vTensor& self,
const api::vTensor& other,
const api::vTensor& out) {
VK_CHECK_COND(check_same_memory_layout(self, other, out));
VK_CHECK_COND(check_same_packed_dim(self, other, out));
std::vector<int64_t> broadcasted_sizes =
calculate_broadcasted_output_size(self, other);
VK_CHECK_COND(out.sizes() == broadcasted_sizes);
Expand Down Expand Up @@ -53,7 +53,7 @@ void add_binary_op_node(
const std::string& op_name) {
ValueRef arg1 = prepack_if_tensor_ref(graph, in1);
ValueRef arg2 =
prepack_if_tensor_ref(graph, in2, graph.memory_layout_of(arg1));
prepack_if_tensor_ref(graph, in2, graph.estimate_memory_layout_of(arg1));

vTensorPtr t_in1 = graph.get_tensor(arg1);
vTensorPtr t_in2 = graph.get_tensor(arg2);
Expand Down
2 changes: 1 addition & 1 deletion backends/vulkan/runtime/graph/ops/impl/Cat.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ void add_cat_default_node(

for (ValueRef input_ref : *input_list) {
vTensorPtr t_in = graph.get_tensor(input_ref);
VK_CHECK_COND(check_memory_layout_is(*t_in, utils::kChannelsPacked));
VK_CHECK_COND(check_packed_dim_is(*t_in, WHCN::kChannelsDim));
}

int64_t dim = graph.extract_scalar<int64_t>(dim_ref);
Expand Down
4 changes: 2 additions & 2 deletions backends/vulkan/runtime/graph/ops/impl/Convolution.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -222,8 +222,8 @@ ValueRef prepack_weights(
}

void check_conv_args(const api::vTensor& in, const api::vTensor& out) {
VK_CHECK_COND(check_memory_layout_is(in, utils::kChannelsPacked));
VK_CHECK_COND(check_memory_layout_is(out, utils::kChannelsPacked));
VK_CHECK_COND(check_packed_dim_is(in, WHCN::kChannelsDim));
VK_CHECK_COND(check_packed_dim_is(out, WHCN::kChannelsDim));
}

struct Conv2dParams final {
Expand Down
Loading