Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions backends/vulkan/runtime/graph/ComputeGraph.h
Original file line number Diff line number Diff line change
Expand Up @@ -612,6 +612,22 @@ class ComputeGraph final {
return {t, staging};
}

/*
* Add an input tensor with the specified properties along with its staging
* buffer.
*/
inline IOValueRef add_input_tensor(
const std::vector<int64_t>& sizes,
const vkapi::ScalarType dtype,
const utils::StorageType storage_type,
const utils::GPUMemoryLayout memory_layout,
const int64_t shared_object_idx = -1) {
ValueRef t = add_tensor(
sizes, dtype, storage_type, memory_layout, shared_object_idx);
ValueRef staging = set_input_tensor(t);
return {t, staging};
}

SharedObject& get_shared_object(const int64_t idx);

//
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,11 @@ bitw8_image_to_nchw_nobitw8buffer:
STORAGE: texture3d
DTYPE: int8
generate_variant_forall:
DTYPE:
- VALUE: int8
- VALUE: uint8
STORAGE:
- VALUE: texture2d
- VALUE: texture3d
DTYPE:
- VALUE: int8
- VALUE: uint8
shader_variants:
- NAME: bitw8_image_to_nchw_nobitw8buffer
30 changes: 16 additions & 14 deletions backends/vulkan/runtime/graph/ops/glsl/image_to_nchw.glsl
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,11 @@ ${define_required_extensions(DTYPE)}

layout(std430) buffer;

${layout_declare_buffer(B, "w", "nchw_out", DTYPE)}
${layout_declare_buffer(B, "w", "buf_out", DTYPE)}
${layout_declare_tensor(B, "r", "t_in", DTYPE, STORAGE)}
${layout_declare_ubo(B, "ivec4", "sizes")}
$if not TO_STAGING:
${layout_declare_ubo(B, "ivec4", "buf_strides")}

#include "indexing_utils.h"

Expand All @@ -31,23 +33,23 @@ ${layout_declare_spec_const(C, "int", "t_layout", "DEFAULT_LAYOUT")}
const lowp ivec4 axis_map = unhash_axis_map(t_layout);
const lowp int packed_dim = unhash_packed_dim(t_layout);

void write_out_texel(VEC4_T texel, ivec4 tensor_idx) {
const ivec4 buf_indices = tidx_to_nchwi(
tensor_idx,
sizes,
packed_dim);
void write_out_texel(VEC4_T texel, ivec4 tidx) {
$if TO_STAGING:
const ivec4 buf_indices = tidx_to_nchwi(tidx, sizes, packed_dim);
$else:
const ivec4 buf_indices = tidx_to_4bufi(tidx, buf_strides, packed_dim);

if (tensor_idx[packed_dim] < sizes[packed_dim]) {
nchw_out[buf_indices.x] = BUF_T(texel.x);
if (tidx[packed_dim] < sizes[packed_dim]) {
buf_out[buf_indices.x] = BUF_T(texel.x);
}
if (tensor_idx[packed_dim] + 1 < sizes[packed_dim]) {
nchw_out[buf_indices.y] = BUF_T(texel.y);
if (tidx[packed_dim] + 1 < sizes[packed_dim]) {
buf_out[buf_indices.y] = BUF_T(texel.y);
}
if (tensor_idx[packed_dim] + 2 < sizes[packed_dim]) {
nchw_out[buf_indices.z] = BUF_T(texel.z);
if (tidx[packed_dim] + 2 < sizes[packed_dim]) {
buf_out[buf_indices.z] = BUF_T(texel.z);
}
if (tensor_idx[packed_dim] + 3 < sizes[packed_dim]) {
nchw_out[buf_indices.w] = BUF_T(texel.w);
if (tidx[packed_dim] + 3 < sizes[packed_dim]) {
buf_out[buf_indices.w] = BUF_T(texel.w);
}
}

Expand Down
10 changes: 6 additions & 4 deletions backends/vulkan/runtime/graph/ops/glsl/image_to_nchw.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,16 @@ image_to_nchw:
parameter_names_with_default_values:
DTYPE: float
STORAGE: texture3d
TO_STAGING: True
generate_variant_forall:
DTYPE:
- VALUE: half
- VALUE: float
- VALUE: int
- VALUE: int8
STORAGE:
- VALUE: texture3d
- VALUE: texture2d
shader_variants:
- NAME: image_to_nchw
- NAME: image_to_nchw_texture3d
- NAME: image_to_nchw_texture2d
STORAGE: texture2d
- NAME: clone_image_to_buffer
TO_STAGING: False
15 changes: 15 additions & 0 deletions backends/vulkan/runtime/graph/ops/glsl/indexing_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,21 @@ ivec4 tidx_to_nchwi(const ivec4 tidx, const ivec4 sizes, const int packed_dim) {
return base_i + ivec4(0, 1, 2, 3) * strides[packed_dim];
}

/*
* Get the buffer indices that contain the data of the texel that corresponds to
* to the provided tensor index. Since the texel have 4 elements, 4 buffer
* indices will be retrieved.
*/
ivec4 tidx_to_4bufi(
const ivec4 tidx,
const ivec4 strides,
const int packed_dim) {
int base_i = tidx.x * strides.x + tidx.y * strides.y + tidx.z * strides.z +
tidx.w * strides.w;

return base_i + ivec4(0, 1, 2, 3) * strides[packed_dim];
}

ivec4 nchwi_to_tidx(const int nchwi, const ivec4 sizes) {
return ivec4(
nchwi % sizes.x,
Expand Down
41 changes: 28 additions & 13 deletions backends/vulkan/runtime/graph/ops/glsl/matmul_naive_buffer.glsl
Original file line number Diff line number Diff line change
Expand Up @@ -16,21 +16,23 @@ ${define_required_extensions(DTYPE)}

layout(std430) buffer;

${layout_declare_tensor(0, "w", "t_out", DTYPE, "buffer")}
${layout_declare_tensor(1, "r", "t_mat1", DTYPE, "buffer")}
${layout_declare_tensor(2, "r", "t_mat2", DTYPE, "buffer")}
${layout_declare_ubo(3, "ivec4", "out_sizes")}
${layout_declare_ubo(4, "ivec4", "out_strides")}
${layout_declare_ubo(5, "ivec4", "mat1_sizes")}
${layout_declare_ubo(6, "ivec4", "mat1_strides")}
${layout_declare_ubo(7, "ivec4", "mat2_sizes")}
${layout_declare_ubo(8, "ivec4", "mat2_strides")}
${layout_declare_ubo(9, "int", "out_numel")}
${layout_declare_tensor(B, "w", "t_out", DTYPE, "buffer")}
${layout_declare_tensor(B, "r", "t_mat1", DTYPE, "buffer")}
${layout_declare_tensor(B, "r", "t_mat2", DTYPE, "buffer")}
${layout_declare_ubo(B, "ivec4", "out_sizes")}
${layout_declare_ubo(B, "ivec4", "out_strides")}
${layout_declare_ubo(B, "ivec4", "mat1_sizes")}
${layout_declare_ubo(B, "ivec4", "mat1_strides")}
${layout_declare_ubo(B, "ivec4", "mat2_sizes")}
${layout_declare_ubo(B, "ivec4", "mat2_strides")}
${layout_declare_ubo(B, "int", "out_numel")}

#include "indexing_utils.h"

layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;

${layout_declare_spec_const(C, "int", "mat2_is_transposed", "0")}

void main() {
const ivec4 out_bufix = ivec4(
gl_GlobalInvocationID.x,
Expand All @@ -44,15 +46,28 @@ void main() {

int mat1_bufi = tidx_to_bufi(
ivec4(0, out_bufix.y, out_bufix.z, out_bufix.w), mat1_strides);
int mat2_bufi = tidx_to_bufi(
ivec4(out_bufix.x, 0, out_bufix.z, out_bufix.w), mat2_strides);
int mat2_bufi;
if (mat2_is_transposed > 0) {
mat2_bufi = tidx_to_bufi(
ivec4(0, out_bufix.x, 0, 0), mat2_strides);
} else {
mat2_bufi = tidx_to_bufi(
ivec4(out_bufix.x, 0, out_bufix.z, out_bufix.w), mat2_strides);
}

int mat2_stride;
if (mat2_is_transposed > 0) {
mat2_stride = mat2_strides.x;
} else {
mat2_stride = mat2_strides.y;
}

T sum = T(0.0);
for (int i = 0; i < mat1_sizes.x; ++i) {
sum += t_mat1[mat1_bufi] * t_mat2[mat2_bufi];

mat1_bufi += mat1_strides.x;
mat2_bufi += mat2_strides.y;
mat2_bufi += mat2_stride;
}

const int out_bufi = tidx_to_bufi(out_bufix, out_strides);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,11 @@ nchw_to_bitw8_image_nobitw8buffer:
STORAGE: texture3d
DTYPE: int8
generate_variant_forall:
DTYPE:
- VALUE: int8
- VALUE: uint8
STORAGE:
- VALUE: texture2d
- VALUE: texture3d
DTYPE:
- VALUE: int8
- VALUE: uint8
shader_variants:
- NAME: nchw_to_bitw8_image_nobitw8buffer
10 changes: 6 additions & 4 deletions backends/vulkan/runtime/graph/ops/glsl/nchw_to_image.glsl
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ layout(std430) buffer;
${layout_declare_tensor(B, "w", "t_out", DTYPE, STORAGE)}
${layout_declare_buffer(B, "r", "buf_in", DTYPE)}
${layout_declare_ubo(B, "ivec4", "sizes")}
$if not FROM_STAGING:
${layout_declare_ubo(B, "ivec4", "buf_strides")}

#include "indexing_utils.h"

Expand All @@ -32,10 +34,10 @@ const lowp ivec4 axis_map = unhash_axis_map(t_layout);
const lowp int packed_dim = unhash_packed_dim(t_layout);

VEC4_T read_texel(ivec4 tidx) {
const ivec4 buf_indices = tidx_to_nchwi(
tidx,
sizes,
packed_dim);
$if FROM_STAGING:
const ivec4 buf_indices = tidx_to_nchwi(tidx, sizes, packed_dim);
$else:
const ivec4 buf_indices = tidx_to_4bufi(tidx, buf_strides, packed_dim);

VEC4_T texel = VEC4_T(0);
if (tidx[packed_dim] < sizes[packed_dim]) {
Expand Down
10 changes: 6 additions & 4 deletions backends/vulkan/runtime/graph/ops/glsl/nchw_to_image.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,16 @@ nchw_to_image:
parameter_names_with_default_values:
STORAGE: texture3d
DTYPE: float
FROM_STAGING: True
generate_variant_forall:
DTYPE:
- VALUE: half
- VALUE: float
- VALUE: int
- VALUE: int8
STORAGE:
- VALUE: texture3d
- VALUE: texture2d
shader_variants:
- NAME: nchw_to_image
- NAME: nchw_to_image_texture3d
- NAME: nchw_to_image_texture2d
STORAGE: texture2d
- NAME: clone_buffer_to_image
FROM_STAGING: False
96 changes: 91 additions & 5 deletions backends/vulkan/runtime/graph/ops/impl/Clone.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,28 @@

#include <executorch/backends/vulkan/runtime/graph/Logging.h>

#include <executorch/backends/vulkan/runtime/graph/ops/impl/View.h>

#include <executorch/backends/vulkan/runtime/graph/ops/impl/utils/KernelUtils.h>
#include <executorch/backends/vulkan/runtime/graph/ops/impl/utils/TensorUtils.h>
#include <executorch/backends/vulkan/runtime/graph/ops/utils/ShaderNameUtils.h>

namespace vkcompute {

void resize_clone_node(
ComputeGraph* graph,
const std::vector<ArgGroup>& args,
const std::vector<ValueRef>& extra_args) {
(void)extra_args;
vTensorPtr out = graph->get_tensor(args[0].refs[0]);
vTensorPtr in = graph->get_tensor(args[1].refs[0]);
// TODO: support for when dimensionality doesn't match, i.e. clone is used to
// implement squeeze.
if (out->dim() == in->dim()) {
out->virtual_resize(in->sizes());
}
}

void add_clone_node(
ComputeGraph& graph,
const ValueRef in,
Expand All @@ -30,14 +46,84 @@ void add_clone_node(
VK_KERNEL_FROM_STR(kernel_name),
graph.create_global_wg_size(out),
graph.create_local_wg_size(out),
{{out, vkapi::MemoryAccessType::WRITE},
{in, vkapi::MemoryAccessType::READ}},
{t_out->logical_limits_ubo()}));
// Inputs and Outputs
{{out, vkapi::kWrite}, {in, vkapi::kRead}},
// Parameter Buffers
{t_out->logical_limits_ubo()},
// Specialization Constants
{},
// Resizing Logic
resize_clone_node));
}

void add_image_to_buffer_node(
ComputeGraph& graph,
const ValueRef image,
const ValueRef buffer) {
std::string kernel_name = "clone_image_to_buffer";
add_dtype_suffix(kernel_name, graph.dtype_of(image));
vkapi::ShaderInfo shader = VK_KERNEL_FROM_STR(kernel_name);

utils::uvec3 global_wg_size = graph.create_global_wg_size(image);
graph.execute_nodes().emplace_back(new DispatchNode(
graph,
shader,
global_wg_size,
graph.create_local_wg_size(global_wg_size),
// Input and Outputs
{{buffer, vkapi::kWrite}, {image, vkapi::kRead}},
// Parameter Buffers
{graph.sizes_ubo(image), graph.strides_ubo(buffer)},
// Specialization Constants
{graph.hashed_layout_of(image)},
// Resizing Logic
resize_clone_node));
}

void add_buffer_to_image_node(
ComputeGraph& graph,
const ValueRef buffer,
const ValueRef image) {
std::string kernel_name = "clone_buffer_to_image";
add_dtype_suffix(kernel_name, graph.dtype_of(image));
vkapi::ShaderInfo shader = VK_KERNEL_FROM_STR(kernel_name);

utils::uvec3 global_wg_size = graph.create_global_wg_size(image);
graph.execute_nodes().emplace_back(new DispatchNode(
graph,
shader,
global_wg_size,
graph.create_local_wg_size(global_wg_size),
// Input and Outputs
{{image, vkapi::kWrite}, {buffer, vkapi::kRead}},
// Parameter Buffers
{graph.sizes_ubo(image), graph.strides_ubo(buffer)},
// Specialization Constants
{graph.hashed_layout_of(image)},
// Resizing Logic
resize_clone_node));
}

void clone(ComputeGraph& graph, const std::vector<ValueRef>& args) {
// The vulkan delegate does not support changing memory format.
return add_clone_node(graph, args[0], args[2]);
const ValueRef src = args[0];
const ValueRef dst = args[2];

const utils::StorageType src_storage = graph.storage_type_of(src);
const utils::StorageType dst_storage = graph.storage_type_of(dst);
if (src_storage == utils::kTexture3D && dst_storage == utils::kTexture3D) {
if (graph.hashed_layout_of(src) == graph.hashed_layout_of(dst)) {
return add_clone_node(graph, src, dst);
} else {
return add_view_node(graph, src, kDummyValueRef, dst);
}
}
if (src_storage == utils::kTexture3D && dst_storage == utils::kBuffer) {
return add_image_to_buffer_node(graph, src, dst);
}
if (src_storage == utils::kBuffer && dst_storage == utils::kTexture3D) {
return add_buffer_to_image_node(graph, src, dst);
}
VK_THROW("Buffer to buffer memory layout transition not supported yet!");
}

// Clone node is not the most efficient implementation for the aten.clone
Expand Down
3 changes: 3 additions & 0 deletions backends/vulkan/runtime/graph/ops/impl/Linear.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -279,9 +279,12 @@ void linear(ComputeGraph& graph, const std::vector<ValueRef>& args) {
ValueRef weight = prepack_standard(
graph, weight_data, graph.storage_type_of(out), utils::kWidthPacked);
ValueRef mat2_is_transposed = graph.add_scalar(true);

if (graph.val_is_none(bias)) {
return add_matmul_node(graph, input, weight, out, mat2_is_transposed);
} else {
// Buffer implementation does not yet support biases
VK_CHECK_COND(!graph.is_buffer_storage(out));
return add_addmm_node(
graph,
bias,
Expand Down
Loading
Loading