Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions backends/vulkan/runtime/graph/ComputeGraph.h
Original file line number Diff line number Diff line change
Expand Up @@ -612,6 +612,22 @@ class ComputeGraph final {
return {t, staging};
}

/*
* Add an input tensor with the specified properties along with its staging
* buffer.
*/
inline IOValueRef add_input_tensor(
const std::vector<int64_t>& sizes,
const vkapi::ScalarType dtype,
const utils::StorageType storage_type,
const utils::GPUMemoryLayout memory_layout,
const int64_t shared_object_idx = -1) {
ValueRef t = add_tensor(
sizes, dtype, storage_type, memory_layout, shared_object_idx);
ValueRef staging = set_input_tensor(t);
return {t, staging};
}

SharedObject& get_shared_object(const int64_t idx);

//
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,11 @@ bitw8_image_to_nchw_nobitw8buffer:
STORAGE: texture3d
DTYPE: int8
generate_variant_forall:
DTYPE:
- VALUE: int8
- VALUE: uint8
STORAGE:
- VALUE: texture2d
- VALUE: texture3d
DTYPE:
- VALUE: int8
- VALUE: uint8
shader_variants:
- NAME: bitw8_image_to_nchw_nobitw8buffer
30 changes: 16 additions & 14 deletions backends/vulkan/runtime/graph/ops/glsl/image_to_nchw.glsl
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,11 @@ ${define_required_extensions(DTYPE)}

layout(std430) buffer;

${layout_declare_buffer(B, "w", "nchw_out", DTYPE)}
${layout_declare_buffer(B, "w", "buf_out", DTYPE)}
${layout_declare_tensor(B, "r", "t_in", DTYPE, STORAGE)}
${layout_declare_ubo(B, "ivec4", "sizes")}
$if not TO_STAGING:
${layout_declare_ubo(B, "ivec4", "buf_strides")}

#include "indexing_utils.h"

Expand All @@ -31,23 +33,23 @@ ${layout_declare_spec_const(C, "int", "t_layout", "DEFAULT_LAYOUT")}
const lowp ivec4 axis_map = unhash_axis_map(t_layout);
const lowp int packed_dim = unhash_packed_dim(t_layout);

void write_out_texel(VEC4_T texel, ivec4 tensor_idx) {
const ivec4 buf_indices = tidx_to_nchwi(
tensor_idx,
sizes,
packed_dim);
void write_out_texel(VEC4_T texel, ivec4 tidx) {
$if TO_STAGING:
const ivec4 buf_indices = tidx_to_nchwi(tidx, sizes, packed_dim);
$else:
const ivec4 buf_indices = tidx_to_4bufi(tidx, buf_strides, packed_dim);

if (tensor_idx[packed_dim] < sizes[packed_dim]) {
nchw_out[buf_indices.x] = BUF_T(texel.x);
if (tidx[packed_dim] < sizes[packed_dim]) {
buf_out[buf_indices.x] = BUF_T(texel.x);
}
if (tensor_idx[packed_dim] + 1 < sizes[packed_dim]) {
nchw_out[buf_indices.y] = BUF_T(texel.y);
if (tidx[packed_dim] + 1 < sizes[packed_dim]) {
buf_out[buf_indices.y] = BUF_T(texel.y);
}
if (tensor_idx[packed_dim] + 2 < sizes[packed_dim]) {
nchw_out[buf_indices.z] = BUF_T(texel.z);
if (tidx[packed_dim] + 2 < sizes[packed_dim]) {
buf_out[buf_indices.z] = BUF_T(texel.z);
}
if (tensor_idx[packed_dim] + 3 < sizes[packed_dim]) {
nchw_out[buf_indices.w] = BUF_T(texel.w);
if (tidx[packed_dim] + 3 < sizes[packed_dim]) {
buf_out[buf_indices.w] = BUF_T(texel.w);
}
}

Expand Down
10 changes: 6 additions & 4 deletions backends/vulkan/runtime/graph/ops/glsl/image_to_nchw.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,16 @@ image_to_nchw:
parameter_names_with_default_values:
DTYPE: float
STORAGE: texture3d
TO_STAGING: True
generate_variant_forall:
DTYPE:
- VALUE: half
- VALUE: float
- VALUE: int
- VALUE: int8
STORAGE:
- VALUE: texture3d
- VALUE: texture2d
shader_variants:
- NAME: image_to_nchw
- NAME: image_to_nchw_texture3d
- NAME: image_to_nchw_texture2d
STORAGE: texture2d
- NAME: clone_image_to_buffer
TO_STAGING: False
15 changes: 15 additions & 0 deletions backends/vulkan/runtime/graph/ops/glsl/indexing_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,21 @@ ivec4 tidx_to_nchwi(const ivec4 tidx, const ivec4 sizes, const int packed_dim) {
return base_i + ivec4(0, 1, 2, 3) * strides[packed_dim];
}

/*
* Get the buffer indices that contain the data of the texel that corresponds to
* to the provided tensor index. Since the texel have 4 elements, 4 buffer
* indices will be retrieved.
*/
ivec4 tidx_to_4bufi(
const ivec4 tidx,
const ivec4 strides,
const int packed_dim) {
int base_i = tidx.x * strides.x + tidx.y * strides.y + tidx.z * strides.z +
tidx.w * strides.w;

return base_i + ivec4(0, 1, 2, 3) * strides[packed_dim];
}

ivec4 nchwi_to_tidx(const int nchwi, const ivec4 sizes) {
return ivec4(
nchwi % sizes.x,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,11 @@ nchw_to_bitw8_image_nobitw8buffer:
STORAGE: texture3d
DTYPE: int8
generate_variant_forall:
DTYPE:
- VALUE: int8
- VALUE: uint8
STORAGE:
- VALUE: texture2d
- VALUE: texture3d
DTYPE:
- VALUE: int8
- VALUE: uint8
shader_variants:
- NAME: nchw_to_bitw8_image_nobitw8buffer
10 changes: 6 additions & 4 deletions backends/vulkan/runtime/graph/ops/glsl/nchw_to_image.glsl
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ layout(std430) buffer;
${layout_declare_tensor(B, "w", "t_out", DTYPE, STORAGE)}
${layout_declare_buffer(B, "r", "buf_in", DTYPE)}
${layout_declare_ubo(B, "ivec4", "sizes")}
$if not FROM_STAGING:
${layout_declare_ubo(B, "ivec4", "buf_strides")}

#include "indexing_utils.h"

Expand All @@ -32,10 +34,10 @@ const lowp ivec4 axis_map = unhash_axis_map(t_layout);
const lowp int packed_dim = unhash_packed_dim(t_layout);

VEC4_T read_texel(ivec4 tidx) {
const ivec4 buf_indices = tidx_to_nchwi(
tidx,
sizes,
packed_dim);
$if FROM_STAGING:
const ivec4 buf_indices = tidx_to_nchwi(tidx, sizes, packed_dim);
$else:
const ivec4 buf_indices = tidx_to_4bufi(tidx, buf_strides, packed_dim);

VEC4_T texel = VEC4_T(0);
if (tidx[packed_dim] < sizes[packed_dim]) {
Expand Down
10 changes: 6 additions & 4 deletions backends/vulkan/runtime/graph/ops/glsl/nchw_to_image.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,16 @@ nchw_to_image:
parameter_names_with_default_values:
STORAGE: texture3d
DTYPE: float
FROM_STAGING: True
generate_variant_forall:
DTYPE:
- VALUE: half
- VALUE: float
- VALUE: int
- VALUE: int8
STORAGE:
- VALUE: texture3d
- VALUE: texture2d
shader_variants:
- NAME: nchw_to_image
- NAME: nchw_to_image_texture3d
- NAME: nchw_to_image_texture2d
STORAGE: texture2d
- NAME: clone_buffer_to_image
FROM_STAGING: False
96 changes: 91 additions & 5 deletions backends/vulkan/runtime/graph/ops/impl/Clone.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,28 @@

#include <executorch/backends/vulkan/runtime/graph/Logging.h>

#include <executorch/backends/vulkan/runtime/graph/ops/impl/View.h>

#include <executorch/backends/vulkan/runtime/graph/ops/impl/utils/KernelUtils.h>
#include <executorch/backends/vulkan/runtime/graph/ops/impl/utils/TensorUtils.h>
#include <executorch/backends/vulkan/runtime/graph/ops/utils/ShaderNameUtils.h>

namespace vkcompute {

void resize_clone_node(
ComputeGraph* graph,
const std::vector<ArgGroup>& args,
const std::vector<ValueRef>& extra_args) {
(void)extra_args;
vTensorPtr out = graph->get_tensor(args[0].refs[0]);
vTensorPtr in = graph->get_tensor(args[1].refs[0]);
// TODO: support for when dimensionality doesn't match, i.e. clone is used to
// implement squeeze.
if (out->dim() == in->dim()) {
out->virtual_resize(in->sizes());
}
}

void add_clone_node(
ComputeGraph& graph,
const ValueRef in,
Expand All @@ -30,14 +46,84 @@ void add_clone_node(
VK_KERNEL_FROM_STR(kernel_name),
graph.create_global_wg_size(out),
graph.create_local_wg_size(out),
{{out, vkapi::MemoryAccessType::WRITE},
{in, vkapi::MemoryAccessType::READ}},
{t_out->logical_limits_ubo()}));
// Inputs and Outputs
{{out, vkapi::kWrite}, {in, vkapi::kRead}},
// Parameter Buffers
{t_out->logical_limits_ubo()},
// Specialization Constants
{},
// Resizing Logic
resize_clone_node));
}

void add_image_to_buffer_node(
ComputeGraph& graph,
const ValueRef image,
const ValueRef buffer) {
std::string kernel_name = "clone_image_to_buffer";
add_dtype_suffix(kernel_name, graph.dtype_of(image));
vkapi::ShaderInfo shader = VK_KERNEL_FROM_STR(kernel_name);

utils::uvec3 global_wg_size = graph.create_global_wg_size(image);
graph.execute_nodes().emplace_back(new DispatchNode(
graph,
shader,
global_wg_size,
graph.create_local_wg_size(global_wg_size),
// Input and Outputs
{{buffer, vkapi::kWrite}, {image, vkapi::kRead}},
// Parameter Buffers
{graph.sizes_ubo(image), graph.strides_ubo(buffer)},
// Specialization Constants
{graph.hashed_layout_of(image)},
// Resizing Logic
resize_clone_node));
}

void add_buffer_to_image_node(
ComputeGraph& graph,
const ValueRef buffer,
const ValueRef image) {
std::string kernel_name = "clone_buffer_to_image";
add_dtype_suffix(kernel_name, graph.dtype_of(image));
vkapi::ShaderInfo shader = VK_KERNEL_FROM_STR(kernel_name);

utils::uvec3 global_wg_size = graph.create_global_wg_size(image);
graph.execute_nodes().emplace_back(new DispatchNode(
graph,
shader,
global_wg_size,
graph.create_local_wg_size(global_wg_size),
// Input and Outputs
{{image, vkapi::kWrite}, {buffer, vkapi::kRead}},
// Parameter Buffers
{graph.sizes_ubo(image), graph.strides_ubo(buffer)},
// Specialization Constants
{graph.hashed_layout_of(image)},
// Resizing Logic
resize_clone_node));
}

void clone(ComputeGraph& graph, const std::vector<ValueRef>& args) {
// The vulkan delegate does not support changing memory format.
return add_clone_node(graph, args[0], args[2]);
const ValueRef src = args[0];
const ValueRef dst = args[2];

const utils::StorageType src_storage = graph.storage_type_of(src);
const utils::StorageType dst_storage = graph.storage_type_of(dst);
if (src_storage == utils::kTexture3D && dst_storage == utils::kTexture3D) {
if (graph.hashed_layout_of(src) == graph.hashed_layout_of(dst)) {
return add_clone_node(graph, src, dst);
} else {
return add_view_node(graph, src, kDummyValueRef, dst);
}
}
if (src_storage == utils::kTexture3D && dst_storage == utils::kBuffer) {
return add_image_to_buffer_node(graph, src, dst);
}
if (src_storage == utils::kBuffer && dst_storage == utils::kTexture3D) {
return add_buffer_to_image_node(graph, src, dst);
}
VK_THROW("Buffer to buffer memory layout transition not supported yet!");
}

// Clone node is not the most efficient implementation for the aten.clone
Expand Down
2 changes: 2 additions & 0 deletions backends/vulkan/runtime/graph/ops/impl/View.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@

#include <executorch/backends/vulkan/runtime/graph/ops/OperatorRegistry.h>

#include <executorch/backends/vulkan/runtime/graph/ops/impl/View.h>

#include <executorch/backends/vulkan/runtime/graph/ops/impl/utils/KernelUtils.h>
#include <executorch/backends/vulkan/runtime/graph/ops/impl/utils/TensorUtils.h>
#include <executorch/backends/vulkan/runtime/graph/ops/utils/ShaderNameUtils.h>
Expand Down
21 changes: 21 additions & 0 deletions backends/vulkan/runtime/graph/ops/impl/View.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under the BSD-style license found in the
* LICENSE file in the root directory of this source tree.
*/

#pragma once

#include <executorch/backends/vulkan/runtime/graph/ComputeGraph.h>

namespace vkcompute {

void add_view_node(
ComputeGraph& graph,
ValueRef in,
ValueRef sizes,
ValueRef out);

} // namespace vkcompute
8 changes: 4 additions & 4 deletions backends/vulkan/runtime/graph/ops/utils/StagingUtils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,8 @@ vkapi::ShaderInfo get_nchw_to_tensor_shader(
if (is_bitw8(v_dst.dtype()) && v_dst.storage_type() != utils::kBuffer &&
!int8_buffer_enabled) {
kernel_name = "nchw_to_bitw8_image_nobitw8buffer";
add_dtype_suffix(kernel_name, v_dst);
add_storage_type_suffix(kernel_name, v_dst);
add_dtype_suffix(kernel_name, v_dst);
return VK_KERNEL_FROM_STR(kernel_name);
}

Expand All @@ -41,8 +41,8 @@ vkapi::ShaderInfo get_nchw_to_tensor_shader(
}

kernel_name = "nchw_to_image";
add_dtype_suffix(kernel_name, v_dst);
add_storage_type_suffix(kernel_name, v_dst);
add_dtype_suffix(kernel_name, v_dst);

return VK_KERNEL_FROM_STR(kernel_name);
}
Expand All @@ -56,8 +56,8 @@ vkapi::ShaderInfo get_tensor_to_nchw_shader(
if (is_bitw8(v_src.dtype()) && v_src.storage_type() != utils::kBuffer &&
!int8_buffer_enabled) {
kernel_name = "bitw8_image_to_nchw_nobitw8buffer";
add_dtype_suffix(kernel_name, v_src);
add_storage_type_suffix(kernel_name, v_src);
add_dtype_suffix(kernel_name, v_src);
return VK_KERNEL_FROM_STR(kernel_name);
}

Expand All @@ -68,8 +68,8 @@ vkapi::ShaderInfo get_tensor_to_nchw_shader(
}

kernel_name = "image_to_nchw";
add_dtype_suffix(kernel_name, v_src);
add_storage_type_suffix(kernel_name, v_src);
add_dtype_suffix(kernel_name, v_src);

return VK_KERNEL_FROM_STR(kernel_name);
}
Expand Down
2 changes: 1 addition & 1 deletion backends/vulkan/test/utils/test_utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -118,8 +118,8 @@ void record_bitw8_image_to_nchw_nobitw8buffer_op(
utils::uvec3 global_wg_size = {buffer_len, 1, 1};

std::string kernel_name = "bitw8_image_to_nchw_nobitw8buffer";
add_dtype_suffix(kernel_name, v_src);
add_storage_type_suffix(kernel_name, v_src);
add_dtype_suffix(kernel_name, v_src);

context->submit_compute_job(
VK_KERNEL_FROM_STR(kernel_name),
Expand Down
Loading
Loading