Skip to content

Commit

Permalink
[pytorch] Add support for "height" and "width" dimension for the "sel…
Browse files Browse the repository at this point in the history
…ect" operator on pytorch vulkan backend (#94612)

Summary: Add support for "height" and "width" dimension for the "select" operator on pytorch vulkan backend.

Test Plan:
```
yipjustin@yipjustin-mbp fbsource % buck run  -c pt.vulkan_full_precision=1  --target-platforms ovr_config//platform/macos:arm64-fbsource //xplat/caffe2:pt_vulkan_api_test_binAppleMac\#macosx-arm64 -- --gtest_filter="*select_3d*"
Downloaded 1/2 artifacts, 1.29 Mbytes, 0.0% cache miss (for updated rules)
Building: finished in 3.7 sec (100%) 450/450 jobs, 2/450 updated
  Total time: 3.8 sec
BUILD SUCCEEDED
Running main() from xplat/third-party/gmock/googletest-1.12.1/googletest/src/gtest_main.cc
Note: Google Test filter = *select_3d*
[==========] Running 9 tests from 1 test suite.
[----------] Global test environment set-up.
[----------] 9 tests from VulkanAPITest
[ RUN      ] VulkanAPITest.select_3d_depth_small
[       OK ] VulkanAPITest.select_3d_depth_small (30 ms)
[ RUN      ] VulkanAPITest.select_3d_depth_medium
[       OK ] VulkanAPITest.select_3d_depth_medium (0 ms)
[ RUN      ] VulkanAPITest.select_3d_depth_large
[       OK ] VulkanAPITest.select_3d_depth_large (1 ms)
[ RUN      ] VulkanAPITest.select_3d_height_small
[       OK ] VulkanAPITest.select_3d_height_small (0 ms)
[ RUN      ] VulkanAPITest.select_3d_height_medium
[       OK ] VulkanAPITest.select_3d_height_medium (0 ms)
[ RUN      ] VulkanAPITest.select_3d_height_large
[       OK ] VulkanAPITest.select_3d_height_large (3 ms)
[ RUN      ] VulkanAPITest.select_3d_width_small
[       OK ] VulkanAPITest.select_3d_width_small (0 ms)
[ RUN      ] VulkanAPITest.select_3d_width_medium
[       OK ] VulkanAPITest.select_3d_width_medium (0 ms)
[ RUN      ] VulkanAPITest.select_3d_width_large
[       OK ] VulkanAPITest.select_3d_width_large (1 ms)
[----------] 9 tests from VulkanAPITest (40 ms total)

[----------] Global test environment tear-down
[==========] 9 tests from 1 test suite ran. (40 ms total)
[  PASSED  ] 9 tests.
```

Reviewed By: SS-JIA

Differential Revision: D43020796

Pull Request resolved: #94612
Approved by: https://github.com/SS-JIA
  • Loading branch information
yipjustin authored and pytorchmergebot committed Feb 15, 2023
1 parent fa1ea9f commit f2c2642
Show file tree
Hide file tree
Showing 4 changed files with 240 additions and 2 deletions.
40 changes: 40 additions & 0 deletions aten/src/ATen/native/vulkan/glsl/select_height.glsl
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
#version 450 core
#define PRECISION $precision
#define FORMAT $format

layout(std430) buffer;

/* Qualifiers: layout - storage - precision - memory */

layout(set = 0, binding = 0, FORMAT) uniform PRECISION restrict writeonly image3D uOutput;
layout(set = 0, binding = 1) uniform PRECISION sampler3D uInput;
layout(set = 0, binding = 2) uniform PRECISION restrict Block {
ivec3 size;
int index;
} uBlock;

layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;

void main() {
const ivec3 pos = ivec3(gl_GlobalInvocationID);

// w
const int src_x = pos.x;
// h
const int src_y = uBlock.index;
// c
const int src_z = pos.y;

const vec4 v = texelFetch(uInput, ivec3(src_x, src_y, src_z), 0);

for (int i = 0; i < 4; i++) {
ivec3 new_pos = ivec3(pos.x, pos.y * 4 + i, 0);

// When the C-channel exceeds original block size, exit early
if (new_pos.y >= uBlock.size.y) {
return;
}

imageStore(uOutput, new_pos, vec4(v[i], 0, 0, 0));
}
}
40 changes: 40 additions & 0 deletions aten/src/ATen/native/vulkan/glsl/select_width.glsl
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
#version 450 core
#define PRECISION $precision
#define FORMAT $format

layout(std430) buffer;

/* Qualifiers: layout - storage - precision - memory */

layout(set = 0, binding = 0, FORMAT) uniform PRECISION restrict writeonly image3D uOutput;
layout(set = 0, binding = 1) uniform PRECISION sampler3D uInput;
layout(set = 0, binding = 2) uniform PRECISION restrict Block {
ivec3 size;
int index;
} uBlock;

layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;

void main() {
const ivec3 pos = ivec3(gl_GlobalInvocationID);

// w
const int src_x = uBlock.index;
// h
const int src_y = pos.x;
// c
const int src_z = pos.y;

const vec4 v = texelFetch(uInput, ivec3(src_x, src_y, src_z), 0);

for (int i = 0; i < 4; i++) {
ivec3 new_pos = ivec3(pos.x, pos.y * 4 + i, 0);

// When the C-channel exceeds original block size, exit early
if (new_pos.y >= uBlock.size.y) {
return;
}

imageStore(uOutput, new_pos, vec4(v[i], 0, 0, 0));
}
}
126 changes: 124 additions & 2 deletions aten/src/ATen/native/vulkan/ops/Select.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -53,9 +53,125 @@ Tensor select_depth(const Tensor& input_arg, uint32_t index) {
return convert(v_output);
}

Tensor select_height(const Tensor& input_arg, uint32_t index) {
api::Context* const context = api::context();

const Tensor input = input_arg.is_vulkan() ? input_arg : input_arg.vulkan();
const vTensor& v_input = convert(input);
const IntArrayRef v_input_sizes = v_input.sizes();

vTensor v_output{
context,
{v_input_sizes[0], v_input_sizes[2]},
input_arg.scalar_type(),
};

const struct Block final {
uvec3 size; // output texture size
uint32_t index;
} block{v_output.extents(), index};

// Input tensor is a (c, h, w)
// Output tensor is a (c, w)
// In shader, the input texture's coordinate is (w, h, c)
// In shader, the output texture's coordinate is (w, c, 1)
uint32_t w = v_output.extents().data[0u];
uint32_t c = v_output.extents().data[1u];

// Encoding of c-channel is packed into texel, hence we only call ceil(c/4)
// times to minimize invocation and read.
// For the last dimension, it is the selected height. Shader will do a direct
// lookup based on block.index.
uvec3 global_workgroup_size{w, api::utils::div_up(c, 4u), 1};

api::UniformParamsBuffer params(context, block);
api::PipelineBarrier pipeline_barrier{};

context->submit_compute_job(
// shader descriptor
VK_KERNEL(select_height),
// pipeline barrier
pipeline_barrier,
// global work group size
global_workgroup_size,
// local work group size
adaptive_work_group_size(global_workgroup_size),
// fence handle
VK_NULL_HANDLE,
// shader arguments
v_output.image(
pipeline_barrier,
api::PipelineStage::COMPUTE,
api::MemoryAccessType::WRITE),
v_input.image(pipeline_barrier, api::PipelineStage::COMPUTE),
// params buffer
params.buffer());

return convert(v_output);
}

Tensor select_width(const Tensor& input_arg, uint32_t index) {
api::Context* const context = api::context();

const Tensor input = input_arg.is_vulkan() ? input_arg : input_arg.vulkan();
const vTensor& v_input = convert(input);
const IntArrayRef v_input_sizes = v_input.sizes();

vTensor v_output{
context,
{v_input_sizes[0], v_input_sizes[1]},
input_arg.scalar_type(),
};

const struct Block final {
uvec3 size; // output texture size
uint32_t index;
} block{v_output.extents(), index};

// Input tensor is a (c, h, w)
// Output tensor is a (c, h)
// In shader, the input texture's coordinate is (w, h, c)
// In shader, the output texture's coordinate is (h, c, 1)
uint32_t h = v_output.extents().data[0u];
uint32_t c = v_output.extents().data[1u];

// Encoding of c-channel is packed into texel, hence we only call ceil(c/4)
// times to minimize invocation and read.
// For the last dimension, it is the selected width. Shader will do a direct
// lookup based on block.index.
uvec3 global_workgroup_size{h, api::utils::div_up(c, 4u), 1};

api::UniformParamsBuffer params(context, block);
api::PipelineBarrier pipeline_barrier{};

context->submit_compute_job(
// shader descriptor
VK_KERNEL(select_width),
// pipeline barrier
pipeline_barrier,
// global work group size
global_workgroup_size,
// local work group size
adaptive_work_group_size(global_workgroup_size),
// fence handle
VK_NULL_HANDLE,
// shader arguments
v_output.image(
pipeline_barrier,
api::PipelineStage::COMPUTE,
api::MemoryAccessType::WRITE),
v_input.image(pipeline_barrier, api::PipelineStage::COMPUTE),
// params buffer
params.buffer());

return convert(v_output);
}

Tensor select(const Tensor& self, int64_t dim, int64_t index) {
TORCH_CHECK(self.dim() == 3, "Vulkan select only supports 3d tensors!");
TORCH_CHECK(dim == 0, "Vulkan select only supports dim = 0!");
TORCH_CHECK(
0 <= dim && dim <= 2,
"Vulkan select only supports one of the dim (0, 1, 2)");

const int64_t size = self.size(dim);

Expand All @@ -73,7 +189,13 @@ Tensor select(const Tensor& self, int64_t dim, int64_t index) {
index += size;
}

return select_depth(self, index);
if (dim == 0) {
return select_depth(self, index);
} else if (dim == 1) {
return select_height(self, index);
} else {
return select_width(self, index);
}
}

#ifdef USE_VULKAN_API
Expand Down
36 changes: 36 additions & 0 deletions aten/src/ATen/test/vulkan_api_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2803,6 +2803,42 @@ TEST_F(VulkanAPITest, select_3d_depth_large) {
test_select({100, 1, 144}, 0, 50);
}

TEST_F(VulkanAPITest, select_3d_height_small) {
test_select({1, 1, 1}, 1, 0);
}

TEST_F(VulkanAPITest, select_3d_height_medium) {
test_select({3, 5, 2}, 1, 2);
}

TEST_F(VulkanAPITest, select_3d_height_medium1) {
test_select({16, 16, 5}, 1, 6);
}

TEST_F(VulkanAPITest, select_3d_height_medium2) {
test_select({17, 17, 5}, 1, 6);
}

TEST_F(VulkanAPITest, select_3d_height_large) {
test_select({100, 144, 5}, 1, 50);
}

TEST_F(VulkanAPITest, select_3d_width_small) {
test_select({1, 1, 1}, 2, 0);
}

TEST_F(VulkanAPITest, select_3d_width_medium) {
test_select({3, 5, 3}, 2, 2);
}

TEST_F(VulkanAPITest, select_3d_width_medium2) {
test_select({17, 17, 8}, 2, 6);
}

TEST_F(VulkanAPITest, select_3d_width_large) {
test_select({100, 3, 144}, 2, 50);
}

TEST_F(VulkanAPITest, sigmoid) {
const auto in_cpu = at::rand({17, 197, 302, 5}, at::device(at::kCPU).dtype(at::kFloat));
const auto in_vulkan = in_cpu.vulkan();
Expand Down

0 comments on commit f2c2642

Please sign in to comment.