Skip to content

Commit

Permalink
[PyTorch] Add Vulkan support and tests for at::upsample_bilinear2d (#…
Browse files Browse the repository at this point in the history
…98022)

Summary:
Pull Request resolved: #98022

Bilinear upsampling is a [4D tensor upsampling operation](https://pytorch.org/docs/stable/generated/torch.nn.Upsample.html), this adds support for the operation on the Vulkan GPU backend.

Test Plan:
1. `buck run --target-platforms ovr_config//platform/macos:arm64-fbsource  //xplat/caffe2:pt_vulkan_api_test_binAppleMac\#macosx-arm64 -c pt.vulkan_full_precision=1` on Apple M1 MacBook
2. Confirm all tests pass with no regression, and the added tests `*upsample_bilinear2d*` pass
2a. All tests P669847383
2b. `upsample_bilinear2d` tests P669866631
3. Overview:

```
...

[ RUN      ] VulkanAPITest.upsample_bilinear2d_align_false_small
[       OK ] VulkanAPITest.upsample_bilinear2d_align_false_small (1 ms)
[ RUN      ] VulkanAPITest.upsample_bilinear2d_align_false_large
[       OK ] VulkanAPITest.upsample_bilinear2d_align_false_large (2 ms)
[ RUN      ] VulkanAPITest.upsample_bilinear2d_align_true_small
[       OK ] VulkanAPITest.upsample_bilinear2d_align_true_small (2 ms)
[ RUN      ] VulkanAPITest.upsample_bilinear2d_align_true_large
[       OK ] VulkanAPITest.upsample_bilinear2d_align_true_large (1 ms)

...

[==========] 209 tests from 1 test suite ran. (6317 ms total)
[  PASSED  ] 201 tests.
[  SKIPPED ] 1 test, listed below:
[  SKIPPED ] VulkanAPITest.querypool_flushed_shader_log
[  FAILED  ] 7 tests, listed below:
[  FAILED  ] VulkanAPITest.cat_dim1_singledepth_success
[  FAILED  ] VulkanAPITest.gru_success
[  FAILED  ] VulkanAPITest.gru_mclareninputs_success
[  FAILED  ] VulkanAPITest.gru_prepack_success
[  FAILED  ] VulkanAPITest.lstm_success
[  FAILED  ] VulkanAPITest.lstm_mclareninputs_success
[  FAILED  ] VulkanAPITest.lstm_prepack_success
```

Reviewed By: SS-JIA

Differential Revision: D43142564

fbshipit-source-id: 39931862c2700e69562565042e2f9e92a262f276
  • Loading branch information
liuk22 authored and facebook-github-bot committed Mar 30, 2023
1 parent c218309 commit 4f73c5e
Show file tree
Hide file tree
Showing 4 changed files with 294 additions and 0 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
#version 450 core
#define PRECISION $precision
#define FORMAT $format

layout(std430) buffer;

/* Qualifiers: layout - storage - precision - memory */

/*
* Output Image
*/
layout(set = 0, binding = 0, FORMAT) uniform PRECISION restrict writeonly image3D uOutput;

/*
* Input Buffer
*/
layout(set = 0, binding = 1) uniform PRECISION sampler3D uInput;

/*
* Params Buffer
*/
layout(set = 0, binding = 2) uniform PRECISION restrict Block {
ivec4 oextents;
ivec2 iextents;
vec2 scale;
}
uBlock;

/*
* Local Work Group Size
*/
layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;

/*
* Upsamples uInput to the uOutput with scale according to uBlock params,
* using the equation for bilinear upsampling/interpolation
* along the height and width plane.
* align_false ~ align_corners=False, it means that each of the 4 output
* corner texels are treated in interpolation as if they were half texel
* offset outwards from the 4 input corner texels, if the two textures
* were overlaid the output texture would be "bigger".
*/
void main() {
const ivec3 pos = ivec3(gl_GlobalInvocationID);
if (any(greaterThan(pos, uBlock.oextents.xyz))) {
return;
}
// the border interpolated continuous coordinates from align=false
// are floored and ceiled to avoid alpha becoming negative
vec2 pos_interp = clamp(
((pos.xy + 0.5) * uBlock.scale) - 0.5, vec2(0, 0), uBlock.iextents.xy);

// 4 input texels used for bilinear interpolation, naming by PyTorch
// Tensor coordinate space where the "top" is x = 0 and "left" is y = 0,
// Vulkan reversed
ivec3 in_pos_topleft = ivec3(floor(pos_interp.x), floor(pos_interp.y), pos.z);
ivec3 in_pos_bottomleft =
ivec3(floor(pos_interp.x), ceil(pos_interp.y), pos.z);
ivec3 in_pos_topright = ivec3(ceil(pos_interp.x), floor(pos_interp.y), pos.z);
ivec3 in_pos_bottomright =
ivec3(ceil(pos_interp.x), ceil(pos_interp.y), pos.z);

vec2 alpha = pos_interp - in_pos_topleft.xy;

const vec4 top_val_interp =
(texelFetch(uInput, in_pos_topleft, 0) * (1 - alpha.x)) +
(texelFetch(uInput, in_pos_topright, 0) * alpha.x);
const vec4 bot_val_interp =
(texelFetch(uInput, in_pos_bottomleft, 0) * (1 - alpha.x)) +
(texelFetch(uInput, in_pos_bottomright, 0) * alpha.x);

imageStore(
uOutput,
pos,
(top_val_interp * (1 - alpha.y)) + (bot_val_interp * alpha.y));
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
#version 450 core
#define PRECISION $precision
#define FORMAT $format

layout(std430) buffer;

/* Qualifiers: layout - storage - precision - memory */

/*
* Output Image
*/
layout(set = 0, binding = 0, FORMAT) uniform PRECISION restrict writeonly image3D uOutput;

/*
* Input Buffer
*/
layout(set = 0, binding = 1) uniform PRECISION sampler3D uInput;

/*
* Params Buffer
*/
layout(set = 0, binding = 2) uniform PRECISION restrict Block {
ivec4 oextents;
ivec2 iextents;
vec2 scale;
}
uBlock;

/*
* Local Work Group Size
*/
layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;

/*
* Upsamples uInput to the uOutput with scale according to uBlock params,
* using the equation for bilinear upsampling/interpolation
* along the height and width plane.
* align_true ~ align_corners=True, it means that each of the 4 output
* corner texels are treated in interpolation as if they were squarely
* aligned with the 4 input corner texels, if the two textures were overlaid.
*/
void main() {
const ivec3 pos = ivec3(gl_GlobalInvocationID);
if (any(greaterThan(pos, uBlock.oextents.xyz))) {
return;
}
vec2 pos_interp = vec2(pos.xy) * uBlock.iextents.xy /
clamp(uBlock.oextents.xy - 1, vec2(1, 1), uBlock.oextents.xy - 1);

// 4 input texels used for bilinear interpolation, naming by PyTorch
// Tensor coordinate space where the "top" is x = 0 and "left" is y = 0,
// Vulkan reversed
ivec3 in_pos_topleft = ivec3(floor(pos_interp.x), floor(pos_interp.y), pos.z);
ivec3 in_pos_bottomleft =
ivec3(floor(pos_interp.x), ceil(pos_interp.y), pos.z);
ivec3 in_pos_topright = ivec3(ceil(pos_interp.x), floor(pos_interp.y), pos.z);
ivec3 in_pos_bottomright =
ivec3(ceil(pos_interp.x), ceil(pos_interp.y), pos.z);

vec2 alpha = pos_interp - in_pos_topleft.xy;

const vec4 top_val_interp =
(texelFetch(uInput, in_pos_topleft, 0) * (1 - alpha.x)) +
(texelFetch(uInput, in_pos_topright, 0) * alpha.x);
const vec4 bot_val_interp =
(texelFetch(uInput, in_pos_bottomleft, 0) * (1 - alpha.x)) +
(texelFetch(uInput, in_pos_bottomright, 0) * alpha.x);

imageStore(
uOutput,
pos,
(top_val_interp * (1 - alpha.y)) + (bot_val_interp * alpha.y));
}
85 changes: 85 additions & 0 deletions aten/src/ATen/native/vulkan/ops/Upsample.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -93,12 +93,97 @@ Tensor upsample_nearest2d(
return convert(v_output);
}

Tensor upsample_bilinear2d(
const Tensor& input_arg,
const IntArrayRef output_sizes,
bool align_corners,
const c10::optional<double> scales_h,
const c10::optional<double> scales_w) {
api::Context* const context = api::context();

TORCH_CHECK(
(4 == input_arg.sizes().size()) && (2 == output_sizes.size()),
"Invalid input!");

const Tensor input = input_arg.is_vulkan() ? input_arg : input_arg.vulkan();
const vTensor& v_input = convert(input);

vTensor v_output{
context,
{
get_dim<Dim4D::Batch>(v_input),
get_dim<Dim4D::Channel>(v_input),
output_sizes[Layout::Parameter::height],
output_sizes[Layout::Parameter::width],
},
input_arg.scalar_type(),
};

const api::utils::uvec3 output_extents = v_output.extents();
const struct Block final {
uvec3 oextents;
uint32_t padding;
ivec2 iextents;
vec2 scale;
} block{
v_output.extents(), // oextents
0u, // padding
{
safe_downcast<int32_t>(get_dim<Dim4D::Width>(input_arg) - 1),
safe_downcast<int32_t>(get_dim<Dim4D::Height>(input_arg) - 1),
}, // iextents
{
compute_scales_value<float>(
scales_w,
get_dim<Dim4D::Width>(input_arg),
get_dim<Dim4D::Width>(v_output)),
compute_scales_value<float>(
scales_h,
get_dim<Dim4D::Height>(input_arg),
get_dim<Dim4D::Height>(v_output)),
}, // scale
};

api::UniformParamsBuffer params(context, block);
api::PipelineBarrier pipeline_barrier{};
api::ShaderInfo shader_desc;
if (align_corners) {
shader_desc = VK_KERNEL(upsample_bilinear2d_align_true);
} else {
shader_desc = VK_KERNEL(upsample_bilinear2d_align_false);
}
context->submit_compute_job(
// shader descriptor
shader_desc,
// pipeline barrier
pipeline_barrier,
// global work group size
output_extents,
// local work group size
adaptive_work_group_size(output_extents),
// fence handle
VK_NULL_HANDLE,
// shader arguments
v_output.image(
pipeline_barrier,
api::PipelineStage::COMPUTE,
api::MemoryAccessType::WRITE),
v_input.image(pipeline_barrier, api::PipelineStage::COMPUTE),
// params buffer
params.buffer());

return convert(v_output);
}

#ifdef USE_VULKAN_API

TORCH_LIBRARY_IMPL(aten, Vulkan, m) {
m.impl(
TORCH_SELECTIVE_NAME("aten::upsample_nearest2d"),
TORCH_FN(upsample_nearest2d));
m.impl(
TORCH_SELECTIVE_NAME("aten::upsample_bilinear2d"),
TORCH_FN(upsample_bilinear2d));
}

#endif /* USE_VULKAN_API */
Expand Down
60 changes: 60 additions & 0 deletions aten/src/ATen/test/vulkan_api_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3310,6 +3310,66 @@ TEST_F(VulkanAPITest, upsample_nearest2d) {
ASSERT_TRUE(check);
}

TEST_F(VulkanAPITest, upsample_bilinear2d_align_false_small) {
const auto in_cpu = at::rand({1, 2, 2, 3}, at::TensorOptions(at::kCPU).dtype(at::kFloat));
const auto out_cpu = at::upsample_bilinear2d(in_cpu, {4, 6}, false);

const auto in_vulkan = in_cpu.vulkan();
const auto out_vulkan = at::upsample_bilinear2d(in_vulkan, {4, 6}, false);

const auto check = almostEqual(out_cpu, out_vulkan.cpu());
if (!check) {
showRtol(out_cpu, out_vulkan.cpu());
}

ASSERT_TRUE(check);
}

TEST_F(VulkanAPITest, upsample_bilinear2d_align_false_large) {
const auto in_cpu = at::rand({1, 7, 25, 25}, at::TensorOptions(at::kCPU).dtype(at::kFloat));
const auto out_cpu = at::upsample_bilinear2d(in_cpu, {45, 45}, false);

const auto in_vulkan = in_cpu.vulkan();
const auto out_vulkan = at::upsample_bilinear2d(in_vulkan, {45, 45}, false);

const auto check = almostEqual(out_cpu, out_vulkan.cpu());
if (!check) {
showRtol(out_cpu, out_vulkan.cpu());
}

ASSERT_TRUE(check);
}

TEST_F(VulkanAPITest, upsample_bilinear2d_align_true_small) {
const auto in_cpu = at::rand({1, 2, 2, 3}, at::TensorOptions(at::kCPU).dtype(at::kFloat));
const auto out_cpu = at::upsample_bilinear2d(in_cpu, {4, 6}, true);

const auto in_vulkan = in_cpu.vulkan();
const auto out_vulkan = at::upsample_bilinear2d(in_vulkan, {4, 6}, true);

const auto check = almostEqual(out_cpu, out_vulkan.cpu());
if (!check) {
showRtol(out_cpu, out_vulkan.cpu());
}

ASSERT_TRUE(check);
}

TEST_F(VulkanAPITest, upsample_bilinear2d_align_true_large) {
const auto in_cpu = at::rand({1, 7, 25, 25}, at::TensorOptions(at::kCPU).dtype(at::kFloat));
const auto out_cpu = at::upsample_bilinear2d(in_cpu, {45, 45}, true);

const auto in_vulkan = in_cpu.vulkan();
const auto out_vulkan = at::upsample_bilinear2d(in_vulkan, {45, 45}, true);

const auto check = almostEqual(out_cpu, out_vulkan.cpu());
if (!check) {
showRtol(out_cpu, out_vulkan.cpu());
}

ASSERT_TRUE(check);
}

void test_unbind(const at::IntArrayRef input_shape, int64_t dim) {
const auto in_cpu = at::rand(input_shape, at::device(at::kCPU).dtype(at::kFloat));
const auto out_cpu = at::unbind(in_cpu, dim);
Expand Down

0 comments on commit 4f73c5e

Please sign in to comment.