-
Notifications
You must be signed in to change notification settings - Fork 21.4k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[PyTorch] Add Vulkan support and tests for at::upsample_bilinear2d (#…
…98022) Summary: Pull Request resolved: #98022 Bilinear upsampling is a [4D tensor upsampling operation](https://pytorch.org/docs/stable/generated/torch.nn.Upsample.html), this adds support for the operation on the Vulkan GPU backend. Test Plan: 1. `buck run --target-platforms ovr_config//platform/macos:arm64-fbsource //xplat/caffe2:pt_vulkan_api_test_binAppleMac\#macosx-arm64 -c pt.vulkan_full_precision=1` on Apple M1 MacBook 2. Confirm all tests pass with no regression, and the added tests `*upsample_bilinear2d*` pass 2a. All tests P669847383 2b. `upsample_bilinear2d` tests P669866631 3. Overview: ``` ... [ RUN ] VulkanAPITest.upsample_bilinear2d_align_false_small [ OK ] VulkanAPITest.upsample_bilinear2d_align_false_small (1 ms) [ RUN ] VulkanAPITest.upsample_bilinear2d_align_false_large [ OK ] VulkanAPITest.upsample_bilinear2d_align_false_large (2 ms) [ RUN ] VulkanAPITest.upsample_bilinear2d_align_true_small [ OK ] VulkanAPITest.upsample_bilinear2d_align_true_small (2 ms) [ RUN ] VulkanAPITest.upsample_bilinear2d_align_true_large [ OK ] VulkanAPITest.upsample_bilinear2d_align_true_large (1 ms) ... [==========] 209 tests from 1 test suite ran. (6317 ms total) [ PASSED ] 201 tests. [ SKIPPED ] 1 test, listed below: [ SKIPPED ] VulkanAPITest.querypool_flushed_shader_log [ FAILED ] 7 tests, listed below: [ FAILED ] VulkanAPITest.cat_dim1_singledepth_success [ FAILED ] VulkanAPITest.gru_success [ FAILED ] VulkanAPITest.gru_mclareninputs_success [ FAILED ] VulkanAPITest.gru_prepack_success [ FAILED ] VulkanAPITest.lstm_success [ FAILED ] VulkanAPITest.lstm_mclareninputs_success [ FAILED ] VulkanAPITest.lstm_prepack_success ``` Reviewed By: SS-JIA Differential Revision: D43142564 fbshipit-source-id: 39931862c2700e69562565042e2f9e92a262f276
- Loading branch information
1 parent
c218309
commit 4f73c5e
Showing
4 changed files
with
294 additions
and
0 deletions.
There are no files selected for viewing
76 changes: 76 additions & 0 deletions
76
aten/src/ATen/native/vulkan/glsl/upsample_bilinear2d_align_false.glsl
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,76 @@ | ||
#version 450 core | ||
#define PRECISION $precision | ||
#define FORMAT $format | ||
|
||
layout(std430) buffer; | ||
|
||
/* Qualifiers: layout - storage - precision - memory */ | ||
|
||
/* | ||
* Output Image | ||
*/ | ||
layout(set = 0, binding = 0, FORMAT) uniform PRECISION restrict writeonly image3D uOutput; | ||
|
||
/* | ||
* Input Buffer | ||
*/ | ||
layout(set = 0, binding = 1) uniform PRECISION sampler3D uInput; | ||
|
||
/* | ||
* Params Buffer | ||
*/ | ||
layout(set = 0, binding = 2) uniform PRECISION restrict Block { | ||
ivec4 oextents; | ||
ivec2 iextents; | ||
vec2 scale; | ||
} | ||
uBlock; | ||
|
||
/* | ||
* Local Work Group Size | ||
*/ | ||
layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in; | ||
|
||
/* | ||
* Upsamples uInput to the uOutput with scale according to uBlock params, | ||
* using the equation for bilinear upsampling/interpolation | ||
* along the height and width plane. | ||
* align_false ~ align_corners=False, it means that each of the 4 output | ||
* corner texels are treated in interpolation as if they were half texel | ||
* offset outwards from the 4 input corner texels, if the two textures | ||
* were overlaid the output texture would be "bigger". | ||
*/ | ||
void main() { | ||
const ivec3 pos = ivec3(gl_GlobalInvocationID); | ||
if (any(greaterThan(pos, uBlock.oextents.xyz))) { | ||
return; | ||
} | ||
// the border interpolated continuous coordinates from align=false | ||
// are floored and ceiled to avoid alpha becoming negative | ||
vec2 pos_interp = clamp( | ||
((pos.xy + 0.5) * uBlock.scale) - 0.5, vec2(0, 0), uBlock.iextents.xy); | ||
|
||
// 4 input texels used for bilinear interpolation, naming by PyTorch | ||
// Tensor coordinate space where the "top" is x = 0 and "left" is y = 0, | ||
// Vulkan reversed | ||
ivec3 in_pos_topleft = ivec3(floor(pos_interp.x), floor(pos_interp.y), pos.z); | ||
ivec3 in_pos_bottomleft = | ||
ivec3(floor(pos_interp.x), ceil(pos_interp.y), pos.z); | ||
ivec3 in_pos_topright = ivec3(ceil(pos_interp.x), floor(pos_interp.y), pos.z); | ||
ivec3 in_pos_bottomright = | ||
ivec3(ceil(pos_interp.x), ceil(pos_interp.y), pos.z); | ||
|
||
vec2 alpha = pos_interp - in_pos_topleft.xy; | ||
|
||
const vec4 top_val_interp = | ||
(texelFetch(uInput, in_pos_topleft, 0) * (1 - alpha.x)) + | ||
(texelFetch(uInput, in_pos_topright, 0) * alpha.x); | ||
const vec4 bot_val_interp = | ||
(texelFetch(uInput, in_pos_bottomleft, 0) * (1 - alpha.x)) + | ||
(texelFetch(uInput, in_pos_bottomright, 0) * alpha.x); | ||
|
||
imageStore( | ||
uOutput, | ||
pos, | ||
(top_val_interp * (1 - alpha.y)) + (bot_val_interp * alpha.y)); | ||
} |
73 changes: 73 additions & 0 deletions
73
aten/src/ATen/native/vulkan/glsl/upsample_bilinear2d_align_true.glsl
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,73 @@ | ||
#version 450 core | ||
#define PRECISION $precision | ||
#define FORMAT $format | ||
|
||
layout(std430) buffer; | ||
|
||
/* Qualifiers: layout - storage - precision - memory */ | ||
|
||
/* | ||
* Output Image | ||
*/ | ||
layout(set = 0, binding = 0, FORMAT) uniform PRECISION restrict writeonly image3D uOutput; | ||
|
||
/* | ||
* Input Buffer | ||
*/ | ||
layout(set = 0, binding = 1) uniform PRECISION sampler3D uInput; | ||
|
||
/* | ||
* Params Buffer | ||
*/ | ||
layout(set = 0, binding = 2) uniform PRECISION restrict Block { | ||
ivec4 oextents; | ||
ivec2 iextents; | ||
vec2 scale; | ||
} | ||
uBlock; | ||
|
||
/* | ||
* Local Work Group Size | ||
*/ | ||
layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in; | ||
|
||
/* | ||
* Upsamples uInput to the uOutput with scale according to uBlock params, | ||
* using the equation for bilinear upsampling/interpolation | ||
* along the height and width plane. | ||
* align_true ~ align_corners=True, it means that each of the 4 output | ||
* corner texels are treated in interpolation as if they were squarely | ||
* aligned with the 4 input corner texels, if the two textures were overlaid. | ||
*/ | ||
void main() { | ||
const ivec3 pos = ivec3(gl_GlobalInvocationID); | ||
if (any(greaterThan(pos, uBlock.oextents.xyz))) { | ||
return; | ||
} | ||
vec2 pos_interp = vec2(pos.xy) * uBlock.iextents.xy / | ||
clamp(uBlock.oextents.xy - 1, vec2(1, 1), uBlock.oextents.xy - 1); | ||
|
||
// 4 input texels used for bilinear interpolation, naming by PyTorch | ||
// Tensor coordinate space where the "top" is x = 0 and "left" is y = 0, | ||
// Vulkan reversed | ||
ivec3 in_pos_topleft = ivec3(floor(pos_interp.x), floor(pos_interp.y), pos.z); | ||
ivec3 in_pos_bottomleft = | ||
ivec3(floor(pos_interp.x), ceil(pos_interp.y), pos.z); | ||
ivec3 in_pos_topright = ivec3(ceil(pos_interp.x), floor(pos_interp.y), pos.z); | ||
ivec3 in_pos_bottomright = | ||
ivec3(ceil(pos_interp.x), ceil(pos_interp.y), pos.z); | ||
|
||
vec2 alpha = pos_interp - in_pos_topleft.xy; | ||
|
||
const vec4 top_val_interp = | ||
(texelFetch(uInput, in_pos_topleft, 0) * (1 - alpha.x)) + | ||
(texelFetch(uInput, in_pos_topright, 0) * alpha.x); | ||
const vec4 bot_val_interp = | ||
(texelFetch(uInput, in_pos_bottomleft, 0) * (1 - alpha.x)) + | ||
(texelFetch(uInput, in_pos_bottomright, 0) * alpha.x); | ||
|
||
imageStore( | ||
uOutput, | ||
pos, | ||
(top_val_interp * (1 - alpha.y)) + (bot_val_interp * alpha.y)); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters