Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[PyTorch] Add Vulkan support and tests for at::upsample_bilinear2d #98022

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
#version 450 core
#define PRECISION $precision
#define FORMAT $format

layout(std430) buffer;

/* Qualifiers: layout - storage - precision - memory */

/*
* Output Image
*/
layout(set = 0, binding = 0, FORMAT) uniform PRECISION restrict writeonly image3D uOutput;

/*
* Input Buffer
*/
layout(set = 0, binding = 1) uniform PRECISION sampler3D uInput;

/*
* Params Buffer
*/
layout(set = 0, binding = 2) uniform PRECISION restrict Block {
ivec4 oextents;
ivec2 iextents;
vec2 scale;
}
uBlock;

/*
* Local Work Group Size
*/
layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;

/*
* Upsamples uInput to the uOutput with scale according to uBlock params,
* using the equation for bilinear upsampling/interpolation
* along the height and width plane.
* align_false ~ align_corners=False, it means that each of the 4 output
* corner texels are treated in interpolation as if they were half texel
* offset outwards from the 4 input corner texels, if the two textures
* were overlaid the output texture would be "bigger".
*/
void main() {
const ivec3 pos = ivec3(gl_GlobalInvocationID);
if (any(greaterThan(pos, uBlock.oextents.xyz))) {
return;
}
// the border interpolated continuous coordinates from align=false
// are floored and ceiled to avoid alpha becoming negative
vec2 pos_interp = clamp(
((pos.xy + 0.5) * uBlock.scale) - 0.5, vec2(0, 0), uBlock.iextents.xy);

// 4 input texels used for bilinear interpolation, naming by PyTorch
// Tensor coordinate space where the "top" is x = 0 and "left" is y = 0,
// Vulkan reversed
ivec3 in_pos_topleft = ivec3(floor(pos_interp.x), floor(pos_interp.y), pos.z);
ivec3 in_pos_bottomleft =
ivec3(floor(pos_interp.x), ceil(pos_interp.y), pos.z);
ivec3 in_pos_topright = ivec3(ceil(pos_interp.x), floor(pos_interp.y), pos.z);
ivec3 in_pos_bottomright =
ivec3(ceil(pos_interp.x), ceil(pos_interp.y), pos.z);

vec2 alpha = pos_interp - in_pos_topleft.xy;

const vec4 top_val_interp =
(texelFetch(uInput, in_pos_topleft, 0) * (1 - alpha.x)) +
(texelFetch(uInput, in_pos_topright, 0) * alpha.x);
const vec4 bot_val_interp =
(texelFetch(uInput, in_pos_bottomleft, 0) * (1 - alpha.x)) +
(texelFetch(uInput, in_pos_bottomright, 0) * alpha.x);

imageStore(
uOutput,
pos,
(top_val_interp * (1 - alpha.y)) + (bot_val_interp * alpha.y));
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
#version 450 core
#define PRECISION $precision
#define FORMAT $format

layout(std430) buffer;

/* Qualifiers: layout - storage - precision - memory */

/*
* Output Image
*/
layout(set = 0, binding = 0, FORMAT) uniform PRECISION restrict writeonly image3D uOutput;

/*
* Input Buffer
*/
layout(set = 0, binding = 1) uniform PRECISION sampler3D uInput;

/*
* Params Buffer
*/
layout(set = 0, binding = 2) uniform PRECISION restrict Block {
ivec4 oextents;
ivec2 iextents;
vec2 scale;
}
uBlock;

/*
* Local Work Group Size
*/
layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;

/*
* Upsamples uInput to the uOutput with scale according to uBlock params,
* using the equation for bilinear upsampling/interpolation
* along the height and width plane.
* align_true ~ align_corners=True, it means that each of the 4 output
* corner texels are treated in interpolation as if they were squarely
* aligned with the 4 input corner texels, if the two textures were overlaid.
*/
void main() {
const ivec3 pos = ivec3(gl_GlobalInvocationID);
if (any(greaterThan(pos, uBlock.oextents.xyz))) {
return;
}
vec2 pos_interp = vec2(pos.xy) * uBlock.iextents.xy /
clamp(uBlock.oextents.xy - 1, vec2(1, 1), uBlock.oextents.xy - 1);

// 4 input texels used for bilinear interpolation, naming by PyTorch
// Tensor coordinate space where the "top" is x = 0 and "left" is y = 0,
// Vulkan reversed
ivec3 in_pos_topleft = ivec3(floor(pos_interp.x), floor(pos_interp.y), pos.z);
ivec3 in_pos_bottomleft =
ivec3(floor(pos_interp.x), ceil(pos_interp.y), pos.z);
ivec3 in_pos_topright = ivec3(ceil(pos_interp.x), floor(pos_interp.y), pos.z);
ivec3 in_pos_bottomright =
ivec3(ceil(pos_interp.x), ceil(pos_interp.y), pos.z);

vec2 alpha = pos_interp - in_pos_topleft.xy;

const vec4 top_val_interp =
(texelFetch(uInput, in_pos_topleft, 0) * (1 - alpha.x)) +
(texelFetch(uInput, in_pos_topright, 0) * alpha.x);
const vec4 bot_val_interp =
(texelFetch(uInput, in_pos_bottomleft, 0) * (1 - alpha.x)) +
(texelFetch(uInput, in_pos_bottomright, 0) * alpha.x);

imageStore(
uOutput,
pos,
(top_val_interp * (1 - alpha.y)) + (bot_val_interp * alpha.y));
}
85 changes: 85 additions & 0 deletions aten/src/ATen/native/vulkan/ops/Upsample.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -93,12 +93,97 @@ Tensor upsample_nearest2d(
return convert(v_output);
}

Tensor upsample_bilinear2d(
const Tensor& input_arg,
const IntArrayRef output_sizes,
bool align_corners,
const c10::optional<double> scales_h,
const c10::optional<double> scales_w) {
api::Context* const context = api::context();

TORCH_CHECK(
(4 == input_arg.sizes().size()) && (2 == output_sizes.size()),
"Invalid input!");

const Tensor input = input_arg.is_vulkan() ? input_arg : input_arg.vulkan();
const vTensor& v_input = convert(input);

vTensor v_output{
context,
{
get_dim<Dim4D::Batch>(v_input),
get_dim<Dim4D::Channel>(v_input),
output_sizes[Layout::Parameter::height],
output_sizes[Layout::Parameter::width],
},
input_arg.scalar_type(),
};

const api::utils::uvec3 output_extents = v_output.extents();
const struct Block final {
uvec3 oextents;
uint32_t padding;
ivec2 iextents;
vec2 scale;
} block{
v_output.extents(), // oextents
0u, // padding
{
safe_downcast<int32_t>(get_dim<Dim4D::Width>(input_arg) - 1),
safe_downcast<int32_t>(get_dim<Dim4D::Height>(input_arg) - 1),
}, // iextents
{
compute_scales_value<float>(
scales_w,
get_dim<Dim4D::Width>(input_arg),
get_dim<Dim4D::Width>(v_output)),
compute_scales_value<float>(
scales_h,
get_dim<Dim4D::Height>(input_arg),
get_dim<Dim4D::Height>(v_output)),
}, // scale
};

api::UniformParamsBuffer params(context, block);
api::PipelineBarrier pipeline_barrier{};
api::ShaderInfo shader_desc;
if (align_corners) {
shader_desc = VK_KERNEL(upsample_bilinear2d_align_true);
} else {
shader_desc = VK_KERNEL(upsample_bilinear2d_align_false);
}
context->submit_compute_job(
// shader descriptor
shader_desc,
// pipeline barrier
pipeline_barrier,
// global work group size
output_extents,
// local work group size
adaptive_work_group_size(output_extents),
// fence handle
VK_NULL_HANDLE,
// shader arguments
v_output.image(
pipeline_barrier,
api::PipelineStage::COMPUTE,
api::MemoryAccessType::WRITE),
v_input.image(pipeline_barrier, api::PipelineStage::COMPUTE),
// params buffer
params.buffer());

return convert(v_output);
}

#ifdef USE_VULKAN_API

TORCH_LIBRARY_IMPL(aten, Vulkan, m) {
m.impl(
TORCH_SELECTIVE_NAME("aten::upsample_nearest2d"),
TORCH_FN(upsample_nearest2d));
m.impl(
TORCH_SELECTIVE_NAME("aten::upsample_bilinear2d"),
TORCH_FN(upsample_bilinear2d));
}

#endif /* USE_VULKAN_API */
Expand Down
60 changes: 60 additions & 0 deletions aten/src/ATen/test/vulkan_api_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3310,6 +3310,66 @@ TEST_F(VulkanAPITest, upsample_nearest2d) {
ASSERT_TRUE(check);
}

TEST_F(VulkanAPITest, upsample_bilinear2d_align_false_small) {
const auto in_cpu = at::rand({1, 2, 2, 3}, at::TensorOptions(at::kCPU).dtype(at::kFloat));
const auto out_cpu = at::upsample_bilinear2d(in_cpu, {4, 6}, false);

const auto in_vulkan = in_cpu.vulkan();
const auto out_vulkan = at::upsample_bilinear2d(in_vulkan, {4, 6}, false);

const auto check = almostEqual(out_cpu, out_vulkan.cpu());
if (!check) {
showRtol(out_cpu, out_vulkan.cpu());
}

ASSERT_TRUE(check);
}

TEST_F(VulkanAPITest, upsample_bilinear2d_align_false_large) {
const auto in_cpu = at::rand({1, 7, 25, 25}, at::TensorOptions(at::kCPU).dtype(at::kFloat));
const auto out_cpu = at::upsample_bilinear2d(in_cpu, {45, 45}, false);

const auto in_vulkan = in_cpu.vulkan();
const auto out_vulkan = at::upsample_bilinear2d(in_vulkan, {45, 45}, false);

const auto check = almostEqual(out_cpu, out_vulkan.cpu());
if (!check) {
showRtol(out_cpu, out_vulkan.cpu());
}

ASSERT_TRUE(check);
}

TEST_F(VulkanAPITest, upsample_bilinear2d_align_true_small) {
const auto in_cpu = at::rand({1, 2, 2, 3}, at::TensorOptions(at::kCPU).dtype(at::kFloat));
const auto out_cpu = at::upsample_bilinear2d(in_cpu, {4, 6}, true);

const auto in_vulkan = in_cpu.vulkan();
const auto out_vulkan = at::upsample_bilinear2d(in_vulkan, {4, 6}, true);

const auto check = almostEqual(out_cpu, out_vulkan.cpu());
if (!check) {
showRtol(out_cpu, out_vulkan.cpu());
}

ASSERT_TRUE(check);
}

TEST_F(VulkanAPITest, upsample_bilinear2d_align_true_large) {
const auto in_cpu = at::rand({1, 7, 25, 25}, at::TensorOptions(at::kCPU).dtype(at::kFloat));
const auto out_cpu = at::upsample_bilinear2d(in_cpu, {45, 45}, true);

const auto in_vulkan = in_cpu.vulkan();
const auto out_vulkan = at::upsample_bilinear2d(in_vulkan, {45, 45}, true);

const auto check = almostEqual(out_cpu, out_vulkan.cpu());
if (!check) {
showRtol(out_cpu, out_vulkan.cpu());
}

ASSERT_TRUE(check);
}

void test_unbind(const at::IntArrayRef input_shape, int64_t dim) {
const auto in_cpu = at::rand(input_shape, at::device(at::kCPU).dtype(at::kFloat));
const auto out_cpu = at::unbind(in_cpu, dim);
Expand Down