pytorch · liuk22 · Mar 30, 2023
diff --git a/aten/src/ATen/native/vulkan/glsl/upsample_bilinear2d_align_false.glsl b/aten/src/ATen/native/vulkan/glsl/upsample_bilinear2d_align_false.glsl
@@ -0,0 +1,76 @@
+#version 450 core
+#define PRECISION $precision
+#define FORMAT $format
+
+layout(std430) buffer;
+
+/* Qualifiers: layout - storage - precision - memory */
+
+/*
+ * Output Image
+ */
+layout(set = 0, binding = 0, FORMAT) uniform PRECISION restrict writeonly image3D uOutput;
+
+/*
+ * Input Buffer
+ */
+layout(set = 0, binding = 1) uniform PRECISION sampler3D uInput;
+
+/*
+ * Params Buffer
+ */
+layout(set = 0, binding = 2) uniform PRECISION restrict Block {
+  ivec4 oextents;
+  ivec2 iextents;
+  vec2 scale;
+}
+uBlock;
+
+/*
+ * Local Work Group Size
+ */
+layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;
+
+/*
+ * Upsamples uInput to the uOutput with scale according to uBlock params,
+ * using the equation for bilinear upsampling/interpolation
+ * along the height and width plane.
+ * align_false ~ align_corners=False, it means that each of the 4 output
+ * corner texels are treated in interpolation as if they were half texel
+ * offset outwards from the 4 input corner texels, if the two textures
+ * were overlaid the output texture would be "bigger".
+ */
+void main() {
+  const ivec3 pos = ivec3(gl_GlobalInvocationID);
+  if (any(greaterThan(pos, uBlock.oextents.xyz))) {
+    return;
+  }
+  // the border interpolated continuous coordinates from align=false
+  // are floored and ceiled to avoid alpha becoming negative
+  vec2 pos_interp = clamp(
+      ((pos.xy + 0.5) * uBlock.scale) - 0.5, vec2(0, 0), uBlock.iextents.xy);
+
+  // 4 input texels used for bilinear interpolation, naming by PyTorch
+  // Tensor coordinate space where the "top" is x = 0 and "left" is y = 0,
+  // Vulkan reversed
+  ivec3 in_pos_topleft = ivec3(floor(pos_interp.x), floor(pos_interp.y), pos.z);
+  ivec3 in_pos_bottomleft =
+      ivec3(floor(pos_interp.x), ceil(pos_interp.y), pos.z);
+  ivec3 in_pos_topright = ivec3(ceil(pos_interp.x), floor(pos_interp.y), pos.z);
+  ivec3 in_pos_bottomright =
+      ivec3(ceil(pos_interp.x), ceil(pos_interp.y), pos.z);
+
+  vec2 alpha = pos_interp - in_pos_topleft.xy;
+
+  const vec4 top_val_interp =
+      (texelFetch(uInput, in_pos_topleft, 0) * (1 - alpha.x)) +
+      (texelFetch(uInput, in_pos_topright, 0) * alpha.x);
+  const vec4 bot_val_interp =
+      (texelFetch(uInput, in_pos_bottomleft, 0) * (1 - alpha.x)) +
+      (texelFetch(uInput, in_pos_bottomright, 0) * alpha.x);
+
+  imageStore(
+      uOutput,
+      pos,
+      (top_val_interp * (1 - alpha.y)) + (bot_val_interp * alpha.y));
+}
diff --git a/aten/src/ATen/native/vulkan/glsl/upsample_bilinear2d_align_true.glsl b/aten/src/ATen/native/vulkan/glsl/upsample_bilinear2d_align_true.glsl
@@ -0,0 +1,73 @@
+#version 450 core
+#define PRECISION $precision
+#define FORMAT $format
+
+layout(std430) buffer;
+
+/* Qualifiers: layout - storage - precision - memory */
+
+/*
+ * Output Image
+ */
+layout(set = 0, binding = 0, FORMAT) uniform PRECISION restrict writeonly image3D uOutput;
+
+/*
+ * Input Buffer
+ */
+layout(set = 0, binding = 1) uniform PRECISION sampler3D uInput;
+
+/*
+ * Params Buffer
+ */
+layout(set = 0, binding = 2) uniform PRECISION restrict Block {
+  ivec4 oextents;
+  ivec2 iextents;
+  vec2 scale;
+}
+uBlock;
+
+/*
+ * Local Work Group Size
+ */
+layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;
+
+/*
+ * Upsamples uInput to the uOutput with scale according to uBlock params,
+ * using the equation for bilinear upsampling/interpolation
+ * along the height and width plane.
+ * align_true ~ align_corners=True, it means that each of the 4 output
+ * corner texels are treated in interpolation as if they were squarely
+ * aligned with the 4 input corner texels, if the two textures were overlaid.
+ */
+void main() {
+  const ivec3 pos = ivec3(gl_GlobalInvocationID);
+  if (any(greaterThan(pos, uBlock.oextents.xyz))) {
+    return;
+  }
+  vec2 pos_interp = vec2(pos.xy) * uBlock.iextents.xy /
+      clamp(uBlock.oextents.xy - 1, vec2(1, 1), uBlock.oextents.xy - 1);
+
+  // 4 input texels used for bilinear interpolation, naming by PyTorch
+  // Tensor coordinate space where the "top" is x = 0 and "left" is y = 0,
+  // Vulkan reversed
+  ivec3 in_pos_topleft = ivec3(floor(pos_interp.x), floor(pos_interp.y), pos.z);
+  ivec3 in_pos_bottomleft =
+      ivec3(floor(pos_interp.x), ceil(pos_interp.y), pos.z);
+  ivec3 in_pos_topright = ivec3(ceil(pos_interp.x), floor(pos_interp.y), pos.z);
+  ivec3 in_pos_bottomright =
+      ivec3(ceil(pos_interp.x), ceil(pos_interp.y), pos.z);
+
+  vec2 alpha = pos_interp - in_pos_topleft.xy;
+
+  const vec4 top_val_interp =
+      (texelFetch(uInput, in_pos_topleft, 0) * (1 - alpha.x)) +
+      (texelFetch(uInput, in_pos_topright, 0) * alpha.x);
+  const vec4 bot_val_interp =
+      (texelFetch(uInput, in_pos_bottomleft, 0) * (1 - alpha.x)) +
+      (texelFetch(uInput, in_pos_bottomright, 0) * alpha.x);
+
+  imageStore(
+      uOutput,
+      pos,
+      (top_val_interp * (1 - alpha.y)) + (bot_val_interp * alpha.y));
+}
diff --git a/aten/src/ATen/native/vulkan/ops/Upsample.cpp b/aten/src/ATen/native/vulkan/ops/Upsample.cpp
@@ -93,12 +93,97 @@ Tensor upsample_nearest2d(
   return convert(v_output);
 }
 
+Tensor upsample_bilinear2d(
+    const Tensor& input_arg,
+    const IntArrayRef output_sizes,
+    bool align_corners,
+    const c10::optional<double> scales_h,
+    const c10::optional<double> scales_w) {
+  api::Context* const context = api::context();
+
+  TORCH_CHECK(
+      (4 == input_arg.sizes().size()) && (2 == output_sizes.size()),
+      "Invalid input!");
+
+  const Tensor input = input_arg.is_vulkan() ? input_arg : input_arg.vulkan();
+  const vTensor& v_input = convert(input);
+
+  vTensor v_output{
+      context,
+      {
+          get_dim<Dim4D::Batch>(v_input),
+          get_dim<Dim4D::Channel>(v_input),
+          output_sizes[Layout::Parameter::height],
+          output_sizes[Layout::Parameter::width],
+      },
+      input_arg.scalar_type(),
+  };
+
+  const api::utils::uvec3 output_extents = v_output.extents();
+  const struct Block final {
+    uvec3 oextents;
+    uint32_t padding;
+    ivec2 iextents;
+    vec2 scale;
+  } block{
+      v_output.extents(), // oextents
+      0u, // padding
+      {
+          safe_downcast<int32_t>(get_dim<Dim4D::Width>(input_arg) - 1),
+          safe_downcast<int32_t>(get_dim<Dim4D::Height>(input_arg) - 1),
+      }, // iextents
+      {
+          compute_scales_value<float>(
+              scales_w,
+              get_dim<Dim4D::Width>(input_arg),
+              get_dim<Dim4D::Width>(v_output)),
+          compute_scales_value<float>(
+              scales_h,
+              get_dim<Dim4D::Height>(input_arg),
+              get_dim<Dim4D::Height>(v_output)),
+      }, // scale
+  };
+
+  api::UniformParamsBuffer params(context, block);
+  api::PipelineBarrier pipeline_barrier{};
+  api::ShaderInfo shader_desc;
+  if (align_corners) {
+    shader_desc = VK_KERNEL(upsample_bilinear2d_align_true);
+  } else {
+    shader_desc = VK_KERNEL(upsample_bilinear2d_align_false);
+  }
+  context->submit_compute_job(
+      // shader descriptor
+      shader_desc,
+      // pipeline barrier
+      pipeline_barrier,
+      // global work group size
+      output_extents,
+      // local work group size
+      adaptive_work_group_size(output_extents),
+      // fence handle
+      VK_NULL_HANDLE,
+      // shader arguments
+      v_output.image(
+          pipeline_barrier,
+          api::PipelineStage::COMPUTE,
+          api::MemoryAccessType::WRITE),
+      v_input.image(pipeline_barrier, api::PipelineStage::COMPUTE),
+      // params buffer
+      params.buffer());
+
+  return convert(v_output);
+}
+
 #ifdef USE_VULKAN_API
 
 TORCH_LIBRARY_IMPL(aten, Vulkan, m) {
   m.impl(
       TORCH_SELECTIVE_NAME("aten::upsample_nearest2d"),
       TORCH_FN(upsample_nearest2d));
+  m.impl(
+      TORCH_SELECTIVE_NAME("aten::upsample_bilinear2d"),
+      TORCH_FN(upsample_bilinear2d));
 }
 
 #endif /* USE_VULKAN_API */

diff --git a/aten/src/ATen/test/vulkan_api_test.cpp b/aten/src/ATen/test/vulkan_api_test.cpp
@@ -3310,6 +3310,66 @@ TEST_F(VulkanAPITest, upsample_nearest2d) {
   ASSERT_TRUE(check);
 }
 
+TEST_F(VulkanAPITest, upsample_bilinear2d_align_false_small) {
+  const auto in_cpu = at::rand({1, 2, 2, 3}, at::TensorOptions(at::kCPU).dtype(at::kFloat));
+  const auto out_cpu = at::upsample_bilinear2d(in_cpu, {4, 6}, false);
+
+  const auto in_vulkan = in_cpu.vulkan();
+  const auto out_vulkan = at::upsample_bilinear2d(in_vulkan, {4, 6}, false);
+
+  const auto check = almostEqual(out_cpu, out_vulkan.cpu());
+  if (!check) {
+    showRtol(out_cpu, out_vulkan.cpu());
+  }
+
+  ASSERT_TRUE(check);
+}
+
+TEST_F(VulkanAPITest, upsample_bilinear2d_align_false_large) {
+  const auto in_cpu = at::rand({1, 7, 25, 25}, at::TensorOptions(at::kCPU).dtype(at::kFloat));
+  const auto out_cpu = at::upsample_bilinear2d(in_cpu, {45, 45}, false);
+
+  const auto in_vulkan = in_cpu.vulkan();
+  const auto out_vulkan = at::upsample_bilinear2d(in_vulkan, {45, 45}, false);
+
+  const auto check = almostEqual(out_cpu, out_vulkan.cpu());
+  if (!check) {
+    showRtol(out_cpu, out_vulkan.cpu());
+  }
+
+  ASSERT_TRUE(check);
+}
+
+TEST_F(VulkanAPITest, upsample_bilinear2d_align_true_small) {
+  const auto in_cpu = at::rand({1, 2, 2, 3}, at::TensorOptions(at::kCPU).dtype(at::kFloat));
+  const auto out_cpu = at::upsample_bilinear2d(in_cpu, {4, 6}, true);
+
+  const auto in_vulkan = in_cpu.vulkan();
+  const auto out_vulkan = at::upsample_bilinear2d(in_vulkan, {4, 6}, true);
+
+  const auto check = almostEqual(out_cpu, out_vulkan.cpu());
+  if (!check) {
+    showRtol(out_cpu, out_vulkan.cpu());
+  }
+
+  ASSERT_TRUE(check);
+}
+
+TEST_F(VulkanAPITest, upsample_bilinear2d_align_true_large) {
+  const auto in_cpu = at::rand({1, 7, 25, 25}, at::TensorOptions(at::kCPU).dtype(at::kFloat));
+  const auto out_cpu = at::upsample_bilinear2d(in_cpu, {45, 45}, true);
+
+  const auto in_vulkan = in_cpu.vulkan();
+  const auto out_vulkan = at::upsample_bilinear2d(in_vulkan, {45, 45}, true);
+
+  const auto check = almostEqual(out_cpu, out_vulkan.cpu());
+  if (!check) {
+    showRtol(out_cpu, out_vulkan.cpu());
+  }
+
+  ASSERT_TRUE(check);
+}
+
 void test_unbind(const at::IntArrayRef input_shape, int64_t dim) {
   const auto in_cpu = at::rand(input_shape, at::device(at::kCPU).dtype(at::kFloat));
   const auto out_cpu = at::unbind(in_cpu, dim);