Lower UpsampleBilinear/Nearest2DBackward with scale factor on TPU (#4710)

lsy323 · web-flow · commit f594c29ac94a · 2023-03-01T17:38:34.000-08:00
diff --git a/test/cpp/test_aten_xla_tensor.cpp b/test/cpp/test_aten_xla_tensor.cpp
@@ -4266,24 +4266,52 @@ TEST_F(AtenXlaTensorTest, TestUpsampleNearest2DWithScale) {
 }
 
 TEST_F(AtenXlaTensorTest, TestUpsampleNearest2DBackwardWithScale) {
-  int batch_size = 2;
-  int h = 5;
-  int w = 5;
-  int chans = 2;
-  double scale_h = 2.5;
-  double scale_w = 3.4;
-  auto testfn = [&](const std::vector<torch::Tensor>& inputs) -> torch::Tensor {
-    return torch::upsample_nearest2d(inputs[0], c10::nullopt,
-                                     at::ArrayRef<double>{scale_h, scale_w});
+  struct ImageInfo {
+    int batch_size;
+    int h;
+    int w;
+    int chans;
+    double scale_h;
+    double scale_w;
   };
-  ForEachDevice([&](const torch::Device& device) {
-    TestBackward(
-        {torch::rand({batch_size, chans, h, w},
-                     torch::TensorOptions(torch::kFloat).requires_grad(true))},
-        device, testfn);
-  });
-  ExpectCounterChanged("xla::upsample_nearest2d_backward",
-                       cpp_test::GetIgnoredCounters());
+
+  /* clang-format off */
+  std::vector<ImageInfo> inputs = {
+    {/*batch_size=*/2, /*h=*/5, /*w=*/5, /*chans=*/2, /*scale_h*/2.5, /*scale_w*/3.4},
+    {/*batch_size=*/2, /*h=*/1335, /*w=*/1335, /*chans=*/3, /*scale_h*/2.5, /*scale_w*/3.4},
+    {/*batch_size=*/2, /*h=*/1335, /*w=*/1335, /*chans=*/3, /*scale_h*/0.5, /*scale_w*/0.5},
+  };
+  /* clang-format on */
+
+  for (const auto& img_info : inputs) {
+    for (bool align_corners : {true, false}) {
+      auto testfn =
+          [&](const std::vector<torch::Tensor>& inputs) -> torch::Tensor {
+        return torch::upsample_nearest2d(
+            inputs[0], c10::nullopt,
+            at::ArrayRef<double>{img_info.scale_h, img_info.scale_w});
+      };
+      ForEachDevice([&](const torch::Device& device) {
+        TestBackward(
+            {torch::rand(
+                {img_info.batch_size, img_info.chans, img_info.h, img_info.w},
+                torch::TensorOptions(torch::kFloat).requires_grad(true))},
+            device, testfn);
+        XlaDeviceType device_type = static_cast<XlaDeviceType>(
+            bridge::AtenDeviceToXlaDevice(device).type());
+        if (device_type == XlaDeviceType::TPU) {
+          // Only lowered for TPU, fallback for CPU.
+          ExpectCounterNotChanged("aten::.*", cpp_test::GetIgnoredCounters());
+          ExpectCounterChanged("xla::upsample_nearest2d_backward",
+                               cpp_test::GetIgnoredCounters());
+          ResetCounters();
+        } else {
+          ExpectCounterChanged("aten::.*", cpp_test::GetIgnoredCounters());
+          ResetCounters();
+        }
+      });
+    }
+  }
 }
 
 TEST_F(AtenXlaTensorTest, TestUpsampleBilinear2D) {
@@ -4388,6 +4416,54 @@ TEST_F(AtenXlaTensorTest, TestUpsampleBilinear2DBackward) {
   }
 }
 
+TEST_F(AtenXlaTensorTest, TestUpsampleBilinear2DBackwardWithScale) {
+  struct ImageInfo {
+    int batch_size;
+    int h;
+    int w;
+    int chans;
+    double scale_h;
+    double scale_w;
+  };
+
+  /* clang-format off */
+  std::vector<ImageInfo> inputs = {
+    {/*batch_size=*/2, /*h=*/5, /*w=*/5, /*chans=*/2, /*scale_h*/8.0/5, /*scale_w*/8.0/5},
+    {/*batch_size=*/2, /*h=*/1335, /*w=*/1335, /*chans=*/3, /*scale_h*/255.0/1335, /*scale_w*/255.0/1335},
+  };
+  /* clang-format on */
+
+  for (const auto& img_info : inputs) {
+    for (bool align_corners : {true, false}) {
+      auto testfn =
+          [&](const std::vector<torch::Tensor>& inputs) -> torch::Tensor {
+        return torch::upsample_bilinear2d(
+            inputs[0], c10::nullopt, align_corners,
+            at::ArrayRef<double>{img_info.scale_h, img_info.scale_w});
+      };
+      ForEachDevice([&](const torch::Device& device) {
+        TestBackward(
+            {torch::rand(
+                {img_info.batch_size, img_info.chans, img_info.h, img_info.w},
+                torch::TensorOptions(torch::kFloat).requires_grad(true))},
+            device, testfn);
+        XlaDeviceType device_type = static_cast<XlaDeviceType>(
+            bridge::AtenDeviceToXlaDevice(device).type());
+        if (device_type == XlaDeviceType::TPU) {
+          // Only lowered for TPU, fallback for CPU.
+          ExpectCounterNotChanged("aten::.*", cpp_test::GetIgnoredCounters());
+          ExpectCounterChanged("xla::upsample_bilinear2d_backward",
+                               cpp_test::GetIgnoredCounters());
+          ResetCounters();
+        } else {
+          ExpectCounterChanged("aten::.*", cpp_test::GetIgnoredCounters());
+          ResetCounters();
+        }
+      });
+    }
+  }
+}
+
 TEST_F(AtenXlaTensorTest, TestAddCMul) {
   torch::Tensor a = torch::rand({2, 2}, torch::TensorOptions(torch::kFloat));
   torch::Tensor b = torch::rand({2, 2}, torch::TensorOptions(torch::kFloat));
diff --git a/torch_xla/csrc/aten_xla_type.cpp b/torch_xla/csrc/aten_xla_type.cpp
@@ -2951,16 +2951,26 @@ at::Tensor XLANativeFunctions::upsample_bilinear2d_backward(
   // our XLA lowering.
   XlaDeviceType hw_type =
       static_cast<XlaDeviceType>(grad_output_tensor->GetDevice().type());
-  if (hw_type != XlaDeviceType::TPU || (scales_h && *scales_h != 1.0) ||
-      (scales_w && *scales_w != 1.0)) {
+  if (hw_type != XlaDeviceType::TPU) {
     return at::native::call_fallback_fn<
         &xla_cpu_fallback,
         ATEN_OP(upsample_bilinear2d_backward)>::call(grad_output, output_size,
                                                      input_size, align_corners,
                                                      scales_h, scales_w);
   }
+  std::vector<int64_t> scaled_output_size =
+      torch::lazy::ToVector<int64_t>(output_size);
+  if ((scales_h && *scales_h != 1.0) || (scales_w && *scales_w != 1.0)) {
+    scaled_output_size = GetOutputSizeWithScale(input_size, scales_h, scales_w,
+                                                scaled_output_size);
+    if (!output_size.empty()) {
+      XLA_CHECK(scaled_output_size.at(0) == output_size.at(0) &&
+                scaled_output_size.at(1) == output_size.at(1))
+          << "Inferred output size and output_size from upstream are different";
+    }
+  }
   return bridge::AtenFromXlaTensor(tensor_methods::upsample_bilinear2d_backward(
-      grad_output_tensor, torch::lazy::ToVector<int64_t>(output_size),
+      grad_output_tensor, torch::lazy::ToVector<int64_t>(scaled_output_size),
       torch::lazy::ToVector<int64_t>(input_size), align_corners));
 }
 
@@ -2976,6 +2986,11 @@ at::Tensor XLANativeFunctions::upsample_nearest2d(
   if ((scales_h && *scales_h != 1.0) || (scales_w && *scales_w != 1.0)) {
     scaled_output_size = GetOutputSizeWithScale(input_dims, scales_h, scales_w,
                                                 scaled_output_size);
+    if (!output_size.empty()) {
+      XLA_CHECK(scaled_output_size.at(0) == output_size.at(0) &&
+                scaled_output_size.at(1) == output_size.at(1))
+          << "Inferred output size and output_size from upstream are different";
+    }
   }
   return bridge::AtenFromXlaTensor(
       tensor_methods::upsample_nearest2d(self_tensor, scaled_output_size));
@@ -2991,16 +3006,26 @@ at::Tensor XLANativeFunctions::upsample_nearest2d_backward(
   // our XLA lowering.
   XlaDeviceType hw_type =
       static_cast<XlaDeviceType>(grad_output_tensor->GetDevice().type());
-  if (hw_type != XlaDeviceType::TPU || (scales_h && *scales_h != 1.0) ||
-      (scales_w && *scales_w != 1.0)) {
+  if (hw_type != XlaDeviceType::TPU) {
     return at::native::call_fallback_fn<
         &xla_cpu_fallback,
         ATEN_OP(upsample_nearest2d_backward)>::call(grad_output, output_size,
                                                     input_size, scales_h,
                                                     scales_w);
   }
+  std::vector<int64_t> scaled_output_size =
+      torch::lazy::ToVector<int64_t>(output_size);
+  if ((scales_h && *scales_h != 1.0) || (scales_w && *scales_w != 1.0)) {
+    scaled_output_size = GetOutputSizeWithScale(input_size, scales_h, scales_w,
+                                                scaled_output_size);
+    if (!output_size.empty()) {
+      XLA_CHECK(scaled_output_size.at(0) == output_size.at(0) &&
+                scaled_output_size.at(1) == output_size.at(1))
+          << "Inferred output size and output_size from upstream are different";
+    }
+  }
   return bridge::AtenFromXlaTensor(tensor_methods::upsample_nearest2d_backward(
-      grad_output_tensor, torch::lazy::ToVector<int64_t>(output_size),
+      grad_output_tensor, torch::lazy::ToVector<int64_t>(scaled_output_size),
       torch::lazy::ToVector<int64_t>(input_size)));
 }