[Functionalization] Lower masked_fill.Tensor and masked_fill.Scalar ops (#4616)

wonjoo-wj · alanwaketan · commit 407d259e9fa1 · 2023-02-24T01:37:57.000Z
* Lower masked_fill.Scalar and masked_fill.Tensor to fix related cpp tests

* Remove in-place versions for masked_fill

* Clean-up some code

* Update tensor_methods::masked_fill to expand input tensor if needed

* Add check to expand only if the rank of the input tensor is less than that of the mask tensor

* Update tensor rank comparison if condition

* Enable KlDivBackward cpp test
diff --git a/test/cpp/test_aten_xla_tensor.cpp b/test/cpp/test_aten_xla_tensor.cpp
@@ -8827,7 +8827,6 @@ TEST_F(AtenXlaTensorTest, TestUnsqueezeInPlace) {
 }
 
 TEST_F(AtenXlaTensorTest, TestMaskedFill) {
-  GTEST_SKIP() << "SegFault after functionalization";
   torch::Tensor input =
       torch::rand({2, 3}, torch::TensorOptions(torch::kFloat));
   torch::Tensor mask =
@@ -8842,11 +8841,10 @@ TEST_F(AtenXlaTensorTest, TestMaskedFill) {
   });
 
   ExpectCounterNotChanged("aten::.*", cpp_test::GetIgnoredCounters());
-  ExpectCounterChanged("xla::masked_fill_", cpp_test::GetIgnoredCounters());
+  ExpectCounterChanged("xla::masked_fill", cpp_test::GetIgnoredCounters());
 }
 
 TEST_F(AtenXlaTensorTest, TestMaskedFillInPlace) {
-  GTEST_SKIP() << "SegFault after functionalization";
   torch::Scalar value(42);
   torch::Tensor mask =
       torch::randint(0, 2, {2, 3}, torch::TensorOptions(torch::kBool));
@@ -8862,11 +8860,10 @@ TEST_F(AtenXlaTensorTest, TestMaskedFillInPlace) {
   });
 
   ExpectCounterNotChanged("aten::.*", cpp_test::GetIgnoredCounters());
-  ExpectCounterChanged("xla::masked_fill_", cpp_test::GetIgnoredCounters());
+  ExpectCounterChanged("xla::masked_fill", cpp_test::GetIgnoredCounters());
 }
 
-TEST_F(AtenXlaTensorTest, TestMaskedFillBroadcast) {
-  GTEST_SKIP() << "SegFault after functionalization";
+TEST_F(AtenXlaTensorTest, TestMaskedFillBroadcast1) {
   torch::Tensor input =
       torch::rand({2, 5, 4, 3}, torch::TensorOptions(torch::kFloat));
   torch::Tensor mask =
@@ -8881,7 +8878,25 @@ TEST_F(AtenXlaTensorTest, TestMaskedFillBroadcast) {
   });
 
   ExpectCounterNotChanged("aten::.*", cpp_test::GetIgnoredCounters());
-  ExpectCounterChanged("xla::masked_fill_", cpp_test::GetIgnoredCounters());
+  ExpectCounterChanged("xla::masked_fill", cpp_test::GetIgnoredCounters());
+}
+
+TEST_F(AtenXlaTensorTest, TestMaskedFillBroadcast2) {
+  torch::Tensor input =
+      torch::rand({2, 1}, torch::TensorOptions(torch::kFloat));
+  torch::Tensor mask =
+      torch::randint(0, 2, {2, 3}, torch::TensorOptions(torch::kBool));
+  torch::Scalar value(42);
+  torch::Tensor result = torch::masked_fill(input, mask, value);
+  ForEachDevice([&](const torch::Device& device) {
+    torch::Tensor xla_input = CopyToDevice(input, device);
+    torch::Tensor xla_mask = CopyToDevice(mask, device);
+    torch::Tensor xla_result = torch::masked_fill(xla_input, xla_mask, value);
+    AllClose(result, xla_result);
+  });
+
+  ExpectCounterNotChanged("aten::.*", cpp_test::GetIgnoredCounters());
+  ExpectCounterChanged("xla::masked_fill", cpp_test::GetIgnoredCounters());
 }
 
 TEST_F(AtenXlaTensorTest, TestFill) {
@@ -11301,7 +11316,6 @@ TEST_F(AtenXlaTensorTest, TestBCEWithLogitsBackward) {
 }
 
 TEST_F(AtenXlaTensorTest, TestKlDivBackward) {
-  GTEST_SKIP() << "SegFault after functionalization";
   torch::Tensor input = torch::rand(
       {4, 3}, torch::TensorOptions(torch::kFloat).requires_grad(true));
   torch::Tensor target = torch::rand(
diff --git a/torch_xla/csrc/aten_xla_type.cpp b/torch_xla/csrc/aten_xla_type.cpp
@@ -1643,23 +1643,23 @@ at::Tensor XLANativeFunctions::xlogy(const at::Tensor& self,
       bridge::GetXlaTensor(self), bridge::GetXlaTensor(other)));
 }
 
-at::Tensor& XLANativeFunctions::masked_fill_(at::Tensor& self,
-                                             const at::Tensor& mask,
-                                             const at::Scalar& value) {
-  TORCH_LAZY_FN_COUNTER("xla::");
-  XLATensorPtr self_tensor = bridge::GetXlaTensor(self);
-  tensor_methods::masked_fill_(self_tensor, bridge::GetXlaTensor(mask), value);
-  return self;
-}
-
-at::Tensor& XLANativeFunctions::masked_fill_(at::Tensor& self,
-                                             const at::Tensor& mask,
-                                             const at::Tensor& value) {
+at::Tensor XLANativeFunctions::masked_fill(const at::Tensor& self,
+                                           const at::Tensor& mask,
+                                           const at::Tensor& value) {
   TORCH_LAZY_FN_COUNTER("xla::");
   XLA_CHECK_EQ(value.dim(), 0) << "masked_fill_ only supports a 0-dimensional "
                                << "value tensor, but got tensor "
                                << "with " << value.dim() << " dimension(s).";
-  return masked_fill_(self, mask, value.item());
+  return masked_fill(self, mask, value.item());
+}
+
+at::Tensor XLANativeFunctions::masked_fill(const at::Tensor& self,
+                                           const at::Tensor& mask,
+                                           const at::Scalar& value) {
+  TORCH_LAZY_FN_COUNTER("xla::");
+  XLATensorPtr self_tensor = bridge::GetXlaTensor(self);
+  return bridge::AtenFromXlaTensor(tensor_methods::masked_fill(
+      self_tensor, bridge::GetXlaTensor(mask), value));
 }
 
 at::Tensor XLANativeFunctions::masked_scatter(const at::Tensor& self,
@@ -3392,20 +3392,6 @@ at::Tensor XLANativeFunctions::linalg_pinv(
       linalg_pinv, atol_rtol_tensor)>::call(self, atol, rtol, hermitian);
 }
 
-at::Tensor XLANativeFunctions::masked_fill(const at::Tensor& self,
-                                           const at::Tensor& mask,
-                                           const at::Tensor& value) {
-  return at::functionalization::functionalize_aten_op<ATEN_OP2(
-      masked_fill, Tensor)>::call(self, mask, value);
-}
-
-at::Tensor XLANativeFunctions::masked_fill(const at::Tensor& self,
-                                           const at::Tensor& mask,
-                                           const at::Scalar& value) {
-  return at::functionalization::functionalize_aten_op<ATEN_OP2(
-      masked_fill, Scalar)>::call(self, mask, value);
-}
-
 at::Tensor XLANativeFunctions::mvlgamma(const at::Tensor& self, int64_t p) {
   return at::functionalization::functionalize_aten_op<ATEN_OP(mvlgamma)>::call(
       self, p);
diff --git a/torch_xla/csrc/tensor_methods.cpp b/torch_xla/csrc/tensor_methods.cpp
@@ -1536,11 +1536,19 @@ XLATensorPtr lt(const XLATensorPtr& input, const XLATensorPtr& other) {
   return DispatchComparisonOp(at::aten::lt, input, other);
 }
 
-void masked_fill_(XLATensorPtr& input, const XLATensorPtr& mask,
-                  const at::Scalar& value) {
+XLATensorPtr masked_fill(XLATensorPtr& input, const XLATensorPtr& mask,
+                         const at::Scalar& value) {
   torch::lazy::ScopePusher ir_scope(at::aten::masked_fill.toQualString());
-  input->SetIrValue(torch::lazy::MakeNode<MaskedFill>(
-      input->GetIrValue(), MaybeExpand(mask->GetIrValue(), input->shape()),
+  auto input_value = input->GetIrValue();
+  // Expand input tensor to mask if needed (same as masked_scatter below).
+  // An additional check makes sure to only expand if the rank of input tensor
+  // is less than that of the mask tensor.
+  if (input->shape().get().rank() <= mask->shape().get().rank() &&
+      input->shape().get().dimensions() < mask->shape().get().dimensions()) {
+    input_value = MaybeExpand(input->GetIrValue(), mask->shape());
+  }
+  return input->CreateFrom(torch::lazy::MakeNode<MaskedFill>(
+      input_value, MaybeExpand(mask->GetIrValue(), GetXlaShape(input_value)),
       value));
 }
 
diff --git a/torch_xla/csrc/tensor_methods.h b/torch_xla/csrc/tensor_methods.h
@@ -484,9 +484,8 @@ XLATensorPtr lt(const XLATensorPtr& input, const at::Scalar& other);
 
 XLATensorPtr lt(const XLATensorPtr& input, const XLATensorPtr& other);
 
-// In-place version of the method above.
-void masked_fill_(XLATensorPtr& input, const XLATensorPtr& mask,
-                  const at::Scalar& value);
+XLATensorPtr masked_fill(XLATensorPtr& input, const XLATensorPtr& mask,
+                         const at::Scalar& value);
 
 XLATensorPtr masked_scatter(XLATensorPtr& input, const XLATensorPtr& mask,
                             const XLATensorPtr& source);
diff --git a/xla_native_functions.yaml b/xla_native_functions.yaml
@@ -212,8 +212,8 @@ supported:
   - log2
   - log10
   - logsumexp
-  - masked_fill_.Scalar
-  - masked_fill_.Tensor
+  - masked_fill.Scalar
+  - masked_fill.Tensor
   - masked_scatter
   - masked_select
   - max
@@ -368,8 +368,6 @@ supported:
   - _trilinear
   - linalg_pinv.atol_rtol_tensor
   - _cdist_forward
-  - masked_fill.Scalar
-  - masked_fill.Tensor
   - mvlgamma
   - permute
   # The same applies to these ops, but we already have direct lowerings for them