Fix non_zero cast issue (#4243)

JackCaoG · web-flow · commit 48e12e3d893a · 2022-11-29T15:06:37.000-08:00
* Make nonzero result to reflect the real type

* Make sizeNode's type hardware dependent

* fix cpu and gpu error

* fix gpu and tpu cpp test
diff --git a/test/cpp/test_aten_xla_tensor.cpp b/test/cpp/test_aten_xla_tensor.cpp
@@ -5140,7 +5140,7 @@ TEST_F(AtenXlaTensorTest, TestNonzero) {
   ForEachDevice([&](const torch::Device& device) {
     torch::Tensor xla_a = CopyToDevice(a, device);
     torch::Tensor xla_b = torch::nonzero(xla_a);
-    AllClose(b, xla_b);
+    AllClose(b, torch::_cast_Long(xla_b));
 
     if (DebugUtil::ExperimentEnabled("nonzero")) {
       // If the nonzero support is enabled, we must not see any aten:: calls.
diff --git a/test/test_operations.py b/test/test_operations.py
@@ -749,6 +749,14 @@ def test_masked_select_shape(self):
         torch.masked_select(x, mask), 0)
     self.assertEqual(x_dim0_shape.item(), 3)
 
+  def test_nonzero_cast(self):
+    t1 = torch.ones(5, 2, device=xm.xla_device())
+    # Result of the nonzero should be the index type. Currently
+    # index type is s64 on cpu and gpu, but s32 on TPU. We should be
+    # able to cast it to any other type without error.
+    t2 = torch.nonzero(t1.int()).float()
+    xm.mark_step()
+
 
 class TestOptimizationBarrier(XlaTestCase):
 
diff --git a/torch_xla/csrc/ops/dynamic_ir.cpp b/torch_xla/csrc/ops/dynamic_ir.cpp
@@ -5,6 +5,7 @@
 #include "torch_xla/csrc/lowering_context.h"
 #include "torch_xla/csrc/ops/infer_output_shape.h"
 #include "torch_xla/csrc/tensor.h"
+#include "torch_xla/csrc/tensor_util.h"
 
 namespace torch_xla {
 
@@ -23,8 +24,10 @@ const std::shared_ptr<torch::lazy::DimensionNode> DimCast(
 
 SizeNode::SizeNode(torch::lazy::Value input, size_t dim)
     : XlaNode(torch::lazy::OpKind{c10::Symbol::fromQualString("aten::size")},
-              {input}, xla::ShapeUtil::MakeShape(xla::S64, {}), 1,
-              torch::lazy::MHash(dim)),
+              {input},
+              xla::ShapeUtil::MakeShape(
+                  GetShapeDimensionType(/*device=*/nullptr), {}),
+              1, torch::lazy::MHash(dim)),
       dim_(dim) {
   // Not all IR has torch::lazy::shape now, use xla::shape to unblock
   // the development.
diff --git a/torch_xla/csrc/tensor_methods.cpp b/torch_xla/csrc/tensor_methods.cpp
@@ -1876,7 +1876,9 @@ std::pair<XLATensorPtr, XLATensorPtr> XLATensor::nms(
 XLATensorPtr XLATensor::nonzero(const XLATensorPtr& input) {
   torch::lazy::NodePtr node =
       torch::lazy::MakeNode<NonZero>(input->GetIrValue());
-  return input->CreateFrom(torch::lazy::Value(node, 0), at::ScalarType::Long);
+  // Nonzero result type should not depend on input type, hence we shouldn't
+  // use input->CreateFrom which will inherit the logical_element_type.
+  return Create(torch::lazy::Value(node, 0), input->GetDevice());
 }
 
 XLATensorPtr XLATensor::norm(const XLATensorPtr& input,