Fix sparse fake tensors detach (#125679)

As in the title. Fixes a bug reported in #117907 (comment) Pull Request resolved: #125679 Approved by: https://github.com/amjames, https://github.com/lezcano
pytorch · May 9, 2024 · 0241ed9 · 0241ed9
1 parent 7e86a7c
commit 0241ed9
Show file tree

Hide file tree

Showing 7 changed files with 82 additions and 34 deletions.
diff --git a/aten/src/ATen/SparseCsrTensorImpl.cpp b/aten/src/ATen/SparseCsrTensorImpl.cpp
@@ -57,6 +57,8 @@ SparseCsrTensorImpl::SparseCsrTensorImpl(
   TORCH_INTERNAL_ASSERT(((key_set.has(DispatchKey::SparseCsrCPU) && device().type() == kCPU)
                          || (key_set.has(DispatchKey::SparseCsrCUDA) && device().type() == kCUDA)
                          || (key_set.has(DispatchKey::SparseCsrMeta) && device().type() == kMeta)
+                         || (key_set.has(DispatchKey::SparseCsrCPU) && device().type() == kMeta)   // fake tensor
+                         || (key_set.has(DispatchKey::SparseCsrCUDA) && device().type() == kMeta)  // fake tensor
                          || (key_set.has(DispatchKey::SparseCsrPrivateUse1) && device().type() == kPrivateUse1)),
                         "Inconsistent key_set (=", key_set, ") and device (=", device(), ")");
 

diff --git a/aten/src/ATen/SparseCsrTensorImpl.h b/aten/src/ATen/SparseCsrTensorImpl.h
@@ -2,6 +2,7 @@
 
 #include <ATen/Tensor.h>
 #include <c10/core/TensorImpl.h>
+#include <c10/core/impl/TorchDispatchModeTLS.h>
 #include <c10/util/Exception.h>
 namespace at {
 
@@ -107,6 +108,39 @@ struct TORCH_API SparseCsrTensorImpl : public TensorImpl {
     }
   }
 
+  template <typename VariableVersion>
+  c10::intrusive_ptr<TensorImpl> shallow_copy_and_detach_core(
+      VariableVersion&& version_counter,
+      bool allow_tensor_metadata_change) const {
+    const auto mode_stack_len = c10::impl::TorchDispatchModeTLS::stack_len();
+    c10::impl::PyInterpreter&& interpreter = nullptr;
+    if (mode_stack_len > 0 &&
+        !c10::impl::tls_is_dispatch_key_excluded(DispatchKey::Python)) {
+      const auto& cur_torch_dispatch_mode_state =
+          c10::impl::TorchDispatchModeTLS::get_stack_at(mode_stack_len - 1);
+      interpreter = cur_torch_dispatch_mode_state->pyinterpreter();
+    } else if (
+        key_set_.has(DispatchKey::Python) &&
+        !c10::impl::tls_is_dispatch_key_excluded(DispatchKey::Python)) {
+      interpreter = pyobj_slot_.load_pyobj_interpreter();
+    } else {
+      // otherwise just copy the SparseTensorImpl and not the PyObject.
+      auto impl = c10::make_intrusive<SparseCsrTensorImpl>(
+          key_set(), device(), layout_impl(), dtype());
+      copy_tensor_metadata(
+          /*src_sparse_impl=*/this,
+          /*dest_sparse_impl=*/impl.get(),
+          /*version_counter=*/version_counter,
+          /*allow_tensor_metadata_change=*/allow_tensor_metadata_change);
+      impl->refresh_numel();
+      return impl;
+    }
+    auto r = interpreter->detach(this);
+    r->set_version_counter(std::forward<VariableVersion>(version_counter));
+    r->set_allow_tensor_metadata_change(allow_tensor_metadata_change);
+    return r;
+  }
+
   /**
    * Return a TensorImpl that is a shallow-copy of this TensorImpl.
    *
@@ -116,15 +150,8 @@ struct TORCH_API SparseCsrTensorImpl : public TensorImpl {
   c10::intrusive_ptr<TensorImpl> shallow_copy_and_detach(
       const c10::VariableVersion& version_counter,
       bool allow_tensor_metadata_change) const override {
-    auto impl = c10::make_intrusive<SparseCsrTensorImpl>(
-        key_set(), device(), layout_impl(), dtype());
-    copy_tensor_metadata(
-        /*src_sparse_impl=*/this,
-        /*dest_sparse_impl=*/impl.get(),
-        /*version_counter=*/version_counter,
-        /*allow_tensor_metadata_change=*/allow_tensor_metadata_change);
-    impl->refresh_numel();
-    return impl;
+    return shallow_copy_and_detach_core(
+        version_counter, allow_tensor_metadata_change);
   }
 
   /**
@@ -136,15 +163,8 @@ struct TORCH_API SparseCsrTensorImpl : public TensorImpl {
   c10::intrusive_ptr<TensorImpl> shallow_copy_and_detach(
       c10::VariableVersion&& version_counter,
       bool allow_tensor_metadata_change) const override {
-    auto impl = c10::make_intrusive<SparseCsrTensorImpl>(
-        key_set(), device(), layout_impl(), dtype());
-    copy_tensor_metadata(
-        /*src_sparse_impl=*/this,
-        /*dest_sparse_impl=*/impl.get(),
-        /*version_counter=*/std::move(version_counter),
-        /*allow_tensor_metadata_change=*/allow_tensor_metadata_change);
-    impl->refresh_numel();
-    return impl;
+    return shallow_copy_and_detach_core(
+        std::move(version_counter), allow_tensor_metadata_change);
   }
 
  private:

diff --git a/aten/src/ATen/SparseTensorImpl.h b/aten/src/ATen/SparseTensorImpl.h
@@ -2,6 +2,7 @@
 
 #include <ATen/Tensor.h>
 #include <c10/core/TensorImpl.h>
+#include <c10/core/impl/TorchDispatchModeTLS.h>
 #include <c10/util/Exception.h>
 #include <c10/util/irange.h>
 
@@ -306,6 +307,38 @@ struct TORCH_API SparseTensorImpl : public TensorImpl {
       const Tensor& indices,
       const Tensor& values);
 
+  template <typename VariableVersion>
+  c10::intrusive_ptr<TensorImpl> shallow_copy_and_detach_core(
+      VariableVersion&& version_counter,
+      bool allow_tensor_metadata_change) const {
+    const auto mode_stack_len = c10::impl::TorchDispatchModeTLS::stack_len();
+    c10::impl::PyInterpreter&& interpreter = nullptr;
+    if (mode_stack_len > 0 &&
+        !c10::impl::tls_is_dispatch_key_excluded(DispatchKey::Python)) {
+      const auto& cur_torch_dispatch_mode_state =
+          c10::impl::TorchDispatchModeTLS::get_stack_at(mode_stack_len - 1);
+      interpreter = cur_torch_dispatch_mode_state->pyinterpreter();
+    } else if (
+        key_set_.has(DispatchKey::Python) &&
+        !c10::impl::tls_is_dispatch_key_excluded(DispatchKey::Python)) {
+      interpreter = pyobj_slot_.load_pyobj_interpreter();
+    } else {
+      // otherwise just copy the SparseTensorImpl and not the PyObject.
+      auto impl = c10::make_intrusive<SparseTensorImpl>(key_set(), dtype());
+      copy_tensor_metadata(
+          /*src_sparse_impl=*/this,
+          /*dest_sparse_impl=*/impl.get(),
+          /*version_counter=*/version_counter,
+          /*allow_tensor_metadata_change=*/allow_tensor_metadata_change);
+      impl->refresh_numel();
+      return impl;
+    }
+    auto r = interpreter->detach(this);
+    r->set_version_counter(std::forward<VariableVersion>(version_counter));
+    r->set_allow_tensor_metadata_change(allow_tensor_metadata_change);
+    return r;
+  }
+
   /**
    * Return a TensorImpl that is a shallow-copy of this TensorImpl.
    *
@@ -315,14 +348,8 @@ struct TORCH_API SparseTensorImpl : public TensorImpl {
   c10::intrusive_ptr<TensorImpl> shallow_copy_and_detach(
       const c10::VariableVersion& version_counter,
       bool allow_tensor_metadata_change) const override {
-    auto impl = c10::make_intrusive<SparseTensorImpl>(key_set(), dtype());
-    copy_tensor_metadata(
-        /*src_sparse_impl=*/this,
-        /*dest_sparse_impl=*/impl.get(),
-        /*version_counter=*/version_counter,
-        /*allow_tensor_metadata_change=*/allow_tensor_metadata_change);
-    impl->refresh_numel();
-    return impl;
+    return shallow_copy_and_detach_core(
+        version_counter, allow_tensor_metadata_change);
   }
 
   /**
@@ -334,14 +361,8 @@ struct TORCH_API SparseTensorImpl : public TensorImpl {
   c10::intrusive_ptr<TensorImpl> shallow_copy_and_detach(
       c10::VariableVersion&& version_counter,
       bool allow_tensor_metadata_change) const override {
-    auto impl = c10::make_intrusive<SparseTensorImpl>(key_set(), dtype());
-    copy_tensor_metadata(
-        /*src_sparse_impl=*/this,
-        /*dest_sparse_impl=*/impl.get(),
-        /*version_counter=*/std::move(version_counter),
-        /*allow_tensor_metadata_change=*/allow_tensor_metadata_change);
-    impl->refresh_numel();
-    return impl;
+    return shallow_copy_and_detach_core(
+        std::move(version_counter), allow_tensor_metadata_change);
   }
 
   /**

diff --git a/test/dynamo_expected_failures/TestAutograd.test_sparse_mm_backward b/test/dynamo_expected_failures/TestAutograd.test_sparse_mm_backward
diff --git a/test/dynamo_expected_failures/TestNN.test_linear_autograd_device_cpu_nobias_weightCOO b/test/dynamo_expected_failures/TestNN.test_linear_autograd_device_cpu_nobias_weightCOO
diff --git a/test/inductor/test_torchinductor.py b/test/inductor/test_torchinductor.py
@@ -1266,6 +1266,7 @@ def fn(a):
         sample[-1] = 1
         self.common(fn, (sample,))
 
+    @skipCPUIf(IS_MACOS, "fails on macos")
     def test_multilayer_var(self):
         def fn(a):
             return torch.var(a)

diff --git a/test/test_sparse.py b/test/test_sparse.py
@@ -4446,6 +4446,10 @@ def test_fake(self, dtype, layout):
             self.assertIsInstance(f, FakeTensor)
             self.assertEqualMeta(f, t, 0)
 
+            d = f.detach()
+            self.assertIsInstance(d, FakeTensor)
+            self.assertEqualMeta(d, t, 0)
+
     @all_sparse_layouts('layout', include_strided=False)
     @parametrize("dtype", [torch.float64])
     def test_zeros_like_fake(self, dtype, layout):