pytorch · albanD · Feb 17, 2022 · Feb 18, 2022 · Feb 18, 2022 · zou3519
diff --git a/aten/src/ATen/core/PythonFallbackKernel.cpp b/aten/src/ATen/core/PythonFallbackKernel.cpp
@@ -2,15 +2,27 @@
 #include <ATen/core/dispatch/Dispatcher.h>
 #include <ATen/core/PythonModeTLS.h>
 
+#include <stack>
+
 namespace {
 
 // TLS saving the state of the include/exclude sets on entry to the dispatcher
 // This is set in the pythonTLSSnapshot fallback and used by the Python fallback.
-thread_local c10::optional<c10::impl::LocalDispatchKeySet> tls_on_entry;
+thread_local std::stack<c10::impl::LocalDispatchKeySet> tls_on_entry;
+
+struct C10_API StashTLSStateGuard {
+ public:
+  StashTLSStateGuard(const c10::impl::LocalDispatchKeySet& key_set) {
+    tls_on_entry.push(key_set);
+  }
+  ~StashTLSStateGuard() {
+    tls_on_entry.pop();
+  }
+};
 
 void pythonFallback(const c10::OperatorHandle& op, torch::jit::Stack* stack) {
-  TORCH_INTERNAL_ASSERT(tls_on_entry.has_value());
-  c10::impl::ForceDispatchKeyGuard guard(tls_on_entry.value());
+  TORCH_INTERNAL_ASSERT(tls_on_entry.size() > 0);
+  c10::impl::ForceDispatchKeyGuard guard(tls_on_entry.top());
 
   // If Python Mode is active, use its PyInterpreter for dispatch
   const auto& maybe_python_mode_state = at::impl::PythonModeTLS::get_state();
@@ -54,11 +66,9 @@ void pythonTLSSnapshotFallback(const c10::OperatorHandle& op, c10::DispatchKeySe
   // A CompositeImplicitAutograd function may have been called just before this and so the tls here were never cleared
   // This is also why we don't need an RAII to ensure the tls is reset when exceptions happen
 
-  tls_on_entry = c10::impl::tls_local_dispatch_key_set();
+  StashTLSStateGuard guard(c10::impl::tls_local_dispatch_key_set());
 
   op.redispatchBoxed(dispatch_keys & c10::DispatchKeySet(c10::DispatchKeySet::FULL_AFTER, c10::DispatchKey::PythonTLSSnapshot), stack);
-
-  tls_on_entry = c10::nullopt;
 }
 
 

diff --git a/test/test_python_dispatch.py b/test/test_python_dispatch.py
@@ -562,6 +562,46 @@ def test_autograd_in_attr(self):
         self.assertIsNone(t.grad)
         self.assertIsNotNone(t.elem.grad)
 
+    def test_multiple_ops_subclass(self):
+        # This is a Direct Subclass, don't do that!
+        class MySubclass(torch.Tensor):
+            @staticmethod
+            def __new__(cls, elem):
+                r = torch.Tensor._make_subclass(cls, elem)
+                return r
+
+            __torch_function__ = torch._C._disabled_torch_function_impl
+
+            @classmethod
+            def __torch_dispatch__(cls, func, types, args=(), kwargs=None):
+                with no_dispatch():
+                    return func(*args, **kwargs)
+
+        x = MySubclass(torch.rand(2, 2, dtype=torch.complex64))
+        y = x.conj()
+        # Details of the bug that this tests for:
+        # Here, y dispatch keys are: {PythonTLSSnapshot, AutogradCPU, Conjugate, Python, CPU}
+        # There are a few calls to the dispatcher that are going to happen here:
+        #  - call_exp: User calling exp on y
+        #    - PythonTLSSnapshot: records the TLS on entry and redispatch
+        #    - AutogradCPU: no input requires grad, so does nothing and redispatch
+        #    - Conjugate: no special implementation for exp: use the fallback that
+        #                 first clone the Tensor (to materialize the conj) then redispatch
+        #      - call_clone: conjugate fallback calling clone on y
 auto resolved_tensor = at::clone(tensor); 
 auto resolved_tensor = at::clone(tensor); 
+        #        - PythonTLSSnapshot: records the TLS on entry and redispatch
+        #        - (AutogradCPU: skipped as autograd added itself to the exclude set above)
+        #        - Conjugate: special implementation for clone: just skip this key
+        #        - Python: Reset the TLS based on the snapshot above and call the user implementation (this
+        #                  actually calls into the dispatcher again but since we disable both our keys
+        #                  before, not detailed here)
+        #        - exit Python: restore the TLS and exit
+        #        - exit Conjugate: nothing was inplace so just exit
+        #        - exit PythonTLSSnapshot: done with this call, reset the saved TLS to empty
+        #    - Python: Reset the TLS again based on the snapshot. <- this used to fail
+        #    - More steps....
+        y.exp()
+
+
 
 if __name__ == '__main__':
     run_tests()