pytorch · Chillee · Mar 17, 2022 · Mar 26, 2022 · Apr 23, 2022 · Apr 27, 2022
diff --git a/test/test_fx_experimental.py b/test/test_fx_experimental.py
@@ -27,6 +27,7 @@
 from torch.fx.experimental.rewriter import RewritingTracer
 from torch.fx.experimental.schema_type_annotation import AnnotateTypesWithSchema
 from torch.fx.experimental.meta_tracer import MetaTracer
+from torch.fx.experimental.proxy_tensor import make_fx
 from torch.fx.graph_module import GraphModule
 from torch.fx.node import Node
 from torch.fx.operator_schemas import (
@@ -689,6 +690,14 @@ def forward(self, x):
             gm = torch.fx.GraphModule(mttm, graph)
             torch.testing.assert_close(gm(x), mttm(x))
 
+    def test_proxy_tensor(self):
+        def f(x):
+            val = x.cos().cos().sum()
+            return torch.autograd.grad(val, x)
+
+        traced_graph = make_fx(f)(torch.randn(3, requires_grad=True))
+        inp = torch.randn(3, requires_grad=True)
+        torch.testing.assert_close(traced_graph(inp), f(inp))
 
     def test_call_to_assert_with_msg(self):
         class M(torch.nn.Module):

diff --git a/torch/fx/experimental/proxy_tensor.py b/torch/fx/experimental/proxy_tensor.py
@@ -0,0 +1,184 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+import functools
+from typing import Any, Dict, Optional, Tuple, Callable, Union
+import torch
+from torch._C import _disabled_torch_function_impl
+import torch.utils._pytree as pytree
+from torch.fx import Tracer, GraphModule
+import torch.fx as fx
+from torch.fx.passes.shape_prop import _extract_tensor_metadata
+from contextlib import contextmanager
+
+__all__ = ["ProxyTensor", "PythonKeyTracer", "dispatch_trace", "make_fx"]
+aten = torch.ops.aten
+
+CURRENT_DECOMPOSITION_TABLE: Dict[torch._ops.OpOverload, Callable] = {}
+
+
+@contextmanager
+def no_dispatch():
+    guard = torch._C._DisableTorchDispatch()  # type: ignore[attr-defined]
+    try:
+        yield
+    finally:
+        del guard
+
+
+@contextmanager
+def decompose(decomposition_table):
+    global CURRENT_DECOMPOSITION_TABLE
+    old_decomposition_table = CURRENT_DECOMPOSITION_TABLE
+    CURRENT_DECOMPOSITION_TABLE = decomposition_table
+    try:
+        yield CURRENT_DECOMPOSITION_TABLE
+    finally:
+        CURRENT_DECOMPOSITION_TABLE = old_decomposition_table
+
+
+class ProxyTensor(torch.Tensor):
+    proxy: fx.Proxy
+
+    @staticmethod
+    def __new__(cls, elem, proxy):
+        # Hack to deal with super().__new__ not working for sparse tensors
+        if elem.is_sparse:
+            proxy.node.meta['tensor_meta'] = {}
+            r = torch.Tensor._make_subclass(cls, elem, elem.requires_grad)
+        else:
+            r = super().__new__(cls, elem)  # type: ignore[call-arg]
+            proxy.node.meta['tensor_meta'] = _extract_tensor_metadata(r)
+        r.proxy = proxy  # type: ignore[attr-defined]
+
+        return r
+
+    def __repr__(self):
+        with no_dispatch():
+            return f"ProxyTensor({self.as_subclass(torch.Tensor)}, proxy={self.proxy})"  # type: ignore[arg-type]
+
+    __torch_function__ = _disabled_torch_function_impl
+
+    @classmethod
+    def __torch_dispatch__(cls, func_overload, types, args=(), kwargs=None):
+        func = func_overload.overloadpacket
+        if func_overload in CURRENT_DECOMPOSITION_TABLE:
+            return CURRENT_DECOMPOSITION_TABLE[func_overload](*args, **kwargs)
+        if func_overload == aten._local_scalar_dense.default:
+            raise RuntimeError("It appears that you're trying to get value out of a tracing tensor - erroring out! "
+                               "It's likely that this is caused by data-dependent control flow or similar.")
+
+        def unwrap_proxy(e):
+            return e.proxy if isinstance(e, ProxyTensor) else e
+
+        proxy_args = pytree.tree_map(unwrap_proxy, args)
+        proxy_kwargs = pytree.tree_map(unwrap_proxy, kwargs)
+
+        proxy_out = func(*proxy_args, **proxy_kwargs)
+
+        # Kind of a hacky way to test if an op is in-place or not
+        if func.__name__[-1] == "_" and func.__name__[0] != "_":
+            args[0].proxy = proxy_out
+            proxy_out.node.meta['tensor_meta'] = _extract_tensor_metadata(args[0])
+
+        with no_dispatch():
+            real_out = func_overload(*args, **kwargs)
+
+        def wrap_with_proxy(e, proxy):
+            if type(e) == torch.Tensor:
+                return ProxyTensor(e, proxy)
+            else:
+                return e
+
+        # Unfortunately, tree_map cannot directly be used here. As the resulting
+        # object may be a proxy that represents a tuple, we may need to
+        # explicitly unwrap the proxy by simulating the flattening operations.
+        if isinstance(real_out, tuple):
+            return tuple(wrap_with_proxy(e, proxy_out[idx]) for idx, e in enumerate(real_out))
+        elif isinstance(real_out, list):
+            return list([wrap_with_proxy(e, proxy_out[idx]) for idx, e in enumerate(real_out)])
+        elif isinstance(real_out, torch.Tensor):
+            return wrap_with_proxy(real_out, proxy_out)
+        else:
+            return real_out
+
+
+class PythonKeyTracer(Tracer):
+    def __init__(self):
+        super().__init__()
+
+    # In general, we don't want to make modules leaves. In principle, users of
+    # this tracer might want to override this in order to turn a couple specific
+    # modules into leaves in the traced graph.
+    def call_module(
+        self, m: torch.nn.Module, forward: Callable[..., Any], args: Tuple[Any, ...], kwargs: Dict[str, Any]
+    ) -> Any:
+        return forward(*args, **kwargs)
+
+    def create_arg(self, a: Any):
+        if isinstance(a, torch.nn.Parameter):
+            for n, p in self.root.named_parameters():
+                if a is p:
+                    return self.create_node('get_attr', n, (), {})
+            qualname: Optional[str] = None
+
+            if not qualname:
+                i = 0
+                while True:
+                    qualname = f'_param_constant{i}'
+                    if not hasattr(self.root, qualname):
+                        break
+                    i += 1
+                setattr(self.root, qualname, a)
+
+            return self.create_node('get_attr', qualname, (), {})
+        return super().create_arg(a)
+
+
+def dispatch_trace(
+    root: Union[torch.nn.Module, Callable], concrete_args: Optional[Tuple[Any, ...]] = None
+) -> GraphModule:
+    tracer = PythonKeyTracer()
+    graph = tracer.trace(root, concrete_args)
+    name = root.__class__.__name__ if isinstance(root, torch.nn.Module) else root.__name__
+    return GraphModule(tracer.root, graph, name)
+
+
+def wrap_key(f, inps):
+    flat_inps, _ = pytree.tree_flatten(inps)
+
+    @functools.wraps(f)
+    def wrapped(*args):
+        flat_args, args_spec = pytree.tree_flatten(args)
+        assert(len(flat_args) == len(flat_inps))
+        for idx, arg in enumerate(flat_args):
+            if isinstance(flat_inps[idx], torch.Tensor):
+                flat_args[idx] = ProxyTensor(flat_inps[idx], arg)
+            else:
+                flat_args[idx] = flat_inps[idx]
+
+        tree_args = pytree.tree_unflatten(flat_args, args_spec)
+        out = f(*tree_args)
+        flat_outs, out_spec = pytree.tree_flatten(out)
+        for idx in range(len(flat_outs)):
+            if isinstance(flat_outs[idx], torch.Tensor) and isinstance(flat_outs[idx], ProxyTensor):
+                flat_outs[idx] = flat_outs[idx].proxy
+        return pytree.tree_unflatten(flat_outs, out_spec)
+
+    return wrapped
+
+
+def make_fx(f, decomposition_table=None):
+    if decomposition_table is None:
+        decomposition_table = {}
+
+    @functools.wraps(f)
+    def wrapped(*args):
+        phs = pytree.tree_map(lambda x: fx.PH, args)  # type: ignore[attr-defined]
+        with decompose(decomposition_table):
+            t = dispatch_trace(wrap_key(f, args), concrete_args=tuple(phs))
+        return t
+
+    return wrapped