From 365331997bd7bd8a1adfa3d1c48a70778eacecf8 Mon Sep 17 00:00:00 2001
From: "Edward Z. Yang" <ezyang@fb.com>
Date: Thu, 8 Oct 2020 15:10:42 -0700
Subject: [PATCH] Rewrite implementation of faithful cpp signatures

This rewrite is as per my comments at https://github.com/pytorch/pytorch/pull/44087#issuecomment-701664506
I did the rewrite by reverting #44087 and then reimplementing it on top.
You may find it easier to review by diffing against master with only #44087
reverted.

There are two main ideas.

First, we now factor cpp argument processing into two phases operating
on three representations of data:

1. `FunctionSchema` - this is the source from native_functions.yaml
2. `Union[Argument, ThisArgument, TensorOptionsArgument]` - this is
   the arguments after doing some basic semantic analysis to group
   them (for TensorOptions) or identify the this argument (if this
   is a method).  There is only ever one of these per functions.
3. `Union[CppArgument, CppThisArgument, CppTensorOptionsArgument]` -
   this is the arguments after we've elaborated them to C++.  There
   may be multiple of these per actual C++ signature.

You can think of (2) as common processing, whereas (3) bakes in specific
assumptions about whether or not you have a faithful or non-faithful
signature.

Second, we now have CppSignature and CppSignatureGroup representing
the *total* public C++ API signature.  So those dataclasses are what
know how to render definitions/declarations, and you no longer have
to manually type it out in the Functions/TensorMethods codegen.

Here is an exhaustive accounting of the changes.

tools.codegen.api.types

- CppSignature and CppSignatureGroup got moved to tools.codegen.api.types
- Add new CppThisArgument and CppTensorOptionsArguments (modeled off
  of ThisArgument and TensorOptionsArguments) so that we can retain
  high level semantic structure even after elaborating terms with C++
  API information.  Once this is done, we can refine
  CppArgument.argument to no longer contain a ThisArgument (ThisArgument
  is always translated to CppThisArgument.  Note that this doesn't
  apply to TensorOptionsArguments, as those may be expanded or not
  expanded, and so you could get a single CppArgument for 'options')
- Add no_default() functional mutator to easily remove default arguments
  from CppArgument and friends
- Add an explicit_arguments() method to CppArgument and friends to
  extract (flat) argument list that must be explicitly written in the signature.
  This is everything except (Cpp)ThisArgument, and is also convenient
  when you don't care about the extra structure of
  CppTensorOptionsArguments

tools.codegen.api.cpp

- group_arguments is back, and it doesn't send things directly to a
  CppSignatureGroup; instead, it moves us from representation (1) to (2)
  (perhaps it should live in model).  Here I changed my mind from my
  PR comment; I discovered it was not necessary to do classification at
  grouping time, and it was simpler and easier to do it later.
- argument got split into argument_not_this/argument/argument_faithful.
  argument and argument_faithful are obvious enough what they do,
  and I needed argument_not_this as a more refined version of argument
  so that I could get the types to work out on TensorOptionsArguments

tools.codegen.api.dispatcher

- Here we start seeing the payoff.  The old version of this code had a
  "scatter" mode and a "gather" mode.  We don't need that anymore:
  cppargument_exprs is 100% type-directed via the passed in cpp
  arguments.  I am able to write the functions without any reference
  to use_c10_dispatcher

tools.codegen.gen

- Instead of having exprs_str and types_str functions, I moved these to
  live directly on CppSignature, since it seemed pretty logical.
- The actual codegen for TensorMethods/Functions is greatly simplified,
  since (1) all of the heavy lifting is now happening in
  CppSignature(Group) construction, and (2) I don't need to proxy one
  way or another, the new dispatcher translation code is able to handle
  both cases no problem.  There is a little faffing about with ordering
  to reduce the old and new diff which could be removed afterwards.

Here are codegen diffs.  For use_c10_dispatcher: full:

```
+// aten::_cudnn_init_dropout_state(float dropout, bool train, int dropout_seed, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=False) -> Tensor
 Tensor _cudnn_init_dropout_state(double dropout, bool train, int64_t dropout_seed, const TensorOptions & options) {
-    return _cudnn_init_dropout_state(dropout, train, dropout_seed, optTypeMetaToScalarType(options.dtype_opt()), options.layout_opt(), options.device_opt(), options.pinned_memory_opt());
+    static auto op = c10::Dispatcher::singleton()
+        .findSchemaOrThrow("aten::_cudnn_init_dropout_state", "")
+        .typed<Tensor (double, bool, int64_t, c10::optional<ScalarType>, c10::optional<Layout>, c10::optional<Device>, c10::optional<bool>)>();
+    return op.call(dropout, train, dropout_seed, optTypeMetaToScalarType(options.dtype_opt()), options.layout_opt(), options.device_opt(), options.pinned_memory_opt());
 }
```

Otherwise:

```
+// aten::empty_meta(int[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None, MemoryFormat? memory_format=None) -> Tensor
 Tensor empty_meta(IntArrayRef size, c10::optional<ScalarType> dtype, c10::optional<Layout> layout, c10::optional<Device> device, c10::optional<bool> pin_memory, c10::optional<MemoryFormat> memory_format) {
-    return empty_meta(size, TensorOptions().dtype(dtype).layout(layout).device(device).pinned_memory(pin_memory), memory_format);
+    static auto op = c10::Dispatcher::singleton()
+        .findSchemaOrThrow("aten::empty_meta", "")
+        .typed<Tensor (IntArrayRef, const TensorOptions &, c10::optional<MemoryFormat>)>();
+    return op.call(size, TensorOptions().dtype(dtype).layout(layout).device(device).pinned_memory(pin_memory), memory_format);
 }
```

Things that I probably did not get right:

- The Union[Argument, TensorOptionsArguments, ThisArgument] and
  the Cpp variants are starting to get a little unwieldy.  Not sure if
  this means I should add a supertype (or at the very least an
  alias); in some cases I do purposely omit one of these from the Union
- Code may not necessarily live in the most logical files.  There isn't
  very much rhyme or reason to it.
- The fields on CppSignature.  They're not very well constrained and
  it will be better if people don't use them directly.
- Disambiguation.  We should do this properly in #44087 and we don't
  need special logic for deleting defaulting for faithful signatures;
  there is a more general story here.

Signed-off-by: Edward Z. Yang <ezyang@fb.com>

ghstack-source-id: 5db980cce350fb3f38c75088b7da1b0061b7c717
Pull Request resolved: https://github.com/pytorch/pytorch/pull/45890
---
 tools/codegen/api/cpp.py               | 108 ++++--------
 tools/codegen/api/dispatcher.py        | 132 +++++++-------
 tools/codegen/api/legacy_dispatcher.py |   4 +-
 tools/codegen/api/types.py             | 230 ++++++++++++++++++++++++-
 tools/codegen/gen.py                   | 191 ++++++--------------
 5 files changed, 384 insertions(+), 281 deletions(-)

diff --git a/tools/codegen/api/cpp.py b/tools/codegen/api/cpp.py
index f8fd2fdbde55..7e74019ea616 100644
--- a/tools/codegen/api/cpp.py
+++ b/tools/codegen/api/cpp.py
@@ -1,9 +1,7 @@
 from tools.codegen.model import *
-from tools.codegen.api.types import TensorOptionsArguments, CppArgument, ThisArgument
+from tools.codegen.api.types import *
 import tools.codegen.local as local
-from typing import Optional, Sequence, Union, Callable, List, Tuple
-import copy
-from dataclasses import dataclass
+from typing import Optional, Sequence, Union, Callable, List
 
 # This file describes the translation of JIT schema to the public C++
 # API, which is what people use when they call functions like at::add.
@@ -197,7 +195,10 @@ def default_expr(d: str, t: Type) -> str:
     return JIT_TO_CPP_DEFAULT.get(d, d)
 
 # Convert an argument into its C++ API form
-def argument(a: Union[Argument, TensorOptionsArguments, ThisArgument]) -> CppArgument:
+
+def argument_not_this(
+    a: Union[Argument, TensorOptionsArguments],
+) -> CppArgument:
     if isinstance(a, Argument):
         return CppArgument(
             type=argument_type(a),
@@ -205,13 +206,6 @@ def argument(a: Union[Argument, TensorOptionsArguments, ThisArgument]) -> CppArg
             default=default_expr(a.default, a.type) if a.default is not None else None,
             argument=a,
         )
-    elif isinstance(a, ThisArgument):
-        return CppArgument(
-            type=argument_type(a.argument),
-            name="const_cast<Tensor&>(*this)",  # this is an abuse but it's convenient
-            default=None,
-            argument=a,
-        )
     elif isinstance(a, TensorOptionsArguments):
         default = None
         if all(x.default == "None" for x in a.all()):
@@ -227,51 +221,34 @@ def argument(a: Union[Argument, TensorOptionsArguments, ThisArgument]) -> CppArg
     else:
         assert_never(a)
 
-@dataclass(frozen=True)
-class CppSignature:
-    returns: Tuple[Return, ...]
-    arguments: Tuple[Union[Argument, TensorOptionsArguments, ThisArgument], ...]
-
-    def cpp_arguments(self) -> Sequence[CppArgument]:
-        return list(map(argument, self.arguments))
-
-    # Return arguments as a comma separated list, i.e. like they would be in a C++
-    # function signature. Include default values for arguments.
-    def cpp_arguments_str(self, with_defaults: bool) -> str:
-        args_without_this = [argument(a) for a in self.arguments if not isinstance(a, ThisArgument)]
-        if with_defaults:
-            return ', '.join(map(str, args_without_this))
-        else:
-            return ', '.join(map(lambda s: s.str_no_default(), args_without_this))
-
-
-@dataclass(frozen=True)
-class CppSignatureGroup:
-    # arguments contains the arguments for the C++ signature as it is represented
-    # in the JIT schema.
-    signature: CppSignature
-
-    # gathered_signature is an alternative C++ signature in which TensorOptions are
-    # gathered into one TensorOptions object instead of being scattered into
-    # ScalarType, Layout, Device. This is only present for factory operators,
-    # other operators have this set to None. This can be used to generate a
-    # convenience API in the C++ frontend so users can call using TensorOptions objects.
-    gathered_signature: Optional[CppSignature]
-
-    # If it is a factory op, this returns the arguments for the convenience API
-    # that takes TensorOptions. If it is not a factory op and doesn't have
-    # a gathered signature, then this returns the regular signature instead.
-    def signature_prefer_gathered(self) -> CppSignature:
-        if self.gathered_signature is not None:
-            return self.gathered_signature
-        else:
-            return self.signature
+def argument(
+    a: Union[Argument, TensorOptionsArguments, ThisArgument],
+) -> Union[CppSingleArgumentPack, CppThisArgumentPack]:
+    if isinstance(a, ThisArgument):
+        return CppThisArgumentPack(argument=a, type=argument_type(a.argument))
+    else:
+        return CppSingleArgumentPack(argument_not_this(a))
 
+def argument_faithful(
+    a: Union[Argument, TensorOptionsArguments, ThisArgument],
+) -> CppArgumentPack:
+    if isinstance(a, TensorOptionsArguments):
+        return CppTensorOptionsArgumentPack(
+            argument=a,
+            dtype=argument_not_this(a.dtype),
+            layout=argument_not_this(a.layout),
+            device=argument_not_this(a.device),
+            pin_memory=argument_not_this(a.pin_memory),
+        )
+    else:
+        return argument(a)
 
-def signature_group(
-    func: FunctionSchema, *, method: bool = False,
-) -> CppSignatureGroup:
+# NB: this unconditionally groups arguments
+def group_arguments(
+    func: FunctionSchema, *, method: bool
+) -> Sequence[Union[Argument, TensorOptionsArguments, ThisArgument]]:
     args: List[Union[Argument, ThisArgument, TensorOptionsArguments]] = []
+
     args.extend(func.out_arguments)
 
     if method:
@@ -279,9 +256,8 @@ def signature_group(
     else:
         args.extend(func.arguments)
 
-    gathered_args = copy.deepcopy(args)
-
     # group up arguments for tensor options
+
     def pred(name: str, ty: Type) -> Callable[[Argument], bool]:
         return lambda a: a.name == name and a.type in [ty, OptionalType(ty)]
     predicates = [  # order matters
@@ -291,16 +267,14 @@ def pred(name: str, ty: Type) -> Callable[[Argument], bool]:
         pred('pin_memory', Type.parse('bool')),
     ]
 
-    has_tensoroptions_argument = False
     i = 0
     while i < len(func.kwarg_only_arguments):
         # If there is enough space...
         if i <= len(func.kwarg_only_arguments) - len(predicates):
             # And the next len(predicates) arguments look like TensorOptions arguments
             if all(p(a) for p, a in zip(predicates, func.kwarg_only_arguments[i : i + len(predicates)])):
-                has_tensoroptions_argument = True
                 # Group them together as one argument
-                gathered_args.append(TensorOptionsArguments(
+                args.append(TensorOptionsArguments(
                     dtype=func.kwarg_only_arguments[i],
                     layout=func.kwarg_only_arguments[i + 1],
                     device=func.kwarg_only_arguments[i + 2],
@@ -308,19 +282,7 @@ def pred(name: str, ty: Type) -> Callable[[Argument], bool]:
                 ))
                 i += len(predicates)
                 continue
-        gathered_args.append(func.kwarg_only_arguments[i])
+        args.append(func.kwarg_only_arguments[i])
         i += 1
 
-    args.extend(func.kwarg_only_arguments)
-
-    if has_tensoroptions_argument:
-        return CppSignatureGroup(
-            signature=CppSignature(arguments=tuple(args), returns=tuple(func.returns)),
-            gathered_signature=CppSignature(arguments=tuple(gathered_args), returns=tuple(func.returns)),
-        )
-    else:
-        assert gathered_args == args
-        return CppSignatureGroup(
-            signature=CppSignature(arguments=tuple(args), returns=tuple(func.returns)),
-            gathered_signature=None,
-        )
+    return args
diff --git a/tools/codegen/api/dispatcher.py b/tools/codegen/api/dispatcher.py
index 6cb141c22f99..e32510ec1e46 100644
--- a/tools/codegen/api/dispatcher.py
+++ b/tools/codegen/api/dispatcher.py
@@ -1,11 +1,10 @@
 from tools.codegen.model import *
 
-from tools.codegen.api.types import CppArgument, DispatcherExpr, TensorOptionsArguments, \
-    DispatcherArgument, ThisArgument, LegacyDispatcherArgument
-from tools.codegen.api import cpp
+from tools.codegen.api.types import *
+import tools.codegen.api.cpp as cpp
 import tools.codegen.api.legacy_dispatcher as legacy_dispatcher
 import tools.codegen.local as local
-from enum import Enum
+
 import itertools
 from typing import Sequence, Optional
 
@@ -75,73 +74,86 @@ def arguments(func: FunctionSchema) -> Sequence[DispatcherArgument]:
             for la in legacy_dispatcher.arguments(func)
         ]
 
-# TODO GATHER is only needed for non-c10-full ops, remove later.
-ProcessTensoroptions = Enum('ProcessTensoroptions', ('GATHER', 'SCATTER', 'PASS_THROUGH'))
-
-
 # Given a set of CppArguments in scope, return a sequence of dispatcher
 # expressions that translate the cpp API into dispatcher API
-def cppargument_exprs(a: CppArgument,
-                      *,
-                      tensor_options: Optional[CppArgument],
-                      process_tensoroptions: ProcessTensoroptions = ProcessTensoroptions.PASS_THROUGH
-                      ) -> Sequence[DispatcherExpr]:
-    if isinstance(a.argument, TensorOptionsArguments):
-        if process_tensoroptions == ProcessTensoroptions.SCATTER:
-            ta = a.argument
+#
+# WARNING: This is unsound if you pass it CppArgument when you were
+# supposed to pass it CppTensorOptionsArguments, it will directly
+# translate device to device, which will give you the wrong signature
+# for dispatcher.  If Argument "knew" that it was part of a
+# TensorOptions that would help us dynamically test for this case
+def cppargument_exprs(
+    a: CppArgumentPack,
+    *, tensor_options: Optional[CppArgument]
+) -> Sequence[DispatcherExpr]:
+    if isinstance(a, CppSingleArgumentPack):
+        if isinstance(a.this.argument, TensorOptionsArguments):
+            if local.use_c10_dispatcher() is UseC10Dispatcher.full:
+                # Scatter
+                ta = a.this.argument
+                name = a.this.name
+                return [
+                    DispatcherExpr(type=argument_type(ta.dtype), expr=f'optTypeMetaToScalarType({name}.dtype_opt())'),
+                    DispatcherExpr(type=argument_type(ta.layout), expr=f'{name}.layout_opt()'),
+                    DispatcherExpr(type=argument_type(ta.device), expr=f'{name}.device_opt()'),
+                    DispatcherExpr(type=argument_type(ta.pin_memory), expr=f'{name}.pinned_memory_opt()'),  # weird discrep
+                ]
+            else:
+                # No-op
+                return [DispatcherExpr(type='const TensorOptions &', expr=a.this.name)]
+        elif isinstance(a.this.argument, Argument):
+            if a.this.name == 'memory_format' and \
+                    tensor_options is not None and \
+                    local.use_c10_dispatcher() is UseC10Dispatcher.full:
+                return [DispatcherExpr(
+                    type=argument_type(a.this.argument),
+                    expr=f'c10::impl::check_tensor_options_and_extract_memory_format({tensor_options.name}, {a.this.name})')
+                ]
+            else:
+                return [DispatcherExpr(type=argument_type(a.this.argument), expr=a.this.name)]
+        else:
+            assert_never(a.this.argument)
+    elif isinstance(a, CppTensorOptionsArgumentPack):
+        if local.use_c10_dispatcher() is UseC10Dispatcher.full:
+            # No-op
             return [
-                DispatcherExpr(type=argument_type(ta.dtype), expr=f'optTypeMetaToScalarType({a.name}.dtype_opt())'),
-                DispatcherExpr(type=argument_type(ta.layout), expr=f'{a.name}.layout_opt()'),
-                DispatcherExpr(type=argument_type(ta.device), expr=f'{a.name}.device_opt()'),
-                DispatcherExpr(type=argument_type(ta.pin_memory), expr=f'{a.name}.pinned_memory_opt()'),  # weird discrep
+                expr
+                for sub_a in a.explicit_arguments()  # NB: don't really care about explicitness here
+                for expr in cppargument_exprs(CppSingleArgumentPack(sub_a), tensor_options=tensor_options)
             ]
-        elif process_tensoroptions == ProcessTensoroptions.GATHER:
-            return [
-                DispatcherExpr(
-                    type='const TensorOptions &',
-                    expr="TensorOptions().dtype(dtype).layout(layout).device(device).pinned_memory(pin_memory)")]
         else:
-            assert process_tensoroptions == ProcessTensoroptions.PASS_THROUGH
-            return [DispatcherExpr(type='const TensorOptions &', expr=a.name)]
-    elif isinstance(a.argument, ThisArgument):
-        return [DispatcherExpr(type=argument_type(a.argument.argument), expr=a.name)]
-    elif isinstance(a.argument, Argument):
-        if a.name == 'memory_format' and tensor_options is not None and local.use_c10_dispatcher() is UseC10Dispatcher.full:
+            # Gather
             return [DispatcherExpr(
-                type=argument_type(a.argument),
-                expr=f'c10::impl::check_tensor_options_and_extract_memory_format({tensor_options.name}, {a.name})')
-            ]
-        else:
-            return [DispatcherExpr(type=argument_type(a.argument), expr=a.name)]
+                type='const TensorOptions &',
+                expr=f'TensorOptions().dtype({a.dtype.name}).layout({a.layout.name})'
+                     f'.device({a.device.name}).pinned_memory({a.pin_memory.name})',
+            )]
+    elif isinstance(a, CppThisArgumentPack):
+        return [DispatcherExpr(
+            type=a.type,
+            expr='const_cast<Tensor&>(*this)',
+        )]
     else:
-        assert_never(a.argument)
+        assert_never(a)
 
-def cpparguments_exprs(args: Sequence[CppArgument], process_tensoroptions: ProcessTensoroptions) -> Sequence[DispatcherExpr]:
-    tensor_options = next((a for a in args if isinstance(a.argument, TensorOptionsArguments)), None)
-    return [r for a in args for r in cppargument_exprs(a,
-                                                       tensor_options=tensor_options,
-                                                       process_tensoroptions=process_tensoroptions)]
+def cpparguments_exprs(args: Sequence[CppArgumentPack]) -> Sequence[DispatcherExpr]:
+    tensor_options = next(
+        (a.this for a in args if isinstance(a, CppSingleArgumentPack) and
+            isinstance(a.this.argument, TensorOptionsArguments)),
+        None
+    )
+    return [r for a in args for r in cppargument_exprs(a, tensor_options=tensor_options)]
 
 # I don't think this is entirely sound, but it should be reasonably
 # close
 def legacydispatcherarguments_exprs(args: Sequence[LegacyDispatcherArgument]) -> Sequence[DispatcherExpr]:
-    if local.use_c10_dispatcher() is UseC10Dispatcher.full:
-        process_tensoroptions = ProcessTensoroptions.SCATTER
-    else:
-        process_tensoroptions = ProcessTensoroptions.PASS_THROUGH
-    return cpparguments_exprs([CppArgument(type=a.type,
-                                           name=a.name,
-                                           default=None,
-                                           argument=a.argument) for a in args],
-                              process_tensoroptions=process_tensoroptions)
+    return cpparguments_exprs([
+        CppSingleArgumentPack(CppArgument(type=a.type, name=a.name, default=None, argument=a.argument))
+        for a in args
+    ])
 
 def exprs(args: Sequence[DispatcherArgument]) -> Sequence[DispatcherExpr]:
-    if local.use_c10_dispatcher() is UseC10Dispatcher.full:
-        process_tensoroptions = ProcessTensoroptions.SCATTER
-    else:
-        process_tensoroptions = ProcessTensoroptions.PASS_THROUGH
-    return cpparguments_exprs([CppArgument(type=a.type,
-                                           name=a.name,
-                                           default=None,
-                                           argument=a.argument) for a in args],
-                              process_tensoroptions=process_tensoroptions)
+    return cpparguments_exprs([
+        CppSingleArgumentPack(CppArgument(type=a.type, name=a.name, default=None, argument=a.argument))
+        for a in args
+    ])
diff --git a/tools/codegen/api/legacy_dispatcher.py b/tools/codegen/api/legacy_dispatcher.py
index 160d39495951..fd2866b8b744 100644
--- a/tools/codegen/api/legacy_dispatcher.py
+++ b/tools/codegen/api/legacy_dispatcher.py
@@ -71,6 +71,4 @@ def argument(a: Union[Argument, ThisArgument, TensorOptionsArguments]) -> Legacy
         assert_never(a)
 
 def arguments(func: FunctionSchema) -> Sequence[LegacyDispatcherArgument]:
-    signature_group = cpp.signature_group(func)
-    args = signature_group.signature_prefer_gathered().arguments
-    return list(map(argument, args))
+    return list(map(argument, cpp.group_arguments(func, method=False)))
diff --git a/tools/codegen/api/types.py b/tools/codegen/api/types.py
index cb315cfc7525..6f003815ed96 100644
--- a/tools/codegen/api/types.py
+++ b/tools/codegen/api/types.py
@@ -1,6 +1,14 @@
 from tools.codegen.model import *
 from dataclasses import dataclass
-from typing import Optional, Union, Sequence
+from typing import Optional, Union, Sequence, Tuple, TypeVar
+
+_T = TypeVar('_T')
+
+# ------------------------------------------------------------------- #
+
+#                       Grouping arguments
+
+# ------------------------------------------------------------------- #
 
 # Represents the implicit *this argument for method calls in C++ API
 @dataclass(frozen=True)
@@ -18,7 +26,13 @@ class TensorOptionsArguments:
     def all(self) -> Sequence[Argument]:
         return [self.dtype, self.layout, self.device, self.pin_memory]
 
-# Describe a argument (e.g., the x in "f(int x)") in the C++ API
+# ------------------------------------------------------------------- #
+
+#                           cpp types
+
+# ------------------------------------------------------------------- #
+
+# Describe a single argument (e.g., the x in "f(int x)") in the C++ API.
 @dataclass(frozen=True)
 class CppArgument:
     # C++ type, e.g., int
@@ -29,8 +43,7 @@ class CppArgument:
     default: Optional[str]
     # The JIT argument(s) this formal was derived from.  May
     # correspond to multiple arguments if this is TensorOptions!
-    # May also correspond to the implicit *this argument!
-    argument: Union[Argument, TensorOptionsArguments, ThisArgument]
+    argument: Union[Argument, TensorOptionsArguments]
 
     # Default string representation prints the most elaborated form
     # of the formal
@@ -40,15 +53,221 @@ def __str__(self) -> str:
             mb_default = f"={self.default}"
         return f"{self.type} {self.name}{mb_default}"
 
+    # List of CppArguments that this structure explicitly represents
+    def explicit_arguments(self) -> Sequence['CppArgument']:
+        return [self]
+
+    # Return a copy of CppArgument with defaults removed
+    def no_default(self) -> 'CppArgument':
+        return CppArgument(
+            type=self.type,
+            name=self.name,
+            default=None,
+            argument=self.argument,
+        )
+
     # However, you might also find the version with no default useful
     def str_no_default(self) -> str:
         return f"{self.type} {self.name}"
 
+# An argument pack groups several CppArguments together into
+# a semantically meaningful unit.  Don't let the packing
+# deceive you: if you look at these arguments in C++, they're
+# always packing (in analogy to how parameter packs in C++
+# templates actually turn into separate arguments when you
+# unpack them).
+@dataclass(frozen=True)
+class CppArgumentPackIface:
+    # Return this argument pack, but with default stripped
+    def no_default(self: _T) -> _T:
+        raise NotImplementedError
+
+    # Unpack the pack into a sequence of arguments, discarding
+    # semantic information, and also discarding the implicit this
+    # argument that doesn't actually show up in declarations
+    def explicit_arguments(self) -> Sequence[CppArgument]:
+        raise NotImplementedError
+
+# Lifts a single CppArgument into a pack.
+@dataclass(frozen=True)
+class CppSingleArgumentPack(CppArgumentPackIface):
+    this: CppArgument
+
+    def no_default(self) -> 'CppSingleArgumentPack':
+        return CppSingleArgumentPack(self.this.no_default())
+
+    @property
+    def type(self) -> str:
+        return self.this.type
+
+    def explicit_arguments(self) -> Sequence[CppArgument]:
+        return [self.this]
+
+# Describe an implicit this argument (*this) on methods in the C++ API.
+# We don't use CppSingleArgumentPack because these never show up
+# in the explicit arguments list
+@dataclass(frozen=True)
+class CppThisArgumentPack(CppArgumentPackIface):
+    # The grouped JIT argument this formal was derived from
+    argument: ThisArgument
+
+    # C++ type, e.g., Tensor&
+    type: str
+
+    # this arguments are never defaulted
+    def no_default(self) -> 'CppThisArgumentPack':
+        return self
+
+    # The this argument is implicit, so it's not included in the
+    # explicit arguments list.
+    def explicit_arguments(self) -> Sequence[CppArgument]:
+        return []
+
+# Semantically represents a bundle of CppArguments that collectively
+# represent a TensorOptions.  If you don't care about TensorOptions
+# processing, think of this as just a list of four CppArguments; however
+# if you need to bundle these arguments back into a single
+# TensorOptions, it will be easiest to operate on this struct as a
+# whole.
+#
+# NOTE: this does NOT represent a 'const TensorOptions&' argument.
+# If you have one of those, it will be CppSingleArgumentPack
+@dataclass(frozen=True)
+class CppTensorOptionsArgumentPack(CppArgumentPackIface):
+    argument: TensorOptionsArguments
+    dtype: CppArgument
+    layout: CppArgument
+    device: CppArgument
+    pin_memory: CppArgument
+
+    # Remove the defaults from each of the constituent arguments
+    # representing the TensorOptions
+    def no_default(self) -> 'CppTensorOptionsArgumentPack':
+        return CppTensorOptionsArgumentPack(
+            argument=self.argument,
+            dtype=self.dtype.no_default(),
+            layout=self.layout.no_default(),
+            device=self.device.no_default(),
+            pin_memory=self.pin_memory.no_default(),
+        )
+
+    # Flatten the TensorOptions into individual CppArguments
+    def explicit_arguments(self) -> Sequence[CppArgument]:
+        return [self.dtype, self.layout, self.device, self.pin_memory]
+
+# Use this instead of CppArgumentPackIface, as this is a closed union
+CppArgumentPack = Union[
+    CppSingleArgumentPack,
+    CppThisArgumentPack,
+    CppTensorOptionsArgumentPack,
+]
+
 @dataclass(frozen=True)
 class CppExpr:
     type: str
     expr: str
 
+# A CppSignature represents a single overload in the C++ API.  For
+# any given function schema, there may be multiple CppSignatures
+# corresponding to it, based on how we desugar to C++.  See also
+# CppSignatureGroup.
+@dataclass(frozen=True)
+class CppSignature:
+    # The schema this signature is derived from
+    func: FunctionSchema
+
+    # Enough information about the C++ types to generate a full
+    # C++ type signature for this signature.  I'm not too sure
+    # if these are the right representations, so for now this
+    # is intended to be more abstract.
+    _argument_packs: Tuple[CppArgumentPack, ...]
+    _returns_type: str
+
+    # Return the unpacked argument structure of this signature,
+    # discarding information about which arguments are semantically
+    # related to each other.
+    def arguments(self) -> Sequence[CppArgument]:
+        return [sub_a for a in self._argument_packs for sub_a in a.explicit_arguments()]
+
+    # Return the packed argument structure of this signature.  This preserves
+    # high-level structure of the arguments so you may find it easier to do
+    # translations working with this representation.
+    def argument_packs(self) -> Sequence[CppArgumentPack]:
+        return self._argument_packs
+
+    # Render the C++ declaration for this signature
+    def decl(self) -> str:
+        cpp_args_str = ', '.join(map(str, self.arguments()))
+        return f"{self._returns_type} {cpp.name(self.func)}({cpp_args_str})"
+
+    # Render the C++ definition for this signature, not including
+    # the body (with curly braces)
+    def defn(self, prefix: str = "") -> str:
+        cpp_args_str = ', '.join(a.str_no_default() for a in self.arguments())
+        return f"{self._returns_type} {prefix}{cpp.name(self.func)}({cpp_args_str})"
+
+    # NB: This constructor knows how to disambiguate defaults when
+    # faithful is True.  Ideally this would live as an external process
+    # see https://github.com/pytorch/pytorch/pull/45666
+    @staticmethod
+    def _from_grouped_arguments(
+        func: FunctionSchema,
+        arguments: Sequence[Union[Argument, TensorOptionsArguments, ThisArgument]],
+        *,
+        faithful: bool
+    ) -> 'CppSignature':
+        if faithful:
+            # Faithful signatures will ungroup arguments into argument
+            # packs.
+            #
+            # After this, manually do overload disambiguation, by
+            # dropping defaults from the faithful signature.  In
+            # principle, we should be able to do this at some later
+            # point in time with other overload disambiguation
+            argument_packs = tuple(
+                cpp.argument_faithful(a).no_default() for a in arguments
+            )
+        else:
+            argument_packs = tuple(
+                cpp.argument(a) for a in arguments
+            )
+        return CppSignature(
+            func=func,
+            _argument_packs=argument_packs,
+            _returns_type=cpp.returns_type(func.returns),
+        )
+
+# Represents group of all CppSignatures associated with a
+# FunctionSchema.  Right now, that's the regular, user-visible
+# signature, as well as a "faithful" signature which doesn't
+# have grouping.
+@dataclass(frozen=True)
+class CppSignatureGroup:
+    func: FunctionSchema
+    signature: CppSignature
+    faithful_signature: Optional[CppSignature]
+
+    @staticmethod
+    def from_schema(func: FunctionSchema, *, method: bool) -> 'CppSignatureGroup':
+        grouped_arguments = cpp.group_arguments(func, method=method)
+        faithful_signature: Optional[CppSignature]
+        if any(isinstance(a, TensorOptionsArguments) for a in grouped_arguments):
+            faithful_signature = CppSignature._from_grouped_arguments(func, grouped_arguments, faithful=True)
+        else:
+            faithful_signature = None
+        signature = CppSignature._from_grouped_arguments(func, grouped_arguments, faithful=False)
+        return CppSignatureGroup(
+            func=func,
+            signature=signature,
+            faithful_signature=faithful_signature,
+        )
+
+# ------------------------------------------------------------------- #
+
+#                   dispatcher/legacy_dispatcher types
+
+# ------------------------------------------------------------------- #
+
 @dataclass(frozen=True)
 class DispatcherExpr:
     type: str
@@ -93,3 +312,6 @@ def str_with_default(self) -> str:
         if self.default is not None:
             mb_default = f"={self.default}"
         return f"{self.type} {self.name}{mb_default}"
+
+# Functions only, no types
+import tools.codegen.api.cpp as cpp
diff --git a/tools/codegen/gen.py b/tools/codegen/gen.py
index 0f386d8520f7..3edb6c818902 100644
--- a/tools/codegen/gen.py
+++ b/tools/codegen/gen.py
@@ -15,7 +15,6 @@
 from tools.codegen.model import *
 from tools.codegen.api.types import *
 import tools.codegen.api.cpp as cpp
-from tools.codegen.api.cpp import CppSignature
 import tools.codegen.api.dispatcher as dispatcher
 import tools.codegen.api.legacy_dispatcher as legacy_dispatcher
 import tools.codegen.local as local
@@ -314,28 +313,6 @@ def func(f: NativeFunction) -> Optional[str]:
 
     return func
 
-# Return a string with a comma separated list of expressions that could be used
-# to call this operator. This can be used to generate code that wraps operators
-# and calls back into them. The process_tensoroptions argument determines how
-# tensor options should be treated. They can be
-# - PASS_THROUGH: Don't do anything, just handle them as regular arguments
-# - SCATTER: Expect a `TensorOptions options` in the scope and scatter it into `options.dtype, ...`
-# - GATHER: Expect `dtype, ...` in the scope and gather them into a TensorOptions for calling
-def exprs_str(signature: CppSignature,
-              process_tensoroptions: dispatcher.ProcessTensoroptions = dispatcher.ProcessTensoroptions.PASS_THROUGH,
-              exclude_this: bool = False,
-              ) -> str:
-    args = signature.cpp_arguments()
-    if exclude_this:
-        args = [a for a in args if not isinstance(a.argument, ThisArgument)]
-    exprs = dispatcher.cpparguments_exprs(args, process_tensoroptions=process_tensoroptions)
-    return ', '.join(map(lambda a: a.expr, exprs))
-
-def types_str(signature: CppSignature) -> str:
-    args = signature.cpp_arguments()
-    exprs = dispatcher.cpparguments_exprs(args, process_tensoroptions=dispatcher.ProcessTensoroptions.PASS_THROUGH)
-    return ', '.join(map(lambda a: a.type, exprs))
-
 # Generates Function.cpp and Function.h.  These files provide the
 # functional public C++ API, and the scaffolding to call into
 # the dispatcher from these functions.  See also compute_tensor_method.
@@ -347,73 +324,43 @@ def go(f: NativeFunction) -> Optional[str]:
         if Variant.function not in f.variants:
             return None
 
-        cpp_returns_type = cpp.returns_type(f.func.returns)
-        cpp_name = cpp.name(f.func)
-        signature_group = cpp.signature_group(f.func, method=False)
+        name = cpp.name(f.func)
+
+        sig_group = CppSignatureGroup.from_schema(f.func, method=False)
 
         if target is Target.DECLARATION:
-            if signature_group.gathered_signature is None:
-                # There's no TensorOptions
-                return f"""
-CAFFE2_API {cpp_returns_type} {cpp_name}({signature_group.signature.cpp_arguments_str(with_defaults=True)});
-"""
-            else:
-                # There's TensorOptions in the API. Create 2 APIs - one taking the TensorOptions object ("gathered_signature"),
-                # and one taking a scattered signature with ScalarType, Layout, Device separately ("signature").
-                # The gathered_signature already exists in several older PyTorch versions and had default arguments.
-                # For backward compatibility, we left it unchanged and added the scattered API on top of it.
-                # Note that the scattered API cannot have default arguments or calls will be ambigious.
-                return f"""
-CAFFE2_API {cpp_returns_type} {cpp_name}({signature_group.gathered_signature.cpp_arguments_str(with_defaults=True)});
-CAFFE2_API {cpp_returns_type} {cpp_name}({signature_group.signature.cpp_arguments_str(with_defaults=False)});
-"""
+            result = f"\nCAFFE2_API {sig_group.signature.decl()};\n"
+            if sig_group.faithful_signature is not None:
+                result += f"CAFFE2_API {sig_group.faithful_signature.decl()};\n"
+            return result
 
         assert target is Target.DEFINITION
 
-        dispatcher_returns_type = dispatcher.returns_type(f.func.returns)
+        def generate_defn(sig: CppSignature) -> str:
+            dispatcher_exprs = dispatcher.cpparguments_exprs(sig.argument_packs())
+            dispatcher_returns_type = dispatcher.returns_type(f.func.returns)
+            dispatcher_types_str = ', '.join(map(lambda a: a.type, dispatcher_exprs))
+            dispatcher_exprs_str = ', '.join(map(lambda a: a.expr, dispatcher_exprs))
 
-        if signature_group.gathered_signature is None:
-            # There's no TensorOptions
-            return f"""
-// aten::{f.func}
-{cpp_returns_type} {cpp_name}({signature_group.signature.cpp_arguments_str(with_defaults=False)}) {{
-    static auto op = c10::Dispatcher::singleton()
-        .findSchemaOrThrow("aten::{f.func.name.name}", "{f.func.name.overload_name}")
-        .typed<{dispatcher_returns_type} ({types_str(signature_group.signature)})>();
-    return op.call({exprs_str(signature_group.signature)});
-}}
-"""
-        elif local.use_c10_dispatcher() is UseC10Dispatcher.full:
-            # for c10-full ops, the scattered version is the real op and the gathered version is a proxy
-            # calling into the scattered version
             return f"""
 // aten::{f.func}
-{cpp_returns_type} {cpp_name}({signature_group.signature.cpp_arguments_str(with_defaults=False)}) {{
+{sig.defn()} {{
     static auto op = c10::Dispatcher::singleton()
         .findSchemaOrThrow("aten::{f.func.name.name}", "{f.func.name.overload_name}")
-        .typed<{dispatcher_returns_type} ({types_str(signature_group.signature)})>();
-    return op.call({exprs_str(signature_group.signature)});
-}}
-{cpp_returns_type} {cpp_name}({signature_group.gathered_signature.cpp_arguments_str(with_defaults=False)}) {{
-    return {cpp_name}({exprs_str(signature_group.gathered_signature, dispatcher.ProcessTensoroptions.SCATTER)});
-}}
-"""
-        else:
-            # for non-c10-full ops, the gathered version is the real op and the scattered version is a proxy
-            # calling into the gathered version
-            return f"""
-// aten::{f.func}
-{cpp_returns_type} {cpp_name}({signature_group.gathered_signature.cpp_arguments_str(with_defaults=False)}) {{
-    static auto op = c10::Dispatcher::singleton()
-        .findSchemaOrThrow("aten::{f.func.name.name}", "{f.func.name.overload_name}")
-        .typed<{dispatcher_returns_type} ({types_str(signature_group.gathered_signature)})>();
-    return op.call({exprs_str(signature_group.gathered_signature)});
-}}
-{cpp_returns_type} {cpp_name}({signature_group.signature.cpp_arguments_str(with_defaults=False)}) {{
-    return {cpp_name}({exprs_str(signature_group.gathered_signature, dispatcher.ProcessTensoroptions.GATHER)});
+        .typed<{dispatcher_returns_type} ({dispatcher_types_str})>();
+    return op.call({dispatcher_exprs_str});
 }}
 """
 
+        result = generate_defn(sig_group.signature)
+        if sig_group.faithful_signature is not None:
+            if local.use_c10_dispatcher() is UseC10Dispatcher.full:
+                result = f"{generate_defn(sig_group.faithful_signature)}\n{result}"
+            else:
+                result = f"{result}\n{generate_defn(sig_group.faithful_signature)}"
+
+        return result
+
     return go
 
 # Generates TensorBody.h (sic) and TensorMethods.cpp.  These files provide the
@@ -429,81 +376,43 @@ def go(f: NativeFunction) -> Optional[str]:
         assert len(f.func.arguments) > 0
         assert sum(a.name == 'self' for a in f.func.arguments) == 1
 
-        cpp_name = cpp.name(f.func)
-        cpp_returns_type = cpp.returns_type(f.func.returns)
-        signature_group = cpp.signature_group(f.func, method=True)
+        name = cpp.name(f.func)
+
+        sig_group = CppSignatureGroup.from_schema(f.func, method=True)
 
         if target is Target.DECLARATION:
-            if signature_group.gathered_signature is None:
-                # There's no TensorOptions. Just create the API without concern for TensorOptions.
-                return f"{cpp_returns_type} {cpp_name}({signature_group.signature.cpp_arguments_str(with_defaults=True)}) const;"
-            else:
-                # There's TensorOptions in the API. Create 2 APIs - one taking the TensorOptions object ("gathered_signature"),
-                # and one taking a scattered signature with ScalarType, Layout, Device separately ("signature").
-                # The gathered_signature already exists in several older PyTorch versions and had default arguments.
-                # For backward compatibility, we left it unchanged and added the scattered API on top of it.
-                # Note that the scattered API cannot have default arguments or calls will be ambigious.
-                return f"""
-{cpp_returns_type} {cpp_name}({signature_group.gathered_signature.cpp_arguments_str(with_defaults=True)}) const;
-{cpp_returns_type} {cpp_name}({signature_group.signature.cpp_arguments_str(with_defaults=False)}) const;
-"""
+            result = f"{sig_group.signature.decl()} const;\n"
+            if sig_group.faithful_signature is not None:
+                result += f"{sig_group.faithful_signature.decl()} const;\n"
+            return result
 
         assert target is Target.DEFINITION
 
-        dispatcher_returns_type = dispatcher.returns_type(f.func.returns)
-
-        result = f"""
-// aten::{f.func}
-{cpp_returns_type} Tensor::{cpp_name}({signature_group.signature.cpp_arguments_str(with_defaults=False)}) const {{
-    static auto op = c10::Dispatcher::singleton()
-        .findSchemaOrThrow("aten::{f.func.name.name}", "{f.func.name.overload_name}")
-        .typed<{dispatcher_returns_type} ({types_str(signature_group.signature)})>();
-    return op.call({exprs_str(signature_group.signature)});
-}}
-"""
+        def generate_defn(sig: CppSignature) -> str:
+            dispatcher_exprs = dispatcher.cpparguments_exprs(sig.argument_packs())
+            dispatcher_returns_type = dispatcher.returns_type(f.func.returns)
+            dispatcher_types_str = ', '.join(map(lambda a: a.type, dispatcher_exprs))
+            dispatcher_exprs_str = ', '.join(map(lambda a: a.expr, dispatcher_exprs))
 
-        if signature_group.gathered_signature is None:
-            # There's no TensorOptions
             return f"""
 // aten::{f.func}
-{cpp_returns_type} Tensor::{cpp_name}({signature_group.signature.cpp_arguments_str(with_defaults=False)}) const {{
+{sig.defn("Tensor::")} const {{
     static auto op = c10::Dispatcher::singleton()
         .findSchemaOrThrow("aten::{f.func.name.name}", "{f.func.name.overload_name}")
-        .typed<{dispatcher_returns_type} ({types_str(signature_group.signature)})>();
-    return op.call({exprs_str(signature_group.signature)});
-}}
-"""
-        elif local.use_c10_dispatcher() is UseC10Dispatcher.full:
-            # for c10-full ops, the scattered version is the real op and the gathered version is a proxy
-            # calling into the scattered version
-            return f"""
-// aten::{f.func}
-{cpp_returns_type} Tensor::{cpp_name}({signature_group.signature.cpp_arguments_str(with_defaults=False)}) const {{
-    static auto op = c10::Dispatcher::singleton()
-        .findSchemaOrThrow("aten::{f.func.name.name}", "{f.func.name.overload_name}")
-        .typed<{dispatcher_returns_type} ({types_str(signature_group.signature)})>();
-    return op.call({exprs_str(signature_group.signature)});
-}}
-{cpp_returns_type} Tensor::{cpp_name}({signature_group.gathered_signature.cpp_arguments_str(with_defaults=False)}) const {{
-    return {cpp_name}({exprs_str(signature_group.gathered_signature, dispatcher.ProcessTensoroptions.SCATTER, exclude_this=True)});
-}}
-"""
-        else:
-            # for non-c10-full ops, the gathered version is the real op and the scattered version is a proxy
-            # calling into the gathered version
-            return f"""
-// aten::{f.func}
-{cpp_returns_type} Tensor::{cpp_name}({signature_group.gathered_signature.cpp_arguments_str(with_defaults=False)}) const {{
-    static auto op = c10::Dispatcher::singleton()
-        .findSchemaOrThrow("aten::{f.func.name.name}", "{f.func.name.overload_name}")
-        .typed<{dispatcher_returns_type} ({types_str(signature_group.gathered_signature)})>();
-    return op.call({exprs_str(signature_group.gathered_signature)});
-}}
-{cpp_returns_type} Tensor::{cpp_name}({signature_group.signature.cpp_arguments_str(with_defaults=False)}) const {{
-    return {cpp_name}({exprs_str(signature_group.gathered_signature, dispatcher.ProcessTensoroptions.GATHER, exclude_this=True)});
+        .typed<{dispatcher_returns_type} ({dispatcher_types_str})>();
+    return op.call({dispatcher_exprs_str});
 }}
 """
 
+        result = f"{generate_defn(sig_group.signature)}"
+        if sig_group.faithful_signature is not None:
+            if local.use_c10_dispatcher() is UseC10Dispatcher.full:
+                result = f"{generate_defn(sig_group.faithful_signature)}\n{result}"
+            else:
+                result = f"{result}\n{generate_defn(sig_group.faithful_signature)}"
+
+        return result
+
     return go
 
 # Generates ATenOpList.cpp, a runtime accessible list of all aten
@@ -823,8 +732,8 @@ def compute_declaration_yaml(f: NativeFunction) -> object:
     kwarg_only_set = set(a.name for a in f.func.kwarg_only_arguments)
     out_arg_set = set(a.name for a in f.func.out_arguments)
 
-    signature_group = cpp.signature_group(f.func)
-    cpp_args = signature_group.signature_prefer_gathered().cpp_arguments()
+    sig_group = CppSignatureGroup.from_schema(f.func, method=False)
+    cpp_args = sig_group.signature.arguments()
     arguments = [
         compute_cpp_argument_yaml(
             cpp_a, schema_order=False,