From b4651cb260b9b185d9298f3e1523ca87b57c4227 Mon Sep 17 00:00:00 2001
From: Sebastian Messmer <messmer@fb.com>
Date: Thu, 24 Sep 2020 08:18:40 -0700
Subject: [PATCH] VariableKernel calls into scattered C++ api

Pull Request resolved: https://github.com/pytorch/pytorch/pull/44158

Previously, the C++ API only supported calling ops with a gathered TensorOptions object. So even if the VariableKernel took scattered arguments,
it had to re-gather them to call into the C++ API. But a diff stacked below this one introduced a scattered API for the C++ frontend.

This reaps the benefits and makes sure that if the Variable kernel gets scattered arguments (i.e. it's a c10-full op), then it passes those on without regathering
ghstack-source-id: 112810178

Differential Revision: [D23512538](https://our.internmc.facebook.com/intern/diff/D23512538/)
---
 tools/autograd/gen_autograd.py      |  2 ++
 tools/autograd/gen_variable_type.py | 18 +++++++++---------
 2 files changed, 11 insertions(+), 9 deletions(-)

diff --git a/tools/autograd/gen_autograd.py b/tools/autograd/gen_autograd.py
index 2aaba65b9a79..c12e9b2003d8 100644
--- a/tools/autograd/gen_autograd.py
+++ b/tools/autograd/gen_autograd.py
@@ -128,6 +128,8 @@ def load_aten_declarations(path):
 
         for arg in declaration['arguments']:
             arg['simple_type'] = get_simple_type(arg)
+        for arg in declaration['schema_order_arguments']:
+            arg['simple_type'] = get_simple_type(arg)
         for ret in declaration['returns']:
             ret['simple_type'] = get_simple_type(ret)
 
diff --git a/tools/autograd/gen_variable_type.py b/tools/autograd/gen_variable_type.py
index 406b65079838..283f0153faae 100644
--- a/tools/autograd/gen_variable_type.py
+++ b/tools/autograd/gen_variable_type.py
@@ -245,9 +245,6 @@
 UNPACK_TENSOR = CodeTemplate("""\
 auto${ref} ${arg_name}_ = unpack${suffix}(${arg_name}, "${arg_name}", ${arg_pos});""")
 
-UNPACK_OPTIONS = CodeTemplate("""\
-auto ${arg_name}_ = TensorOptions().dtype(dtype).layout(layout).device(device).pinned_memory(pin_memory);""")
-
 LEGACY_UNPACK_OPTIONS = CodeTemplate("""\
 auto ${arg_name}_ = TensorOptions(${arg_name});""")
 
@@ -1236,7 +1233,12 @@ def requires_unpack(arg):
     body = []
     unpacked_args = []
     unpacked_args_simple_type = {}
-    for i, arg in enumerate(declaration['arguments']):
+    if declaration['use_c10_dispatcher'] == 'full':
+        arguments = declaration['schema_order_arguments']
+    else:
+        assert declaration['use_c10_dispatcher'] == 'with_codegenerated_unboxing_wrapper'
+        arguments = declaration['arguments']
+    for i, arg in enumerate(arguments):
         if not requires_unpack(arg):
             unpacked_args.append(arg['name'])
             unpacked_args_simple_type[arg['name']] = arg['simple_type']
@@ -1258,11 +1260,9 @@ def requires_unpack(arg):
             # Okay, we are abusing the definition of 'unpack' here a bit,
             # although it's still getting the non-variable from the variable
             # (in this case via TensorOptions rather than Variable/Tensor).
-            if declaration['use_c10_dispatcher'] == 'full':
-                body.append(UNPACK_OPTIONS.substitute(arg_name=arg['name']))
-            else:
-                assert declaration['use_c10_dispatcher'] == 'with_codegenerated_unboxing_wrapper'
-                body.append(LEGACY_UNPACK_OPTIONS.substitute(arg_name=arg['name']))
+            assert declaration['use_c10_dispatcher'] == 'with_codegenerated_unboxing_wrapper', \
+                "VariableKernel shouldn't take TensorOptions if the op is c10-full"
+            body.append(LEGACY_UNPACK_OPTIONS.substitute(arg_name=arg['name']))
 
         unpacked_args.append(arg['name'] + '_')
         unpacked_args_simple_type[arg['name'] + '_'] = arg['simple_type']