Update on "Support device map for distributed autograd while using Te…

…nsorPipe." TensorPipe's `set_device_map` option was applied during the forward pass. However, if we ran the backward pass for the graph we would not automatically pick up the reverse device mapping. As a result, users had to specify both forward and backward device mapping which is very tedious to do. In this PR, I've added this functionality such that TensorPipe automatically picks up the reverse device mapping during the backward pass. This is done by storing the appropriate device mapping in the "recv" autograd function for distributed autograd. #Closes: #44170 Differential Revision: [D23751975](https://our.internmc.facebook.com/intern/diff/D23751975/) **NOTE FOR REVIEWERS**: This PR has internal Facebook specific changes or comments, please review them on [Phabricator](https://our.internmc.facebook.com/intern/diff/D23751975/)! [ghstack-poisoned]
pytorch · Dec 24, 2020 · 6dbfacc · 6dbfacc
2 parents 59fd8eb + 55b431b
commit 6dbfacc
Show file tree

Hide file tree

Showing 3,228 changed files with 233,874 additions and 79,064 deletions.
diff --git a/.circleci/cimodel/data/binary_build_data.py b/.circleci/cimodel/data/binary_build_data.py
@@ -30,12 +30,12 @@ def get_processor_arch_name(gpu_version):
         "cu" + gpu_version.strip("cuda") if gpu_version.startswith("cuda") else gpu_version
     )
 
-
 LINUX_PACKAGE_VARIANTS = OrderedDict(
     manywheel=[
         "3.6m",
         "3.7m",
         "3.8m",
+        "3.9m"
     ],
     conda=dimensions.STANDARD_PYTHON_VERSIONS,
     libtorch=[
@@ -54,7 +54,7 @@ def get_processor_arch_name(gpu_version):
     )),
     # Skip CUDA-9.2 builds on Windows
     windows=(
-        [v for v in dimensions.GPU_VERSIONS if v not in ['cuda92', "rocm3.7"]],
+        [v for v in dimensions.GPU_VERSIONS if v not in ['cuda92'] + dimensions.ROCM_VERSION_LABELS],
         OrderedDict(
             wheel=dimensions.STANDARD_PYTHON_VERSIONS,
             conda=dimensions.STANDARD_PYTHON_VERSIONS,
@@ -142,11 +142,11 @@ def get_children(self):
 
         # XXX disabling conda rocm build since docker images are not there
         if self.find_prop("package_format") == 'conda':
-            gpu_versions = filter(lambda x: x != "rocm3.7", gpu_versions)
+            gpu_versions = filter(lambda x: x not in dimensions.ROCM_VERSION_LABELS, gpu_versions)
 
         # XXX libtorch rocm build  is temporarily disabled
         if self.find_prop("package_format") == 'libtorch':
-            gpu_versions = filter(lambda x: x != "rocm3.7", gpu_versions)
+            gpu_versions = filter(lambda x: x not in dimensions.ROCM_VERSION_LABELS, gpu_versions)
 
         return [ArchConfigNode(self, v) for v in gpu_versions]
 

diff --git a/.circleci/cimodel/data/dimensions.py b/.circleci/cimodel/data/dimensions.py
@@ -8,13 +8,17 @@
 ]
 
 ROCM_VERSIONS = [
-    "3.7",
+    "3.10",
+    "4.0",
 ]
 
-GPU_VERSIONS = [None] + ["cuda" + v for v in CUDA_VERSIONS] + ["rocm" + v for v in ROCM_VERSIONS]
+ROCM_VERSION_LABELS = ["rocm" + v for v in ROCM_VERSIONS]
+
+GPU_VERSIONS = [None] + ["cuda" + v for v in CUDA_VERSIONS] + ROCM_VERSION_LABELS
 
 STANDARD_PYTHON_VERSIONS = [
     "3.6",
     "3.7",
     "3.8",
+    "3.9"
 ]
diff --git a/.circleci/cimodel/data/pytorch_build_data.py b/.circleci/cimodel/data/pytorch_build_data.py
@@ -18,7 +18,11 @@
         ("clang", [
             ("5", [
                 ("3.6", [
-                    ("asan", [XImportant(True)]),
+                    ("asan", [
+                        (True, [
+                            ("shard_test", [XImportant(True)]),
+                        ]),
+                    ]),
                 ]),
             ]),
             ("7", [
@@ -45,14 +49,22 @@
             ]),
             ("10.2", [
                 ("3.6", [
-                    ("important", [X(True)]),
-                    ("libtorch", [X(True)]),
+                    ("shard_test", [XImportant(True)]),
+                    ("libtorch", [
+                        (True, [
+                            ('build_only', [X(True)]),
+                        ]),
+                    ]),
                 ]),
             ]),
-            ("11.0", [
+            ("11.1", [
                 ("3.8", [
                     X(True),
-                    ("libtorch", [XImportant(True)])
+                    ("libtorch", [
+                        (True, [
+                            ('build_only', [XImportant(True)]),
+                        ]),
+                    ]),
                 ]),
             ]),
         ]),
@@ -72,12 +84,16 @@
         ("gcc", [
             ("9", [
                 ("3.8", [
-                    ("coverage", [XImportant(True)]),
+                    ("coverage", [
+                        (True, [
+                            ("shard_test", [XImportant(True)]),
+                        ]),
+                    ]),
                 ]),
             ]),
         ]),
         ("rocm", [
-            ("3.7", [
+            ("3.9", [
                 ("3.6", [
                     ('build_only', [XImportant(True)]),
                 ]),
@@ -158,6 +174,7 @@ def child_constructor(self):
             "libtorch": LibTorchConfigNode,
             "important": ImportantConfigNode,
             "build_only": BuildOnlyConfigNode,
+            "shard_test": ShardTestConfigNode,
             "cuda_gcc_override": CudaGccOverrideConfigNode,
             "coverage": CoverageConfigNode,
             "pure_torch": PureTorchConfigNode,
@@ -195,7 +212,7 @@ def init2(self, node_name):
         self.props["is_asan"] = node_name
 
     def child_constructor(self):
-        return ImportantConfigNode
+        return ExperimentalFeatureConfigNode
 
 
 class ONNXConfigNode(TreeConfigNode):
@@ -250,7 +267,7 @@ def init2(self, node_name):
         self.props["is_libtorch"] = node_name
 
     def child_constructor(self):
-        return ImportantConfigNode
+        return ExperimentalFeatureConfigNode
 
 
 class CudaGccOverrideConfigNode(TreeConfigNode):
@@ -260,17 +277,24 @@ def init2(self, node_name):
     def child_constructor(self):
         return ExperimentalFeatureConfigNode
 
-class BuildOnlyConfigNode(TreeConfigNode):
 
+class BuildOnlyConfigNode(TreeConfigNode):
     def init2(self, node_name):
         self.props["build_only"] = node_name
 
     def child_constructor(self):
         return ExperimentalFeatureConfigNode
 
 
-class CoverageConfigNode(TreeConfigNode):
+class ShardTestConfigNode(TreeConfigNode):
+    def init2(self, node_name):
+        self.props["shard_test"] = node_name
+
+    def child_constructor(self):
+        return ImportantConfigNode
+
 
+class CoverageConfigNode(TreeConfigNode):
     def init2(self, node_name):
         self.props["is_coverage"] = node_name
 
@@ -290,7 +314,6 @@ def get_children(self):
 
 
 class XenialCompilerConfigNode(TreeConfigNode):
-
     def modify_label(self, label):
         return label or "<unspecified>"
 
@@ -304,7 +327,6 @@ def child_constructor(self):
 
 
 class BionicCompilerConfigNode(TreeConfigNode):
-
     def modify_label(self, label):
         return label or "<unspecified>"
 

diff --git a/.circleci/cimodel/data/pytorch_build_definitions.py b/.circleci/cimodel/data/pytorch_build_definitions.py
@@ -6,7 +6,7 @@
 import cimodel.lib.conf_tree as conf_tree
 import cimodel.lib.miniutils as miniutils
 from cimodel.data.pytorch_build_data import CONFIG_TREE_DATA, TopLevelNode
-from cimodel.data.simple.util.branch_filters import gen_filter_dict
+from cimodel.data.simple.util.branch_filters import gen_filter_dict, RC_PATTERN
 from cimodel.data.simple.util.docker_constants import gen_docker_image
 
 
@@ -110,6 +110,8 @@ def gen_workflow_params(self, phase):
             parameters["resource_class"] = resource_class
         if phase == "build" and self.rocm_version is not None:
             parameters["resource_class"] = "xlarge"
+        if hasattr(self, 'filters'):
+            parameters['filters'] = self.filters
         return parameters
 
     def gen_workflow_job(self, phase):
@@ -139,14 +141,16 @@ def gen_workflow_job(self, phase):
 
 # TODO This is a hack to special case some configs just for the workflow list
 class HiddenConf(object):
-    def __init__(self, name, parent_build=None):
+    def __init__(self, name, parent_build=None, filters=None):
         self.name = name
         self.parent_build = parent_build
+        self.filters = filters
 
     def gen_workflow_job(self, phase):
         return {
             self.gen_build_name(phase): {
-                "requires": [self.parent_build.gen_build_name("build")]
+                "requires": [self.parent_build.gen_build_name("build")],
+                "filters": self.filters,
             }
         }
 
@@ -166,7 +170,8 @@ def gen_workflow_job(self, phase):
                 "branch": self.branch,
                 "requires": [self.parent_build],
                 "context": "org-member",
-                "filters": gen_filter_dict(branches_list=["nightly"])
+                "filters": gen_filter_dict(branches_list=["nightly"],
+                                           tags_list=RC_PATTERN)
             }
         }
 
@@ -205,7 +210,9 @@ def gen_docs_configs(xenial_parent_config):
     configs.append(
         HiddenConf(
             "pytorch_python_doc_build",
-            parent_build=xenial_parent_config
+            parent_build=xenial_parent_config,
+            filters=gen_filter_dict(branches_list=r"/.*/",
+                                    tags_list=RC_PATTERN),
         )
     )
     configs.append(
@@ -219,7 +226,9 @@ def gen_docs_configs(xenial_parent_config):
     configs.append(
         HiddenConf(
             "pytorch_cpp_doc_build",
-            parent_build=xenial_parent_config
+            parent_build=xenial_parent_config,
+            filters=gen_filter_dict(branches_list=r"/.*/",
+                                    tags_list=RC_PATTERN),
         )
     )
     configs.append(
@@ -263,6 +272,7 @@ def instantiate_configs():
         compiler_version = fc.find_prop("compiler_version")
         is_xla = fc.find_prop("is_xla") or False
         is_asan = fc.find_prop("is_asan") or False
+        is_coverage = fc.find_prop("is_coverage") or False
         is_onnx = fc.find_prop("is_onnx") or False
         is_pure_torch = fc.find_prop("is_pure_torch") or False
         is_vulkan = fc.find_prop("is_vulkan") or False
@@ -301,7 +311,10 @@ def instantiate_configs():
             parms_list.append("asan")
             python_version = fc.find_prop("pyver")
             parms_list[0] = fc.find_prop("abbreviated_pyver")
-            restrict_phases = ["build", "test1", "test2"]
+
+        if is_coverage:
+            parms_list_ignored_for_docker_image.append("coverage")
+            python_version = fc.find_prop("pyver")
 
         if is_onnx:
             parms_list.append("onnx")
@@ -317,13 +330,13 @@ def instantiate_configs():
         is_important = fc.find_prop("is_important") or False
         parallel_backend = fc.find_prop("parallel_backend") or None
         build_only = fc.find_prop("build_only") or False
-        is_coverage = fc.find_prop("is_coverage") or False
+        shard_test = fc.find_prop("shard_test") or False
         # TODO: fix pure_torch python test packaging issue.
+        if shard_test:
+            restrict_phases = ["build"] if restrict_phases is None else restrict_phases
+            restrict_phases.extend(["test1", "test2"])
         if build_only or is_pure_torch:
             restrict_phases = ["build"]
-        if is_coverage and restrict_phases is None:
-            restrict_phases = ["build", "coverage_test"]
-
 
         gpu_resource = None
         if cuda_version and cuda_version != "10":
@@ -348,6 +361,8 @@ def instantiate_configs():
 
         # run docs builds on "pytorch-linux-xenial-py3.6-gcc5.4". Docs builds
         # should run on a CPU-only build that runs on all PRs.
+        # XXX should this be updated to a more modern build? Projects are
+        #     beginning to drop python3.6
         if (
             distro_name == "xenial"
             and fc.find_prop("pyver") == "3.6"
@@ -358,6 +373,8 @@ def instantiate_configs():
             and compiler_name == "gcc"
             and fc.find_prop("compiler_version") == "5.4"
         ):
+            c.filters = gen_filter_dict(branches_list=r"/.*/",
+                                        tags_list=RC_PATTERN)
             c.dependent_tests = gen_docs_configs(c)
 
         if cuda_version == "10.2" and python_version == "3.6" and not is_libtorch:

diff --git a/.circleci/cimodel/data/simple/docker_definitions.py b/.circleci/cimodel/data/simple/docker_definitions.py
@@ -1,48 +1,55 @@
 from collections import OrderedDict
 
 from cimodel.lib.miniutils import quote
+from cimodel.data.simple.util.branch_filters import gen_filter_dict, RC_PATTERN
 
 
 # TODO: make this generated from a matrix rather than just a static list
 IMAGE_NAMES = [
+    "pytorch-linux-bionic-cuda11.1-cudnn8-py3.6-gcc9",
+    "pytorch-linux-bionic-cuda11.1-cudnn8-py3.8-gcc9",
     "pytorch-linux-bionic-cuda11.0-cudnn8-py3.6-gcc9",
     "pytorch-linux-bionic-cuda11.0-cudnn8-py3.8-gcc9",
     "pytorch-linux-bionic-cuda10.2-cudnn7-py3.8-gcc9",
     "pytorch-linux-bionic-py3.6-clang9",
     "pytorch-linux-bionic-cuda10.2-cudnn7-py3.6-clang9",
     "pytorch-linux-bionic-py3.8-gcc9",
-    "pytorch-linux-bionic-rocm3.5.1-py3.6",
     "pytorch-linux-xenial-cuda10-cudnn7-py3-gcc7",
     "pytorch-linux-xenial-cuda10.1-cudnn7-py3-gcc7",
     "pytorch-linux-xenial-cuda10.2-cudnn7-py3-gcc7",
     "pytorch-linux-xenial-cuda11.0-cudnn8-py3-gcc7",
+    "pytorch-linux-xenial-cuda11.1-cudnn8-py3-gcc7",
     "pytorch-linux-xenial-cuda9.2-cudnn7-py3-gcc5.4",
     "pytorch-linux-xenial-cuda9.2-cudnn7-py3-gcc7",
     "pytorch-linux-xenial-py3-clang5-android-ndk-r19c",
     "pytorch-linux-xenial-py3-clang5-asan",
     "pytorch-linux-xenial-py3-clang7-onnx",
     "pytorch-linux-xenial-py3.8",
     "pytorch-linux-xenial-py3.6-clang7",
-    "pytorch-linux-xenial-py3.6-gcc4.8",
-    "pytorch-linux-xenial-py3.6-gcc5.4",
+    "pytorch-linux-xenial-py3.6-gcc5.4",  # this one is used in doc builds
     "pytorch-linux-xenial-py3.6-gcc7.2",
     "pytorch-linux-xenial-py3.6-gcc7",
-    "pytorch-linux-bionic-rocm3.7-py3.6",
+    "pytorch-linux-bionic-rocm3.9-py3.6",
+    "pytorch-linux-bionic-rocm3.10-py3.6",
 ]
 
 
 def get_workflow_jobs():
     """Generates a list of docker image build definitions"""
-    return [
-        OrderedDict(
+    ret = []
+    for image_name in IMAGE_NAMES:
+        parameters = OrderedDict({
+            "name": quote(f"docker-{image_name}"),
+            "image_name": quote(image_name),
+        })
+        if image_name == "pytorch-linux-xenial-py3.6-gcc5.4":
+            # pushing documentation on tags requires CircleCI to also
+            # build all the dependencies on tags, including this docker image
+            parameters['filters'] = gen_filter_dict(branches_list=r"/.*/",
+                                                    tags_list=RC_PATTERN)
+        ret.append(OrderedDict(
             {
-                "docker_build_job": OrderedDict(
-                    {
-                        "name": quote(f"docker-{image_name}"),
-                        "image_name": quote(image_name),
-                    }
-                )
+                "docker_build_job": parameters
             }
-        )
-        for image_name in IMAGE_NAMES
-    ]
+        ))
+    return ret