Merge branch 'master' into log1p

pytorch · Aug 26, 2020 · 7822617 · 7822617
2 parents 449ac32 + 51861cc
commit 7822617
Show file tree

Hide file tree

Showing 2,677 changed files with 186,784 additions and 69,573 deletions.
diff --git a/.circleci/README.md b/.circleci/README.md
@@ -178,8 +178,7 @@ CircleCI creates a  final yaml file by inlining every <<* segment, so if we were
 So, CircleCI has several executor types: macos, machine, and docker are the ones we use. The 'machine' executor gives you two cores on some linux vm. The 'docker' executor gives you considerably more cores (nproc was 32 instead of 2 back when I tried in February). Since the dockers are faster, we try to run everything that we can in dockers. Thus
 
 * linux build jobs use the docker executor. Running them on the docker executor was at least 2x faster than running them on the machine executor
-* linux test jobs use the machine executor and spin up their own docker. Why this nonsense? It's cause we run nvidia-docker for our GPU tests; any code that calls into the CUDA runtime needs to be run on nvidia-docker. To run a nvidia-docker you need to install some nvidia packages on the host machine and then call docker with the '—runtime nvidia' argument. CircleCI doesn't support this, so we have to do it ourself.
-    * This is not just a mere inconvenience. **This blocks all of our linux tests from using more than 2 cores.** But there is nothing that we can do about it, but wait for a fix on circleci's side. Right now, we only run some smoke tests (some simple imports) on the binaries, but this also affects non-binary test jobs.
+* linux test jobs use the machine executor in order for them to properly interface with GPUs since docker executors cannot execute with attached GPUs
 * linux upload jobs use the machine executor. The upload jobs are so short that it doesn't really matter what they use
 * linux smoke test jobs use the machine executor for the same reason as the linux test jobs
 
@@ -419,8 +418,6 @@ You can build Linux binaries locally easily using docker.
 #    in the docker container then you will see path/to/foo/baz on your local
 #    machine. You could also clone the pytorch and builder repos in the docker.
 #
-# If you're building a CUDA binary then use `nvidia-docker run` instead, see below.
-#
 # If you know how, add ccache as a volume too and speed up everything
 docker run \
     -v your/pytorch/repo:/pytorch \
@@ -444,9 +441,7 @@ export DESIRED_CUDA=cpu
 
 **Building CUDA binaries on docker**
 
-To build a CUDA binary you need to use `nvidia-docker run` instead of just `docker run` (or you can manually pass `--runtime=nvidia`). This adds some needed libraries and things to build CUDA stuff.
-
-You can build CUDA binaries on CPU only machines, but you can only run CUDA binaries on CUDA machines. This means that you can build a CUDA binary on a docker on your laptop if you so choose (though it’s gonna take a loong time).
+You can build CUDA binaries on CPU only machines, but you can only run CUDA binaries on CUDA machines. This means that you can build a CUDA binary on a docker on your laptop if you so choose (though it’s gonna take a long time).
 
 For Facebook employees, ask about beefy machines that have docker support and use those instead of your laptop; it will be 5x as fast.
 

diff --git a/.circleci/cimodel/data/binary_build_data.py b/.circleci/cimodel/data/binary_build_data.py
@@ -50,18 +50,19 @@ def get_processor_arch_name(cuda_version):
             "3.7",
         ],
     )),
-    windows=(dimensions.CUDA_VERSIONS, OrderedDict(
-        wheel=dimensions.STANDARD_PYTHON_VERSIONS,
-        conda=dimensions.STANDARD_PYTHON_VERSIONS,
-        libtorch=[
-            "3.7",
-        ],
-    )),
+    # Skip CUDA-9.2 builds on Windows
+    windows=(
+        [v for v in dimensions.CUDA_VERSIONS if v not in ['92', '110']],
+        OrderedDict(
+            wheel=dimensions.STANDARD_PYTHON_VERSIONS,
+            conda=dimensions.STANDARD_PYTHON_VERSIONS,
+            libtorch=[
+                "3.7",
+            ],
+        )
+    ),
 )
 
-CONFIG_TREE_DATA_NO_WINDOWS = CONFIG_TREE_DATA.copy()
-CONFIG_TREE_DATA_NO_WINDOWS.pop("windows")
-
 # GCC config variants:
 #
 # All the nightlies (except libtorch with new gcc ABI) are built with devtoolset7,

diff --git a/.circleci/cimodel/data/binary_build_definitions.py b/.circleci/cimodel/data/binary_build_definitions.py
@@ -1,10 +1,10 @@
 from collections import OrderedDict
 
+import cimodel.data.simple.util.branch_filters as branch_filters
 import cimodel.data.binary_build_data as binary_build_data
 import cimodel.lib.conf_tree as conf_tree
 import cimodel.lib.miniutils as miniutils
 
-
 class Conf(object):
     def __init__(self, os, cuda_version, pydistro, parms, smoke, libtorch_variant, gcc_config_variant, libtorch_config_variant):
 
@@ -64,28 +64,24 @@ def gen_workflow_job(self, phase, upload_phase_dependency=None, nightly=False):
         job_def = OrderedDict()
         job_def["name"] = self.gen_build_name(phase, nightly)
         job_def["build_environment"] = miniutils.quote(" ".join(self.gen_build_env_parms()))
-        job_def["requires"] = ["setup"]
         if self.smoke:
-            job_def["requires"].append("update_s3_htmls_for_nightlies")
-            job_def["requires"].append("update_s3_htmls_for_nightlies_devtoolset7")
-            job_def["filters"] = {"branches": {"only": "postnightly"}}
+            job_def["requires"] = [
+                "update_s3_htmls",
+            ]
+            job_def["filters"] = branch_filters.gen_filter_dict(
+                branches_list=["postnightly"],
+            )
         else:
-            job_def["filters"] = {
-                "branches": {
-                    "only": "nightly"
-                },
-                # Will run on tags like v1.5.0-rc1, etc.
-                "tags": {
-                    # Using a raw string here to avoid having to escape
-                    # anything
-                    "only": r"/v[0-9]+(\.[0-9]+)*-rc[0-9]+/"
-                }
-            }
+            filter_branch = r"/.*/"
+            job_def["filters"] = branch_filters.gen_filter_dict(
+                branches_list=[filter_branch],
+                tags_list=[branch_filters.RC_PATTERN],
+            )
         if self.libtorch_variant:
             job_def["libtorch_variant"] = miniutils.quote(self.libtorch_variant)
         if phase == "test":
             if not self.smoke:
-                job_def["requires"].append(self.gen_build_name("build", nightly))
+                job_def["requires"] = [self.gen_build_name("build", nightly)]
             if not (self.smoke and self.os == "macos") and self.os != "windows":
                 job_def["docker_image"] = self.gen_docker_image()
 
@@ -101,28 +97,55 @@ def gen_workflow_job(self, phase, upload_phase_dependency=None, nightly=False):
                     job_def["executor"] = "windows-with-nvidia-gpu"
                 else:
                     job_def["resource_class"] = "gpu.medium"
-        if phase == "upload":
-            job_def["context"] = "org-member"
-            job_def["requires"] = ["setup", self.gen_build_name(upload_phase_dependency, nightly)]
 
         os_name = miniutils.override(self.os, {"macos": "mac"})
         job_name = "_".join([self.get_name_prefix(), os_name, phase])
         return {job_name : job_def}
 
+    def gen_upload_job(self, phase, requires_dependency):
+        """Generate binary_upload job for configuration
+
+        Output looks similar to:
+
+      - binary_upload:
+          name: binary_linux_manywheel_3_7m_cu92_devtoolset7_nightly_upload
+          context: org-member
+          requires: binary_linux_manywheel_3_7m_cu92_devtoolset7_nightly_test
+          filters:
+            branches:
+              only:
+                - nightly
+            tags:
+              only: /v[0-9]+(\\.[0-9]+)*-rc[0-9]+/
+          package_type: manywheel
+          upload_subfolder: cu92
+        """
+        return {
+            "binary_upload": OrderedDict({
+                "name": self.gen_build_name(phase, nightly=True),
+                "context": "org-member",
+                "requires": [self.gen_build_name(
+                    requires_dependency,
+                    nightly=True
+                )],
+                "filters": branch_filters.gen_filter_dict(
+                    branches_list=["nightly"],
+                    tags_list=[branch_filters.RC_PATTERN],
+                ),
+                "package_type": self.pydistro,
+                "upload_subfolder": binary_build_data.get_processor_arch_name(
+                    self.cuda_version
+                ),
+            })
+        }
+
 def get_root(smoke, name):
 
-    if smoke:
-        return binary_build_data.TopLevelNode(
-            name,
-            binary_build_data.CONFIG_TREE_DATA_NO_WINDOWS,
-            smoke,
-        )
-    else:
-        return binary_build_data.TopLevelNode(
-            name,
-            binary_build_data.CONFIG_TREE_DATA,
-            smoke,
-        )
+    return binary_build_data.TopLevelNode(
+        name,
+        binary_build_data.CONFIG_TREE_DATA,
+        smoke,
+    )
 
 
 def gen_build_env_list(smoke):
@@ -154,10 +177,23 @@ def get_nightly_uploads():
     mylist = []
     for conf in configs:
         phase_dependency = "test" if predicate_exclude_macos(conf) else "build"
-        mylist.append(conf.gen_workflow_job("upload", phase_dependency, nightly=True))
+        mylist.append(conf.gen_upload_job("upload", phase_dependency))
 
     return mylist
 
+def get_post_upload_jobs():
+    return [
+        {
+            "update_s3_htmls": {
+                "name": "update_s3_htmls",
+                "context": "org-member",
+                "filters": branch_filters.gen_filter_dict(
+                    branches_list=["postnightly"],
+                ),
+            },
+        },
+    ]
+
 def get_nightly_tests():
 
     configs = gen_build_env_list(False)

diff --git a/.circleci/cimodel/data/caffe2_build_definitions.py b/.circleci/cimodel/data/caffe2_build_definitions.py
@@ -5,7 +5,7 @@
 from cimodel.lib.conf_tree import Ver
 import cimodel.lib.miniutils as miniutils
 from cimodel.data.caffe2_build_data import CONFIG_TREE_DATA, TopLevelNode
-from cimodel.data.simple.util.branch_filters import gen_branches_only_filter_dict
+from cimodel.data.simple.util.branch_filters import gen_filter_dict
 
 from dataclasses import dataclass
 
@@ -118,16 +118,15 @@ def gen_workflow_params(self, phase):
     def gen_workflow_job(self, phase):
         job_def = OrderedDict()
         job_def["name"] = self.construct_phase_name(phase)
-        job_def["requires"] = ["setup"]
 
         if phase == "test":
-            job_def["requires"].append(self.construct_phase_name("build"))
+            job_def["requires"] = [self.construct_phase_name("build")]
             job_name = "caffe2_" + self.get_platform() + "_test"
         else:
             job_name = "caffe2_" + self.get_platform() + "_build"
 
         if not self.is_important:
-            job_def["filters"] = gen_branches_only_filter_dict()
+            job_def["filters"] = gen_filter_dict()
         job_def.update(self.gen_workflow_params(phase))
         return {job_name : job_def}
 

diff --git a/.circleci/cimodel/data/dimensions.py b/.circleci/cimodel/data/dimensions.py
@@ -5,6 +5,7 @@
     "92",
     "101",
     "102",
+    "110"
 ]
 
 STANDARD_PYTHON_VERSIONS = [

diff --git a/.circleci/cimodel/data/pytorch_build_data.py b/.circleci/cimodel/data/pytorch_build_data.py
@@ -3,8 +3,10 @@
 
 CONFIG_TREE_DATA = [
     ("xenial", [
-        (None, [
-            X("nightly"),
+        ("rocm", [
+            ("3.5.1", [
+                X("3.6"),
+            ]),
         ]),
         ("gcc", [
             ("5.4", [  # All this subtree rebases to master and then build
@@ -19,27 +21,38 @@
         ]),
         ("clang", [
             ("5", [
-                XImportant("3.6"),  # This is actually the ASAN build
+                ("3.6", [
+                    ("asan", [XImportant(True)]),
+                ]),
             ]),
         ]),
         ("cuda", [
-            ("9.2", [X("3.6")]),
-            ("10.1", [X("3.6")]),
-            ("10.2", [
-                XImportant("3.6"),
+            ("9.2", [
                 ("3.6", [
-                    ("libtorch", [XImportant(True)])
+                    X(True),
+                    ("cuda_gcc_override", [
+                        ("gcc5.4", [
+                            ('build_only', [XImportant(True)]),
+                        ]),
+                    ]),
+                ])
+            ]),
+            ("10.1", [
+                ("3.6", [
+                    ('build_only', [X(True)]),
                 ]),
             ]),
-        ]),
-        ("android", [
-            ("r19c", [
+            ("10.2", [
                 ("3.6", [
-                    ("android_abi", [XImportant("x86_32")]),
-                    ("android_abi", [X("x86_64")]),
-                    ("android_abi", [X("arm-v7a")]),
-                    ("android_abi", [X("arm-v8a")]),
-                ])
+                    ("important", [X(True)]),
+                    ("libtorch", [X(True)]),
+                ]),
+            ]),
+            ("11.0", [
+                ("3.8", [
+                    X(True),
+                    ("libtorch", [XImportant(True)])
+                ]),
             ]),
         ]),
     ]),
@@ -51,9 +64,13 @@
             ("9", [
                 ("3.6", [
                     ("xla", [XImportant(True)]),
+                    ("vulkan", [XImportant(True)]),
                 ]),
             ]),
         ]),
+        ("gcc", [
+            ("9", [XImportant("3.8")]),
+        ]),
     ]),
 ]
 
@@ -120,12 +137,15 @@ def child_constructor(self):
         experimental_feature = self.find_prop("experimental_feature")
 
         next_nodes = {
+            "asan": AsanConfigNode,
             "xla": XlaConfigNode,
+            "vulkan": VulkanConfigNode,
             "parallel_tbb": ParallelTBBConfigNode,
             "parallel_native": ParallelNativeConfigNode,
             "libtorch": LibTorchConfigNode,
             "important": ImportantConfigNode,
-            "android_abi": AndroidAbiConfigNode,
+            "build_only": BuildOnlyConfigNode,
+            "cuda_gcc_override": CudaGccOverrideConfigNode
         }
         return next_nodes[experimental_feature]
 
@@ -141,6 +161,28 @@ def child_constructor(self):
         return ImportantConfigNode
 
 
+class AsanConfigNode(TreeConfigNode):
+    def modify_label(self, label):
+        return "Asan=" + str(label)
+
+    def init2(self, node_name):
+        self.props["is_asan"] = node_name
+
+    def child_constructor(self):
+        return ImportantConfigNode
+
+
+class VulkanConfigNode(TreeConfigNode):
+    def modify_label(self, label):
+        return "Vulkan=" + str(label)
+
+    def init2(self, node_name):
+        self.props["is_vulkan"] = node_name
+
+    def child_constructor(self):
+        return ImportantConfigNode
+
+
 class ParallelTBBConfigNode(TreeConfigNode):
     def modify_label(self, label):
         return "PARALLELTBB=" + str(label)
@@ -174,13 +216,20 @@ def child_constructor(self):
         return ImportantConfigNode
 
 
-class AndroidAbiConfigNode(TreeConfigNode):
+class CudaGccOverrideConfigNode(TreeConfigNode):
+    def init2(self, node_name):
+        self.props["cuda_gcc_override"] = node_name
+
+    def child_constructor(self):
+        return ExperimentalFeatureConfigNode
+
+class BuildOnlyConfigNode(TreeConfigNode):
 
     def init2(self, node_name):
-        self.props["android_abi"] = node_name
+        self.props["build_only"] = node_name
 
     def child_constructor(self):
-        return ImportantConfigNode
+        return ExperimentalFeatureConfigNode
 
 
 class ImportantConfigNode(TreeConfigNode):