update master

pytorch · Oct 9, 2020 · ff801b4 · ff801b4
2 parents 5715ae3 + a5c0dbc
commit ff801b4
Show file tree

Hide file tree

Showing 3,269 changed files with 272,275 additions and 72,530 deletions.
diff --git a/.circleci/README.md b/.circleci/README.md
@@ -178,8 +178,7 @@ CircleCI creates a  final yaml file by inlining every <<* segment, so if we were
 So, CircleCI has several executor types: macos, machine, and docker are the ones we use. The 'machine' executor gives you two cores on some linux vm. The 'docker' executor gives you considerably more cores (nproc was 32 instead of 2 back when I tried in February). Since the dockers are faster, we try to run everything that we can in dockers. Thus
 
 * linux build jobs use the docker executor. Running them on the docker executor was at least 2x faster than running them on the machine executor
-* linux test jobs use the machine executor and spin up their own docker. Why this nonsense? It's cause we run nvidia-docker for our GPU tests; any code that calls into the CUDA runtime needs to be run on nvidia-docker. To run a nvidia-docker you need to install some nvidia packages on the host machine and then call docker with the '—runtime nvidia' argument. CircleCI doesn't support this, so we have to do it ourself.
-    * This is not just a mere inconvenience. **This blocks all of our linux tests from using more than 2 cores.** But there is nothing that we can do about it, but wait for a fix on circleci's side. Right now, we only run some smoke tests (some simple imports) on the binaries, but this also affects non-binary test jobs.
+* linux test jobs use the machine executor in order for them to properly interface with GPUs since docker executors cannot execute with attached GPUs
 * linux upload jobs use the machine executor. The upload jobs are so short that it doesn't really matter what they use
 * linux smoke test jobs use the machine executor for the same reason as the linux test jobs
 
@@ -419,8 +418,6 @@ You can build Linux binaries locally easily using docker.
 #    in the docker container then you will see path/to/foo/baz on your local
 #    machine. You could also clone the pytorch and builder repos in the docker.
 #
-# If you're building a CUDA binary then use `nvidia-docker run` instead, see below.
-#
 # If you know how, add ccache as a volume too and speed up everything
 docker run \
     -v your/pytorch/repo:/pytorch \
@@ -444,9 +441,7 @@ export DESIRED_CUDA=cpu
 
 **Building CUDA binaries on docker**
 
-To build a CUDA binary you need to use `nvidia-docker run` instead of just `docker run` (or you can manually pass `--runtime=nvidia`). This adds some needed libraries and things to build CUDA stuff.
-
-You can build CUDA binaries on CPU only machines, but you can only run CUDA binaries on CUDA machines. This means that you can build a CUDA binary on a docker on your laptop if you so choose (though it’s gonna take a loong time).
+You can build CUDA binaries on CPU only machines, but you can only run CUDA binaries on CUDA machines. This means that you can build a CUDA binary on a docker on your laptop if you so choose (though it’s gonna take a long time).
 
 For Facebook employees, ask about beefy machines that have docker support and use those instead of your laptop; it will be 5x as fast.
 

diff --git a/.circleci/cimodel/data/binary_build_data.py b/.circleci/cimodel/data/binary_build_data.py
@@ -25,8 +25,10 @@
 ]
 
 
-def get_processor_arch_name(cuda_version):
-    return "cpu" if not cuda_version else "cu" + cuda_version
+def get_processor_arch_name(gpu_version):
+    return "cpu" if not gpu_version else (
+        "cu" + gpu_version.strip("cuda") if gpu_version.startswith("cuda") else gpu_version
+    )
 
 
 LINUX_PACKAGE_VARIANTS = OrderedDict(
@@ -42,21 +44,25 @@ def get_processor_arch_name(cuda_version):
 )
 
 CONFIG_TREE_DATA = OrderedDict(
-    linux=(dimensions.CUDA_VERSIONS, LINUX_PACKAGE_VARIANTS),
+    linux=(dimensions.GPU_VERSIONS, LINUX_PACKAGE_VARIANTS),
     macos=([None], OrderedDict(
         wheel=dimensions.STANDARD_PYTHON_VERSIONS,
         conda=dimensions.STANDARD_PYTHON_VERSIONS,
         libtorch=[
             "3.7",
         ],
     )),
-    windows=(dimensions.CUDA_VERSIONS, OrderedDict(
-        wheel=dimensions.STANDARD_PYTHON_VERSIONS,
-        conda=dimensions.STANDARD_PYTHON_VERSIONS,
-        libtorch=[
-            "3.7",
-        ],
-    )),
+    # Skip CUDA-9.2 builds on Windows
+    windows=(
+        [v for v in dimensions.GPU_VERSIONS if v not in ['cuda92'] + dimensions.ROCM_VERSION_LABELS],
+        OrderedDict(
+            wheel=dimensions.STANDARD_PYTHON_VERSIONS,
+            conda=dimensions.STANDARD_PYTHON_VERSIONS,
+            libtorch=[
+                "3.7",
+            ],
+        )
+    ),
 )
 
 # GCC config variants:
@@ -93,12 +99,12 @@ def get_children(self):
 
 
 class OSConfigNode(ConfigNode):
-    def __init__(self, parent, os_name, cuda_versions, py_tree):
+    def __init__(self, parent, os_name, gpu_versions, py_tree):
         super(OSConfigNode, self).__init__(parent, os_name)
 
         self.py_tree = py_tree
         self.props["os_name"] = os_name
-        self.props["cuda_versions"] = cuda_versions
+        self.props["gpu_versions"] = gpu_versions
 
     def get_children(self):
         return [PackageFormatConfigNode(self, k, v) for k, v in self.py_tree.items()]
@@ -117,7 +123,7 @@ def get_children(self):
         elif self.find_prop("os_name") == "windows" and self.find_prop("package_format") == "libtorch":
             return [WindowsLibtorchConfigNode(self, v) for v in WINDOWS_LIBTORCH_CONFIG_VARIANTS]
         else:
-            return [ArchConfigNode(self, v) for v in self.find_prop("cuda_versions")]
+            return [ArchConfigNode(self, v) for v in self.find_prop("gpu_versions")]
 
 
 class LinuxGccConfigNode(ConfigNode):
@@ -127,14 +133,22 @@ def __init__(self, parent, gcc_config_variant):
         self.props["gcc_config_variant"] = gcc_config_variant
 
     def get_children(self):
-        cuda_versions = self.find_prop("cuda_versions")
+        gpu_versions = self.find_prop("gpu_versions")
 
         # XXX devtoolset7 on CUDA 9.0 is temporarily disabled
         # see https://github.com/pytorch/pytorch/issues/20066
         if self.find_prop("gcc_config_variant") == 'devtoolset7':
-            cuda_versions = filter(lambda x: x != "90", cuda_versions)
+            gpu_versions = filter(lambda x: x != "cuda_90", gpu_versions)
+
+        # XXX disabling conda rocm build since docker images are not there
+        if self.find_prop("package_format") == 'conda':
+            gpu_versions = filter(lambda x: x not in dimensions.ROCM_VERSION_LABELS, gpu_versions)
+
+        # XXX libtorch rocm build  is temporarily disabled
+        if self.find_prop("package_format") == 'libtorch':
+            gpu_versions = filter(lambda x: x not in dimensions.ROCM_VERSION_LABELS, gpu_versions)
 
-        return [ArchConfigNode(self, v) for v in cuda_versions]
+        return [ArchConfigNode(self, v) for v in gpu_versions]
 
 
 class WindowsLibtorchConfigNode(ConfigNode):
@@ -144,14 +158,14 @@ def __init__(self, parent, libtorch_config_variant):
         self.props["libtorch_config_variant"] = libtorch_config_variant
 
     def get_children(self):
-        return [ArchConfigNode(self, v) for v in self.find_prop("cuda_versions")]
+        return [ArchConfigNode(self, v) for v in self.find_prop("gpu_versions")]
 
 
 class ArchConfigNode(ConfigNode):
-    def __init__(self, parent, cu):
-        super(ArchConfigNode, self).__init__(parent, get_processor_arch_name(cu))
+    def __init__(self, parent, gpu):
+        super(ArchConfigNode, self).__init__(parent, get_processor_arch_name(gpu))
 
-        self.props["cu"] = cu
+        self.props["gpu"] = gpu
 
     def get_children(self):
         return [PyVersionConfigNode(self, v) for v in self.find_prop("python_versions")]

diff --git a/.circleci/cimodel/data/binary_build_definitions.py b/.circleci/cimodel/data/binary_build_definitions.py
@@ -6,10 +6,10 @@
 import cimodel.lib.miniutils as miniutils
 
 class Conf(object):
-    def __init__(self, os, cuda_version, pydistro, parms, smoke, libtorch_variant, gcc_config_variant, libtorch_config_variant):
+    def __init__(self, os, gpu_version, pydistro, parms, smoke, libtorch_variant, gcc_config_variant, libtorch_config_variant):
 
         self.os = os
-        self.cuda_version = cuda_version
+        self.gpu_version = gpu_version
         self.pydistro = pydistro
         self.parms = parms
         self.smoke = smoke
@@ -18,7 +18,7 @@ def __init__(self, os, cuda_version, pydistro, parms, smoke, libtorch_variant, g
         self.libtorch_config_variant = libtorch_config_variant
 
     def gen_build_env_parms(self):
-        elems = [self.pydistro] + self.parms + [binary_build_data.get_processor_arch_name(self.cuda_version)]
+        elems = [self.pydistro] + self.parms + [binary_build_data.get_processor_arch_name(self.gpu_version)]
         if self.gcc_config_variant is not None:
             elems.append(str(self.gcc_config_variant))
         if self.libtorch_config_variant is not None:
@@ -37,9 +37,12 @@ def gen_docker_image(self):
         docker_distro_prefix = miniutils.override(self.pydistro, docker_word_substitution)
 
         # The cpu nightlies are built on the pytorch/manylinux-cuda102 docker image
-        alt_docker_suffix = self.cuda_version or "102"
-        docker_distro_suffix = "" if self.pydistro == "conda" else alt_docker_suffix
-        return miniutils.quote("pytorch/" + docker_distro_prefix + "-cuda" + docker_distro_suffix)
+        # TODO cuda images should consolidate into tag-base images similar to rocm
+        alt_docker_suffix = "cuda102" if not self.gpu_version else (
+            "rocm:" + self.gpu_version.strip("rocm") if self.gpu_version.startswith("rocm") else self.gpu_version)
+        docker_distro_suffix = alt_docker_suffix if self.pydistro != "conda" else (
+            "cuda" if alt_docker_suffix.startswith("cuda") else "rocm")
+        return miniutils.quote("pytorch/" + docker_distro_prefix + "-" + docker_distro_suffix)
 
     def get_name_prefix(self):
         return "smoke" if self.smoke else "binary"
@@ -69,14 +72,10 @@ def gen_workflow_job(self, phase, upload_phase_dependency=None, nightly=False):
                 "update_s3_htmls",
             ]
             job_def["filters"] = branch_filters.gen_filter_dict(
-                branches_list=["nightly"],
-                tags_list=[branch_filters.RC_PATTERN],
+                branches_list=["postnightly"],
             )
         else:
-            if phase in ["upload"]:
-                filter_branch = "nightly"
-            else:
-                filter_branch = r"/.*/"
+            filter_branch = r"/.*/"
             job_def["filters"] = branch_filters.gen_filter_dict(
                 branches_list=[filter_branch],
                 tags_list=[branch_filters.RC_PATTERN],
@@ -89,28 +88,61 @@ def gen_workflow_job(self, phase, upload_phase_dependency=None, nightly=False):
             if not (self.smoke and self.os == "macos") and self.os != "windows":
                 job_def["docker_image"] = self.gen_docker_image()
 
-            if self.os != "windows" and self.cuda_version:
+            # fix this. only works on cuda not rocm
+            if self.os != "windows" and self.gpu_version:
                 job_def["use_cuda_docker_runtime"] = miniutils.quote("1")
         else:
             if self.os == "linux" and phase != "upload":
                 job_def["docker_image"] = self.gen_docker_image()
 
         if phase == "test":
-            if self.cuda_version:
+            if self.gpu_version:
                 if self.os == "windows":
                     job_def["executor"] = "windows-with-nvidia-gpu"
                 else:
                     job_def["resource_class"] = "gpu.medium"
-        if phase == "upload":
-            job_def["context"] = "org-member"
-            job_def["requires"] = [
-                self.gen_build_name(upload_phase_dependency, nightly)
-            ]
 
         os_name = miniutils.override(self.os, {"macos": "mac"})
         job_name = "_".join([self.get_name_prefix(), os_name, phase])
         return {job_name : job_def}
 
+    def gen_upload_job(self, phase, requires_dependency):
+        """Generate binary_upload job for configuration
+
+        Output looks similar to:
+
+      - binary_upload:
+          name: binary_linux_manywheel_3_7m_cu92_devtoolset7_nightly_upload
+          context: org-member
+          requires: binary_linux_manywheel_3_7m_cu92_devtoolset7_nightly_test
+          filters:
+            branches:
+              only:
+                - nightly
+            tags:
+              only: /v[0-9]+(\\.[0-9]+)*-rc[0-9]+/
+          package_type: manywheel
+          upload_subfolder: cu92
+        """
+        return {
+            "binary_upload": OrderedDict({
+                "name": self.gen_build_name(phase, nightly=True),
+                "context": "org-member",
+                "requires": [self.gen_build_name(
+                    requires_dependency,
+                    nightly=True
+                )],
+                "filters": branch_filters.gen_filter_dict(
+                    branches_list=["nightly"],
+                    tags_list=[branch_filters.RC_PATTERN],
+                ),
+                "package_type": self.pydistro,
+                "upload_subfolder": binary_build_data.get_processor_arch_name(
+                    self.gpu_version,
+                ),
+            })
+        }
+
 def get_root(smoke, name):
 
     return binary_build_data.TopLevelNode(
@@ -129,7 +161,7 @@ def gen_build_env_list(smoke):
     for c in config_list:
         conf = Conf(
             c.find_prop("os_name"),
-            c.find_prop("cu"),
+            c.find_prop("gpu"),
             c.find_prop("package_format"),
             [c.find_prop("pyver")],
             c.find_prop("smoke"),
@@ -149,32 +181,19 @@ def get_nightly_uploads():
     mylist = []
     for conf in configs:
         phase_dependency = "test" if predicate_exclude_macos(conf) else "build"
-        mylist.append(conf.gen_workflow_job("upload", phase_dependency, nightly=True))
+        mylist.append(conf.gen_upload_job("upload", phase_dependency))
 
     return mylist
 
 def get_post_upload_jobs():
-    """Generate jobs to update HTML indices and report binary sizes"""
-    configs = gen_build_env_list(False)
-    common_job_def = {
-        "context": "org-member",
-        "filters": branch_filters.gen_filter_dict(
-            branches_list=["nightly"],
-            tags_list=[branch_filters.RC_PATTERN],
-        ),
-        "requires": [],
-    }
-    for conf in configs:
-        upload_job_name = conf.gen_build_name(
-            build_or_test="upload",
-            nightly=True
-        )
-        common_job_def["requires"].append(upload_job_name)
     return [
         {
             "update_s3_htmls": {
                 "name": "update_s3_htmls",
-                **common_job_def,
+                "context": "org-member",
+                "filters": branch_filters.gen_filter_dict(
+                    branches_list=["postnightly"],
+                ),
             },
         },
     ]