Update on "Convert generator in Sampler back to lazy construction"

Fixes #63609 - Revert #63026 - Sampler is expected to be re-seeded if user specify seed before each epoch - Can not attach generator to self with `__iter__` because multiple iterators will ruin the use case - Add tests to prevent the same case for different Samplers Differential Revision: [D30451774](https://our.internmc.facebook.com/intern/diff/D30451774) [ghstack-poisoned]
pytorch · Sep 29, 2021 · 8aee201 · 8aee201
2 parents 4c639d5 + 58f7f3a
commit 8aee201
Show file tree

Hide file tree

Showing 1,894 changed files with 140,415 additions and 60,206 deletions.
diff --git a/.bazelrc b/.bazelrc
@@ -3,7 +3,11 @@ build --copt=-I.
 build --copt=-isystem --copt bazel-out/k8-fastbuild/bin
 
 # Configuration to disable tty features for environments like CI
-
 build:no-tty --curses no
 build:no-tty --progress_report_interval 10
 build:no-tty --show_progress_rate_limit 10
+
+# Configuration to build with GPU support
+build:gpu --define=cuda=true
+# define a separate build folder for faster switching between configs
+build:gpu --platform_suffix=-gpu
diff --git a/.bazelversion b/.bazelversion
@@ -1 +1 @@
-3.1.0
+4.2.1
diff --git a/.circleci/README.md b/.circleci/README.md
@@ -343,7 +343,6 @@ All linux builds occur in docker images. The docker images are
     * Has ALL CUDA versions installed. The script pytorch/builder/conda/switch_cuda_version.sh sets /usr/local/cuda to a symlink to e.g. /usr/local/cuda-10.0 to enable different CUDA builds
     * Also used for cpu builds
 * pytorch/manylinux-cuda90
-* pytorch/manylinux-cuda92
 * pytorch/manylinux-cuda100
     * Also used for cpu builds
 

diff --git a/.circleci/cimodel/data/binary_build_definitions.py b/.circleci/cimodel/data/binary_build_definitions.py
@@ -124,17 +124,17 @@ def gen_upload_job(self, phase, requires_dependency):
         Output looks similar to:
 
       - binary_upload:
-          name: binary_linux_manywheel_3_7m_cu92_devtoolset7_nightly_upload
+          name: binary_linux_manywheel_3_7m_cu113_devtoolset7_nightly_upload
           context: org-member
-          requires: binary_linux_manywheel_3_7m_cu92_devtoolset7_nightly_test
+          requires: binary_linux_manywheel_3_7m_cu113_devtoolset7_nightly_test
           filters:
             branches:
               only:
                 - nightly
             tags:
               only: /v[0-9]+(\\.[0-9]+)*-rc[0-9]+/
           package_type: manywheel
-          upload_subfolder: cu92
+          upload_subfolder: cu113
         """
         return {
             "binary_upload": OrderedDict({

diff --git a/.circleci/cimodel/data/dimensions.py b/.circleci/cimodel/data/dimensions.py
@@ -7,9 +7,9 @@
 ]
 
 ROCM_VERSIONS = [
-    "4.0.1",
     "4.1",
     "4.2",
+    "4.3.1",
 ]
 
 ROCM_VERSION_LABELS = ["rocm" + v for v in ROCM_VERSIONS]

diff --git a/.circleci/cimodel/data/pytorch_build_data.py b/.circleci/cimodel/data/pytorch_build_data.py
@@ -7,9 +7,6 @@
             ("5.4", [  # All this subtree rebases to master and then build
                 ("3.6", [
                     ("important", [X(True)]),
-                    ("parallel_tbb", [X(True)]),
-                    ("parallel_native", [X(True)]),
-                    ("pure_torch", [X(True)]),
                 ]),
             ]),
             # TODO: bring back libtorch test
@@ -30,7 +27,8 @@
         ("cuda", [
             ("10.2", [
                 ("3.6", [
-                    ("shard_test", [X(True)]),
+                    # Build are needed for slow_gradcheck
+                    ('build_only', [X(True)]),
                     ("slow_gradcheck", [
                         # If you update this slow gradcheck, you should
                         # also update docker_definitions.py to make sure
@@ -47,47 +45,25 @@
                     # ]),
                 ]),
             ]),
-            ("11.1", [
-                ("3.8", [
-                    ("shard_test", [XImportant(True)]),
-                    # UNCOMMENT THE BELOW TO REENABLE LIBTORCH
-                    # ("libtorch", [
-                    #     (True, [
-                    #         ('build_only', [X(True)]),
-                    #     ]),
-                    # ]),
-                ]),
-            ]),
         ]),
     ]),
     ("bionic", [
         ("clang", [
-            ("9", [
-                ("3.6", [
-                    ("noarch", [XImportant(True)]),
-                ]),
-            ]),
             ("9", [
                 ("3.6", [
                     ("xla", [XImportant(True)]),
                     ("vulkan", [XImportant(True)]),
                 ]),
             ]),
         ]),
-        ("cuda", [
-            ("10.2", [
-                ("3.9", [
-                    ("shard_test", [XImportant(True)]),
-                ]),
-            ]),
-        ]),
-        ("rocm", [
-            ("3.9", [
-                ("3.6", [
-                    ('build_only', [XImportant(True)]),
-                ]),
-            ]),
-        ]),
+        # @jithunnair-amd believes Jenkins builds are sufficient
+        # ("rocm", [
+        #     ("3.9", [
+        #         ("3.6", [
+        #             ('build_only', [XImportant(True)]),
+        #         ]),
+        #     ]),
+        # ]),
     ]),
 ]
 
@@ -169,7 +145,6 @@ def child_constructor(self):
             "build_only": BuildOnlyConfigNode,
             "shard_test": ShardTestConfigNode,
             "cuda_gcc_override": CudaGccOverrideConfigNode,
-            "coverage": CoverageConfigNode,
             "pure_torch": PureTorchConfigNode,
             "slow_gradcheck": SlowGradcheckConfigNode,
         }
@@ -313,14 +288,6 @@ def child_constructor(self):
         return ImportantConfigNode
 
 
-class CoverageConfigNode(TreeConfigNode):
-    def init2(self, node_name):
-        self.props["is_coverage"] = node_name
-
-    def child_constructor(self):
-        return ExperimentalFeatureConfigNode
-
-
 class ImportantConfigNode(TreeConfigNode):
     def modify_label(self, label):
         return "IMPORTANT=" + str(label)

diff --git a/.circleci/cimodel/data/pytorch_build_definitions.py b/.circleci/cimodel/data/pytorch_build_definitions.py
@@ -178,43 +178,14 @@ def gen_workflow_job(self, phase):
             }
         }
 
-# TODO Convert these to graph nodes
-def gen_dependent_configs(xenial_parent_config):
-
-    extra_parms = [
-        (["multigpu"], "large"),
-        (["nogpu", "NO_AVX2"], None),
-        (["nogpu", "NO_AVX"], None),
-        (["slow"], "medium"),
-    ]
-
-    configs = []
-    for parms, gpu in extra_parms:
-
-        c = Conf(
-            xenial_parent_config.distro,
-            ["py3"] + parms,
-            pyver=xenial_parent_config.pyver,
-            cuda_version=xenial_parent_config.cuda_version,
-            restrict_phases=["test"],
-            gpu_resource=gpu,
-            parent_build=xenial_parent_config,
-            is_important=False,
-        )
-
-        configs.append(c)
-
-    return configs
-
-
 def gen_docs_configs(xenial_parent_config):
     configs = []
 
     configs.append(
         HiddenConf(
             "pytorch_python_doc_build",
             parent_build=xenial_parent_config,
-            filters=gen_filter_dict(branches_list=r"/.*/",
+            filters=gen_filter_dict(branches_list=["master", "nightly"],
                                     tags_list=RC_PATTERN),
         )
     )
@@ -230,7 +201,7 @@ def gen_docs_configs(xenial_parent_config):
         HiddenConf(
             "pytorch_cpp_doc_build",
             parent_build=xenial_parent_config,
-            filters=gen_filter_dict(branches_list=r"/.*/",
+            filters=gen_filter_dict(branches_list=["master", "nightly"],
                                     tags_list=RC_PATTERN),
         )
     )
@@ -241,13 +212,6 @@ def gen_docs_configs(xenial_parent_config):
             branch="master",
         )
     )
-
-    configs.append(
-        HiddenConf(
-            "pytorch_doc_test",
-            parent_build=xenial_parent_config
-        )
-    )
     return configs
 
 
@@ -275,7 +239,6 @@ def instantiate_configs(only_slow_gradcheck):
         compiler_version = fc.find_prop("compiler_version")
         is_xla = fc.find_prop("is_xla") or False
         is_asan = fc.find_prop("is_asan") or False
-        is_coverage = fc.find_prop("is_coverage") or False
         is_noarch = fc.find_prop("is_noarch") or False
         is_onnx = fc.find_prop("is_onnx") or False
         is_pure_torch = fc.find_prop("is_pure_torch") or False
@@ -320,10 +283,6 @@ def instantiate_configs(only_slow_gradcheck):
             python_version = fc.find_prop("pyver")
             parms_list[0] = fc.find_prop("abbreviated_pyver")
 
-        if is_coverage:
-            parms_list_ignored_for_docker_image.append("coverage")
-            python_version = fc.find_prop("pyver")
-
         if is_noarch:
             parms_list_ignored_for_docker_image.append("noarch")
 
@@ -393,27 +352,27 @@ def instantiate_configs(only_slow_gradcheck):
                                         tags_list=RC_PATTERN)
             c.dependent_tests = gen_docs_configs(c)
 
-        if cuda_version == "10.2" and python_version == "3.6" and not is_libtorch and not is_slow_gradcheck:
-            c.dependent_tests = gen_dependent_configs(c)
-
         if (
-            compiler_name == "gcc"
-            and compiler_version == "5.4"
+            compiler_name != "clang"
+            and not rocm_version
             and not is_libtorch
             and not is_vulkan
             and not is_pure_torch
-            and parallel_backend is None
+            and not is_noarch
+            and not is_slow_gradcheck
+            and not only_slow_gradcheck
+            and not build_only
         ):
-            bc_breaking_check = Conf(
-                "backward-compatibility-check",
+            distributed_test = Conf(
+                c.gen_build_name("") + "distributed",
                 [],
                 is_xla=False,
                 restrict_phases=["test"],
                 is_libtorch=False,
                 is_important=True,
                 parent_build=c,
             )
-            c.dependent_tests.append(bc_breaking_check)
+            c.dependent_tests.append(distributed_test)
 
         config_list.append(c)
 

diff --git a/.circleci/cimodel/data/simple/docker_definitions.py b/.circleci/cimodel/data/simple/docker_definitions.py
@@ -6,37 +6,22 @@
 
 # TODO: make this generated from a matrix rather than just a static list
 IMAGE_NAMES = [
-    "pytorch-linux-bionic-cuda10.2-cudnn7-py3.9-gcc7",
-    "pytorch-linux-bionic-py3.6-clang9",
-    "pytorch-linux-bionic-cuda10.2-cudnn7-py3.6-clang9",
-    "pytorch-linux-bionic-py3.8-gcc9",
-    "pytorch-linux-xenial-cuda10.2-cudnn7-py3-gcc7",
-    "pytorch-linux-xenial-cuda11.1-cudnn8-py3-gcc7",
-    "pytorch-linux-xenial-cuda11.3-cudnn8-py3-gcc7",
-    "pytorch-linux-xenial-py3-clang5-android-ndk-r19c",
-    "pytorch-linux-xenial-py3-clang5-asan",
-    "pytorch-linux-xenial-py3-clang7-asan",
-    "pytorch-linux-xenial-py3-clang7-onnx",
-    "pytorch-linux-xenial-py3.8",
-    "pytorch-linux-xenial-py3.6-clang7",
-    "pytorch-linux-xenial-py3.6-gcc5.4",  # this one is used in doc builds
-    "pytorch-linux-xenial-py3.6-gcc7.2",
-    "pytorch-linux-xenial-py3.6-gcc7",
-    "pytorch-linux-bionic-rocm3.9-py3.6",
-    "pytorch-linux-bionic-rocm4.0.1-py3.6",
     "pytorch-linux-bionic-rocm4.1-py3.6",
     "pytorch-linux-bionic-rocm4.2-py3.6",
+    "pytorch-linux-bionic-rocm4.3.1-py3.6",
 ]
 
 # This entry should be an element from the list above
 # This should contain the image matching the "slow_gradcheck" entry in
 # pytorch_build_data.py
 SLOW_GRADCHECK_IMAGE_NAME = "pytorch-linux-xenial-cuda10.2-cudnn7-py3-gcc7"
 
-def get_workflow_jobs(only_slow_gradcheck=False):
+def get_workflow_jobs(images=IMAGE_NAMES, only_slow_gradcheck=False):
     """Generates a list of docker image build definitions"""
     ret = []
-    for image_name in IMAGE_NAMES:
+    for image_name in images:
+        if image_name.startswith('docker-'):
+            image_name = image_name.lstrip('docker-')
         if only_slow_gradcheck and image_name is not SLOW_GRADCHECK_IMAGE_NAME:
             continue