Update

[ghstack-poisoned]
pytorch · Jun 22, 2024 · 798cc2e · 798cc2e
2 parents f1180c7 + fdcff74
commit 798cc2e
Show file tree

Hide file tree

Showing 373 changed files with 7,382 additions and 7,207 deletions.
diff --git a/.ci/docker/ci_commit_pins/triton.txt b/.ci/docker/ci_commit_pins/triton.txt
@@ -1 +1 @@
-45fff310c891f5a92d55445adf8cc9d29df5841e
+dedb7bdf339a3546896d4820366ca562c586bfa0
diff --git a/.ci/pytorch/test.sh b/.ci/pytorch/test.sh
@@ -350,17 +350,31 @@ test_inductor_distributed() {
   assert_git_not_dirty
 }
 
-test_inductor() {
+test_inductor_shard() {
+  if [[ -z "$NUM_TEST_SHARDS" ]]; then
+    echo "NUM_TEST_SHARDS must be defined to run a Python test shard"
+    exit 1
+  fi
+
   python tools/dynamo/verify_dynamo.py
-  python test/run_test.py --inductor --include test_modules test_ops test_ops_gradients test_torch --verbose
+  python test/run_test.py --inductor \
+    --include test_modules test_ops test_ops_gradients test_torch \
+    --shard "$1" "$NUM_TEST_SHARDS" \
+    --verbose
+
   # Do not add --inductor for the following inductor unit tests, otherwise we will fail because of nested dynamo state
-  python test/run_test.py --include inductor/test_torchinductor inductor/test_torchinductor_opinfo inductor/test_aot_inductor --verbose
+  python test/run_test.py \
+    --include inductor/test_torchinductor inductor/test_torchinductor_opinfo inductor/test_aot_inductor \
+    --shard "$1" "$NUM_TEST_SHARDS" \
+    --verbose
+}
 
+test_inductor_aoti() {
   # docker build uses bdist_wheel which does not work with test_aot_inductor
   # TODO: need a faster way to build
   if [[ "$BUILD_ENVIRONMENT" != *rocm* ]]; then
-      BUILD_AOT_INDUCTOR_TEST=1 python setup.py develop
-      CPP_TESTS_DIR="${BUILD_BIN_DIR}" LD_LIBRARY_PATH="${TORCH_LIB_DIR}" python test/run_test.py --cpp --verbose -i cpp/test_aoti_abi_check cpp/test_aoti_inference
+    BUILD_AOT_INDUCTOR_TEST=1 python setup.py develop
+    CPP_TESTS_DIR="${BUILD_BIN_DIR}" LD_LIBRARY_PATH="${TORCH_LIB_DIR}" python test/run_test.py --cpp --verbose -i cpp/test_aoti_abi_check cpp/test_aoti_inference
   fi
 }
 
@@ -1299,10 +1313,14 @@ elif [[ "${TEST_CONFIG}" == *torchbench* ]]; then
 elif [[ "${TEST_CONFIG}" == *inductor_cpp_wrapper_abi_compatible* ]]; then
   install_torchvision
   test_inductor_cpp_wrapper_abi_compatible
-elif [[ "${TEST_CONFIG}" == *inductor* && "${SHARD_NUMBER}" == 1 ]]; then
+elif [[ "${TEST_CONFIG}" == *inductor* && "${SHARD_NUMBER}" == 1 && $NUM_TEST_SHARDS -gt 1 ]]; then
   install_torchvision
-  test_inductor
+  test_inductor_shard 1
+  test_inductor_aoti
   test_inductor_distributed
+elif [[ "${TEST_CONFIG}" == *inductor* && "${SHARD_NUMBER}" -gt 1 && $NUM_TEST_SHARDS -gt 1 ]]; then
+  install_torchvision
+  test_inductor_shard "${SHARD_NUMBER}"
 elif [[ "${TEST_CONFIG}" == *dynamo* && "${SHARD_NUMBER}" == 1 && $NUM_TEST_SHARDS -gt 1 ]]; then
   install_torchvision
   test_dynamo_shard 1

diff --git a/.github/workflows/_runner-determinator.yml b/.github/workflows/_runner-determinator.yml
@@ -34,11 +34,123 @@ jobs:
       ISSUE_NUMBER: ${{ inputs.issue_number }}
       USERNAME: ${{ inputs.user_name }}
     steps:
-      - name: Checkout PyTorch
-        uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
-        with:
-          fetch-depth: 1
-          submodules: true
+      # - name: Checkout PyTorch
+      #   uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
+      #   with:
+      #     fetch-depth: 1
+      #     submodules: true
+
+      # TODO: Remove the hardcoded step below
+      # Hardcoding below is temporary for testing ALI runners
+      # This file below should match the script found in .github/scripts/get_workflow_type.py
+      - name: Hardcode runner-determinator script
+        run: |
+          cat <<EOF > get_workflow_type.py
+          import json
+          from argparse import ArgumentParser
+          from typing import Any, Tuple
+
+          from github import Auth, Github
+          from github.Issue import Issue
+
+
+          WORKFLOW_LABEL_META = ""  # use meta runners
+          WORKFLOW_LABEL_LF = "lf."  # use runners from the linux foundation
+          LABEL_TYPE_KEY = "label_type"
+          MESSAGE_KEY = "message"
+          MESSAGE = ""  # Debug message to return to the caller
+
+
+          def parse_args() -> Any:
+              parser = ArgumentParser("Get dynamic rollout settings")
+              parser.add_argument("--github-token", type=str, required=True, help="GitHub token")
+              parser.add_argument(
+                  "--github-repo",
+                  type=str,
+                  required=False,
+                  default="pytorch/test-infra",
+                  help="GitHub repo to get the issue",
+              )
+              parser.add_argument(
+                  "--github-issue", type=int, required=True, help="GitHub issue umber"
+              )
+              parser.add_argument(
+                  "--github-user", type=str, required=True, help="GitHub username"
+              )
+              parser.add_argument(
+                  "--github-branch", type=str, required=True, help="Current GitHub branch"
+              )
+
+              return parser.parse_args()
+
+
+          def get_gh_client(github_token: str) -> Github:
+              auth = Auth.Token(github_token)
+              return Github(auth=auth)
+
+
+          def get_issue(gh: Github, repo: str, issue_num: int) -> Issue:
+              repo = gh.get_repo(repo)
+              return repo.get_issue(number=issue_num)
+
+
+          def is_exception_branch(branch: str) -> bool:
+              return branch.split("/")[0] in {"main", "nightly", "release", "landchecks"}
+
+
+          def get_workflow_type(issue: Issue, username: str) -> Tuple[str, str]:
+              try:
+                  user_list = issue.get_comments()[0].body.split()
+
+                  if user_list[0] == "!":
+                      MESSAGE = "LF Workflows are disabled for everyone. Using meta runners."
+                      return WORKFLOW_LABEL_META, MESSAGE
+                  elif user_list[0] == "*":
+                      MESSAGE = "LF Workflows are enabled for everyone. Using LF runners."
+                      return WORKFLOW_LABEL_LF, MESSAGE
+                  elif username in user_list:
+                      MESSAGE = f"LF Workflows are enabled for {username}. Using LF runners."
+                      return WORKFLOW_LABEL_LF, MESSAGE
+                  else:
+                      MESSAGE = f"LF Workflows are disabled for {username}. Using meta runners."
+                      return WORKFLOW_LABEL_META, MESSAGE
+              except Exception as e:
+                  MESSAGE = f"Failed to get determine workflow type. Falling back to meta runners. Exception: {e}"
+                  return WORKFLOW_LABEL_META, MESSAGE
+
+
+          def main() -> None:
+              args = parse_args()
+
+              if is_exception_branch(args.github_branch):
+                  output = {
+                      LABEL_TYPE_KEY: WORKFLOW_LABEL_META,
+                      MESSAGE_KEY: f"Exception branch: '{args.github_branch}', using meta runners",
+                  }
+              else:
+                  try:
+                      gh = get_gh_client(args.github_token)
+                      # The default issue we use - https://github.com/pytorch/test-infra/issues/5132
+                      issue = get_issue(gh, args.github_repo, args.github_issue)
+                      label_type, message = get_workflow_type(issue, args.github_user)
+                      output = {
+                          LABEL_TYPE_KEY: label_type,
+                          MESSAGE_KEY: message,
+                      }
+                  except Exception as e:
+                      output = {
+                          LABEL_TYPE_KEY: WORKFLOW_LABEL_META,
+                          MESSAGE_KEY: f"Failed to get issue. Falling back to meta runners. Exception: {e}",
+                      }
+
+              json_output = json.dumps(output)
+              print(json_output)
+
+
+          if __name__ == "__main__":
+              main()
+          EOF
+          cat get_workflow_type.py
 
       - name: Install dependencies
         run: python3 -m pip install urllib3==1.26.18 PyGithub==2.3.0
@@ -49,7 +161,7 @@ jobs:
           curr_branch="${{ inputs.curr_branch }}"
           echo "Current branch is '$curr_branch'"
 
-          output="$(python3 .github/scripts/get_workflow_type.py \
+          output="$(python3 get_workflow_type.py \
             --github-token "$GITHUB_TOKEN" \
             --github-issue "$ISSUE_NUMBER" \
             --github-branch "$curr_branch" \

diff --git a/.github/workflows/pull.yml b/.github/workflows/pull.yml
@@ -515,3 +515,29 @@ jobs:
       build-environment: linux-focal-cuda12.1-py3.10-gcc9-experimental-split-build
       docker-image: ${{ needs.linux-focal-cuda12_1-py3_10-gcc9-experimental-split-build.outputs.docker-image }}
       test-matrix: ${{ needs.linux-focal-cuda12_1-py3_10-gcc9-experimental-split-build.outputs.test-matrix }}
+
+  linux-focal-py3_12-clang10-experimental-split-build:
+    name: linux-focal-py3.12-clang10-experimental-split-build
+    uses: ./.github/workflows/_linux-build-label.yml
+    with:
+      use_split_build: True
+      build-environment: linux-focal-py3.12-clang10
+      docker-image-name: pytorch-linux-focal-py3.12-clang10
+      test-matrix: |
+        { include: [
+          { config: "default", shard: 1, num_shards: 3, runner: "linux.2xlarge" },
+          { config: "default", shard: 2, num_shards: 3, runner: "linux.2xlarge" },
+          { config: "default", shard: 3, num_shards: 3, runner: "linux.2xlarge" },
+          { config: "dynamo", shard: 1, num_shards: 3, runner: "linux.2xlarge" },
+          { config: "dynamo", shard: 2, num_shards: 3, runner: "linux.2xlarge" },
+          { config: "dynamo", shard: 3, num_shards: 3, runner: "linux.2xlarge" },
+        ]}
+  linux-focal-py3_12-clang10-experimental-split-build-test:
+    name: linux-focal-py3.12-clang10-experimental-split-build
+    uses: ./.github/workflows/_linux-test.yml
+    needs: linux-focal-py3_12-clang10-experimental-split-build
+    with:
+      build-environment: linux-focal-py3.12-clang10-experimental-split-build
+      docker-image: ${{ needs.linux-focal-py3_12-clang10-experimental-split-build.outputs.docker-image }}
+      test-matrix: ${{ needs.linux-focal-py3_12-clang10-experimental-split-build.outputs.test-matrix }}
+      timeout-minutes: 600
diff --git a/.lintrunner.toml b/.lintrunner.toml
@@ -224,16 +224,13 @@ exclude_patterns = [
     'c10/util/win32-headers.h',
     'c10/util/*inl.h',
     'c10/test/**/*.h',
-    'aten/src/ATen/core/TensorImpl_test.cpp',
     'third_party/**/*',
     'torch/csrc/api/**',
     'torch/csrc/autograd/generated/**',
     'torch/csrc/distributed/**/*',
     'torch/csrc/dynamo/eval_frame.h',
     'torch/csrc/inductor/**/*',
     'torch/csrc/jit/**/*',
-    'torch/csrc/jit/serialization/import_legacy.cpp',
-    'torch/csrc/jit/serialization/export.cpp',
     'torch/csrc/lazy/**/*',
 ]
 init_command = [
@@ -1390,6 +1387,33 @@ exclude_patterns = [
     'torch/contrib/_tensorboard_vis.py',
     "torch/cuda/_gpu_trace.py",
     'torch/cuda/_memory_viz.py',  # mypy: Value of type "object" is not indexable
+    'torch/distributed/__init__.py',
+    'torch/distributed/_composable_state.py',
+    'torch/distributed/_sharded_tensor/__init__.py',
+    'torch/distributed/_sharding_spec/__init__.py',
+    'torch/distributed/_tools/__init__.py',
+    'torch/distributed/_tools/memory_tracker.py',
+    'torch/distributed/argparse_util.py',
+    'torch/distributed/c10d_logger.py',
+    'torch/distributed/collective_utils.py',
+    'torch/distributed/constants.py',
+    'torch/distributed/distributed_c10d.py',
+    'torch/distributed/examples/memory_tracker_example.py',
+    'torch/distributed/launch.py',
+    'torch/distributed/launcher/__init__.py',
+    'torch/distributed/launcher/api.py',
+    'torch/distributed/logging_handlers.py',
+    'torch/distributed/nn/__init__.py',
+    'torch/distributed/nn/api/__init__.py',
+    'torch/distributed/nn/api/remote_module.py',
+    'torch/distributed/nn/functional.py',
+    'torch/distributed/nn/jit/__init__.py',
+    'torch/distributed/nn/jit/instantiator.py',
+    'torch/distributed/nn/jit/templates/__init__.py',
+    'torch/distributed/nn/jit/templates/remote_module_template.py',
+    'torch/distributed/remote_device.py',
+    'torch/distributed/rendezvous.py',
+    'torch/distributed/run.py',
     'torch/fft/__init__.py',
     'torch/func/__init__.py',
     'torch/futures/__init__.py',
@@ -1484,6 +1508,7 @@ exclude_patterns = [
     'torch/nn/_reduction.py',
     'torch/nn/common_types.py',
     'torch/nn/cpp.py',
+    'torch/nn/functional.py',
     'torch/nn/grad.py',
     'torch/nn/init.py',
     'torch/nn/intrinsic/__init__.py',
@@ -1502,6 +1527,32 @@ exclude_patterns = [
     'torch/nn/intrinsic/quantized/modules/bn_relu.py',
     'torch/nn/intrinsic/quantized/modules/conv_relu.py',
     'torch/nn/intrinsic/quantized/modules/linear_relu.py',
+    'torch/nn/modules/__init__.py',
+    'torch/nn/modules/_functions.py',
+    'torch/nn/modules/activation.py',
+    'torch/nn/modules/adaptive.py',
+    'torch/nn/modules/batchnorm.py',
+    'torch/nn/modules/channelshuffle.py',
+    'torch/nn/modules/container.py',
+    'torch/nn/modules/conv.py',
+    'torch/nn/modules/distance.py',
+    'torch/nn/modules/dropout.py',
+    'torch/nn/modules/flatten.py',
+    'torch/nn/modules/fold.py',
+    'torch/nn/modules/instancenorm.py',
+    'torch/nn/modules/lazy.py',
+    'torch/nn/modules/linear.py',
+    'torch/nn/modules/loss.py',
+    'torch/nn/modules/module.py',
+    'torch/nn/modules/normalization.py',
+    'torch/nn/modules/padding.py',
+    'torch/nn/modules/pixelshuffle.py',
+    'torch/nn/modules/pooling.py',
+    'torch/nn/modules/rnn.py',
+    'torch/nn/modules/sparse.py',
+    'torch/nn/modules/transformer.py',
+    'torch/nn/modules/upsampling.py',
+    'torch/nn/modules/utils.py',
     'torch/nn/parameter.py',
     'torch/nn/qat/__init__.py',
     'torch/nn/qat/dynamic/__init__.py',

diff --git a/aten/src/ATen/CMakeLists.txt b/aten/src/ATen/CMakeLists.txt
@@ -53,11 +53,6 @@ if(NOT BUILD_LITE_INTERPRETER)
   file(GLOB_RECURSE ATen_CORE_TEST_SRCS "core/*_test.cpp")
 endif()
 EXCLUDE(ATen_CORE_SRCS "${ATen_CORE_SRCS}" ${ATen_CORE_TEST_SRCS})
-# Exclude TensorImpl_test.cpp if compiling without Caffe2
-if(NOT BUILD_LITE_INTERPRETER)
-  file(GLOB_RECURSE ATen_CORE_EXCLUDED_TEST_SRCS "core/TensorImpl_test.cpp")
-  EXCLUDE(ATen_CORE_TEST_SRCS "${ATen_CORE_TEST_SRCS}" ${ATen_CORE_EXCLUDED_TEST_SRCS})
-endif()
 
 file(GLOB base_h "*.h" "detail/*.h" "cpu/*.h" "cpu/vec/vec512/*.h" "cpu/vec/vec256/*.h" "cpu/vec/vec256/vsx/*.h" "cpu/vec/vec256/zarch/*.h" "cpu/vec/*.h" "quantized/*.h" "functorch/*.h")
 file(GLOB base_cpp "*.cpp" "detail/*.cpp" "cpu/*.cpp" "functorch/*.cpp")

diff --git a/aten/src/ATen/Context.cpp b/aten/src/ATen/Context.cpp
@@ -56,6 +56,14 @@ void Context::setDeterministicCuDNN(bool b) {
   deterministic_cudnn = b;
 }
 
+bool Context::deterministicMkldnn() const {
+  return deterministic_mkldnn;
+}
+
+void Context::setDeterministicMkldnn(bool b) {
+  deterministic_mkldnn = b;
+}
+
 bool Context::deterministicAlgorithms() const {
   return _deterministic_algorithms;
 }

diff --git a/aten/src/ATen/Context.h b/aten/src/ATen/Context.h
@@ -188,6 +188,8 @@ class TORCH_API Context {
   void setBenchmarkLimitCuDNN(int);
   bool deterministicCuDNN() const;
   void setDeterministicCuDNN(bool);
+  bool deterministicMkldnn() const;
+  void setDeterministicMkldnn(bool);
   bool userEnabledNNPACK() const;
   void setUserEnabledNNPACK(bool e);
 
@@ -358,6 +360,7 @@ class TORCH_API Context {
   c10::once_flag thp_init;
   bool enabled_cudnn = true;
   bool deterministic_cudnn = false;
+  bool deterministic_mkldnn = false;
   bool _deterministic_algorithms = false;
   bool _deterministic_algorithms_warn_only = false;
   bool _deterministic_fill_uninitialized_memory = true;

diff --git a/aten/src/ATen/FunctionalInverses.cpp b/aten/src/ATen/FunctionalInverses.cpp
@@ -303,7 +303,7 @@ Tensor FunctionalInverses::_nested_view_from_buffer_inverse(const Tensor& base,
     return Tensor();
 }
 
-Tensor FunctionalInverses::_nested_view_from_jagged_inverse(const Tensor& base, const Tensor& mutated_view, InverseReturnMode inverse_return_mode, const Tensor& offsets, const Tensor& dummy, const std::optional<Tensor>& lengths, int64_t ragged_idx) {
+Tensor FunctionalInverses::_nested_view_from_jagged_inverse(const Tensor& base, const Tensor& mutated_view, InverseReturnMode inverse_return_mode, const Tensor& offsets, const Tensor& dummy, const std::optional<Tensor>& lengths, int64_t ragged_idx, const c10::optional<Tensor>& min_seqlen, const c10::optional<Tensor>& max_seqlen) {
   auto values = at::_nested_get_values(mutated_view);
   if (inverse_return_mode != InverseReturnMode::NeverView) {
     return values;
@@ -317,7 +317,12 @@ Tensor FunctionalInverses::_nested_get_values_inverse(const Tensor& base, const
   auto lengths = at::_nested_get_lengths(base);
   auto ragged_idx = at::_nested_get_ragged_idx(base);
   auto dummy = at::_nested_get_jagged_dummy(base);
-  auto nt = at::_nested_view_from_jagged(mutated_view, offsets, dummy, lengths, ragged_idx);
+  auto min_seqlen = at::_nested_get_min_seqlen(base);
+  auto max_seqlen = at::_nested_get_max_seqlen(base);
+  auto nt = at::_nested_view_from_jagged(
+      mutated_view, offsets, dummy, lengths, ragged_idx,
+      (min_seqlen.defined() ? c10::optional<Tensor>(min_seqlen) : c10::nullopt),
+      (max_seqlen.defined() ? c10::optional<Tensor>(max_seqlen) : c10::nullopt));
 
   if (inverse_return_mode != InverseReturnMode::NeverView) {
     return nt;

diff --git a/aten/src/ATen/core/MetaFallbackKernel.cpp b/aten/src/ATen/core/MetaFallbackKernel.cpp
@@ -17,7 +17,7 @@ static void metaFallback(
       "while using an operator with PT2 compilation APIs (torch.compile/torch.export); "
       "in order to use this operator with those APIs you'll need to add a fake impl. "
       "Please see the following for next steps:  "
-      "https://pytorch.org/docs/main/notes/custom_operators.html");
+      "https://pytorch.org/tutorials/advanced/custom_ops_landing_page.html");
 }
 
 TORCH_LIBRARY_IMPL(_, Meta, m) {