Update on "Support randn_like() for NT"

To satisfy an internal ask. [ghstack-poisoned]
pytorch · Mar 13, 2023 · 2b4d3c2 · 2b4d3c2
2 parents bf54268 + 3c0f0b0
commit 2b4d3c2
Show file tree

Hide file tree

Showing 400 changed files with 19,507 additions and 14,236 deletions.
diff --git a/.ci/docker/ci_commit_pins/triton.txt b/.ci/docker/ci_commit_pins/triton.txt
@@ -1 +1 @@
-b8b470bc597c1c5bd03682c09fe3e6b7c53787fd
+2c32f4399986045ff25cae201ed3b16d922a9d3b
diff --git a/.ci/docker/requirements-ci.txt b/.ci/docker/requirements-ci.txt
@@ -244,9 +244,9 @@ unittest-xml-reporting<=3.2.0,>=2.0.0
 #Pinned versions:
 #test that import:
 
-lintrunner==0.9.2
-#Description: all about linters
-#Pinned versions: 0.9.2
+lintrunner==0.10.7
+#Description: all about linters!
+#Pinned versions: 0.10.7
 #test that import:
 
 rockset==1.0.3

diff --git a/.ci/docker/triton_version.txt b/.ci/docker/triton_version.txt
@@ -0,0 +1 @@
+2.1.0
diff --git a/.ci/docker/ubuntu-cuda/Dockerfile b/.ci/docker/ubuntu-cuda/Dockerfile
@@ -91,8 +91,9 @@ ARG TRITON
 COPY ./common/install_triton.sh install_triton.sh
 COPY ./common/common_utils.sh common_utils.sh
 COPY ci_commit_pins/triton.txt triton.txt
+COPY triton_version.txt triton_version.txt
 RUN if [ -n "${TRITON}" ]; then bash ./install_triton.sh; fi
-RUN rm install_triton.sh common_utils.sh triton.txt
+RUN rm install_triton.sh common_utils.sh triton.txt triton_version.txt
 
 # Install ccache/sccache (do this last, so we get priority in PATH)
 COPY ./common/install_cache.sh install_cache.sh

diff --git a/.ci/onnx/test.sh b/.ci/onnx/test.sh
@@ -61,10 +61,9 @@ if [[ "$BUILD_ENVIRONMENT" == *onnx* ]]; then
   pip install -q --user --no-use-pep517 "git+https://github.com/pytorch/vision.git@$(cat .github/ci_commit_pins/vision.txt)"
   pip install -q --user transformers==4.25.1
   pip install -q --user ninja flatbuffers==2.0 numpy==1.22.4 onnxruntime==1.14.0 beartype==0.10.4
-  # TODO: change this when onnx 1.13.1 is released.
-  pip install --no-use-pep517 'onnx @ git+https://github.com/onnx/onnx@e192ba01e438d22ca2dedd7956e28e3551626c91'
+  pip install -q --user onnx==1.13.1
   # TODO: change this when onnx-script is on testPypi
-  pip install 'onnx-script @ git+https://github.com/microsoft/onnx-script@0298154caf6b46fc4e30abba034095c1290c26e3'
+  pip install 'onnx-script @ git+https://github.com/microsoft/onnx-script@29241e15f5182be1384f1cf6ba203d7e2e125196'
   # numba requires numpy <= 1.20, onnxruntime requires numpy >= 1.21.
   # We don't actually need it for our tests, but it's imported if it's present, so uninstall.
   pip uninstall -q --yes numba

diff --git a/.ci/pytorch/multigpu-test.sh b/.ci/pytorch/multigpu-test.sh
@@ -27,7 +27,6 @@ time python test/run_test.py --verbose -i distributed/checkpoint/test_checkpoint
 time python test/run_test.py --verbose -i distributed/checkpoint/test_file_system_checkpoint
 time python test/run_test.py --verbose -i distributed/_shard/sharding_spec/test_sharding_spec
 time python test/run_test.py --verbose -i distributed/_shard/sharding_plan/test_sharding_plan
-time python test/run_test.py --verbose -i distributed/_shard/sharded_tensor/test_megatron_prototype
 time python test/run_test.py --verbose -i distributed/_shard/sharded_tensor/test_sharded_tensor
 time python test/run_test.py --verbose -i distributed/_shard/sharded_tensor/test_sharded_tensor_reshard
 time python test/run_test.py --verbose -i distributed/_shard/sharded_tensor/ops/test_chunk
@@ -36,12 +35,10 @@ time python test/run_test.py --verbose -i distributed/_shard/sharded_tensor/ops/
 time python test/run_test.py --verbose -i distributed/_shard/sharded_tensor/ops/test_embedding_bag
 time python test/run_test.py --verbose -i distributed/_shard/sharded_tensor/ops/test_binary_cmp
 time python test/run_test.py --verbose -i distributed/_shard/sharded_tensor/ops/test_init
-time python test/run_test.py --verbose -i distributed/_shard/sharded_tensor/ops/test_linear
 time python test/run_test.py --verbose -i distributed/_shard/sharded_tensor/ops/test_math_ops
 time python test/run_test.py --verbose -i distributed/_shard/sharded_tensor/ops/test_matrix_ops
 time python test/run_test.py --verbose -i distributed/_shard/sharded_tensor/ops/test_softmax
 time python test/run_test.py --verbose -i distributed/_shard/sharded_optim/test_sharded_optim
-time python test/run_test.py --verbose -i distributed/_shard/test_partial_tensor
 # Other tests
 time python test/run_test.py --verbose -i test_cuda_primary_ctx
 time python test/run_test.py --verbose -i test_optim -- -k optimizers_with_varying_tensors

diff --git a/.ci/pytorch/test.sh b/.ci/pytorch/test.sh
@@ -298,13 +298,19 @@ test_single_dynamo_benchmark() {
 
   local partition_flags=()
   if [[ -n "$NUM_TEST_SHARDS" && -n "$shard_id" ]]; then
-    partition_flags=( --total-partitions 2 --partition-id "$shard_id" )
+    partition_flags=( --total-partitions "$NUM_TEST_SHARDS" --partition-id "$shard_id" )
   fi
 
-  if [[ "${TEST_CONFIG}" == *perf* ]]; then
+  if [[ "${TEST_CONFIG}" == *perf_compare* ]]; then
+    python "benchmarks/dynamo/$suite.py" \
+      --ci --performance --disable-cudagraphs \
+      "${DYNAMO_BENCHMARK_FLAGS[@]}" \
+      "$@" "${partition_flags[@]}" \
+      --output "$TEST_REPORTS_DIR/${name}_${suite}.csv"
+  elif [[ "${TEST_CONFIG}" == *perf* ]]; then
     # MKL_THREADING_LAYER=GNU to mitigate https://github.com/pytorch/pytorch/issues/37377
     MKL_THREADING_LAYER=GNU python benchmarks/dynamo/runner.py --suites="$suite" \
-      --base-sha="$BASE_SHA" --output-dir="$TEST_REPORTS_DIR" "${partition_flags[@]}" \
+      --base-sha="$BASE_SHA" "${partition_flags[@]}" \
       --no-graphs --no-update-archive --no-gh-comment "$@"
   else
     python "benchmarks/dynamo/$suite.py" \
@@ -319,21 +325,24 @@ test_single_dynamo_benchmark() {
 
 test_dynamo_benchmark() {
   # Usage: test_dynamo_benchmark huggingface 0
+  TEST_REPORTS_DIR=$(pwd)/test/test-reports
 
   local suite="$1"
   shift
   local shard_id="$1"
   shift
 
-  if [[ "${TEST_CONFIG}" == *perf* ]]; then
+  if [[ "${TEST_CONFIG}" == *perf_compare* ]]; then
+    test_single_dynamo_benchmark "amp" "$suite" "$shard_id" --training --amp "$@"
+  elif [[ "${TEST_CONFIG}" == *perf* ]]; then
     # Performance test training only, for float32 and amp
-    test_single_dynamo_benchmark "amp" "$suite" "$shard_id" --training --dtypes=amp "$@"
-    test_single_dynamo_benchmark "float32" "$suite" "$shard_id" --training --dtypes=float32 "$@"
+    test_single_dynamo_benchmark "amp" "$suite" "$shard_id" --training --dtypes=amp --output-dir="$TEST_REPORTS_DIR"/amp "$@"
+    test_single_dynamo_benchmark "float32" "$suite" "$shard_id" --training --dtypes=float32 --output-dir="$TEST_REPORTS_DIR"/float32 "$@"
   else
     # Check inference with --float32
     test_single_dynamo_benchmark "inference" "$suite" "$shard_id" --float32 "$@"
 
-    if [[ "${TEST_CONFIG}" != *cpu_accuracy* && "${TEST_CONFIG}" != *dynamic* ]]; then
+    if [[ "${TEST_CONFIG}" != *cpu_accuracy* ]]; then
       # Check training with --amp
       test_single_dynamo_benchmark "training" "$suite" "$shard_id" --training --amp "$@"
     fi
@@ -639,7 +648,7 @@ build_xla() {
   apply_patches
   SITE_PACKAGES="$(python -c 'from distutils.sysconfig import get_python_lib; print(get_python_lib())')"
   # These functions are defined in .circleci/common.sh in pytorch/xla repo
-  install_deps_pytorch_xla $XLA_DIR $USE_CACHE
+  retry install_deps_pytorch_xla $XLA_DIR $USE_CACHE
   CMAKE_PREFIX_PATH="${SITE_PACKAGES}/torch:${CMAKE_PREFIX_PATH}" XLA_SANDBOX_BUILD=1 build_torch_xla $XLA_DIR
   assert_git_not_dirty
 }

diff --git a/.clang-format b/.clang-format
@@ -60,9 +60,6 @@ MacroBlockBegin: ''
 MacroBlockEnd:   ''
 MaxEmptyLinesToKeep: 1
 NamespaceIndentation: None
-ObjCBlockIndentWidth: 2
-ObjCSpaceAfterProperty: false
-ObjCSpaceBeforeProtocolList: false
 PenaltyBreakBeforeFirstCallParameter: 1
 PenaltyBreakComment: 300
 PenaltyBreakFirstLessLess: 120
@@ -85,4 +82,11 @@ SpacesInSquareBrackets: false
 Standard:        Cpp11
 TabWidth:        8
 UseTab:          Never
+---
+Language: ObjC
+ColumnLimit: 120
+AlignAfterOpenBracket: Align
+ObjCBlockIndentWidth: 2
+ObjCSpaceAfterProperty: false
+ObjCSpaceBeforeProtocolList: false
 ...
diff --git a/.github/actions/setup-linux/action.yml b/.github/actions/setup-linux/action.yml
@@ -13,7 +13,13 @@ runs:
           # Pulled from instance metadata endpoint for EC2
           # see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
           category=$1
-          curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
+          # If it is GCP runner (runner name contains gcp), do not run this
+          runner_name_str=${{ runner.name }}
+          if [[ $runner_name_str != *"gcp"* ]]; then
+            curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
+          else
+            echo "Runner is from Google Cloud Platform, No info on ec2 metadata"
+          fi
         }
         echo "ami-id: $(get_ec2_metadata ami-id)"
         echo "instance-id: $(get_ec2_metadata instance-id)"

diff --git a/.github/ci_commit_pins/vision.txt b/.github/ci_commit_pins/vision.txt
@@ -1 +1 @@
-beb4bb706b5e13009cb5d5586505c6d2896d184a
+7d2acaa7d7fc600fa08fca18e9230f8651147025
diff --git a/.github/pytorch-probot.yml b/.github/pytorch-probot.yml
@@ -6,6 +6,7 @@ ciflow_push_tags:
 - ciflow/binaries_libtorch
 - ciflow/binaries_wheel
 - ciflow/inductor
+- ciflow/inductor-perf-compare
 - ciflow/inductor-perf-test-nightly
 - ciflow/mps
 - ciflow/nightly

diff --git a/.github/requirements-gha-cache.txt b/.github/requirements-gha-cache.txt
@@ -7,7 +7,7 @@
 #   .ci/docker/requirements-ci.txt
 boto3==1.19.12
 jinja2==3.0.1
-lintrunner==0.9.2
+lintrunner==0.10.7
 ninja==1.10.0.post1
 nvidia-ml-py==11.525.84
 pyyaml==6.0

diff --git a/.github/scripts/build_triton_wheel.py b/.github/scripts/build_triton_wheel.py
@@ -6,9 +6,15 @@
 import sys
 import shutil
 SCRIPT_DIR = Path(__file__).parent
+REPO_DIR = SCRIPT_DIR.parent.parent
 
 def read_triton_pin() -> str:
-    with open(SCRIPT_DIR.parent / "ci_commit_pins" / "triton.txt") as f:
+    with open(REPO_DIR / ".ci" / "docker" / "ci_commit_pins" / "triton.txt") as f:
+        return f.read().strip()
+
+
+def read_triton_version() -> str:
+    with open(REPO_DIR / ".ci" / "docker" / "triton_version.txt") as f:
         return f.read().strip()
 
 
@@ -19,26 +25,35 @@ def check_and_replace(inp: str, src: str, dst: str) -> str:
     return inp.replace(src, dst)
 
 
-def patch_setup_py(path: Path, *, version: str = "2.0.0", name: str = "triton") -> None:
+def patch_setup_py(path: Path, *, version: str, name: str = "triton") -> None:
     with open(path) as f:
         orig = f.read()
     # Replace name
     orig = check_and_replace(orig, "name=\"triton\",", f"name=\"{name}\",")
     # Replace version
-    orig = check_and_replace(orig, "version=\"2.0.0\",", f"version=\"{version}\",")
+    orig = check_and_replace(orig, f"version=\"{read_triton_version()}\",", f"version=\"{version}\",")
+    with open(path, "w") as f:
+        f.write(orig)
+
+
+def patch_init_py(path: Path, *, version: str) -> None:
+    with open(path) as f:
+        orig = f.read()
+    # Replace version
+    orig = check_and_replace(orig, "__version__ = '2.0.0'", f"__version__ = \"{version}\"")
     with open(path, "w") as f:
         f.write(orig)
 
 
-def build_triton(commit_hash: str, build_conda: bool = False, py_version : Optional[str] = None) -> Path:
+def build_triton(*, version: str, commit_hash: str, build_conda: bool = False, py_version : Optional[str] = None) -> Path:
     with TemporaryDirectory() as tmpdir:
         triton_basedir = Path(tmpdir) / "triton"
         triton_pythondir = triton_basedir / "python"
         check_call(["git", "clone", "https://github.com/openai/triton"], cwd=tmpdir)
         check_call(["git", "checkout", commit_hash], cwd=triton_basedir)
         if build_conda:
             with open(triton_basedir / "meta.yaml", "w") as meta:
-                print(f"package:\n  name: torchtriton\n  version: 2.0.0+{commit_hash[:10]}\n", file=meta)
+                print(f"package:\n  name: torchtriton\n  version: {version}+{commit_hash[:10]}\n", file=meta)
                 print("source:\n  path: .\n", file=meta)
                 print("build:\n  string: py{{py}}\n  number: 1\n  script: cd python; "
                       "python setup.py install --single-version-externally-managed --record=record.txt\n", file=meta)
@@ -47,6 +62,7 @@ def build_triton(commit_hash: str, build_conda: bool = False, py_version : Optio
                 print("about:\n  home: https://github.com/openai/triton\n  license: MIT\n  summary:"
                       " 'A language and compiler for custom Deep Learning operation'", file=meta)
 
+            patch_init_py(triton_pythondir / "triton" / "__init__.py", version=f"{version}+{commit_hash[:10]}")
             if py_version is None:
                 py_version = f"{sys.version_info.major}.{sys.version_info.minor}"
             check_call(["conda", "build", "--python", py_version,
@@ -55,7 +71,8 @@ def build_triton(commit_hash: str, build_conda: bool = False, py_version : Optio
             shutil.copy(conda_path, Path.cwd())
             return Path.cwd() / conda_path.name
 
-        patch_setup_py(triton_pythondir / "setup.py", name="pytorch-triton", version=f"2.0.0+{commit_hash[:10]}")
+        patch_setup_py(triton_pythondir / "setup.py", name="pytorch-triton", version=f"{version}+{commit_hash[:10]}")
+        patch_init_py(triton_pythondir / "triton" / "__init__.py", version=f"{version}+{commit_hash[:10]}")
         check_call([sys.executable, "setup.py", "bdist_wheel"], cwd=triton_pythondir)
         whl_path = list((triton_pythondir / "dist").glob("*.whl"))[0]
         shutil.copy(whl_path, Path.cwd())
@@ -67,9 +84,13 @@ def main() -> None:
     parser = ArgumentParser("Build Triton binaries")
     parser.add_argument("--build-conda", action="store_true")
     parser.add_argument("--py-version", type=str)
+    parser.add_argument("--commit-hash", type=str, default=read_triton_pin())
+    parser.add_argument("--triton-version", type=str, default=read_triton_version())
     args = parser.parse_args()
-    pin = read_triton_pin()
-    build_triton(pin, build_conda=args.build_conda, py_version=args.py_version)
+    build_triton(commit_hash=args.commit_hash,
+                 version=args.triton_version,
+                 build_conda=args.build_conda,
+                 py_version=args.py_version)
 
 
 if __name__ == "__main__":

diff --git a/.github/scripts/generate_ci_workflows.py b/.github/scripts/generate_ci_workflows.py
@@ -271,18 +271,6 @@ class OperatingSystem:
             isolated_workflow=True,
         ),
     ),
-    BinaryBuildWorkflow(
-        os=OperatingSystem.MACOS,
-        package_type="libtorch",
-        abi_version=generate_binary_build_matrix.PRE_CXX11_ABI,
-        build_configs=generate_binary_build_matrix.generate_libtorch_matrix(
-            OperatingSystem.MACOS, generate_binary_build_matrix.PRE_CXX11_ABI
-        ),
-        ciflow_config=CIFlowConfig(
-            labels={LABEL_CIFLOW_BINARIES, LABEL_CIFLOW_BINARIES_LIBTORCH},
-            isolated_workflow=True,
-        ),
-    ),
     BinaryBuildWorkflow(
         os=OperatingSystem.MACOS_ARM64,
         package_type="wheel",

diff --git a/.github/scripts/test_trymerge.py b/.github/scripts/test_trymerge.py
@@ -13,8 +13,6 @@
 
 from trymerge import (
     find_matching_merge_rule,
-    get_land_checkrun_conclusions,
-    validate_land_time_checks,
     gh_graphql,
     gh_get_team_members,
     read_merge_rules,
@@ -25,7 +23,6 @@
     PostCommentError,
     FlakyRule,
     categorize_checks,
-    get_combined_checks_from_pr_and_land_validation,
     get_rockset_results,
     main as trymerge_main,
     get_classifications,
@@ -111,9 +108,6 @@ def __init__(self) -> None:
             self.pr_num = 76123
             self.dry_run = True
             self.comment_id = 0
-            self.on_mandatory = False
-            self.on_green = False
-            self.land_checks = False
             self.reason = 'this is for testing'
 
     return Object()
@@ -128,9 +122,6 @@ def mock_merge(pr_num: int, repo: GitRepo,
                dry_run: bool = False,
                skip_mandatory_checks: bool = False,
                comment_id: Optional[int] = None,
-               mandatory_only: bool = False,
-               on_green: bool = False,
-               land_checks: bool = False,
                timeout_minutes: int = 400,
                stale_pr_days: int = 3) -> None:
     pass
@@ -346,20 +337,6 @@ def test_cancelled_gets_ignored(self, *args: Any) -> None:
         self.assertTrue(len(lint_checks) > 0)
         self.assertTrue(all([conclusions[name].status == "SUCCESS" for name in lint_checks]))
 
-    def test_get_many_land_checks(self, *args: Any) -> None:
-        """ Tests that all checkruns can be fetched for a commit
-        """
-        conclusions = get_land_checkrun_conclusions('pytorch', 'pytorch', '6882717f73deffb692219ccd1fd6db258d8ed684')
-        self.assertEqual(len(conclusions), 98)
-        self.assertTrue("pull / linux-docs / build-docs (cpp)" in conclusions.keys())
-
-    def test_failed_land_checks(self, *args: Any) -> None:
-        """ Tests that PR with Land Checks fail with a RunTime error
-        """
-        self.assertRaisesRegex(RuntimeError,
-                               ".*Failed to merge; some land checks failed.*",
-                               lambda: validate_land_time_checks('pytorch', 'pytorch', '6882717f73deffb692219ccd1fd6db258d8ed684'))
-
     @mock.patch('trymerge.gh_get_pr_info', return_value=mock_gh_get_info())
     @mock.patch('trymerge.parse_args', return_value=mock_parse_args(True, False))
     @mock.patch('trymerge.try_revert', side_effect=mock_revert)
@@ -376,10 +353,7 @@ def test_main_force(self, mock_merge: Any, mock_parse_args: Any, *args: Any) ->
                                            mock.ANY,
                                            dry_run=mock.ANY,
                                            skip_mandatory_checks=True,
-                                           comment_id=mock.ANY,
-                                           on_green=False,
-                                           land_checks=False,
-                                           mandatory_only=False)
+                                           comment_id=mock.ANY)
 
     @mock.patch('trymerge.gh_get_pr_info', return_value=mock_gh_get_info())
     @mock.patch('trymerge.parse_args', return_value=mock_parse_args(False, False))
@@ -390,10 +364,7 @@ def test_main_merge(self, mock_merge: Any, *args: Any) -> None:
                                            mock.ANY,
                                            dry_run=mock.ANY,
                                            skip_mandatory_checks=False,
-                                           comment_id=mock.ANY,
-                                           on_green=False,
-                                           land_checks=False,
-                                           mandatory_only=False)
+                                           comment_id=mock.ANY)
 
     @mock.patch('trymerge.read_merge_rules', side_effect=mocked_read_merge_rules)
     def test_revert_rules(self, *args: Any) -> None:
@@ -453,7 +424,7 @@ class TestBypassFailures(TestCase):
     def test_get_classifications(self, *args: Any) -> None:
         flaky_rules = [FlakyRule("distributed", ["##[error]The operation was canceled."])]
         pr = GitHubPR("pytorch", "pytorch", 92863)
-        checks = get_combined_checks_from_pr_and_land_validation(pr, None)
+        checks = pr.get_checkrun_conclusions()
         checks = get_classifications(pr.last_commit()['oid'], pr.get_merge_base(), checks, flaky_rules)
         self.assertTrue(
             checks[