Update base for Update on "Add tensor to fake clone snapshot for immu…

…table source of truth" There's a longstanding, well known mutability bug in dynamo, #93610 (and more issues, but this is the one I had at hand). Ops that do in place mutation of tensors will mutate their corresponding FakeTensors. So, for example, if you do `t_` on a tensor, you will reverse its strides. This, in turn, means that the FakeTensors strides are now also reversed, say, if you are trying to torch.compile: ``` class F(torch.nn.Module): def forward(self, x, y): x = x.t_() y = y.t_() return (x + y,) ``` However, we recently introduced accessing the fake_tensor memo/cache to get the symbolic shape values for sizes and strides during guard installation time. This means that tensors captured with a given size and stride, say, for x above, size:(3,3) stride:(3, 1), will get their memo updates to size(3, 3), stride(1, 3). Now, whenever you access this value for anything, it reflects it's current state in the tracing, as opposed to the state at which we initially started tracing on. This causes us to produce guards that are never valid, for the example above, that `x.stride()[0] == 3`. The solution is to not allow mutation to affect the fake tensors we use as source of truth here. We can do this by forcing a clone of the fake tensor at builder time, and storing that as the source of truth for our dynamic sizes and strides during guard installation. cc soumith penguinwu anijain2305 EikanWang jgong5 Guobing-Chen XiaobingSuper zhuhaozhe blzheng Xia-Weiwen wenzhe-nrv jiayisunx desertfire [ghstack-poisoned]
pytorch · Apr 27, 2023 · 0fb3be5 · 0fb3be5
2 parents 5b4a523 + 151d76c
commit 0fb3be5
Show file tree

Hide file tree

Showing 310 changed files with 5,608 additions and 1,780 deletions.
diff --git a/.bazelrc b/.bazelrc
@@ -69,10 +69,6 @@ build --per_file_copt='^//.*\.(cpp|cc)$'@-Werror=all
 # The following warnings come from -Wall. We downgrade them from error
 # to warnings here.
 #
-# sign-compare has a tremendous amount of violations in the
-# codebase. It will be a lot of work to fix them, just disable it for
-# now.
-build --per_file_copt='^//.*\.(cpp|cc)$'@-Wno-sign-compare
 # We intentionally use #pragma unroll, which is compiler specific.
 build --per_file_copt='^//.*\.(cpp|cc)$'@-Wno-error=unknown-pragmas
 

diff --git a/.ci/pytorch/multigpu-test.sh b/.ci/pytorch/multigpu-test.sh
@@ -35,7 +35,8 @@ time python test/run_test.py --verbose -i distributed/_shard/sharded_tensor/ops/
 time python test/run_test.py --verbose -i distributed/_shard/sharded_tensor/ops/test_init
 time python test/run_test.py --verbose -i distributed/_shard/sharded_optim/test_sharded_optim
 
-# DTensor/Random Ops tests
+# DTensor tests
+time python test/run_test.py --verbose -i distributed/_tensor/test_device_mesh.py
 time python test/run_test.py --verbose -i distributed/_tensor/test_random_ops.py
 
 # DTensor/TP tests

diff --git a/.ci/pytorch/test.sh b/.ci/pytorch/test.sh
@@ -427,7 +427,7 @@ test_inductor_torchbench_smoketest_perf() {
   python benchmarks/dynamo/check_hf_bert_perf_csv.py -f "$TEST_REPORTS_DIR/inductor_training_smoketest.csv"
 
   # Check memory compression ratio for a few models
-  for test in hf_Albert timm_efficientdet; do
+  for test in hf_Albert timm_vision_transformer; do
     python benchmarks/dynamo/torchbench.py --device cuda --performance --backend inductor --amp --training \
       --disable-cudagraphs --batch-size-file "$(realpath benchmarks/dynamo/torchbench_models_list.txt)" \
       --only $test --output "$TEST_REPORTS_DIR/inductor_training_smoketest_$test.csv"
@@ -955,7 +955,7 @@ elif [[ "${TEST_CONFIG}" == *torchbench* ]]; then
   install_torchvision
   id=$((SHARD_NUMBER-1))
   if [[ "${TEST_CONFIG}" == *inductor_torchbench_smoketest_perf* ]]; then
-    checkout_install_torchbench hf_Bert hf_Albert timm_efficientdet
+    checkout_install_torchbench hf_Bert hf_Albert timm_vision_transformer
     PYTHONPATH=$(pwd)/torchbench test_inductor_torchbench_smoketest_perf
   else
     checkout_install_torchbench

diff --git a/.github/actionlint.yaml b/.github/actionlint.yaml
@@ -11,9 +11,8 @@ self-hosted-runner:
     - linux.8xlarge.nvidia.gpu
     - linux.16xlarge.nvidia.gpu
     - linux.g5.4xlarge.nvidia.gpu
-    - windows.4xlarge.nonephemeral
+    - windows.4xlarge
     - windows.8xlarge.nvidia.gpu
-    - windows.8xlarge.nvidia.gpu.nonephemeral
     - windows.g5.4xlarge.nvidia.gpu
     - bm-runner
     - linux.rocm.gpu

diff --git a/.github/merge_rules.yaml b/.github/merge_rules.yaml
@@ -287,7 +287,7 @@
   - aten/src/ATen/native/quantized/cpu/**
   - aten/src/ATen/native/Convolution*.cpp
   - aten/src/ATen/native/mkldnn/**
-  - test/test_mkldnn.py
+  - test/test_mkl*.py
   approved_by:
   - mingfeima
   - XiaobingSuper

diff --git a/.github/scripts/rockset_mocks.json b/.github/scripts/rockset_mocks.json
@@ -1133,7 +1133,7 @@
     {
       "workflow_name": "pull",
       "id": 10836206561,
-      "name": "win-vs2019-cpu-py3 / test (default, 1, 2, windows.4xlarge.nonephemeral)",
+      "name": "win-vs2019-cpu-py3 / test (default, 1, 2, windows.4xlarge)",
       "conclusion": "success",
       "completed_at": "2023-01-24T01:11:42Z",
       "html_url": "https://github.com/pytorch/pytorch/actions/runs/3991169410/jobs/6846294540",
@@ -1407,7 +1407,7 @@
     {
       "workflow_name": "pull",
       "id": 10793106674,
-      "name": "win-vs2019-cpu-py3 / test (functorch, 1, 1, windows.4xlarge.nonephemeral)",
+      "name": "win-vs2019-cpu-py3 / test (functorch, 1, 1, windows.4xlarge)",
       "conclusion": "success",
       "completed_at": "2023-01-21T05:06:35Z",
       "html_url": "https://github.com/pytorch/pytorch/actions/runs/3972873201/jobs/6811541260",
@@ -1549,7 +1549,7 @@
     {
       "workflow_name": "trunk",
       "id": 10793229653,
-      "name": "win-vs2019-cuda11.6-py3 / test (force_on_cpu, 1, 1, windows.4xlarge.nonephemeral)",
+      "name": "win-vs2019-cuda11.6-py3 / test (force_on_cpu, 1, 1, windows.4xlarge)",
       "conclusion": "success",
       "completed_at": "2023-01-21T05:16:16Z",
       "html_url": "https://github.com/pytorch/pytorch/actions/runs/3972873205/jobs/6811642435",
@@ -1705,7 +1705,7 @@
     {
       "workflow_name": "pull",
       "id": 10836206839,
-      "name": "win-vs2019-cpu-py3 / test (functorch, 1, 1, windows.4xlarge.nonephemeral)",
+      "name": "win-vs2019-cpu-py3 / test (functorch, 1, 1, windows.4xlarge)",
       "conclusion": "success",
       "completed_at": "2023-01-24T00:50:32Z",
       "html_url": "https://github.com/pytorch/pytorch/actions/runs/3991169410/jobs/6846294751",
@@ -1786,7 +1786,7 @@
     {
       "workflow_name": "pull",
       "id": 10836206711,
-      "name": "win-vs2019-cpu-py3 / test (default, 2, 2, windows.4xlarge.nonephemeral)",
+      "name": "win-vs2019-cpu-py3 / test (default, 2, 2, windows.4xlarge)",
       "conclusion": "success",
       "completed_at": "2023-01-24T01:24:31Z",
       "html_url": "https://github.com/pytorch/pytorch/actions/runs/3991169410/jobs/6846294653",
@@ -2012,7 +2012,7 @@
     {
       "workflow_name": "pull",
       "id": 10793106598,
-      "name": "win-vs2019-cpu-py3 / test (default, 1, 2, windows.4xlarge.nonephemeral)",
+      "name": "win-vs2019-cpu-py3 / test (default, 1, 2, windows.4xlarge)",
       "conclusion": "success",
       "completed_at": "2023-01-21T05:35:26Z",
       "html_url": "https://github.com/pytorch/pytorch/actions/runs/3972873201/jobs/6811541202",
@@ -3052,7 +3052,7 @@
     {
       "workflow_name": "pull",
       "id": 10793106643,
-      "name": "win-vs2019-cpu-py3 / test (default, 2, 2, windows.4xlarge.nonephemeral)",
+      "name": "win-vs2019-cpu-py3 / test (default, 2, 2, windows.4xlarge)",
       "conclusion": "success",
       "completed_at": "2023-01-21T05:33:42Z",
       "html_url": "https://github.com/pytorch/pytorch/actions/runs/3972873201/jobs/6811541238",

diff --git a/.github/templates/windows_binary_build_workflow.yml.j2 b/.github/templates/windows_binary_build_workflow.yml.j2
@@ -56,7 +56,7 @@ jobs:
 {%- for config in build_configs %}
   !{{ config["build_name"] }}-build:
     if: ${{ github.repository_owner == 'pytorch' }}
-    runs-on: windows.4xlarge.nonephemeral
+    runs-on: windows.4xlarge
     timeout-minutes: !{{ common.timeout_minutes }}
     !{{ upload.binary_env(config, True) }}
     steps:
@@ -86,7 +86,7 @@ jobs:
 {%- if config["gpu_arch_type"] == "cuda" %}
     runs-on: windows.8xlarge.nvidia.gpu
 {%- else %}
-    runs-on: windows.4xlarge.nonephemeral
+    runs-on: windows.4xlarge
 {%- endif %}
     timeout-minutes: !{{ common.timeout_minutes }}
     !{{ upload.binary_env(config, True) }}

diff --git a/.github/workflows/_win-build.yml b/.github/workflows/_win-build.yml
@@ -43,7 +43,7 @@ jobs:
   build:
     # Don't run on forked repos.
     if: github.repository_owner == 'pytorch'
-    runs-on: [self-hosted, windows.4xlarge.nonephemeral]
+    runs-on: [self-hosted, windows.4xlarge]
     timeout-minutes: 240
     outputs:
       test-matrix: ${{ steps.filter.outputs.test-matrix }}

diff --git a/.github/workflows/generated-windows-binary-conda-nightly.yml b/.github/workflows/generated-windows-binary-conda-nightly.yml
diff --git a/.github/workflows/generated-windows-binary-libtorch-debug-main.yml b/.github/workflows/generated-windows-binary-libtorch-debug-main.yml