Update on "[FakeTensor] Workaround FFT ops with incorrect meta strides"

Currently there are FFT operators which raise `UnsupportedOperatorException` because their meta implementations sometimes give incorrect strides. This works around the problem for static shapes by falling back to eager. Though we still don't support calls with dynamic shapes. [ghstack-poisoned]
pytorch · Aug 7, 2023 · 2bde2b4 · 2bde2b4
2 parents 8d14698 + 230b0a7
commit 2bde2b4
Show file tree

Hide file tree

Showing 352 changed files with 9,005 additions and 3,787 deletions.
diff --git a/.ci/docker/build.sh b/.ci/docker/build.sh
@@ -174,15 +174,6 @@ case "$image" in
     CONDA_CMAKE=yes
     TRITON=yes
     ;;
-  pytorch-linux-focal-py3-clang7-asan)
-    ANACONDA_PYTHON_VERSION=3.9
-    CLANG_VERSION=7
-    PROTOBUF=yes
-    DB=yes
-    VISION=yes
-    CONDA_CMAKE=yes
-    TRITON=yes
-    ;;
   pytorch-linux-focal-py3-clang10-onnx)
     ANACONDA_PYTHON_VERSION=3.8
     CLANG_VERSION=10
@@ -288,6 +279,15 @@ case "$image" in
     VISION=yes
     TRITON=yes
     ;;
+  pytorch-linux-jammy-py3-clang12-asan)
+    ANACONDA_PYTHON_VERSION=3.9
+    CLANG_VERSION=12
+    PROTOBUF=yes
+    DB=yes
+    VISION=yes
+    CONDA_CMAKE=yes
+    TRITON=yes
+    ;;
   pytorch-linux-focal-linter)
     # TODO: Use 3.9 here because of this issue https://github.com/python/mypy/issues/13627.
     # We will need to update mypy version eventually, but that's for another day. The task

diff --git a/.ci/docker/ci_commit_pins/triton-rocm.txt b/.ci/docker/ci_commit_pins/triton-rocm.txt
@@ -1 +1 @@
-9dc100afb538d39da17621e0f8ad233f2078e6ff
+34887ff8ca7a264c2c75972f5421a1ed3b7d8f6c
diff --git a/.ci/docker/common/install_conda.sh b/.ci/docker/common/install_conda.sh
@@ -109,7 +109,8 @@ if [ -n "$ANACONDA_PYTHON_VERSION" ]; then
   # gcc-9 for ubuntu-18.04 from http://ppa.launchpad.net/ubuntu-toolchain-r/test/ubuntu
   # Pulls llibstdc++6 13.1.0-8ubuntu1~18.04 which is too new for conda
   # So remove libstdc++6.so.3.29 installed by https://anaconda.org/anaconda/libstdcxx-ng/files?version=11.2.0
-  if grep 18.04.6 /etc/issue >/dev/null; then
+  # Same is true for gcc-12 from Ubuntu-22.04
+  if grep -e [12][82].04.[62] /etc/issue >/dev/null; then
     rm /opt/conda/envs/py_$ANACONDA_PYTHON_VERSION/lib/libstdc++.so.6
   fi
 

diff --git a/.ci/docker/common/install_onnx.sh b/.ci/docker/common/install_onnx.sh
@@ -28,7 +28,7 @@ pip_install \
   transformers==4.25.1
 
 # TODO: change this when onnx-script is on testPypi
-pip_install onnxscript-preview==0.1.0.dev20230724 --no-deps
+pip_install onnxscript-preview==0.1.0.dev20230801 --no-deps
 
 # Cache the transformers model to be used later by ONNX tests. We need to run the transformers
 # package to download the model. By default, the model is cached at ~/.cache/huggingface/hub/

diff --git a/.ci/docker/requirements-ci.txt b/.ci/docker/requirements-ci.txt
@@ -25,10 +25,10 @@ coremltools==5.0b5
 #Pinned versions:
 #test that import:
 
-expecttest==0.1.3
+expecttest==0.1.6
 #Description: method for writing tests where test framework auto populates
 # the expected output based on previous runs
-#Pinned versions: 0.1.3
+#Pinned versions: 0.1.6
 #test that import:
 
 flatbuffers==2.0

diff --git a/.ci/pytorch/common_utils.sh b/.ci/pytorch/common_utils.sh
@@ -143,9 +143,19 @@ function install_torchtext() {
 }
 
 function install_torchvision() {
+  local orig_preload
   local commit
   commit=$(get_pinned_commit vision)
+  orig_preload=${LD_PRELOAD}
+  if [ -n "${LD_PRELOAD}" ]; then
+    # Silence dlerror to work-around glibc ASAN bug, see https://sourceware.org/bugzilla/show_bug.cgi?id=27653#c9
+    echo 'char* dlerror(void) { return "";}'|gcc -fpic -shared -o "${HOME}/dlerror.so" -x c -
+    LD_PRELOAD=${orig_preload}:${HOME}/dlerror.so
+  fi
   pip_install --no-use-pep517 --user "git+https://github.com/pytorch/vision.git@${commit}"
+  if [ -n "${LD_PRELOAD}" ]; then
+    LD_PRELOAD=${orig_preload}
+  fi
 }
 
 function install_numpy_pytorch_interop() {

diff --git a/.ci/pytorch/test.sh b/.ci/pytorch/test.sh
@@ -148,7 +148,7 @@ if [[ "$BUILD_ENVIRONMENT" == *asan* ]]; then
     export PYTORCH_TEST_WITH_ASAN=1
     export PYTORCH_TEST_WITH_UBSAN=1
     # TODO: Figure out how to avoid hard-coding these paths
-    export ASAN_SYMBOLIZER_PATH=/usr/lib/llvm-7/bin/llvm-symbolizer
+    export ASAN_SYMBOLIZER_PATH=/usr/lib/llvm-12/bin/llvm-symbolizer
     export TORCH_USE_RTLD_GLOBAL=1
     # NB: We load libtorch.so with RTLD_GLOBAL for UBSAN, unlike our
     # default behavior.
@@ -182,7 +182,7 @@ if [[ "$BUILD_ENVIRONMENT" == *asan* ]]; then
     # have, and it applies to child processes.
 
     # TODO: get rid of the hardcoded path
-    export LD_PRELOAD=/usr/lib/llvm-7/lib/clang/7.0.1/lib/linux/libclang_rt.asan-x86_64.so
+    export LD_PRELOAD=/usr/lib/llvm-12/lib/clang/12.0.1/lib/linux/libclang_rt.asan-x86_64.so
     # Disable valgrind for asan
     export VALGRIND=OFF
     # Increase stack size, because ASAN red zones use more stack

diff --git a/.github/ci_commit_pins/torchbench.txt b/.github/ci_commit_pins/torchbench.txt
@@ -1 +1 @@
-745644f391b4d11da107b2c82fe2d7a3eacf561d
+770d5cf793c283bdc5e55a313fc068bd2fc8c109
diff --git a/.github/ci_commit_pins/vision.txt b/.github/ci_commit_pins/vision.txt
@@ -1 +1 @@
-bdf16222f7e734f81af0f6ea6a5f413c7b353237
+84db2ac4572dd23b67d93d08660426e44f97ba75
diff --git a/.github/ci_commit_pins/xla.txt b/.github/ci_commit_pins/xla.txt
@@ -1 +1 @@
-ca5eab87a71f80cd3168630511d02549cc7d2516
+56a6a02a706367290ce54a1b2602a74af52fa34f
diff --git a/.github/merge_rules.yaml b/.github/merge_rules.yaml
@@ -21,6 +21,7 @@
   - third_party/onnx
   - caffe2/python/onnx/**
   - benchmarks/dynamo/_onnx/**
+  - torch/_logging/**
   approved_by:
   - BowenBao
   - abock

diff --git a/.github/requirements/conda-env-macOS-ARM64 b/.github/requirements/conda-env-macOS-ARM64
@@ -8,6 +8,10 @@ pip=22.2.2
 pillow=9.2.0
 pkg-config=0.29.2
 wheel=0.37.1
+# NB: This is intentionally held back because anaconda main doesn't
+# have updated expecttest, but you don't /need/ the updated version
+# to run the tests.  In the meantime I need to figure out how to
+# cajole anaconda into updating, or get the package from pypi instead...
 expecttest=0.1.3
 
 # Not pinning certifi so that we can always get the latest certificates

diff --git a/.github/requirements/pip-requirements-macOS.txt b/.github/requirements/pip-requirements-macOS.txt
@@ -1,6 +1,6 @@
 boto3==1.19.12
 hypothesis==6.56.4
-expecttest==0.1.3
+expecttest==0.1.6
 librosa>=0.6.2
 mpmath==1.3.0
 networkx==2.8.7

diff --git a/.github/workflows/_linux-build.yml b/.github/workflows/_linux-build.yml
@@ -47,6 +47,12 @@ on:
           An option JSON description of what test configs to run later on. This
           is moved here from the Linux test workflow so that we can apply filter
           logic using test-config labels earlier and skip unnecessary builds
+    secrets:
+      HUGGING_FACE_HUB_TOKEN:
+        required: false
+        description: |
+          HF Auth token to avoid rate limits when downloading models or datasets from hub
+
 
     outputs:
       docker-image:
@@ -130,6 +136,7 @@ jobs:
           XLA_CUDA: ${{ contains(inputs.build-environment, 'xla') && '0' || '' }}
           DEBUG: ${{ inputs.build-with-debug && '1' || '0' }}
           OUR_GITHUB_JOB_ID: ${{ steps.get-job-id.outputs.job-id }}
+          HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
         run: |
           # detached container should get cleaned up by teardown_ec2_linux
           container_name=$(docker run \
@@ -147,6 +154,7 @@ jobs:
             -e TORCH_CUDA_ARCH_LIST \
             -e PR_LABELS \
             -e OUR_GITHUB_JOB_ID \
+            -e HUGGING_FACE_HUB_TOKEN \
             --env-file="/tmp/github_env_${GITHUB_RUN_ID}" \
             --security-opt seccomp=unconfined \
             --cap-add=SYS_PTRACE \

diff --git a/.github/workflows/_linux-test.yml b/.github/workflows/_linux-test.yml
@@ -37,6 +37,11 @@ on:
         required: false
         type: string
         default: ""
+    secrets:
+      HUGGING_FACE_HUB_TOKEN:
+        required: false
+        description: |
+          HF Auth token to avoid rate limits when downloading models or datasets from hub
 
 env:
   GIT_DEFAULT_BRANCH: ${{ github.event.repository.default_branch }}
@@ -163,6 +168,8 @@ jobs:
           PYTORCH_TEST_CUDA_MEM_LEAK_CHECK: ${{ matrix.mem_leak_check && '1' || '0' }}
           PYTORCH_TEST_RERUN_DISABLED_TESTS: ${{ matrix.rerun_disabled_tests && '1' || '0' }}
           DASHBOARD_TAG: ${{ inputs.dashboard-tag }}
+          HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
+
         run: |
           set -x
 
@@ -210,6 +217,7 @@ jobs:
             -e PYTORCH_TEST_CUDA_MEM_LEAK_CHECK \
             -e PYTORCH_TEST_RERUN_DISABLED_TESTS \
             -e SKIP_SCCACHE_INITIALIZATION=1 \
+            -e HUGGING_FACE_HUB_TOKEN \
             -e DASHBOARD_TAG \
             --env-file="/tmp/github_env_${GITHUB_RUN_ID}" \
             --ulimit stack=10485760:83886080 \

diff --git a/.github/workflows/docker-builds.yml b/.github/workflows/docker-builds.yml
@@ -47,7 +47,7 @@ jobs:
           - docker-image-name: pytorch-linux-focal-py3-clang7-android-ndk-r19c
           - docker-image-name: pytorch-linux-focal-py3.8-gcc7
           - docker-image-name: pytorch-linux-focal-py3.8-gcc7-inductor-benchmarks
-          - docker-image-name: pytorch-linux-focal-py3-clang7-asan
+          - docker-image-name: pytorch-linux-jammy-py3-clang12-asan
           - docker-image-name: pytorch-linux-focal-py3-clang10-onnx
           - docker-image-name: pytorch-linux-focal-linter
     env:

diff --git a/.github/workflows/inductor-perf-compare.yml b/.github/workflows/inductor-perf-compare.yml
@@ -25,6 +25,8 @@ jobs:
           { config: "inductor_timm_perf_compare", shard: 2, num_shards: 2, runner: "linux.gcp.a100" },
           { config: "inductor_torchbench_perf_compare", shard: 1, num_shards: 1, runner: "linux.gcp.a100" },
         ]}
+    secrets:
+      HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
 
   linux-bionic-cuda11_8-py3_10-gcc7-inductor-test:
     name: cuda11.8-py3.10-gcc7-sm80
@@ -35,3 +37,5 @@ jobs:
       docker-image: ${{ needs.linux-bionic-cuda11_8-py3_10-gcc7-inductor-build.outputs.docker-image }}
       test-matrix: ${{ needs.linux-bionic-cuda11_8-py3_10-gcc7-inductor-build.outputs.test-matrix }}
       use-gha: anything-non-empty-to-use-gha
+    secrets:
+      HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
diff --git a/.github/workflows/inductor-perf-test-nightly.yml b/.github/workflows/inductor-perf-test-nightly.yml
@@ -78,6 +78,8 @@ jobs:
           { config: "inductor_torchbench_perf", shard: 3, num_shards: 4, runner: "linux.gcp.a100.large" },
           { config: "inductor_torchbench_perf", shard: 4, num_shards: 4, runner: "linux.gcp.a100.large" },
         ]}
+    secrets:
+      HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
 
   linux-bionic-cuda11_8-py3_10-gcc7-inductor-test-nightly:
     name: cuda11.8-py3.10-gcc7-sm80
@@ -91,6 +93,8 @@ jobs:
       test-matrix: ${{ needs.linux-bionic-cuda11_8-py3_10-gcc7-inductor-build.outputs.test-matrix }}
       use-gha: anything-non-empty-to-use-gha
       timeout-minutes: 720
+    secrets:
+      HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
 
   linux-bionic-cuda11_8-py3_10-gcc7-inductor-test:
     name: cuda11.8-py3.10-gcc7-sm80
@@ -104,3 +108,5 @@ jobs:
       test-matrix: ${{ needs.linux-bionic-cuda11_8-py3_10-gcc7-inductor-build.outputs.test-matrix }}
       use-gha: anything-non-empty-to-use-gha
       timeout-minutes: 720
+    secrets:
+      HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
diff --git a/.github/workflows/inductor-periodic.yml b/.github/workflows/inductor-periodic.yml
@@ -37,6 +37,8 @@ jobs:
           { config: "dynamic_aot_eager_timm", shard: 1, num_shards: 2, runner: "linux.g5.4xlarge.nvidia.gpu" },
           { config: "dynamic_aot_eager_timm", shard: 2, num_shards: 2, runner: "linux.g5.4xlarge.nvidia.gpu" },
         ]}
+    secrets:
+      HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
 
   linux-bionic-cuda12_1-py3_10-gcc9-periodic-dynamo-benchmarks-test:
     name: cuda12.1-py3.10-gcc9-sm86-periodic-dynamo-benchmarks
@@ -46,3 +48,5 @@ jobs:
       build-environment: linux-bionic-cuda12.1-py3.10-gcc9-sm86
       docker-image: ${{ needs.linux-bionic-cuda12_1-py3_10-gcc9-periodic-dynamo-benchmarks-build.outputs.docker-image }}
       test-matrix: ${{ needs.linux-bionic-cuda12_1-py3_10-gcc9-periodic-dynamo-benchmarks-build.outputs.test-matrix }}
+    secrets:
+      HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
diff --git a/.github/workflows/inductor.yml b/.github/workflows/inductor.yml
@@ -33,6 +33,8 @@ jobs:
           { config: "inductor_torchbench_dynamic", shard: 1, num_shards: 1, runner: "linux.g5.4xlarge.nvidia.gpu" },
           { config: "inductor_distributed", shard: 1, num_shards: 1, runner: "linux.g5.12xlarge.nvidia.gpu" },
         ]}
+    secrets:
+      HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
 
   linux-bionic-cuda11_8-py3_10-gcc7-inductor-test:
     name: cuda11.8-py3.10-gcc7-sm86
@@ -42,6 +44,8 @@ jobs:
       build-environment: linux-bionic-cuda11.8-py3.10-gcc7-sm86
       docker-image: ${{ needs.linux-bionic-cuda11_8-py3_10-gcc7-inductor-build.outputs.docker-image }}
       test-matrix: ${{ needs.linux-bionic-cuda11_8-py3_10-gcc7-inductor-build.outputs.test-matrix }}
+    secrets:
+      HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
 
   linux-bionic-cuda11_8-py3_10-gcc7-inductor-build-gcp:
     name: cuda11.8-py3.10-gcc7-sm80
@@ -54,6 +58,8 @@ jobs:
         { include: [
           { config: "inductor_torchbench_smoketest_perf", shard: 1, num_shards: 1, runner: "linux.gcp.a100" },
         ]}
+    secrets:
+      HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
 
   linux-bionic-cuda11_8-py3_10-gcc7-inductor-test-gcp:
     name: cuda11.8-py3.10-gcc7-sm80
@@ -64,6 +70,8 @@ jobs:
       docker-image: ${{ needs.linux-bionic-cuda11_8-py3_10-gcc7-inductor-build-gcp.outputs.docker-image }}
       test-matrix: ${{ needs.linux-bionic-cuda11_8-py3_10-gcc7-inductor-build-gcp.outputs.test-matrix }}
       use-gha: anything-non-empty-to-use-gha
+    secrets:
+      HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
 
   linux-focal-cpu-py3_8-gcc7-inductor-build:
     name: linux-focal-cpu-py3.8-gcc7-inductor
@@ -82,6 +90,8 @@ jobs:
           { config: "inductor_timm_dynamic_cpu_accuracy", shard: 2, num_shards: 2, runner: "linux.12xlarge" },
           { config: "inductor_torchbench_dynamic_cpu_accuracy", shard: 1, num_shards: 1, runner: "linux.12xlarge" },
         ]}
+    secrets:
+      HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
 
   linux-focal-cpu-py3_8-gcc7-inductor-test:
     name: linux-focal-cpu-py3.8-gcc7-inductor
@@ -91,3 +101,5 @@ jobs:
       build-environment: linux-focal-py3_8-gcc7-build
       docker-image: ${{ needs.linux-focal-cpu-py3_8-gcc7-inductor-build.outputs.docker-image }}
       test-matrix: ${{ needs.linux-focal-cpu-py3_8-gcc7-inductor-build.outputs.test-matrix }}
+    secrets:
+      HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
diff --git a/.github/workflows/pull.yml b/.github/workflows/pull.yml
@@ -75,12 +75,12 @@ jobs:
           { config: "default", shard: 1, num_shards: 1 },
         ]}
 
-  linux-focal-py3_9-clang7-asan-build:
-    name: linux-focal-py3.9-clang7-asan
+  linux-jammy-py3_9-clang12-asan-build:
+    name: linux-jammy-py3.9-clang12-asan
     uses: ./.github/workflows/_linux-build.yml
     with:
-      build-environment: linux-focal-py3.9-clang7-asan
-      docker-image-name: pytorch-linux-focal-py3-clang7-asan
+      build-environment: linux-jammy-py3.9-clang12-asan
+      docker-image-name: pytorch-linux-jammy-py3-clang12-asan
       test-matrix: |
         { include: [
           { config: "default", shard: 1, num_shards: 6, runner: "linux.4xlarge" },
@@ -92,14 +92,14 @@ jobs:
         ]}
       sync-tag: asan-build
 
-  linux-focal-py3_9-clang7-asan-test:
-    name: linux-focal-py3.9-clang7-asan
+  linux-jammy-py3_9-clang12-asan-test:
+    name: linux-jammy-py3.9-clang12-asan
     uses: ./.github/workflows/_linux-test.yml
-    needs: linux-focal-py3_9-clang7-asan-build
+    needs: linux-jammy-py3_9-clang12-asan-build
     with:
-      build-environment: linux-focal-py3.9-clang7-asan
-      docker-image: ${{ needs.linux-focal-py3_9-clang7-asan-build.outputs.docker-image }}
-      test-matrix: ${{ needs.linux-focal-py3_9-clang7-asan-build.outputs.test-matrix }}
+      build-environment: linux-jammy-py3.9-clang12-asan
+      docker-image: ${{ needs.linux-jammy-py3_9-clang12-asan-build.outputs.docker-image }}
+      test-matrix: ${{ needs.linux-jammy-py3_9-clang12-asan-build.outputs.test-matrix }}
       sync-tag: asan-test
 
   linux-focal-py3_8-clang10-onnx-build:
@@ -224,12 +224,12 @@ jobs:
       docker-image: ${{ needs.linux-bionic-cuda12_1-py3_10-gcc9-build.outputs.docker-image }}
       test-matrix: ${{ needs.linux-bionic-cuda12_1-py3_10-gcc9-build.outputs.test-matrix }}
 
-  linux-focal-py3-clang7-mobile-build:
-    name: linux-focal-py3-clang7-mobile-build
+  linux-jammy-py3-clang12-mobile-build:
+    name: linux-jammy-py3-clang12-mobile-build
     uses: ./.github/workflows/_linux-build.yml
     with:
-      build-environment: linux-focal-py3-clang7-mobile-build
-      docker-image-name: pytorch-linux-focal-py3-clang7-asan
+      build-environment: linux-jammy-py3-clang12-mobile-build
+      docker-image-name: pytorch-linux-jammy-py3-clang12-asan
       build-generates-artifacts: false
       test-matrix: |
         { include: [