Update on "NJT <-> padded dense conversions"

cc voznesenskym penguinwu EikanWang jgong5 Guobing-Chen XiaobingSuper zhuhaozhe blzheng wenzhe-nrv jiayisunx chenyang78 kadeng chauhang [ghstack-poisoned]
pytorch · Jun 18, 2024 · 6033447 · 6033447
2 parents 484fb1b + 6461239
commit 6033447
Show file tree

Hide file tree

Showing 655 changed files with 22,760 additions and 15,090 deletions.
diff --git a/.ci/docker/build.sh b/.ci/docker/build.sh
@@ -373,6 +373,13 @@ case "$image" in
     CONDA_CMAKE=yes
     EXECUTORCH=yes
     ;;
+  pytorch-linux-jammy-py3.12-halide)
+    CUDA_VERSION=12.4
+    ANACONDA_PYTHON_VERSION=3.12
+    GCC_VERSION=11
+    CONDA_CMAKE=yes
+    HALIDE=yes
+    ;;
   pytorch-linux-focal-linter)
     # TODO: Use 3.9 here because of this issue https://github.com/python/mypy/issues/13627.
     # We will need to update mypy version eventually, but that's for another day. The task
@@ -490,6 +497,7 @@ docker build \
        --build-arg "DOCS=${DOCS}" \
        --build-arg "INDUCTOR_BENCHMARKS=${INDUCTOR_BENCHMARKS}" \
        --build-arg "EXECUTORCH=${EXECUTORCH}" \
+       --build-arg "HALIDE=${HALIDE}" \
        --build-arg "XPU_VERSION=${XPU_VERSION}" \
        --build-arg "ACL=${ACL:-}" \
        --build-arg "SKIP_SCCACHE_INSTALL=${SKIP_SCCACHE_INSTALL:-}" \

diff --git a/.ci/docker/ci_commit_pins/halide.txt b/.ci/docker/ci_commit_pins/halide.txt
@@ -0,0 +1 @@
+340136fec6d3ebc73e7a19eba1663e9b0ba8ab2d
diff --git a/.ci/docker/ci_commit_pins/triton-xpu.txt b/.ci/docker/ci_commit_pins/triton-xpu.txt
@@ -1 +1 @@
-b8c64f64c18d8cac598b3adb355c21e7439c21de
+aac14a3b93f11d781d1d5ebc5400b15ae8df5185
diff --git a/.ci/docker/common/install_halide.sh b/.ci/docker/common/install_halide.sh
@@ -0,0 +1,46 @@
+#!/bin/bash
+set -ex
+
+source "$(dirname "${BASH_SOURCE[0]}")/common_utils.sh"
+
+COMMIT=$(get_pinned_commit halide)
+test -n "$COMMIT"
+
+# activate conda to populate CONDA_PREFIX
+test -n "$ANACONDA_PYTHON_VERSION"
+eval "$(conda shell.bash hook)"
+conda activate py_$ANACONDA_PYTHON_VERSION
+
+if [ -n "${UBUNTU_VERSION}" ];then
+    apt update
+    apt-get install -y lld liblld-15-dev libpng-dev libjpeg-dev libgl-dev \
+                  libopenblas-dev libeigen3-dev libatlas-base-dev libzstd-dev
+fi
+
+conda_install numpy scipy imageio cmake ninja
+
+git clone --depth 1 --branch release/16.x --recursive https://github.com/llvm/llvm-project.git
+cmake -DCMAKE_BUILD_TYPE=Release \
+        -DLLVM_ENABLE_PROJECTS="clang" \
+        -DLLVM_TARGETS_TO_BUILD="X86;NVPTX" \
+        -DLLVM_ENABLE_TERMINFO=OFF -DLLVM_ENABLE_ASSERTIONS=ON \
+        -DLLVM_ENABLE_EH=ON -DLLVM_ENABLE_RTTI=ON -DLLVM_BUILD_32_BITS=OFF \
+        -S llvm-project/llvm -B llvm-build -G Ninja
+cmake --build llvm-build
+cmake --install llvm-build --prefix llvm-install
+export LLVM_ROOT=`pwd`/llvm-install
+export LLVM_CONFIG=$LLVM_ROOT/bin/llvm-config
+
+git clone https://github.com/halide/Halide.git
+pushd Halide
+git checkout ${COMMIT} && git submodule update --init --recursive
+pip_install -r requirements.txt
+cmake -G Ninja -DCMAKE_BUILD_TYPE=Release -S . -B build
+cmake --build build
+test -e ${CONDA_PREFIX}/lib/python3 || ln -s python${ANACONDA_PYTHON_VERSION} ${CONDA_PREFIX}/lib/python3
+cmake --install build --prefix ${CONDA_PREFIX}
+chown -R jenkins ${CONDA_PREFIX}
+popd
+rm -rf Halide llvm-build llvm-project llvm-install
+
+python -c "import halide"  # check for errors
diff --git a/.ci/docker/common/install_onnx.sh b/.ci/docker/common/install_onnx.sh
@@ -33,7 +33,9 @@ pip_install coloredlogs packaging
 pip_install onnxruntime==1.18
 pip_install onnx==1.16.0
 # pip_install "onnxscript@git+https://github.com/microsoft/onnxscript@3e869ef8ccf19b5ebd21c10d3e9c267c9a9fa729" --no-deps
-pip_install onnxscript==0.1.0.dev20240523 --no-deps
+pip_install onnxscript==0.1.0.dev20240613 --no-deps
+# required by onnxscript
+pip_install ml_dtypes
 
 # Cache the transformers model to be used later by ONNX tests. We need to run the transformers
 # package to download the model. By default, the model is cached at ~/.cache/huggingface/hub/

diff --git a/.ci/docker/requirements-ci.txt b/.ci/docker/requirements-ci.txt
@@ -85,10 +85,10 @@ librosa>=0.6.2 ; python_version < "3.11"
 #Pinned versions:
 #test that import:
 
-mypy==1.9.0
+mypy==1.10.0
 # Pin MyPy version because new errors are likely to appear with each release
 #Description: linter
-#Pinned versions: 1.9.0
+#Pinned versions: 1.10.0
 #test that import: test_typing.py, test_type_hints.py
 
 networkx==2.8.8

diff --git a/.ci/docker/ubuntu-cuda/Dockerfile b/.ci/docker/ubuntu-cuda/Dockerfile
@@ -103,6 +103,14 @@ COPY triton_version.txt triton_version.txt
 RUN if [ -n "${TRITON}" ]; then bash ./install_triton.sh; fi
 RUN rm install_triton.sh common_utils.sh triton.txt triton_version.txt
 
+ARG HALIDE
+# Build and install halide
+COPY ./common/install_halide.sh install_halide.sh
+COPY ./common/common_utils.sh common_utils.sh
+COPY ci_commit_pins/halide.txt halide.txt
+RUN if [ -n "${HALIDE}" ]; then bash ./install_halide.sh; fi
+RUN rm install_halide.sh common_utils.sh halide.txt
+
 # Install ccache/sccache (do this last, so we get priority in PATH)
 COPY ./common/install_cache.sh install_cache.sh
 ENV PATH /opt/cache/bin:$PATH

diff --git a/.ci/docker/ubuntu/Dockerfile b/.ci/docker/ubuntu/Dockerfile
@@ -155,6 +155,14 @@ COPY ci_commit_pins/executorch.txt executorch.txt
 RUN if [ -n "${EXECUTORCH}" ]; then bash ./install_executorch.sh; fi
 RUN rm install_executorch.sh common_utils.sh executorch.txt
 
+ARG HALIDE
+# Build and install halide
+COPY ./common/install_halide.sh install_halide.sh
+COPY ./common/common_utils.sh common_utils.sh
+COPY ci_commit_pins/halide.txt halide.txt
+RUN if [ -n "${HALIDE}" ]; then bash ./install_halide.sh; fi
+RUN rm install_halide.sh common_utils.sh halide.txt
+
 ARG ONNX
 # Install ONNX dependencies
 COPY ./common/install_onnx.sh ./common/common_utils.sh ./

diff --git a/.ci/pytorch/common_utils.sh b/.ci/pytorch/common_utils.sh
@@ -188,28 +188,6 @@ function clone_pytorch_xla() {
   fi
 }
 
-function checkout_install_torchdeploy() {
-  local commit
-  commit=$(get_pinned_commit multipy)
-  pushd ..
-  git clone --recurse-submodules https://github.com/pytorch/multipy.git
-  pushd multipy
-  git checkout "${commit}"
-  python multipy/runtime/example/generate_examples.py
-  BUILD_CUDA_TESTS=1 pip install -e .
-  popd
-  popd
-}
-
-function test_torch_deploy(){
- pushd ..
- pushd multipy
- ./multipy/runtime/build/test_deploy
- ./multipy/runtime/build/test_deploy_gpu
- popd
- popd
-}
-
 function checkout_install_torchbench() {
   local commit
   commit=$(get_pinned_commit torchbench)

diff --git a/.ci/pytorch/test.sh b/.ci/pytorch/test.sh
@@ -550,6 +550,11 @@ test_inductor_micro_benchmark() {
   python benchmarks/gpt_fast/benchmark.py --output "${TEST_REPORTS_DIR}/gpt_fast_benchmark.csv"
 }
 
+test_inductor_halide() {
+  python test/run_test.py --include inductor/test_halide.py --verbose
+  assert_git_not_dirty
+}
+
 test_dynamo_benchmark() {
   # Usage: test_dynamo_benchmark huggingface 0
   TEST_REPORTS_DIR=$(pwd)/test/test-reports
@@ -1237,11 +1242,10 @@ elif [[ "$TEST_CONFIG" == distributed ]]; then
   if [[ "${SHARD_NUMBER}" == 1 ]]; then
     test_rpc
   fi
-elif [[ "$TEST_CONFIG" == deploy ]]; then
-  checkout_install_torchdeploy
-  test_torch_deploy
 elif [[ "${TEST_CONFIG}" == *inductor_distributed* ]]; then
   test_inductor_distributed
+elif [[ "${TEST_CONFIG}" == *inductor-halide* ]]; then
+  test_inductor_halide
 elif [[ "${TEST_CONFIG}" == *inductor-micro-benchmark* ]]; then
   test_inductor_micro_benchmark
 elif [[ "${TEST_CONFIG}" == *huggingface* ]]; then

diff --git a/.github/actions/diskspace-cleanup/action.yml b/.github/actions/diskspace-cleanup/action.yml
@@ -14,12 +14,14 @@ runs:
     - name: Cleans up diskspace
       shell: bash
       run: |
+        set -ex
         diskspace_cutoff=${{ inputs.diskspace-cutoff }}
-        diskspace=$(df -H / --output=pcent | sed -n 2p | sed 's/%//' | sed 's/ //')
+        docker_root_dir=$(docker info -f '{{.DockerRootDir}}')
+        diskspace=$(df -H --output=pcent ${docker_root_dir} | sed -n 2p | sed 's/%//' | sed 's/ //')
         msg="Please file an issue on pytorch/pytorch reporting the faulty runner. Include a link to the runner logs so the runner can be identified"
         if [[ "$diskspace" -ge "$diskspace_cutoff" ]] ; then
             docker system prune -af
-            diskspace_new=$(df -H / --output=pcent | sed -n 2p | sed 's/%//' | sed 's/ //')
+            diskspace_new=$(df -H --output=pcent ${docker_root_dir} | sed -n 2p | sed 's/%//' | sed 's/ //')
             if [[ "$diskspace_new" -gt "$diskspace_cutoff" ]] ; then
                 echo "Error: Available diskspace is less than $diskspace_cutoff percent. Not enough diskspace."
                 echo "$msg"

diff --git a/.github/ci_commit_pins/torchbench.txt b/.github/ci_commit_pins/torchbench.txt
@@ -1 +1 @@
-d6015d42d9a1834bc7595c4bd6852562fb80b30b
+0dab1dd97709096e8129f8a08115ee83f64f2194
diff --git a/.github/merge_rules.yaml b/.github/merge_rules.yaml
@@ -27,11 +27,9 @@
   - third_party/onnx
   - caffe2/python/onnx/**
   approved_by:
-  - BowenBao
   - justinchuby
   - liqunfu
   - shubhambhokare1
-  - thiagocrepaldi
   - titaiwangms
   - wschin
   - xadupre

diff --git a/.github/pytorch-probot.yml b/.github/pytorch-probot.yml
@@ -26,3 +26,4 @@ retryable_workflows:
 - windows-binary
 labeler_config: labeler.yml
 label_to_label_config: label_to_label.yml
+mergebot: True
diff --git a/.github/scripts/get_workflow_type.py b/.github/scripts/get_workflow_type.py
@@ -53,21 +53,29 @@ def get_workflow_type(issue: Issue, username: str) -> str:
         user_list = issue.get_comments()[0].body.split()
 
         if user_list[0] == "!":
+            print("LF Workflows are disabled for everyone. Using meta runners.")
             return WORKFLOW_LABEL_META
         elif user_list[0] == "*":
+            print("LF Workflows are enabled for everyone. Using LF runners.")
             return WORKFLOW_LABEL_LF
         elif username in user_list:
+            print(f"LF Workflows are enabled for {username}. Using LF runners.")
             return WORKFLOW_LABEL_LF
         else:
+            print(f"LF Workflows are disabled for {username}. Using meta runners.")
             return WORKFLOW_LABEL_META
     except Exception as e:
+        print(
+            f"Failed to get determine workflow type. Falling back to meta runners. Exception: {e}"
+        )
         return WORKFLOW_LABEL_META
 
 
 def main() -> None:
     args = parse_args()
 
     if is_exception_branch(args.github_branch):
+        print(f"Exception branch: '{args.github_branch}', using meta runners")
         output = {LABEL_TYPE_KEY: WORKFLOW_LABEL_META}
     else:
         try:
@@ -77,6 +85,7 @@ def main() -> None:
 
             output = {LABEL_TYPE_KEY: get_workflow_type(issue, args.github_user)}
         except Exception as e:
+            print(f"Failed to get issue. Falling back to meta runners. Exception: {e}")
             output = {LABEL_TYPE_KEY: WORKFLOW_LABEL_META}
 
     json_output = json.dumps(output)

diff --git a/.github/scripts/test_trymerge.py b/.github/scripts/test_trymerge.py
@@ -180,6 +180,9 @@ def mock_gh_get_info() -> Any:
     return {
         "closed": False,
         "isCrossRepository": False,
+        "headRefName": "foo",
+        "baseRefName": "bar",
+        "baseRepository": {"defaultBranchRef": {"name": "bar"}},
         "files": {"nodes": [], "pageInfo": {"hasNextPage": False}},
         "changedFiles": 0,
     }

diff --git a/.github/scripts/trymerge.py b/.github/scripts/trymerge.py
@@ -2330,6 +2330,15 @@ def handle_exception(e: Exception, title: str = "Merge failed") -> None:
             dry_run=args.dry_run,
         )
         return
+    if not pr.is_ghstack_pr() and pr.base_ref() != pr.default_branch():
+        gh_post_pr_comment(
+            org,
+            project,
+            args.pr_num,
+            f"PR targets {pr.base_ref()} rather than {pr.default_branch()}, refusing merge request",
+            dry_run=args.dry_run,
+        )
+        return
 
     if args.check_mergeability:
         if pr.is_ghstack_pr():

diff --git a/.github/workflows/_runner-determinator.yml b/.github/workflows/_runner-determinator.yml
@@ -15,6 +15,9 @@ on:
         required: false
         type: string
         default: "5132"
+        description: |
+          Fetch's GitHub Issue from pytorch/test-infra
+          Example: https://github.com/pytorch/test-infra/issues/5132
 
     outputs:
       label-type:

diff --git a/.github/workflows/_win-build.yml b/.github/workflows/_win-build.yml
@@ -47,6 +47,9 @@ jobs:
     timeout-minutes: 240
     outputs:
       test-matrix: ${{ steps.filter.outputs.test-matrix }}
+    defaults:
+      run:
+        shell: bash
     steps:
       # Duplicated in win-test because this MUST go before a checkout
       - name: Enable git symlinks on Windows and disable fsmonitor daemon
@@ -89,6 +92,7 @@ jobs:
 
       - name: Parse ref
         id: parse-ref
+        shell: bash
         run: python3 .github/scripts/parse_ref.py
 
       - name: Get workflow job id

diff --git a/.github/workflows/_win-test.yml b/.github/workflows/_win-test.yml
@@ -41,6 +41,9 @@ jobs:
       fail-fast: false
     runs-on: ${{ matrix.runner }}
     timeout-minutes: ${{ matrix.mem_leak_check == 'mem_leak_check' && 600 || inputs.timeout-minutes }}
+    defaults:
+      run:
+        shell: bash
     steps:
       # Duplicated in win-build because this MUST go before a checkout
       - name: Enable git symlinks on Windows and disable fsmonitor daemon
@@ -224,6 +227,7 @@ jobs:
 
       - name: Parse ref
         id: parse-ref
+        shell: bash
         run: python3 .github/scripts/parse_ref.py
 
       - name: Uninstall PyTorch

diff --git a/.github/workflows/docker-builds.yml b/.github/workflows/docker-builds.yml
@@ -54,6 +54,7 @@ jobs:
           pytorch-linux-focal-py3-clang9-android-ndk-r21e,
           pytorch-linux-jammy-py3.8-gcc11,
           pytorch-linux-jammy-py3.8-gcc11-inductor-benchmarks,
+          pytorch-linux-jammy-py3.12-halide,
           pytorch-linux-jammy-xpu-2024.0-py3,
           pytorch-linux-jammy-py3-clang15-asan,
           pytorch-linux-focal-py3-clang10-onnx,

diff --git a/.github/workflows/inductor-periodic.yml b/.github/workflows/inductor-periodic.yml
@@ -56,3 +56,29 @@ jobs:
       test-matrix: ${{ needs.linux-focal-cuda12_1-py3_10-gcc9-periodic-dynamo-benchmarks-build.outputs.test-matrix }}
     secrets:
       HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
+
+  linux-focal-cuda12_1-py3_10-gcc9-inductor-build-gcp:
+    name: cuda12.1-py3.10-gcc9-sm80
+    uses: ./.github/workflows/_linux-build.yml
+    with:
+      build-environment: linux-focal-cuda12.1-py3.10-gcc9-sm80
+      docker-image-name: pytorch-linux-focal-cuda12.1-cudnn9-py3-gcc9-inductor-benchmarks
+      cuda-arch-list: '8.0'
+      test-matrix: |
+        { include: [
+          { config: "inductor_torchbench_smoketest_perf", shard: 1, num_shards: 1, runner: "linux.gcp.a100" },
+        ]}
+    secrets:
+      HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
+
+  linux-focal-cuda12_1-py3_10-gcc9-inductor-test-gcp:
+    name: cuda12.1-py3.10-gcc9-sm80
+    uses: ./.github/workflows/_linux-test.yml
+    needs: linux-focal-cuda12_1-py3_10-gcc9-inductor-build-gcp
+    with:
+      build-environment: linux-focal-cuda12.1-py3.10-gcc9-sm80
+      docker-image: ${{ needs.linux-focal-cuda12_1-py3_10-gcc9-inductor-build-gcp.outputs.docker-image }}
+      test-matrix: ${{ needs.linux-focal-cuda12_1-py3_10-gcc9-inductor-build-gcp.outputs.test-matrix }}
+      use-gha: anything-non-empty-to-use-gha
+    secrets:
+      HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}