Update on "add Half support for sigmoid on CPU"

cc jgong5 XiaobingSuper sanchitintel ashokei jingxu10 [ghstack-poisoned]
pytorch · Apr 2, 2023 · 38e0fab · 38e0fab
2 parents e67e848 + ac9a474
commit 38e0fab
Show file tree

Hide file tree

Showing 445 changed files with 13,642 additions and 9,179 deletions.
diff --git a/.ci/docker/ci_commit_pins/triton.txt b/.ci/docker/ci_commit_pins/triton.txt
@@ -1 +1 @@
-e650d3708be4dca12cc3491a2f8ab18ded47c368
+46672772b46b103db7341c9e10fbad7f643557d4
diff --git a/.ci/docker/common/install_conda.sh b/.ci/docker/common/install_conda.sh
@@ -57,12 +57,12 @@ if [ -n "$ANACONDA_PYTHON_VERSION" ]; then
   elif [ "$ANACONDA_PYTHON_VERSION" = "3.10" ]; then
     conda_install numpy=1.21.2 ${CONDA_COMMON_DEPS}
   elif [ "$ANACONDA_PYTHON_VERSION" = "3.9" ]; then
-    conda_install numpy=1.19.2 ${CONDA_COMMON_DEPS}
+    conda_install numpy=1.21.2 ${CONDA_COMMON_DEPS}
   elif [ "$ANACONDA_PYTHON_VERSION" = "3.8" ]; then
-    conda_install numpy=1.18.5 ${CONDA_COMMON_DEPS}
+    conda_install numpy=1.21.2 ${CONDA_COMMON_DEPS}
   else
     # Install `typing-extensions` for 3.7
-    conda_install numpy=1.18.5 ${CONDA_COMMON_DEPS} typing-extensions
+    conda_install numpy=1.21.2 ${CONDA_COMMON_DEPS} typing-extensions
   fi
 
   # This is only supported in 3.8 upward

diff --git a/.ci/docker/common/install_onnx.sh b/.ci/docker/common/install_onnx.sh
@@ -12,8 +12,13 @@ pip_install \
   mock==5.0.1 \
   ninja==1.10.2 \
   networkx==2.0 \
-  numpy==1.22.4 \
-  onnx==1.13.1 \
+  numpy==1.22.4
+
+# TODO: use official onnx package once it's released
+# for now, use the commit from 1.13.1-protobuf4.21 branch
+pip_install "onnx@git+https://github.com/onnx/onnx@389b6bcb05b9479d149d29b2461fbffe8472ed14"
+
+pip_install \
   onnxruntime==1.14.0 \
   parameterized==0.8.1 \
   pytest-cov==4.0.0 \

diff --git a/.ci/pytorch/test.sh b/.ci/pytorch/test.sh
@@ -301,17 +301,23 @@ test_perf_for_dashboard() {
     python "benchmarks/dynamo/$suite.py" \
         --accuracy --"$dtype" --backend "$backend" "$@" \
         --output "$TEST_REPORTS_DIR/${backend}_with_cudagraphs_${suite}_${dtype}_training_cuda_accuracy.csv"
+    python "benchmarks/dynamo/$suite.py" \
+        --accuracy --"$dtype" --backend "$backend" --dynamic-shapes --dynamic-batch-only --disable-cudagraphs "$@" \
+        --output "$TEST_REPORTS_DIR/${backend}_dynamic_${suite}_${dtype}_training_cuda_accuracy.csv"
 
     # Run performance test
     # Skip dynamo-eager and aot-eager for performance test
     # Run performance test for inductor with different configs
-    # TODO: add more configs here, e.g. dynamic-shapes, max-autotune, etc.
+    # TODO: add more configs here, e.g. max-autotune, etc.
     python "benchmarks/dynamo/$suite.py" \
         --performance --cold-start-latency --"$dtype" --backend "$backend" --disable-cudagraphs "$@" \
         --output "$TEST_REPORTS_DIR/${backend}_no_cudagraphs_${suite}_${dtype}_training_cuda_performance.csv"
     python "benchmarks/dynamo/$suite.py" \
         --performance --cold-start-latency --"$dtype" --backend "$backend" "$@" \
         --output "$TEST_REPORTS_DIR/${backend}_with_cudagraphs_${suite}_${dtype}_training_cuda_performance.csv"
+    python "benchmarks/dynamo/$suite.py" \
+        --performance --cold-start-latency --"$dtype" --backend "$backend" --dynamic-shapes --dynamic-batch-only --disable-cudagraphs "$@" \
+        --output "$TEST_REPORTS_DIR/${backend}_dynamic_${suite}_${dtype}_training_cuda_performance.csv"
   done
 }
 
@@ -587,9 +593,7 @@ test_distributed() {
     "$TORCH_BIN_DIR"/TCPStoreTest --gtest_output=xml:$TEST_REPORTS_DIR/TCPStoreTest.xml
 
     MPIEXEC=$(command -v mpiexec)
-    # TODO: this is disabled on GitHub Actions until this issue is resolved
-    # https://github.com/pytorch/pytorch/issues/60756
-    if [[ -n "$MPIEXEC" ]] && [[ -z "$GITHUB_ACTIONS" ]]; then
+    if [[ -n "$MPIEXEC" ]]; then
       MPICMD="${MPIEXEC} -np 2 $TORCH_BIN_DIR/ProcessGroupMPITest"
       eval "$MPICMD"
     fi

diff --git a/.flake8 b/.flake8
@@ -1,20 +1,26 @@
 [flake8]
 enable-extensions = G
-select = B,C,E,F,G,P,T4,W,B9
+select = B,C,E,F,G,P,SIM1,T4,W,B9
 max-line-length = 120
 # C408 ignored because we like the dict keyword argument syntax
 # E501 is not flexible enough, we're using B950 instead
 ignore =
     E203,E305,E402,E501,E721,E741,F405,F821,F841,F999,W503,W504,C408,E302,W291,E303,
+    # fix these lints in the future
+    E275,
     # shebang has extra meaning in fbcode lints, so I think it's not worth trying
     # to line this up with executable bit
     EXE001,
     # these ignores are from flake8-bugbear; please fix!
-    B007,B008,
+    B007,B008,B017,B019,B020,B023,B024,B026,B027,B028,B903,B904,B905,B906,B907
     # these ignores are from flake8-comprehensions; please fix!
-    C407,C417
+    C407
     # these ignores are from flake8-logging-format; please fix!
-    G001,G002,G003,G004,G100,G101,G200,G201,G202
+    G004,G100,G101,G200,G201,G202
+    # these ignores are from flake8-simplify. please fix or ignore with commented reason
+    SIM105,SIM108,SIM109,SIM110,SIM111,SIM113,SIM114,SIM115,SIM116,SIM117,SIM118,SIM119,SIM12,
+    # flake8-simplify code styles
+    SIM102,SIM103,SIM106,SIM112,
 per-file-ignores =
     __init__.py: F401
     torch/utils/cpp_extension.py: B950

diff --git a/.github/ci_commit_pins/torchbench.txt b/.github/ci_commit_pins/torchbench.txt
@@ -1 +1 @@
-0f02ca657f791d874c390af5eaab489b426336d3
+159e58f0b36ee22e2b89d74bd7dc8a79376de01d
diff --git a/.github/ci_commit_pins/vision.txt b/.github/ci_commit_pins/vision.txt
@@ -1 +1 @@
-18a2e8eb5c6e30e2bc22416379b10f5dfaccc4d4
+78c271974f94585f45cd696f66d08dae538a9207
diff --git a/.github/ci_commit_pins/xla.txt b/.github/ci_commit_pins/xla.txt
@@ -1 +1 @@
-015ebcba441dbd5dd21dc02ef12af2c29791a7f0
+5444e06e5b851211af8a83e024c6703acfc095eb
diff --git a/.github/requirements/conda-env-macOS-X64 b/.github/requirements/conda-env-macOS-X64
@@ -1,6 +1,6 @@
 mkl=2021.2.0
 mkl-include=2021.2.0
-numpy=1.18.5
+numpy=1.21.2
 pyyaml=5.3
 setuptools=46.0.0
 cmake=3.22.*

diff --git a/.github/scripts/generate_binary_build_matrix.py b/.github/scripts/generate_binary_build_matrix.py
@@ -209,9 +209,6 @@ def generate_wheels_matrix(
                 if arch_version == "cpu" or arch_version == "cpu-cxx11-abi"
                 else arch_version
             )
-            # Skip rocm 3.11 binaries for now as the docker image are not correct
-            if python_version == "3.11" and gpu_arch_type == "rocm":
-                continue
 
             # special 11.7 wheels package without dependencies
             # dependency downloaded via pip install

diff --git a/.github/scripts/run_torchbench.py b/.github/scripts/run_torchbench.py
@@ -119,7 +119,7 @@ def is_valid_ub_dir(ub_path: str) -> bool:
             [os.path.join(ub_path, ubdir) for ubdir in os.listdir(ub_path)],
         )
     )
-    valid_ubs = list(map(lambda x: os.path.basename(x), ubs))
+    valid_ubs = [os.path.basename(x) for x in ubs]
     return valid_ubs
 
 
@@ -130,13 +130,13 @@ def extract_models_from_pr(
     userbenchmark_list = []
     pr_list = []
     with open(prbody_file, "r") as pf:
-        lines = map(lambda x: x.strip(), pf.read().splitlines())
+        lines = (x.strip() for x in pf.read().splitlines())
         magic_lines = list(filter(lambda x: x.startswith(MAGIC_PREFIX), lines))
         if magic_lines:
             # Only the first magic line will be recognized.
-            pr_list = list(
-                map(lambda x: x.strip(), magic_lines[0][len(MAGIC_PREFIX) :].split(","))
-            )
+            pr_list = [
+                x.strip() for x in magic_lines[0][len(MAGIC_PREFIX) :].split(",")
+            ]
     valid_models = get_valid_models(torchbench_path)
     valid_ubs = get_valid_userbenchmarks(torchbench_path)
     for pr_bm in pr_list:
@@ -158,7 +158,7 @@ def extract_models_from_pr(
 def find_torchbench_branch(prbody_file: str) -> str:
     branch_name: str = ""
     with open(prbody_file, "r") as pf:
-        lines = map(lambda x: x.strip(), pf.read().splitlines())
+        lines = (x.strip() for x in pf.read().splitlines())
         magic_lines = list(
             filter(lambda x: x.startswith(MAGIC_TORCHBENCH_PREFIX), lines)
         )

diff --git a/.github/scripts/stop_runner_service.sh b/.github/scripts/stop_runner_service.sh
@@ -0,0 +1,24 @@
+#!/bin/bash
+
+set +e
+set -x
+
+# Get the service name
+RUNNER_SERVICE=$(cat "${RUNNER_WORKSPACE}/../../.service")
+echo "GitHub self-hosted runner service: ${RUNNER_SERVICE}"
+
+if [[ -n "${RUNNER_SERVICE}" ]]; then
+  echo "The self-hosted runner has encountered an unrecoverable error and will be shutdown"
+
+  pushd "${RUNNER_WORKSPACE}/../../"
+  # Stop it to prevent the runner from receiving new jobs
+  sudo ./svc.sh stop
+  # then uninstall the service
+  sudo ./svc.sh uninstall
+  # Finally, shutting down the runner completely
+  sudo shutdown -P now
+  # NB: In my test, cleaning up and shutting down the runner this way would already
+  # remove the runner from the list of registered runners. Calling config.sh remove
+  # seems redundant as it would require an org token to use, which I don't want to
+  # add as yet another secret to the CI if there is no need
+fi
diff --git a/.github/scripts/trymerge.py b/.github/scripts/trymerge.py
@@ -1802,8 +1802,9 @@ def merge(
     elif (datetime.utcnow() - cast(datetime, pr.last_pushed_at())).days > stale_pr_days:
         raise RuntimeError(
             f"This PR is too stale; the last push date was more than {stale_pr_days} days ago. "
-            "Please rebase and try again. You can rebase by leaving the following comment on this PR:\n"
-            "`@pytorchbot rebase`"
+            "Please rebase and try again. You can rebase and merge by leaving the following comment on this PR:\n"
+            "`@pytorchbot merge -r`\n"
+            "Or just rebase by leaving `@pytorchbot rebase` comment"
         )
 
     start_time = time.time()

diff --git a/.github/workflows/_bazel-build-test.yml b/.github/workflows/_bazel-build-test.yml
@@ -210,7 +210,7 @@ jobs:
 
       - name: Print remaining test logs
         shell: bash
-        if: always()
+        if: always() && steps.test.conclusion
         run: |
           cat test/**/*.log || true
 

diff --git a/.github/workflows/_docs.yml b/.github/workflows/_docs.yml
@@ -178,6 +178,41 @@ jobs:
           if-no-files-found: error
           path: functorch_ghpages/nightly/
           s3-prefix: pytorch/${{ github.event.pull_request.number }}/functorchdocs
+
+      # The three upload steps below duplicate the upload from above, but to a different path. This is needed since we
+      # are in the process of changing the path, but want to keep the disruption to a minimum.
+      # See https://github.com/pytorch/test-infra/issues/3894
+      # After a grace period the s3-prefix should start with pytorch/pytorch/
+      - name: Upload Python Docs Preview (forward compatibility)
+        uses: seemethere/upload-artifact-s3@v5
+        if: ${{ github.event_name == 'pull_request' && matrix.docs_type == 'python' && steps.build-docs.outcome == 'success' }}
+        with:
+          retention-days: 14
+          s3-bucket: doc-previews
+          if-no-files-found: error
+          path: pytorch.github.io/docs/master/
+          s3-prefix: pytorch/pytorch/pytorch/${{ github.event.pull_request.number }}
+
+      - name: Upload C++ Docs Preview (forward compatibility)
+        uses: seemethere/upload-artifact-s3@v5
+        if: ${{ github.event_name == 'pull_request' && matrix.docs_type == 'cpp' && steps.build-docs.outcome == 'success' }}
+        with:
+          retention-days: 14
+          if-no-files-found: error
+          s3-bucket: doc-previews
+          path: cppdocs/
+          s3-prefix: pytorch/pytorch/pytorch/${{ github.event.pull_request.number }}/cppdocs
+
+      - name: Upload functorch Docs Preview (forward compatibility)
+        uses: seemethere/upload-artifact-s3@v5
+        if: ${{ github.event_name == 'pull_request' && matrix.docs_type == 'functorch' && steps.build-docs.outcome == 'success' }}
+        with:
+          retention-days: 14
+          s3-bucket: doc-previews
+          if-no-files-found: error
+          path: functorch_ghpages/nightly/
+          s3-prefix: pytorch/pytorch/pytorch/${{ github.event.pull_request.number }}/functorchdocs
+
       - name: Teardown Linux
         uses: pytorch/test-infra/.github/actions/teardown-linux@main
         if: always()
diff --git a/.github/workflows/_linux-test.yml b/.github/workflows/_linux-test.yml
@@ -90,10 +90,9 @@ jobs:
           docker-image: ${{ inputs.docker-image }}
 
       - name: Install nvidia driver, nvidia-docker runtime, set GPU_FLAG
+        id: install-nvidia-driver
         uses: pytorch/test-infra/.github/actions/setup-nvidia@main
         if: contains(inputs.build-environment, 'cuda') && !contains(matrix.config, 'nogpu')
-        with:
-          driver-version: "525.85.05"
 
       - name: Lock NVIDIA A100 40GB Frequency
         run: |
@@ -227,7 +226,7 @@ jobs:
 
       - name: Print remaining test logs
         shell: bash
-        if: always()
+        if: always() && steps.test.conclusion
         run: |
           cat test/**/*.log || true
 
@@ -272,3 +271,34 @@ jobs:
       - name: Teardown Linux
         uses: pytorch/test-infra/.github/actions/teardown-linux@main
         if: always()
+
+      # NB: We are currently having an intermittent GPU-related issue on G5 runners with
+      # A10G GPU. Once this happens, trying to reset the GPU as done in setup-nvidia does
+      # not seem to help. Here are some symptoms:
+      #   * Calling nvidia-smi timeouts after 60 second
+      #   * Fail to run nvidia-smi with an unable to determine the device handle for GPU
+      #     unknown error
+      #   * Test fails with a missing CUDA GPU error when initializing CUDA in PyTorch
+      #   * Run docker --gpus all fails with error response from daemon
+      #
+      # As both the root cause and recovery path are unclear, let's take the runner out of
+      # service so that it doesn't get any more jobs
+      - name: Check NVIDIA driver installation step
+        if:
+          failure() &&
+          ((steps.install-nvidia-driver.conclusion && steps.install-nvidia-driver.conclusion == 'failure') || (contains(inputs.build-environment, 'cuda') && !contains(matrix.config, 'nogpu')))
+        shell: bash
+        env:
+          RUNNER_WORKSPACE: ${{ runner.workspace }}
+        run: |
+          set +e
+          set -x
+
+          nvidia-smi
+          NVIDIA_SMI_STATUS=$?
+
+          # These are acceptable return code from nvidia-smi as copied from setup-nvidia GitHub action
+          if [ "$NVIDIA_SMI_STATUS" -ne 0 ] && [ "$NVIDIA_SMI_STATUS" -ne 14 ]; then
+            echo "NVIDIA driver installation has failed, shutting down the runner..."
+            .github/scripts/stop_runner_service.sh
+          fi
diff --git a/.github/workflows/_mac-test-mps.yml b/.github/workflows/_mac-test-mps.yml
@@ -126,7 +126,7 @@ jobs:
 
       - name: Print remaining test logs
         shell: bash
-        if: always()
+        if: always() && steps.test.conclusion
         run: |
           cat test/**/*.log || true
 

diff --git a/.github/workflows/_mac-test.yml b/.github/workflows/_mac-test.yml
@@ -185,7 +185,7 @@ jobs:
 
       - name: Print remaining test logs
         shell: bash
-        if: always()
+        if: always() && steps.test.conclusion
         run: |
           cat test/**/*.log || true
 

diff --git a/.github/workflows/_rocm-test.yml b/.github/workflows/_rocm-test.yml
@@ -214,7 +214,7 @@ jobs:
 
       - name: Print remaining test logs
         shell: bash
-        if: always()
+        if: always() && steps.test.conclusion
         run: |
           cat test/**/*.log || true
 

diff --git a/.github/workflows/_win-test.yml b/.github/workflows/_win-test.yml
@@ -201,7 +201,7 @@ jobs:
 
       - name: Print remaining test logs
         shell: bash
-        if: always()
+        if: always() && steps.test.conclusion
         run: |
           cat test/**/*.log || true