Update base for Update on "Use cascade-summation to improve nansum ac…

…curacy" Fixes #59415 This implements nansum as a new `LoadPolicy` for the existing sum functions. So, it's using the more accurate cascade-sum algorithm. I've also expanded `test_nansum` to cover the four special cases of the sum algorithm (inner/outer reduction; vectorized or scalar). Nansum performance comparison ----------------------------- For float sums, contiguous reductions are as much as 10x faster and discontiguous sums are ~1.8x faster (more for small shapes due to TensorIterator overheads). | Shape | Dim | Master Contiguous (us) | This PR Contiguous (us) | Master Discontiguous (us) | This PR Discontiguous (us) | |-------------:|-----|:----------------------:|:-----------------------:|:-------------------------:|:--------------------------:| | 10, 1000 | 0 | 74.9 | 2.02 | 75.6 | 6.41 | | | 1 | 8.24 | 1.8 | 8.28 | 5.24 | | 100, 1000 | 0 | 134 | 7.55 | 130 | 43.2 | | | 1 | 70.5 | 7.01 | 71.5 | 40.6 | | 1000, 1000 | 0 | 726 | 69.2 | 737 | 403 | | | 1 | 702 | 51.0 | 709 | 404 | | 10000, 1000 | 0 | 15,300 | 2,470 | 18,200 | 10,400 | | | 1 | 7,200 | 1,160 | 7,470 | 4,440 | | 100000, 1000 | 0 | 163,000 | 28,000 | 199,000 | 131,000 | | | 1 | 70,700 | 13,500 | 75,700 | 44,200 | Sum performace comparison ------------------------- For float sums, performance is unchanged to within measurement precision: | Shape | Dim | Master Contiguous (us) | This PR Contiguous (us) | Master Discontiguous (us) | This PR Discontiguous (us) | |-------------:|-----|:----------------------:|:-----------------------:|:-------------------------:|:--------------------------:| | 10, 1000 | 0 | 1.92 | 2.01 | 4.2 | 4.49 | | | 1 | 1.68 | 1.68 | 2.79 | 2.75 | | 100, 1000 | 0 | 6.52 | 7.07 | 26.9 | 27.3 | | | 1 | 5.91 | 5.66 | 16.8 | 16.9 | | 1000, 1000 | 0 | 55.6 | 58.6 | 256 | 254 | | | 1 | 41.0 | 41.2 | 150 | 147 | | 10000, 1000 | 0 | 1,370 | 1,650 | 8,070 | 8,020 | | | 1 | 908 | 845 | 3,100 | 2,980 | | 100000, 1000 | 0 | 24,700 | 24,700 | 90,900 | 91,000 | | | 1 | 12,500 | 12,100 | 31,500 | 31,800 | [ghstack-poisoned]
pytorch · Jul 9, 2021 · 36b0c6b · 36b0c6b
2 parents 76fe800 + d52ebf2
commit 36b0c6b
Show file tree

Hide file tree

Showing 479 changed files with 12,513 additions and 5,421 deletions.
diff --git a/.azure_pipelines/job_templates/build-verify-publish-template-unix.yml b/.azure_pipelines/job_templates/build-verify-publish-template-unix.yml
@@ -44,7 +44,7 @@ jobs:
         is_official_build: ${{ parameters.is_official_build}}
 
     # Sync and update PyTorch submodules
-    - bash: git submodule update --init --recursive
+    - bash: git submodule update --init --recursive --jobs 0
       displayName: Update PyTorch submodules
 
     # Build PyTorch and run unit tests - no packaging

diff --git a/.azure_pipelines/job_templates/build-verify-publish-template-win.yml b/.azure_pipelines/job_templates/build-verify-publish-template-win.yml
@@ -47,7 +47,7 @@ jobs:
         is_official_build: ${{ parameters.is_official_build}}
 
     # Sync and update PyTorch submodules
-    - script: git submodule update --init --recursive
+    - script: git submodule update --init --recursive --jobs 0
       displayName: Update PyTorch submodules
 
     # Build PyTorch and run unit tests - no packaging

diff --git a/.circleci/config.yml b/.circleci/config.yml
@@ -496,7 +496,7 @@ jobs:
           time docker pull ${DOCKER_IMAGE}:${DOCKER_TAG} >/dev/null
           export id=$(docker run --env-file "${BASH_ENV}" --cap-add=SYS_PTRACE --security-opt seccomp=unconfined --cap-add=SYS_PTRACE --security-opt seccomp=unconfined -t -d -w /var/lib/jenkins ${DOCKER_IMAGE}:${DOCKER_TAG})
 
-          git submodule sync && git submodule update -q --init --recursive --depth 1
+          git submodule sync && git submodule update -q --init --recursive --depth 1 --jobs 0
 
           docker cp /home/circleci/project/. $id:/var/lib/jenkins/workspace
 
@@ -1677,7 +1677,7 @@ jobs:
           echo "DOCKER_IMAGE: ${DOCKER_IMAGE}:${DOCKER_TAG}"
           time docker pull ${DOCKER_IMAGE}:${DOCKER_TAG} >/dev/null
 
-          git submodule sync && git submodule update -q --init --recursive --depth 1
+          git submodule sync && git submodule update -q --init --recursive --depth 1 --jobs 0
           VOLUME_MOUNTS="-v /home/circleci/project/:/var/lib/jenkins/workspace"
           export id=$(docker run --env-file "${BASH_ENV}" ${VOLUME_MOUNTS} --cap-add=SYS_PTRACE --security-opt seccomp=unconfined --cap-add=SYS_PTRACE --security-opt seccomp=unconfined -t -d -w /var/lib/jenkins ${DOCKER_IMAGE}:${DOCKER_TAG})
 
@@ -1746,7 +1746,7 @@ jobs:
             # sync submodules
             cd ${PROJ_ROOT}
             git submodule sync
-            git submodule update --init --recursive --depth 1
+            git submodule update --init --recursive --depth 1 --jobs 0
 
             # export
             export CMAKE_PREFIX_PATH=${CONDA_PREFIX:-"$(dirname $(which conda))/../"}
@@ -1839,7 +1839,7 @@ jobs:
 
           echo "Do NOT merge master branch into $CIRCLE_BRANCH in environment $BUILD_ENVIRONMENT"
 
-          git submodule sync && git submodule update -q --init --recursive --depth 1
+          git submodule sync && git submodule update -q --init --recursive --depth 1 --jobs 0
 
           docker cp /home/circleci/project/. $id:/var/lib/jenkins/workspace
 

diff --git a/.circleci/scripts/binary_checkout.sh b/.circleci/scripts/binary_checkout.sh
@@ -55,7 +55,7 @@ else
   echo "Can't tell what to checkout"
   exit 1
 fi
-retry git submodule update --init --recursive
+retry git submodule update --init --recursive --jobs 0
 echo "Using Pytorch from "
 git --no-pager log --max-count 1
 popd

diff --git a/.circleci/scripts/binary_ios_build.sh b/.circleci/scripts/binary_ios_build.sh
@@ -22,7 +22,7 @@ export CMAKE_PREFIX_PATH=${CONDA_PREFIX:-"$(dirname $(which conda))/../"}
 # sync submodules
 cd ${PROJ_ROOT}
 git submodule sync
-git submodule update --init --recursive
+git submodule update --init --recursive --jobs 0
 
 # run build script
 chmod a+x ${PROJ_ROOT}/scripts/build_ios.sh

diff --git a/.circleci/scripts/vs_install.ps1 b/.circleci/scripts/vs_install.ps1
@@ -1,8 +1,8 @@
 # https://developercommunity.visualstudio.com/t/install-specific-version-of-vs-component/1142479
 # https://docs.microsoft.com/en-us/visualstudio/releases/2019/history#release-dates-and-build-numbers
 
-# 16.8.5 BuildTools
-$VS_DOWNLOAD_LINK = "https://download.visualstudio.microsoft.com/download/pr/20130c62-1bc8-43d6-b4f0-c20bb7c79113/145a319d79a83376915d8f855605e152ef5f6fa2b2f1d2dca411fb03722eea72/vs_BuildTools.exe"
+# 16.8.6 BuildTools
+$VS_DOWNLOAD_LINK = "https://s3.amazonaws.com/ossci-windows/vs16.8.6_BuildTools.exe"
 $COLLECT_DOWNLOAD_LINK = "https://aka.ms/vscollect.exe"
 $VS_INSTALL_ARGS = @("--nocache","--quiet","--wait", "--add Microsoft.VisualStudio.Workload.VCTools",
                                                      "--add Microsoft.Component.MSBuild",
@@ -20,7 +20,7 @@ if (${env:INSTALL_WINDOWS_SDK} -eq "1") {
 
 curl.exe --retry 3 -kL $VS_DOWNLOAD_LINK --output vs_installer.exe
 if ($LASTEXITCODE -ne 0) {
-    echo "Download of the VS 2019 Version 16.8.5 installer failed"
+    echo "Download of the VS 2019 Version 16.8.6 installer failed"
     exit 1
 }
 

diff --git a/.circleci/verbatim-sources/job-specs/job-specs-custom.yml b/.circleci/verbatim-sources/job-specs/job-specs-custom.yml
@@ -431,7 +431,7 @@
           echo "DOCKER_IMAGE: ${DOCKER_IMAGE}:${DOCKER_TAG}"
           time docker pull ${DOCKER_IMAGE}:${DOCKER_TAG} >/dev/null
 
-          git submodule sync && git submodule update -q --init --recursive --depth 1
+          git submodule sync && git submodule update -q --init --recursive --depth 1 --jobs 0
           VOLUME_MOUNTS="-v /home/circleci/project/:/var/lib/jenkins/workspace"
           export id=$(docker run --env-file "${BASH_ENV}" ${VOLUME_MOUNTS} --cap-add=SYS_PTRACE --security-opt seccomp=unconfined --cap-add=SYS_PTRACE --security-opt seccomp=unconfined -t -d -w /var/lib/jenkins ${DOCKER_IMAGE}:${DOCKER_TAG})
 
@@ -500,7 +500,7 @@
             # sync submodules
             cd ${PROJ_ROOT}
             git submodule sync
-            git submodule update --init --recursive --depth 1
+            git submodule update --init --recursive --depth 1 --jobs 0
 
             # export
             export CMAKE_PREFIX_PATH=${CONDA_PREFIX:-"$(dirname $(which conda))/../"}
@@ -593,7 +593,7 @@
 
           echo "Do NOT merge master branch into $CIRCLE_BRANCH in environment $BUILD_ENVIRONMENT"
 
-          git submodule sync && git submodule update -q --init --recursive --depth 1
+          git submodule sync && git submodule update -q --init --recursive --depth 1 --jobs 0
 
           docker cp /home/circleci/project/. $id:/var/lib/jenkins/workspace
 

diff --git a/.circleci/verbatim-sources/job-specs/pytorch-job-specs.yml b/.circleci/verbatim-sources/job-specs/pytorch-job-specs.yml
@@ -30,7 +30,7 @@ jobs:
           time docker pull ${DOCKER_IMAGE}:${DOCKER_TAG} >/dev/null
           export id=$(docker run --env-file "${BASH_ENV}" --cap-add=SYS_PTRACE --security-opt seccomp=unconfined --cap-add=SYS_PTRACE --security-opt seccomp=unconfined -t -d -w /var/lib/jenkins ${DOCKER_IMAGE}:${DOCKER_TAG})
 
-          git submodule sync && git submodule update -q --init --recursive --depth 1
+          git submodule sync && git submodule update -q --init --recursive --depth 1 --jobs 0
 
           docker cp /home/circleci/project/. $id:/var/lib/jenkins/workspace
 

diff --git a/.github/scripts/generate_ci_workflows.py b/.github/scripts/generate_ci_workflows.py
@@ -246,14 +246,24 @@ def generate_workflow_file(
     # ),
 ]
 
+
+BAZEL_WORKFLOWS = [
+    PyTorchLinuxWorkflow(
+        build_environment="pytorch-linux-xenial-py3.6-gcc7-bazel-test",
+        docker_image_base=f"{DOCKER_REGISTRY}/pytorch/pytorch-linux-xenial-py3.6-gcc7",
+        test_runner_type=LINUX_CPU_TEST_RUNNER,
+    ),
+]
+
 if __name__ == "__main__":
     jinja_env = jinja2.Environment(
         variable_start_string="!{{",
         loader=jinja2.FileSystemLoader(str(GITHUB_DIR.joinpath("templates"))),
     )
     template_and_workflows = [
         (jinja_env.get_template("linux_ci_workflow.yml.j2"), LINUX_WORKFLOWS),
-        (jinja_env.get_template("windows_ci_workflow.yml.j2"), WINDOWS_WORKFLOWS)
+        (jinja_env.get_template("windows_ci_workflow.yml.j2"), WINDOWS_WORKFLOWS),
+        (jinja_env.get_template("bazel_ci_workflow.yml.j2"), BAZEL_WORKFLOWS),
     ]
     for template, workflows in template_and_workflows:
         for workflow in workflows:

diff --git a/.github/templates/bazel_ci_workflow.yml.j2 b/.github/templates/bazel_ci_workflow.yml.j2
@@ -0,0 +1,199 @@
+{%- extends "linux_ci_workflow.yml.j2" -%}
+{%- set exclude_test = true -%}
+{% block name -%}
+# Template is at:    .github/templates/bazel_ci_workflow.yml.j2
+# Generation script: .github/scripts/generate_ci_workflows.py
+name: Bazel Linux CI (!{{ build_environment }})
+{%- endblock %}
+{% block build +%}
+  # building and testing in a single job since bazel runs only small subset of tests
+  build-and-test:
+    runs-on: !{{ test_runner_type }}
+    needs:
+      - calculate-docker-image
+    env:
+      DOCKER_IMAGE: ${{ needs.calculate-docker-image.outputs.docker_image }}
+      JOB_BASE_NAME: !{{ build_environment }}-build-and-test
+      NUM_TEST_SHARDS: !{{ num_test_shards }}
+    steps:
+      - name: Log in to ECR
+        run: |
+          aws ecr get-login --no-include-email --region us-east-1 > /tmp/ecr-login.sh
+          bash /tmp/ecr-login.sh
+          rm /tmp/ecr-login.sh
+      - name: Chown workspace
+        run: |
+          # Ensure the working directory gets chowned back to the current user
+          docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
+      - name: Checkout PyTorch
+        uses: actions/checkout@v2
+        with:
+          fetch-depth: 0 # deep clone, to allow sharding to use git rev-list
+          submodules: recursive
+      - name: Pull docker image
+        run: |
+          docker pull "${DOCKER_IMAGE}"
+      - name: Determine shm-size
+        run: |
+          shm_size="1g"
+          case "${BUILD_ENVIRONMENT}" in
+            *cuda*)
+              shm_size="2g"
+              ;;
+            *rocm*)
+              shm_size="8g"
+              ;;
+          esac
+          echo "SHM_SIZE=${shm_size}" >> "${GITHUB_ENV}"
+      - name: Output disk space left
+        run: |
+          sudo df -H
+      - name: Preserve github env variables for use in docker
+        run: |
+          env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}"
+      - name: Build PyTorch
+        run: |
+          docker run \
+            -e BUILD_ENVIRONMENT \
+            -e JOB_BASE_NAME \
+            -e MAX_JOBS="$(nproc --ignore=2)" \
+            -e SCCACHE_BUCKET \
+            -e CUSTOM_TEST_ARTIFACT_BUILD_DIR \
+            -e SKIP_SCCACHE_INITIALIZATION=1 \
+            -e TORCH_CUDA_ARCH_LIST \
+            --env-file="/tmp/github_env_${GITHUB_RUN_ID}" \
+            --security-opt seccomp=unconfined \
+            --cap-add=SYS_PTRACE \
+            --tty \
+            --user jenkins \
+            -v "${GITHUB_WORKSPACE}:/var/lib/jenkins/workspace" \
+            -w /var/lib/jenkins/workspace \
+            "${DOCKER_IMAGE}" \
+            sh -c 'sudo chown -R jenkins . && sudo chown -R jenkins /dev && .jenkins/pytorch/build.sh'
+      - name: Display and upload binary build size statistics (Click Me)
+        # temporary hack: set CIRCLE_* vars, until we update
+        # tools/stats/print_test_stats.py to natively support GitHub Actions
+        env:
+          SCRIBE_GRAPHQL_ACCESS_TOKEN: ${{ secrets.SCRIBE_GRAPHQL_ACCESS_TOKEN }}
+          CIRCLE_BRANCH: ${{ steps.parse-ref.outputs.branch }}
+          CIRCLE_PR_NUMBER: ${{ github.event.pull_request.number }}
+          CIRCLE_SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
+          CIRCLE_TAG: ${{ steps.parse-ref.outputs.tag }}
+          CIRCLE_WORKFLOW_ID: '${{ github.run_id }}_${{ github.run_number }}'
+        run: |
+          export PYTHONPATH=$PWD
+          COMMIT_TIME=$(git log --max-count=1 --format=%ct || echo 0)
+          export COMMIT_TIME
+          pip3 install requests
+          python3 .circleci/scripts/upload_binary_size_to_scuba.py || exit 0
+      - name: Test PyTorch
+        run: |
+          export SHARD_NUMBER=0
+          # TODO: Stop building test binaries as part of the build phase
+          # Used for GPU_FLAG since that doesn't play nice
+          # shellcheck disable=SC2086
+          # Make sure we copy test results from bazel-testlogs symlink to
+          # a regular directory ./test/test-reports
+          docker run \
+            ${GPU_FLAG:-} \
+            -e BUILD_ENVIRONMENT \
+            -e CUSTOM_TEST_ARTIFACT_BUILD_DIR \
+            -e GITHUB_ACTIONS \
+            -e IN_CI \
+            -e SHARD_NUMBER \
+            -e JOB_BASE_NAME \
+            -e MAX_JOBS="$(nproc --ignore=2)" \
+            -e SCCACHE_BUCKET \
+            --env-file="/tmp/github_env_${GITHUB_RUN_ID}" \
+            --security-opt seccomp=unconfined \
+            --cap-add=SYS_PTRACE \
+            --shm-size="${SHM_SIZE}" \
+            --tty \
+            --user jenkins \
+            -v "${GITHUB_WORKSPACE}:/var/lib/jenkins/workspace" \
+            -w /var/lib/jenkins/workspace \
+            "${DOCKER_IMAGE}" \
+            sh -c 'sudo chown -R jenkins . && sudo chown -R jenkins /dev && .jenkins/pytorch/test.sh && cp -Lr ./bazel-testlogs ./test/test-reports'
+      - name: Chown workspace
+        if: always()
+        run: |
+          # Ensure the working directory gets chowned back to the current user
+          docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
+      - name: Zip test reports for upload
+        if: always()
+        run: |
+          # Remove any previous test reports if they exist
+          rm -f test-reports-*.zip
+          zip -r "test-reports-1.zip" test -i '*.xml'
+      - uses: actions/upload-artifact@v2
+        name: Store PyTorch Test Reports
+        if: always()
+        with:
+          name: test-reports
+          retention-days: 14
+          if-no-files-found: error
+          path:
+            test-reports-*.zip
+      - name: Clean up docker images
+        if: always()
+        run: |
+          # Prune all of the docker images
+          docker system prune -af
+{%- endblock %}
+{% block render_test_results +%}
+  # this is a separate step from test because the log files from test are too
+  # long: basically, GitHub tries to render all of the log files when you click
+  # through an action causing extreme slowdown on actions that contain too many
+  # logs (like test); we can always move it back to the other one, but it
+  # doesn't create the best experience
+  render_test_results:
+    if: always()
+    needs:
+      - build-and-test
+    runs-on: ubuntu-18.04
+    steps:
+      - name: Checkout PyTorch
+        uses: actions/checkout@v2
+        with:
+          # deep clone, to allow tools/stats/print_test_stats.py to use Git commands
+          fetch-depth: 0
+      - uses: actions/download-artifact@v2
+        name: Download PyTorch Test Reports
+        with:
+          name: test-reports
+          path: .
+      - name: Unzip test reports
+        run: |
+          # Should preserve paths so reports should still be in test/test-reports
+          unzip -o 'test-reports-*.zip'
+      - uses: actions/setup-python@v2
+        with:
+          python-version: 3.9
+      - name: Install dependencies
+        # boto3 version copied from .circleci/docker/common/install_conda.sh
+        run: |
+          pip install -r requirements.txt
+          pip install boto3==1.16.34 junitparser rich
+      - name: Output Test Results (Click Me)
+        run: |
+          python tools/render_junit.py test
+      - name: Parse ref
+        id: parse-ref
+        run: .github/scripts/parse_ref.py
+      - name: Display and upload test statistics (Click Me)
+        # temporary hack: set CIRCLE_* vars, until we update
+        # tools/stats/print_test_stats.py to natively support GitHub Actions
+        env:
+          SCRIBE_GRAPHQL_ACCESS_TOKEN: ${{ secrets.SCRIBE_GRAPHQL_ACCESS_TOKEN }}
+          AWS_ACCESS_KEY_ID: ${{ secrets.AWS_OSSCI_METRICS_ACCESS_KEY_ID }}
+          AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_OSSCI_METRICS_SECRET_ACCESS_KEY }}
+          CIRCLE_BRANCH: ${{ steps.parse-ref.outputs.branch }}
+          JOB_BASE_NAME: pytorch-linux-xenial-py3.6-gcc7-bazel-test-test
+          CIRCLE_PR_NUMBER: ${{ github.event.pull_request.number }}
+          CIRCLE_SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
+          CIRCLE_TAG: ${{ steps.parse-ref.outputs.tag }}
+          CIRCLE_WORKFLOW_ID: '${{ github.run_id }}_${{ github.run_number }}'
+        run: |
+          export PYTHONPATH=$PWD
+          python tools/stats/print_test_stats.py --upload-to-s3 --compare-with-s3 test
+{%- endblock %}