Update on "[wip] quantization: store safe_on_fbgemm flag on quantized…

… conv" Summary: This is a start of fixing the problems surfaced in #46749. This particular PR only fixes a small part of this: 1. if a conv module is unsafe to run in fbgemm, we now persist this information with a `safe_for_fbgemm` boolean flag stored on `ConvPackedParams{n}d`. 2. if we are in an fbgemm kernel and we detect that the current conv packed params are tagged as unsafe, we throw an error. For now, this PR is a WIP to get some early feedback if this is the right direction, since iteration cost on this is high. In particular, missing things here are: * testing serialization of saving v3 and loading it back * getting all the conv callsites (currently just module + conv2d is handled) Note: there were some potential improvements discussed on dynamically dispatching to qnnpack if it is available and the flag is set. This PR does not attempt to solve this issue - it can be solved by future PRs. Test Plan: ``` # test that the error gets thrown when we are trying to run an operation which could # saturate, and does not get thrown otherwise python test/test_quantization.py TestQuantizedOps.test_conv_reduce_range # test that loading older versions of conv packed params works as expected # TODO(before land): extend these tests with the v3 files python test/test_quantization.py TestSerialization ``` Reviewers: Subscribers: Tasks: Tags: Differential Revision: [D29175285](https://our.internmc.facebook.com/intern/diff/D29175285) [ghstack-poisoned]
pytorch · Jun 29, 2021 · c5c47af · c5c47af
2 parents 6286989 + b474cb0
commit c5c47af
Show file tree

Hide file tree

Showing 797 changed files with 23,968 additions and 11,366 deletions.
diff --git a/.circleci/cimodel/data/pytorch_build_definitions.py b/.circleci/cimodel/data/pytorch_build_definitions.py
@@ -31,6 +31,7 @@ class Conf:
     is_libtorch: bool = False
     is_important: bool = False
     parallel_backend: Optional[str] = None
+    build_only: bool = False
 
     @staticmethod
     def is_test_phase(phase):
@@ -112,6 +113,8 @@ def gen_workflow_params(self, phase):
             parameters["resource_class"] = "xlarge"
         if hasattr(self, 'filters'):
             parameters['filters'] = self.filters
+        if self.build_only:
+            parameters['build_only'] = miniutils.quote(str(int(True)))
         return parameters
 
     def gen_workflow_job(self, phase):
@@ -369,6 +372,7 @@ def instantiate_configs(only_slow_gradcheck):
             is_libtorch=is_libtorch,
             is_important=is_important,
             parallel_backend=parallel_backend,
+            build_only=build_only,
         )
 
         # run docs builds on "pytorch-linux-xenial-py3.6-gcc5.4". Docs builds

diff --git a/.circleci/config.yml b/.circleci/config.yml
@@ -283,11 +283,15 @@ pytorch_params: &pytorch_params
     build_only:
       type: string
       default: ""
+    ci_master:
+      type: string
+      default: ""
   environment:
     BUILD_ENVIRONMENT: << parameters.build_environment >>
     DOCKER_IMAGE: << parameters.docker_image >>
     USE_CUDA_DOCKER_RUNTIME: << parameters.use_cuda_docker_runtime >>
     BUILD_ONLY: << parameters.build_only >>
+    CI_MASTER: << pipeline.parameters.run_master_build >>
   resource_class: << parameters.resource_class >>
 
 pytorch_android_params: &pytorch_android_params
@@ -496,7 +500,7 @@ jobs:
 
           docker cp /home/circleci/project/. $id:/var/lib/jenkins/workspace
 
-          export COMMAND='((echo "sudo chown -R jenkins workspace && export CIRCLE_JOB="$CIRCLE_JOB" && cd workspace && .jenkins/pytorch/build.sh && find ${BUILD_ROOT} -type f -name "*.a" -or -name "*.o" -delete") | docker exec -u jenkins -i "$id" bash) 2>&1'
+          export COMMAND='((echo "sudo chown -R jenkins workspace && export JOB_BASE_NAME="$CIRCLE_JOB" && cd workspace && .jenkins/pytorch/build.sh && find ${BUILD_ROOT} -type f -name "*.a" -or -name "*.o" -delete") | docker exec -u jenkins -i "$id" bash) 2>&1'
 
           echo ${COMMAND} > ./command.sh && unbuffer bash ./command.sh | ts
 
@@ -635,7 +639,7 @@ jobs:
           # =================== The following code will be executed inside Docker container ===================
           set -ex
           export SCRIBE_GRAPHQL_ACCESS_TOKEN="${SCRIBE_GRAPHQL_ACCESS_TOKEN}"
-          export CIRCLE_JOB="$CIRCLE_JOB"
+          export JOB_BASE_NAME="$CIRCLE_JOB"
           ${PARALLEL_FLAGS}
           cd workspace
           EOL
@@ -682,11 +686,11 @@ jobs:
           export CIRCLE_SHA1="$CIRCLE_SHA1"
           export CIRCLE_PR_NUMBER="${CIRCLE_PR_NUMBER:-}"
           export CIRCLE_BRANCH="$CIRCLE_BRANCH"
-          export CIRCLE_JOB="$CIRCLE_JOB"
+          export JOB_BASE_NAME="$CIRCLE_JOB"
           export CIRCLE_WORKFLOW_ID="$CIRCLE_WORKFLOW_ID"
           cd workspace
           export PYTHONPATH="\${PWD}"
-          python tools/print_test_stats.py --upload-to-s3 --compare-with-s3 test
+          python tools/stats/print_test_stats.py --upload-to-s3 --compare-with-s3 test
           EOL
           echo "(cat docker_commands.sh | docker exec -u jenkins -e LANG=C.UTF-8 -i "$id" bash) 2>&1" > command.sh
           unbuffer bash command.sh | ts
@@ -840,7 +844,7 @@ jobs:
             export AWS_SECRET_ACCESS_KEY=${CIRCLECI_AWS_SECRET_KEY_FOR_WIN_BUILD_V1}
             export PYTHONPATH="$PWD"
             pip install typing_extensions boto3
-            python tools/print_test_stats.py --upload-to-s3 --compare-with-s3 test
+            python tools/stats/print_test_stats.py --upload-to-s3 --compare-with-s3 test
           when: always
       - store_test_results:
           path: test/test-reports
@@ -1455,7 +1459,7 @@ jobs:
             # Using the same IAM user to write stats to our OSS bucket
             export AWS_ACCESS_KEY_ID=${CIRCLECI_AWS_ACCESS_KEY_FOR_SCCACHE_S3_BUCKET_V4}
             export AWS_SECRET_ACCESS_KEY=${CIRCLECI_AWS_SECRET_KEY_FOR_SCCACHE_S3_BUCKET_V4}
-            python tools/print_test_stats.py --upload-to-s3 --compare-with-s3 test
+            python tools/stats/print_test_stats.py --upload-to-s3 --compare-with-s3 test
           when: always
       - store_test_results:
           path: test/test-reports
@@ -7063,6 +7067,7 @@ workflows:
                 - /release\/.*/
           build_environment: "pytorch-libtorch-linux-xenial-cuda10.2-cudnn7-py3-gcc7-build"
           docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-cuda10.2-cudnn7-py3-gcc7"
+          build_only: "1"
       - pytorch_linux_build:
           name: pytorch_linux_xenial_cuda11_1_cudnn8_py3_gcc7_build
           requires:
@@ -7097,6 +7102,7 @@ workflows:
                 - /release\/.*/
           build_environment: "pytorch-libtorch-linux-xenial-cuda11.1-cudnn8-py3-gcc7-build"
           docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-cuda11.1-cudnn8-py3-gcc7"
+          build_only: "1"
       - pytorch_linux_build:
           name: pytorch_linux_bionic_py3_6_clang9_noarch_build
           requires:
@@ -7185,6 +7191,7 @@ workflows:
           build_environment: "pytorch-linux-bionic-rocm3.9-py3.6-build"
           docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-bionic-rocm3.9-py3.6"
           resource_class: xlarge
+          build_only: "1"
       - pytorch_macos_10_15_py3_build:
           name: pytorch_macos_10_15_py3_build
       - pytorch_macos_10_13_py3_build:
@@ -9078,12 +9085,14 @@ workflows:
             - "docker-pytorch-linux-xenial-cuda10.2-cudnn7-py3-gcc7"
           build_environment: "pytorch-libtorch-linux-xenial-cuda10.2-cudnn7-py3-gcc7-build"
           docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-cuda10.2-cudnn7-py3-gcc7"
+          build_only: "1"
       - pytorch_linux_build:
           name: pytorch_libtorch_linux_xenial_cuda11_1_cudnn8_py3_gcc7_build
           requires:
             - "docker-pytorch-linux-xenial-cuda11.1-cudnn8-py3-gcc7"
           build_environment: "pytorch-libtorch-linux-xenial-cuda11.1-cudnn8-py3-gcc7-build"
           docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-cuda11.1-cudnn8-py3-gcc7"
+          build_only: "1"
       - pytorch_linux_build:
           build_environment: "pytorch-linux-xenial-py3-clang5-android-ndk-r19c-x86_32-build"
           docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-py3-clang5-android-ndk-r19c"

diff --git a/.circleci/docker/build.sh b/.circleci/docker/build.sh
@@ -320,7 +320,7 @@ if [ -n "${JENKINS:-}" ]; then
   JENKINS_GID=$(id -g jenkins)
 fi
 
-tmp_tag="tmp-$(cat /dev/urandom | tr -dc 'a-z' | head -c 32)"
+tmp_tag=$(basename "$(mktemp -u)" | tr '[:upper:]' '[:lower:]')
 
 # Build image
 # TODO: build-arg THRIFT is not turned on for any image, remove it once we confirm

diff --git a/.circleci/scripts/setup_ci_environment.sh b/.circleci/scripts/setup_ci_environment.sh
@@ -67,6 +67,7 @@ add_to_env_file() {
 }
 
 add_to_env_file IN_CI 1
+add_to_env_file CI_MASTER "${CI_MASTER:-}"
 add_to_env_file COMMIT_SOURCE "${CIRCLE_BRANCH:-}"
 add_to_env_file BUILD_ENVIRONMENT "${BUILD_ENVIRONMENT}"
 add_to_env_file CIRCLE_PULL_REQUEST "${CIRCLE_PULL_REQUEST}"

diff --git a/.circleci/verbatim-sources/build-parameters/pytorch-build-params.yml b/.circleci/verbatim-sources/build-parameters/pytorch-build-params.yml
@@ -15,11 +15,15 @@ pytorch_params: &pytorch_params
     build_only:
       type: string
       default: ""
+    ci_master:
+      type: string
+      default: ""
   environment:
     BUILD_ENVIRONMENT: << parameters.build_environment >>
     DOCKER_IMAGE: << parameters.docker_image >>
     USE_CUDA_DOCKER_RUNTIME: << parameters.use_cuda_docker_runtime >>
     BUILD_ONLY: << parameters.build_only >>
+    CI_MASTER: << pipeline.parameters.run_master_build >>
   resource_class: << parameters.resource_class >>
 
 pytorch_android_params: &pytorch_android_params

diff --git a/.circleci/verbatim-sources/job-specs/job-specs-custom.yml b/.circleci/verbatim-sources/job-specs/job-specs-custom.yml
@@ -213,7 +213,7 @@
             # Using the same IAM user to write stats to our OSS bucket
             export AWS_ACCESS_KEY_ID=${CIRCLECI_AWS_ACCESS_KEY_FOR_SCCACHE_S3_BUCKET_V4}
             export AWS_SECRET_ACCESS_KEY=${CIRCLECI_AWS_SECRET_KEY_FOR_SCCACHE_S3_BUCKET_V4}
-            python tools/print_test_stats.py --upload-to-s3 --compare-with-s3 test
+            python tools/stats/print_test_stats.py --upload-to-s3 --compare-with-s3 test
           when: always
       - store_test_results:
           path: test/test-reports

diff --git a/.circleci/verbatim-sources/job-specs/pytorch-job-specs.yml b/.circleci/verbatim-sources/job-specs/pytorch-job-specs.yml
@@ -34,7 +34,7 @@ jobs:
 
           docker cp /home/circleci/project/. $id:/var/lib/jenkins/workspace
 
-          export COMMAND='((echo "sudo chown -R jenkins workspace && export CIRCLE_JOB="$CIRCLE_JOB" && cd workspace && .jenkins/pytorch/build.sh && find ${BUILD_ROOT} -type f -name "*.a" -or -name "*.o" -delete") | docker exec -u jenkins -i "$id" bash) 2>&1'
+          export COMMAND='((echo "sudo chown -R jenkins workspace && export JOB_BASE_NAME="$CIRCLE_JOB" && cd workspace && .jenkins/pytorch/build.sh && find ${BUILD_ROOT} -type f -name "*.a" -or -name "*.o" -delete") | docker exec -u jenkins -i "$id" bash) 2>&1'
 
           echo ${COMMAND} > ./command.sh && unbuffer bash ./command.sh | ts
 
@@ -173,7 +173,7 @@ jobs:
           # =================== The following code will be executed inside Docker container ===================
           set -ex
           export SCRIBE_GRAPHQL_ACCESS_TOKEN="${SCRIBE_GRAPHQL_ACCESS_TOKEN}"
-          export CIRCLE_JOB="$CIRCLE_JOB"
+          export JOB_BASE_NAME="$CIRCLE_JOB"
           ${PARALLEL_FLAGS}
           cd workspace
           EOL
@@ -220,11 +220,11 @@ jobs:
           export CIRCLE_SHA1="$CIRCLE_SHA1"
           export CIRCLE_PR_NUMBER="${CIRCLE_PR_NUMBER:-}"
           export CIRCLE_BRANCH="$CIRCLE_BRANCH"
-          export CIRCLE_JOB="$CIRCLE_JOB"
+          export JOB_BASE_NAME="$CIRCLE_JOB"
           export CIRCLE_WORKFLOW_ID="$CIRCLE_WORKFLOW_ID"
           cd workspace
           export PYTHONPATH="\${PWD}"
-          python tools/print_test_stats.py --upload-to-s3 --compare-with-s3 test
+          python tools/stats/print_test_stats.py --upload-to-s3 --compare-with-s3 test
           EOL
           echo "(cat docker_commands.sh | docker exec -u jenkins -e LANG=C.UTF-8 -i "$id" bash) 2>&1" > command.sh
           unbuffer bash command.sh | ts
@@ -378,7 +378,7 @@ jobs:
             export AWS_SECRET_ACCESS_KEY=${CIRCLECI_AWS_SECRET_KEY_FOR_WIN_BUILD_V1}
             export PYTHONPATH="$PWD"
             pip install typing_extensions boto3
-            python tools/print_test_stats.py --upload-to-s3 --compare-with-s3 test
+            python tools/stats/print_test_stats.py --upload-to-s3 --compare-with-s3 test
           when: always
       - store_test_results:
           path: test/test-reports

diff --git a/.github/scripts/generate_ci_workflows.py b/.github/scripts/generate_ci_workflows.py
@@ -81,18 +81,20 @@ def generate_workflow_file(
         cuda_version="cpu",
         test_runner_type=WINDOWS_CPU_TEST_RUNNER,
         on_pull_request=True,
+        num_test_shards=2,
     ),
     PyTorchWindowsWorkflow(
         build_environment="pytorch-win-vs2019-cuda10-cudnn7-py3",
         cuda_version="10.1",
         test_runner_type=WINDOWS_CUDA_TEST_RUNNER,
         on_pull_request=True,
-        only_build_on_pull_request=True
+        num_test_shards=2,
     ),
     PyTorchWindowsWorkflow(
         build_environment="pytorch-win-vs2019-cuda11-cudnn8-py3",
         cuda_version="11.1",
         test_runner_type=WINDOWS_CUDA_TEST_RUNNER,
+        num_test_shards=2,
     )
 ]
 
@@ -140,11 +142,11 @@ def generate_workflow_file(
         docker_image_base=f"{DOCKER_REGISTRY}/pytorch/pytorch-linux-xenial-cuda10.2-cudnn7-py3-gcc7",
         test_runner_type=LINUX_CUDA_TEST_RUNNER,
     ),
-    # PyTorchLinuxWorkflow(
-    #     build_environment="pytorch-linux-xenial-cuda11.1-cudnn8-py3.6-gcc7",
-    #     docker_image_base=f"{DOCKER_REGISTRY}/pytorch/pytorch-linux-xenial-cuda11.1-cudnn8-py3-gcc7",
-    #     test_runner_type=LINUX_CUDA_TEST_RUNNER,
-    # ),
+    PyTorchLinuxWorkflow(
+        build_environment="pytorch-linux-xenial-cuda11.1-cudnn8-py3.6-gcc7",
+        docker_image_base=f"{DOCKER_REGISTRY}/pytorch/pytorch-linux-xenial-cuda11.1-cudnn8-py3-gcc7",
+        test_runner_type=LINUX_CUDA_TEST_RUNNER,
+    ),
     # PyTorchLinuxWorkflow(
     #     build_environment="pytorch-libtorch-linux-xenial-cuda11.1-cudnn8-py3.6-gcc7",
     #     docker_image_base=f"{DOCKER_REGISTRY}/pytorch/pytorch-linux-xenial-cuda11.1-cudnn8-py3-gcc7",

diff --git a/.github/templates/linux_ci_workflow.yml.j2 b/.github/templates/linux_ci_workflow.yml.j2
@@ -145,14 +145,14 @@ jobs:
             sh -c 'sudo chown -R jenkins . && .jenkins/pytorch/build.sh'
       - name: Display and upload binary build size statistics (Click Me)
         # temporary hack: set CIRCLE_* vars, until we update
-        # tools/print_test_stats.py to natively support GitHub Actions
+        # tools/stats/print_test_stats.py to natively support GitHub Actions
         env:
           SCRIBE_GRAPHQL_ACCESS_TOKEN: ${{ secrets.SCRIBE_GRAPHQL_ACCESS_TOKEN }}
           CIRCLE_BRANCH: ${{ steps.parse-ref.outputs.branch }}
           CIRCLE_PR_NUMBER: ${{ github.event.pull_request.number }}
           CIRCLE_SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
           CIRCLE_TAG: ${{ steps.parse-ref.outputs.tag }}
-          CIRCLE_WORKFLOW_ID: ${{ github.run_id }} # dunno if this corresponds
+          CIRCLE_WORKFLOW_ID: '${{ github.run_id }}_${{ github.run_number }}'
         run: |
           export PYTHONPATH=$PWD
           COMMIT_TIME=$(git log --max-count=1 --format=%ct || echo 0)
@@ -306,6 +306,12 @@ jobs:
         run: |
           # Ensure the working directory gets chowned back to the current user
           docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
+      - name: Zip test reports for upload
+        if: always()
+        run: |
+          # Remove any previous test reports if they exist
+          rm -f test-reports-*.zip
+          zip -r "test-reports-${TEST_CONFIG}.zip" test -i '*.xml'
       - uses: actions/upload-artifact@v2
         name: Store PyTorch Test Reports
         if: always()
@@ -314,7 +320,7 @@ jobs:
           retention-days: 14
           if-no-files-found: error
           path:
-            test/**/*.xml
+            test-reports-*.zip
       - name: Clean up docker images
         if: always()
         run: |
@@ -337,13 +343,17 @@ jobs:
       - name: Checkout PyTorch
         uses: actions/checkout@v2
         with:
-          # deep clone, to allow tools/print_test_stats.py to use Git commands
+          # deep clone, to allow tools/stats/print_test_stats.py to use Git commands
           fetch-depth: 0
       - uses: actions/download-artifact@v2
         name: Download PyTorch Test Reports
         with:
           name: test-reports
-          path: test/test-reports
+          path: .
+      - name: Unzip test reports
+        run: |
+          # Should preserve paths so reports should still be in test/test-reports
+          unzip -o 'test-reports-*.zip'
       - uses: actions/setup-python@v2
         with:
           python-version: 3.9
@@ -360,20 +370,20 @@ jobs:
         run: .github/scripts/parse_ref.py
       - name: Display and upload test statistics (Click Me)
         # temporary hack: set CIRCLE_* vars, until we update
-        # tools/print_test_stats.py to natively support GitHub Actions
+        # tools/stats/print_test_stats.py to natively support GitHub Actions
         env:
           SCRIBE_GRAPHQL_ACCESS_TOKEN: ${{ secrets.SCRIBE_GRAPHQL_ACCESS_TOKEN }}
           AWS_ACCESS_KEY_ID: ${{ secrets.AWS_OSSCI_METRICS_ACCESS_KEY_ID }}
           AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_OSSCI_METRICS_SECRET_ACCESS_KEY }}
           CIRCLE_BRANCH: ${{ steps.parse-ref.outputs.branch }}
-          CIRCLE_JOB: !{{ build_environment }}
+          JOB_BASE_NAME: !{{ build_environment }}-test
           CIRCLE_PR_NUMBER: ${{ github.event.pull_request.number }}
           CIRCLE_SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
           CIRCLE_TAG: ${{ steps.parse-ref.outputs.tag }}
-          CIRCLE_WORKFLOW_ID: ${{ github.run_id }} # dunno if this corresponds
+          CIRCLE_WORKFLOW_ID: '${{ github.run_id }}_${{ github.run_number }}'
         run: |
           export PYTHONPATH=$PWD
-          python tools/print_test_stats.py --upload-to-s3 --compare-with-s3 test
+          python tools/stats/print_test_stats.py --upload-to-s3 --compare-with-s3 test
   {%- if enable_doc_jobs %}
 
   pytorch_python_doc_build:
@@ -438,6 +448,20 @@ jobs:
         run: |
           # Ensure the working directory gets chowned back to the current user
           docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
+      - uses: driazati/upload-artifact-s3@21c31d0a7bcb056ca50bd6ce197ba6507c26a1be
+        if: github.event_name == 'pull_request'
+        name: Upload Docs Preview
+        with:
+          name: deploy
+          retention-days: 14
+          if-no-files-found: error
+          path: pytorch.github.io/docs/merge
+      - name: Show Docs Preview URL (Click Me)
+        if: github.event_name == 'pull_request'
+        env:
+          PR_NUMBER: ${{ github.event.pull_request.number }}
+        run: |
+          echo "See rendered docs at https://d28slxzaq48q8t.cloudfront.net/$PR_NUMBER/"
       - name: Archive artifacts into zip
         run: |
           zip -r pytorch_github_io.zip "${GITHUB_WORKSPACE}/pytorch.github.io"
@@ -452,4 +476,5 @@ jobs:
         run: |
           # Prune all of the docker images
           docker system prune -af
+
   {%- endif -%}