Update on "[wip] quantization: store input_qrange_le_128 flag on quan…

…tized conv" Summary: This is a start of fixing the problems surfaced in #46749. This particular PR only fixes a small part of this: 1. if a conv module is unsafe to run in fbgemm, we now persist this information with a `input_qrange_le_128` boolean flag stored on `ConvPackedParams{n}d` set to False. 2. if we are in an fbgemm kernel and we detect that the current conv packed params are tagged as unsafe, we throw an error. For now, this PR is a WIP to get some early feedback if this is the right direction, since iteration cost on this is high. In particular, missing things here are: * testing serialization of saving v3 and loading it back * getting all the conv callsites (currently just module + conv2d is handled) Note: there were some potential improvements discussed on dynamically dispatching to qnnpack if it is available and the flag is set. This PR does not attempt to solve this issue - it can be solved by future PRs. Test Plan: ``` # test that the error gets thrown when we are trying to run an operation which could # saturate, and does not get thrown otherwise python test/test_quantization.py TestQuantizedOps.test_conv_reduce_range # test that loading older versions of conv packed params works as expected # TODO(before land): extend these tests with the v3 files python test/test_quantization.py TestSerialization ``` Reviewers: Subscribers: Tasks: Tags: Differential Revision: [D29175285](https://our.internmc.facebook.com/intern/diff/D29175285) [ghstack-poisoned]
pytorch · Jul 1, 2021 · a2fd226 · a2fd226
2 parents d267788 + bdb41ff
commit a2fd226
Show file tree

Hide file tree

Showing 172 changed files with 3,735 additions and 1,498 deletions.
diff --git a/.circleci/cimodel/data/pytorch_build_data.py b/.circleci/cimodel/data/pytorch_build_data.py
@@ -80,17 +80,6 @@
                 ]),
             ]),
         ]),
-        ("gcc", [
-            ("9", [
-                ("3.8", [
-                    ("coverage", [
-                        (True, [
-                            ("shard_test", [XImportant(True)]),
-                        ]),
-                    ]),
-                ]),
-            ]),
-        ]),
         ("rocm", [
             ("3.9", [
                 ("3.6", [

diff --git a/.circleci/config.yml b/.circleci/config.yml
@@ -212,7 +212,7 @@ commands:
             cd ~/project
             export ANDROID_BUILD_TYPE="<< parameters.build_type >>"
             export COMMIT_TIME=$(git log --max-count=1 --format=%ct || echo 0)
-            python3 .circleci/scripts/upload_binary_size_to_scuba.py android
+            python3 tools/stats/upload_binary_size_to_scuba.py android
 
 ##############################################################################
 # Binary build (nightlies nightly build) defaults
@@ -547,7 +547,7 @@ jobs:
             cd /pytorch && export COMMIT_TIME=$(git log --max-count=1 --format=%ct || echo 0)
             python3 -mpip install requests && \
             SCRIBE_GRAPHQL_ACCESS_TOKEN=${SCRIBE_GRAPHQL_ACCESS_TOKEN} \
-            python3 .circleci/scripts/upload_binary_size_to_scuba.py || exit 0
+            python3 tools/stats/upload_binary_size_to_scuba.py || exit 0
     - store_artifacts:
         path: /home/circleci/project/dist
 
@@ -881,7 +881,7 @@ jobs:
             cd /pytorch && export COMMIT_TIME=$(git log --max-count=1 --format=%ct || echo 0)
             python3 -mpip install requests && \
             SCRIBE_GRAPHQL_ACCESS_TOKEN=${SCRIBE_GRAPHQL_ACCESS_TOKEN} \
-            python3 /pytorch/.circleci/scripts/upload_binary_size_to_scuba.py || exit 0
+            python3 /pytorch/tools/stats/upload_binary_size_to_scuba.py || exit 0
     - persist_to_workspace:
         root: /
         paths: final_pkgs
@@ -7164,26 +7164,6 @@ workflows:
           docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-bionic-cuda10.2-cudnn7-py3.9-gcc7"
           use_cuda_docker_runtime: "1"
           resource_class: gpu.medium
-      - pytorch_linux_build:
-          name: pytorch_linux_bionic_py3_8_gcc9_coverage_build
-          requires:
-            - "docker-pytorch-linux-bionic-py3.8-gcc9"
-          build_environment: "pytorch-linux-bionic-py3.8-gcc9-coverage-build"
-          docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-bionic-py3.8-gcc9"
-      - pytorch_linux_test:
-          name: pytorch_linux_bionic_py3_8_gcc9_coverage_test1
-          requires:
-            - pytorch_linux_bionic_py3_8_gcc9_coverage_build
-          build_environment: "pytorch-linux-bionic-py3.8-gcc9-coverage-test1"
-          docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-bionic-py3.8-gcc9"
-          resource_class: large
-      - pytorch_linux_test:
-          name: pytorch_linux_bionic_py3_8_gcc9_coverage_test2
-          requires:
-            - pytorch_linux_bionic_py3_8_gcc9_coverage_build
-          build_environment: "pytorch-linux-bionic-py3.8-gcc9-coverage-test2"
-          docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-bionic-py3.8-gcc9"
-          resource_class: large
       - pytorch_linux_build:
           name: pytorch_linux_bionic_rocm3_9_py3_6_build
           requires:
@@ -9273,41 +9253,6 @@ workflows:
             - "docker-pytorch-linux-xenial-cuda11.3-cudnn8-py3-gcc7"
           build_environment: "pytorch-libtorch-linux-xenial-cuda11.3-cudnn8-py3-gcc7-build"
           docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-cuda11.3-cudnn8-py3-gcc7"
-      - pytorch_windows_build:
-          build_environment: pytorch-win-vs2019-cuda11-cudnn8-py3
-          cuda_version: "11.3"
-          name: periodic_pytorch_windows_cuda11.3_build
-          python_version: "3.8"
-          use_cuda: "1"
-          vc_product: BuildTools
-          vc_version: "14.28.29333"
-          vc_year: "2019"
-      - pytorch_windows_test:
-          build_environment: pytorch-win-vs2019-cuda11-cudnn8-py3
-          cuda_version: "11.3"
-          executor: windows-with-nvidia-gpu
-          name: periodic_pytorch_windows_cuda11.3_test1
-          python_version: "3.8"
-          requires:
-            - periodic_pytorch_windows_cuda11.3_build
-          test_name: pytorch-windows-test1
-          use_cuda: "1"
-          vc_product: BuildTools
-          vc_version: "14.28.29333"
-          vc_year: "2019"
-      - pytorch_windows_test:
-          build_environment: pytorch-win-vs2019-cuda11-cudnn8-py3
-          cuda_version: "11.3"
-          executor: windows-with-nvidia-gpu
-          name: periodic_pytorch_windows_cuda11.3_test2
-          python_version: "3.8"
-          requires:
-            - periodic_pytorch_windows_cuda11.3_build
-          test_name: pytorch-windows-test2
-          use_cuda: "1"
-          vc_product: BuildTools
-          vc_version: "14.28.29333"
-          vc_year: "2019"
 
   # The following allows these jobs to run on ci-all and release branches
   debuggable-scheduled-ci:

diff --git a/.circleci/docker/common/install_rocm.sh b/.circleci/docker/common/install_rocm.sh
@@ -6,7 +6,7 @@ install_magma() {
     # "install" hipMAGMA into /opt/rocm/magma by copying after build
     git clone https://bitbucket.org/icl/magma.git
     pushd magma
-    git checkout 878b1ce02e9cfe4a829be22c8f911e9c0b6bd88f
+    git checkout aed4e285084763113ce5757393d4008e27b5194b
     cp make.inc-examples/make.inc.hip-gcc-mkl make.inc
     echo 'LIBDIR += -L$(MKLROOT)/lib' >> make.inc
     echo 'LIB += -Wl,--enable-new-dtags -Wl,--rpath,/opt/rocm/lib -Wl,--rpath,$(MKLROOT)/lib -Wl,--rpath,/opt/rocm/magma/lib' >> make.inc

diff --git a/.circleci/scripts/binary_windows_build.sh b/.circleci/scripts/binary_windows_build.sh
@@ -20,8 +20,8 @@ if [[ "${DESIRED_CUDA}" == "cu111" || "${DESIRED_CUDA}" == "cu113" ]]; then
 
     echo "Free Space for CUDA DEBUG BUILD"
     if [[ "$CIRCLECI" == 'true' ]]; then
-        if [[ -d "C:\\Program Files (x86)\\Microsoft Visual Studio\\2019\\Commnuity" ]]; then
-            rm -rf "C:\\Program Files (x86)\\Microsoft Visual Studio\\2019\\Commnuity"
+        if [[ -d "C:\\Program Files (x86)\\Microsoft Visual Studio\\2019\\Community" ]]; then
+            rm -rf "C:\\Program Files (x86)\\Microsoft Visual Studio\\2019\\Community"
         fi
 
         if [[ -d "C:\\Program Files (x86)\\Microsoft Visual Studio 14.0" ]]; then
@@ -67,7 +67,7 @@ if [[ "$CIRCLECI" == 'true' && -d "C:\\ProgramData\\Microsoft\\VisualStudio\\Pac
 fi
 
 if [[ "$CIRCLECI" == 'true' && -d "C:\\Microsoft" ]]; then
-  # don't use quota here
+  # don't use quotes here
   rm -rf /c/Microsoft/AndroidNDK*
 fi
 

diff --git a/.circleci/verbatim-sources/commands.yml b/.circleci/verbatim-sources/commands.yml
@@ -171,4 +171,4 @@ commands:
             cd ~/project
             export ANDROID_BUILD_TYPE="<< parameters.build_type >>"
             export COMMIT_TIME=$(git log --max-count=1 --format=%ct || echo 0)
-            python3 .circleci/scripts/upload_binary_size_to_scuba.py android
+            python3 tools/stats/upload_binary_size_to_scuba.py android
diff --git a/.circleci/verbatim-sources/job-specs/binary-job-specs.yml b/.circleci/verbatim-sources/job-specs/binary-job-specs.yml
@@ -29,7 +29,7 @@
             cd /pytorch && export COMMIT_TIME=$(git log --max-count=1 --format=%ct || echo 0)
             python3 -mpip install requests && \
             SCRIBE_GRAPHQL_ACCESS_TOKEN=${SCRIBE_GRAPHQL_ACCESS_TOKEN} \
-            python3 /pytorch/.circleci/scripts/upload_binary_size_to_scuba.py || exit 0
+            python3 /pytorch/tools/stats/upload_binary_size_to_scuba.py || exit 0
     - persist_to_workspace:
         root: /
         paths: final_pkgs

diff --git a/.circleci/verbatim-sources/job-specs/pytorch-job-specs.yml b/.circleci/verbatim-sources/job-specs/pytorch-job-specs.yml
@@ -81,7 +81,7 @@ jobs:
             cd /pytorch && export COMMIT_TIME=$(git log --max-count=1 --format=%ct || echo 0)
             python3 -mpip install requests && \
             SCRIBE_GRAPHQL_ACCESS_TOKEN=${SCRIBE_GRAPHQL_ACCESS_TOKEN} \
-            python3 .circleci/scripts/upload_binary_size_to_scuba.py || exit 0
+            python3 tools/stats/upload_binary_size_to_scuba.py || exit 0
     - store_artifacts:
         path: /home/circleci/project/dist
 

diff --git a/.circleci/verbatim-sources/workflows/workflows-scheduled-ci.yml b/.circleci/verbatim-sources/workflows/workflows-scheduled-ci.yml
@@ -31,41 +31,6 @@
             - "docker-pytorch-linux-xenial-cuda11.3-cudnn8-py3-gcc7"
           build_environment: "pytorch-libtorch-linux-xenial-cuda11.3-cudnn8-py3-gcc7-build"
           docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-cuda11.3-cudnn8-py3-gcc7"
-      - pytorch_windows_build:
-          build_environment: pytorch-win-vs2019-cuda11-cudnn8-py3
-          cuda_version: "11.3"
-          name: periodic_pytorch_windows_cuda11.3_build
-          python_version: "3.8"
-          use_cuda: "1"
-          vc_product: BuildTools
-          vc_version: "14.28.29333"
-          vc_year: "2019"
-      - pytorch_windows_test:
-          build_environment: pytorch-win-vs2019-cuda11-cudnn8-py3
-          cuda_version: "11.3"
-          executor: windows-with-nvidia-gpu
-          name: periodic_pytorch_windows_cuda11.3_test1
-          python_version: "3.8"
-          requires:
-            - periodic_pytorch_windows_cuda11.3_build
-          test_name: pytorch-windows-test1
-          use_cuda: "1"
-          vc_product: BuildTools
-          vc_version: "14.28.29333"
-          vc_year: "2019"
-      - pytorch_windows_test:
-          build_environment: pytorch-win-vs2019-cuda11-cudnn8-py3
-          cuda_version: "11.3"
-          executor: windows-with-nvidia-gpu
-          name: periodic_pytorch_windows_cuda11.3_test2
-          python_version: "3.8"
-          requires:
-            - periodic_pytorch_windows_cuda11.3_build
-          test_name: pytorch-windows-test2
-          use_cuda: "1"
-          vc_product: BuildTools
-          vc_version: "14.28.29333"
-          vc_year: "2019"
 
   # The following allows these jobs to run on ci-all and release branches
   debuggable-scheduled-ci:

diff --git a/.github/scripts/generate_ci_workflows.py b/.github/scripts/generate_ci_workflows.py
@@ -1,20 +1,23 @@
 #!/usr/bin/env python3
 
 from pathlib import Path
-from typing import Any, Dict
+from typing import Any, Dict, Optional
 
 import jinja2
+from typing_extensions import Literal
 
 DOCKER_REGISTRY = "308535385114.dkr.ecr.us-east-1.amazonaws.com"
 
-GITHUB_DIR = Path(__file__).parent.parent
+GITHUB_DIR = Path(__file__).resolve().parent.parent
 
 
 # it would be nice to statically specify that build_environment must be
 # present, but currently Python has no easy way to do that
 # https://github.com/python/mypy/issues/4617
 PyTorchWorkflow = Dict[str, Any]
 
+YamlShellBool = Literal["''", 1]
+
 WINDOWS_CPU_TEST_RUNNER = "windows.4xlarge"
 WINDOWS_CUDA_TEST_RUNNER = "windows.8xlarge.nvidia.gpu"
 
@@ -27,13 +30,15 @@ def PyTorchWindowsWorkflow(
     on_pull_request: bool = False,
     only_build_on_pull_request: bool = False,
     num_test_shards: int = 1,
+    is_scheduled: Optional[str] = None,
 ) -> PyTorchWorkflow:
     return {
         "build_environment": build_environment,
         "test_runner_type": test_runner_type,
         "cuda_version": cuda_version,
         "on_pull_request": on_pull_request,
         "only_build_on_pull_request": only_build_on_pull_request and on_pull_request,
+        "is_scheduled": is_scheduled,
         "num_test_shards": num_test_shards,
     }
 
@@ -49,14 +54,18 @@ def PyTorchLinuxWorkflow(
     test_runner_type: str,
     on_pull_request: bool = False,
     enable_doc_jobs: bool = False,
+    enable_multigpu_test: YamlShellBool = "''",
     num_test_shards: int = 1,
+    is_scheduled: Optional[str] = None,
 ) -> PyTorchWorkflow:
     return {
         "build_environment": build_environment,
         "docker_image_base": docker_image_base,
         "test_runner_type": test_runner_type,
         "on_pull_request": on_pull_request,
+        "is_scheduled": is_scheduled,
         "enable_doc_jobs": enable_doc_jobs,
+        "enable_multigpu_test": enable_multigpu_test,
         "num_test_shards": num_test_shards,
     }
 
@@ -95,7 +104,14 @@ def generate_workflow_file(
         cuda_version="11.1",
         test_runner_type=WINDOWS_CUDA_TEST_RUNNER,
         num_test_shards=2,
-    )
+    ),
+    PyTorchWindowsWorkflow(
+        build_environment="periodic-pytorch-win-vs2019-cuda11-cudnn8-py3",
+        cuda_version="11.3",
+        test_runner_type=WINDOWS_CUDA_TEST_RUNNER,
+        num_test_shards=2,
+        is_scheduled="45 0,4,8,12,16,20 * * *",
+    ),
 ]
 
 LINUX_WORKFLOWS = [
@@ -147,6 +163,7 @@ def generate_workflow_file(
         build_environment="pytorch-linux-xenial-cuda10.2-cudnn7-py3.6-gcc7",
         docker_image_base=f"{DOCKER_REGISTRY}/pytorch/pytorch-linux-xenial-cuda10.2-cudnn7-py3-gcc7",
         test_runner_type=LINUX_CUDA_TEST_RUNNER,
+        enable_multigpu_test=1,
         num_test_shards=2,
     ),
     PyTorchLinuxWorkflow(
@@ -175,11 +192,13 @@ def generate_workflow_file(
     #     docker_image_base=f"{DOCKER_REGISTRY}/pytorch/pytorch-linux-bionic-py3.6-clang9",
     #     test_runner_type=LINUX_CPU_TEST_RUNNER,
     # ),
-    # PyTorchLinuxWorkflow(
-    #     build_environment="pytorch-linux-bionic-py3.8-gcc9-coverage",
-    #     docker_image_base=f"{DOCKER_REGISTRY}/pytorch/pytorch-linux-bionic-py3.8-gcc9",
-    #     test_runner_type=LINUX_CPU_TEST_RUNNER,
-    # ),
+    PyTorchLinuxWorkflow(
+        build_environment="pytorch-linux-bionic-py3.8-gcc9-coverage",
+        docker_image_base=f"{DOCKER_REGISTRY}/pytorch/pytorch-linux-bionic-py3.8-gcc9",
+        test_runner_type=LINUX_CPU_TEST_RUNNER,
+        on_pull_request=True,
+        num_test_shards=2,
+    ),
     # PyTorchLinuxWorkflow(
     #     build_environment="pytorch-linux-bionic-rocm3.9-py3.6",
     #     docker_image_base=f"{DOCKER_REGISTRY}/pytorch/pytorch-linux-bionic-rocm3.9-py3.6",

diff --git a/.github/scripts/generate_pytorch_test_matrix.py b/.github/scripts/generate_pytorch_test_matrix.py
@@ -9,22 +9,47 @@
 
 import json
 import os
-from typing import List
+from typing import Dict
 
+from typing_extensions import TypedDict
 
-NUM_TEST_SHARDS = int(os.getenv('NUM_TEST_SHARDS', '1'))
 
-def generate_sharding_list() -> List[int]:
-    return list(range(1, NUM_TEST_SHARDS + 1))
+class Config(TypedDict):
+    num_shards: int
+    runner: str
 
 
 def main() -> None:
-    print(json.dumps(
-        {
-            'test_config': generate_sharding_list()
-        },
-        sort_keys=True,
-    ))
+    TEST_RUNNER_TYPE = os.getenv('TEST_RUNNER_TYPE')
+    NUM_TEST_SHARDS = int(os.getenv('NUM_TEST_SHARDS', '1'))
+    MULTIGPU_RUNNER_TYPE = os.getenv('MULTIGPU_RUNNER_TYPE')
+    configs: Dict[str, Config] = {}
+    if MULTIGPU_RUNNER_TYPE is not None and os.getenv('ENABLE_MULTIGPU_TEST'):
+        configs['multigpu'] = {'num_shards': 1, 'runner': MULTIGPU_RUNNER_TYPE}
+    matrix = {
+        'include': [
+            {
+                'config': 'default',
+                'shard': shard,
+                'num_shards': NUM_TEST_SHARDS,
+                'runner': TEST_RUNNER_TYPE,
+            }
+            for shard in range(1, NUM_TEST_SHARDS + 1)
+        ] + [
+            {
+                'config': name,
+                'shard': shard,
+                'num_shards': config['num_shards'],
+                'runner': config['runner'],
+            }
+            for name, config in configs.items()
+            for shard in range(1, config['num_shards'] + 1)
+        ]
+    }
+    render_matrix = {'config': list(dict.fromkeys(x['config'] for x in matrix['include']))}
+    print(json.dumps({'matrix': matrix, 'render-matrix': render_matrix}, indent=2))
+    print(f'::set-output name=matrix::{json.dumps(matrix)}')
+    print(f'::set-output name=render-matrix::{json.dumps(render_matrix)}')
 
 
 if __name__ == "__main__":