Update on "memory efficient per-channel fq: use it everywhere, delete…

… old version" Summary: This PR is the cleanup after #51159. High level, we make the new definition of fake_quant per channel be the definition used by autograd, but keep the old function around as a thin wrapper to keep the user facing API the same. In detail: 1. point fake_quantize_per_channel_affine's implementation to be fake_quantize_per_channel_affine_cachemask 2. delete the fake_quantize_per_channel_affine backward, autograd will automatically use the cachemask backward 3. delete all the fake_quantize_per_channel_affine kernels, since they are no longer used by anything Test Plan: ``` python test/test_quantization.py TestFakeQuantize ``` Reviewers: Subscribers: Tasks: Tags: [ghstack-poisoned]
pytorch · Jan 28, 2021 · 2830016 · 2830016
2 parents 3b53e9b + e42c44a
commit 2830016
Show file tree

Hide file tree

Showing 101 changed files with 2,279 additions and 1,525 deletions.
diff --git a/.circleci/config.yml b/.circleci/config.yml
@@ -574,7 +574,7 @@ jobs:
             hostname
             export id=$(docker run --env-file "${BASH_ENV}" --cap-add=SYS_PTRACE --security-opt seccomp=unconfined --shm-size=8g --ipc=host --device /dev/kfd --device /dev/dri --group-add video -t -d -w /var/lib/jenkins ${COMMIT_DOCKER_IMAGE})
           else
-            export id=$(docker run --env-file "${BASH_ENV}" --cap-add=SYS_PTRACE --security-opt seccomp=unconfined -t -d -w /var/lib/jenkins ${COMMIT_DOCKER_IMAGE})
+            export id=$(docker run --env-file "${BASH_ENV}" --cap-add=SYS_PTRACE --security-opt seccomp=unconfined --shm-size=1g --ipc=host -t -d -w /var/lib/jenkins ${COMMIT_DOCKER_IMAGE})
           fi
           echo "id=${id}" >> "${BASH_ENV}"
 

diff --git a/.circleci/verbatim-sources/job-specs/pytorch-job-specs.yml b/.circleci/verbatim-sources/job-specs/pytorch-job-specs.yml
@@ -133,7 +133,7 @@ jobs:
             hostname
             export id=$(docker run --env-file "${BASH_ENV}" --cap-add=SYS_PTRACE --security-opt seccomp=unconfined --shm-size=8g --ipc=host --device /dev/kfd --device /dev/dri --group-add video -t -d -w /var/lib/jenkins ${COMMIT_DOCKER_IMAGE})
           else
-            export id=$(docker run --env-file "${BASH_ENV}" --cap-add=SYS_PTRACE --security-opt seccomp=unconfined -t -d -w /var/lib/jenkins ${COMMIT_DOCKER_IMAGE})
+            export id=$(docker run --env-file "${BASH_ENV}" --cap-add=SYS_PTRACE --security-opt seccomp=unconfined --shm-size=1g --ipc=host -t -d -w /var/lib/jenkins ${COMMIT_DOCKER_IMAGE})
           fi
           echo "id=${id}" >> "${BASH_ENV}"
 

diff --git a/.github/scripts/generate_binary_build_matrix.py b/.github/scripts/generate_binary_build_matrix.py
@@ -0,0 +1,86 @@
+#!/usr/bin/env python3
+
+"""Generates a matrix to be utilized through github actions
+
+Will output a condensed version of the matrix if on a pull request that only
+includes the latest version of python we support built on three different
+architectures:
+    * CPU
+    * Latest CUDA
+    * Latest ROCM
+"""
+
+import json
+import os
+import itertools
+
+CUDA_ARCHES = [
+    "10.1",
+    "10.2",
+    "11.0"
+]
+
+ROCM_ARCHES = [
+    "3.10",
+    "4.0"
+]
+
+FULL_ARCHES = [
+    "cpu",
+    *CUDA_ARCHES,
+    *ROCM_ARCHES
+]
+
+CONTAINER_IMAGES = {
+    **{
+        # TODO: Re-do manylinux CUDA image tagging scheme to be similar to
+        #       ROCM so we don't have to do this replacement
+        gpu_arch: f"pytorch/manylinux-cuda{gpu_arch.replace('.', '')}"
+        for gpu_arch in CUDA_ARCHES
+    },
+    **{
+        gpu_arch: f"pytorch/manylinux-rocm:{gpu_arch}"
+        for gpu_arch in ROCM_ARCHES
+    },
+    "cpu": "pytorch/manylinux-cpu"
+}
+
+FULL_PYTHON_VERSIONS = [
+    "3.6",
+    "3.7",
+    "3.8",
+    "3.9",
+]
+
+
+def is_pull_request():
+    return os.environ.get("GITHUB_HEAD_REF")
+
+def generate_matrix():
+    python_versions = FULL_PYTHON_VERSIONS
+    arches = FULL_ARCHES
+    if is_pull_request():
+        python_versions = [python_versions[-1]]
+        arches = ["cpu", CUDA_ARCHES[-1], ROCM_ARCHES[-1]]
+    matrix = []
+    for item in itertools.product(python_versions, arches):
+        python_version, arch_version = item
+        # Not my favorite code here
+        gpu_arch_type = "cuda"
+        if "rocm" in CONTAINER_IMAGES[arch_version]:
+            gpu_arch_type = "rocm"
+        elif "cpu" in CONTAINER_IMAGES[arch_version]:
+            gpu_arch_type = "cpu"
+        matrix.append({
+            "python_version": python_version,
+            "gpu_arch_type": gpu_arch_type,
+            "gpu_arch_version": arch_version,
+            "container_image": CONTAINER_IMAGES[arch_version]
+        })
+    return json.dumps({"include": matrix})
+
+def main():
+    print(generate_matrix())
+
+if __name__ == "__main__":
+    main()
diff --git a/.github/scripts/generate_pytorch_version.py b/.github/scripts/generate_pytorch_version.py
@@ -0,0 +1,118 @@
+#!/usr/bin/env python3
+
+import argparse
+import os
+import subprocess
+import re
+
+from datetime import datetime
+from distutils.util import strtobool
+from pathlib import Path
+
+LEADING_V_PATTERN = re.compile("^v")
+TRAILING_RC_PATTERN = re.compile("-rc[0-9]*$")
+LEGACY_BASE_VERSION_SUFFIX_PATTERN = re.compile("a0$")
+
+class NoGitTagException(Exception):
+    pass
+
+def get_pytorch_root():
+    return Path(subprocess.check_output(
+        ['git', 'rev-parse', '--show-toplevel']
+    ).decode('ascii').strip())
+
+def get_tag():
+    root = get_pytorch_root()
+    # We're on a tag
+    am_on_tag = (
+        subprocess.run(
+            ['git', 'describe', '--tags', '--exact'],
+            cwd=root,
+            stdout=subprocess.DEVNULL,
+            stderr=subprocess.DEVNULL
+        ).returncode == 0
+    )
+    tag = ""
+    if am_on_tag:
+        dirty_tag = subprocess.check_output(
+            ['git', 'describe'],
+            cwd=root
+        ).decode('ascii').strip()
+        # Strip leading v that we typically do when we tag branches
+        # ie: v1.7.1 -> 1.7.1
+        tag = re.sub(LEADING_V_PATTERN, "", dirty_tag)
+        # Strip trailing rc pattern
+        # ie: 1.7.1-rc1 -> 1.7.1
+        tag = re.sub(TRAILING_RC_PATTERN, "", tag)
+    return tag
+
+def get_base_version():
+    root = get_pytorch_root()
+    dirty_version = open(root / 'version.txt', 'r').read().strip()
+    # Strips trailing a0 from version.txt, not too sure why it's there in the
+    # first place
+    return re.sub(LEGACY_BASE_VERSION_SUFFIX_PATTERN, "", dirty_version)
+
+class PytorchVersion:
+    def __init__(self, gpu_arch_type, gpu_arch_version, no_build_suffix):
+        self.gpu_arch_type = gpu_arch_type
+        self.gpu_arch_version = gpu_arch_version
+        self.no_build_suffix = no_build_suffix
+
+    def get_post_build_suffix(self):
+        # CUDA 10.2 is the version to be uploaded to PyPI so it doesn't have a
+        # version suffix
+        if ((self.gpu_arch_type == "cuda" and self.gpu_arch_version == "10.2")
+                or self.no_build_suffix):
+            return ""
+        if self.gpu_arch_type == "cuda":
+            return f"+cu{self.gpu_arch_version.replace('.', '')}"
+        return f"+{self.gpu_arch_type}{self.gpu_arch_version}"
+
+    def get_release_version(self):
+        if not get_tag():
+            raise NoGitTagException(
+                "Not on a git tag, are you sure you want a release version?"
+            )
+        return f"{get_tag()}{self.get_post_build_suffix()}"
+
+    def get_nightly_version(self):
+        date_str = datetime.today().strftime('%Y%m%d')
+        build_suffix = self.get_post_build_suffix()
+        return f"{get_base_version()}.dev{date_str}{build_suffix}"
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Generate pytorch version for binary builds"
+    )
+    parser.add_argument(
+        "--no-build-suffix",
+        type=strtobool,
+        help="Whether or not to add a build suffix typically (+cpu)",
+        default=os.environ.get("NO_BUILD_SUFFIX", False)
+    )
+    parser.add_argument(
+        "--gpu-arch-type",
+        type=str,
+        help="GPU arch you are building for, typically (cpu, cuda, rocm)",
+        default=os.environ.get("GPU_ARCH_TYPE", "cpu")
+    )
+    parser.add_argument(
+        "--gpu-arch-version",
+        type=str,
+        help="GPU arch version, typically (10.2, 4.0), leave blank for CPU",
+        default=os.environ.get("GPU_ARCH_VERSION", "")
+    )
+    args = parser.parse_args()
+    version_obj = PytorchVersion(
+        args.gpu_arch_type,
+        args.gpu_arch_version,
+        args.no_build_suffix
+    )
+    try:
+        print(version_obj.get_release_version())
+    except NoGitTagException:
+        print(version_obj.get_nightly_version())
+
+if __name__ == "__main__":
+    main()
diff --git a/.github/workflows/build_linux_binaries.yml b/.github/workflows/build_linux_binaries.yml
@@ -0,0 +1,86 @@
+name: Build Linux Wheels
+
+on:
+  # TODO: These are only runnable from workflow_dispatch, we need to eventually add
+  #       a cron
+  # TODO: Add an on_release trigger to build on tags
+  workflow_dispatch:
+
+jobs:
+  generate-build-matrix:
+    if: ${{ github.repository_owner == 'pytorch' }}
+    runs-on: ubuntu-latest
+    outputs:
+      matrix: ${{ steps.set-matrix.outputs.matrix }}
+    container:
+      image: python:3.9
+    steps:
+      - name: Clone pytorch/pytorch
+        uses: actions/checkout@v2
+      - name: Generating build matrix
+        id: set-matrix
+        run: |
+          # outputting for debugging purposes
+          python .github/scripts/generate_binary_build_matrix.py
+          MATRIX=$(python .github/scripts/generate_binary_build_matrix.py)
+          echo "::set-output name=matrix::${MATRIX}"
+  build-wheel:
+    if: ${{ github.repository_owner == 'pytorch' }}
+    needs: generate-build-matrix
+    runs-on: linux.2xlarge
+    strategy:
+      matrix:
+        ${{ fromJson(needs.generate-build-matrix.outputs.matrix) }}
+    container:
+      image: ${{ matrix.container_image }}
+    env:
+      DESIRED_PYTHON: ${{ matrix.python_version }}
+      # TODO: This is a legacy variable that we eventually want to get rid of in
+      #       favor of GPU_ARCH_VERSION
+      DESIRED_CUDA: ${{ matrix.gpu_arch_version }}
+      GPU_ARCH_VERSION: ${{ matrix.GPU_ARCH_VERSION }}
+      GPU_ARCH_TYPE: ${{ matrix.gpu_arch_type }}
+      PYTORCH_BUILD_NUMBER: 1
+      SKIP_ALL_TESTS: 1
+    steps:
+      - name: Clone pytorch/pytorch
+        uses: actions/checkout@v2
+        with:
+          path: pytorch
+          submodules: recursive
+      - name: Clone pytorch/builder
+        uses: actions/checkout@v2
+        with:
+          repository: pytorch/builder
+          path: builder
+      - name: Generate version string
+        working-directory: pytorch/
+        run: |
+          version=$(.github/scripts/generate_pytorch_version.py)
+          echo "Generated version: ${version}"
+          echo "PYTORCH_BUILD_VERSION=${version}" >> $GITHUB_ENV
+      # TODO: Remove this once we remove the need for the directories to be
+      #       in specific locations
+      - name: Symlink repositories to root directory (for legacy scripts purposes)
+        run: |
+          ln -s $(pwd)/pytorch /pytorch
+          ln -s $(pwd)/builder /builder
+      # TODO: Bundle the correct build script in the base container image so
+      #       that we don't have to do this type of specification
+      - name: Build PyTorch binary (CUDA specific)
+        if: ${{ matrix.gpu_arch_type == 'cuda' }}
+        run: |
+          /builder/manywheel/build.sh
+      - name: Build PyTorch binary (ROCM specific)
+        if: ${{ matrix.gpu_arch_type == 'rocm' }}
+        run: |
+          /builder/manywheel/build_rocm.sh
+      - name: Build PyTorch binary (CPU specific)
+        if: ${{ matrix.gpu_arch_type == 'cpu' }}
+        run: |
+          /builder/manywheel/build_cpu.sh
+      - uses: actions/upload-artifact@v2
+        with:
+          name: pytorch-wheel-py${{ matrix.python_version }}-${{matrix.gpu_arch_type}}-${{ matrix.gpu_arch_version }}
+          path: /remote/**/*.whl
+      # TODO: Add a step here for uploading binaries
diff --git a/.jenkins/caffe2/test.sh b/.jenkins/caffe2/test.sh
@@ -160,7 +160,7 @@ pip install --user pytest-sugar
 if [[ "$BUILD_ENVIRONMENT" == *onnx* ]]; then
   # Check out torch/vision at Jun 11 2020 commit
   # This hash must match one in .jenkins/pytorch/test.sh
-  pip install -q --user git+https://github.com/pytorch/vision.git@e70c91a9ff9b8a20e05c133aec6ec3ed538c32fb
+  pip install -q --user git+https://github.com/pytorch/vision.git@ae0d80b3c52dc98b3a9763bdb974c3ef7b6eb83d
   pip install -q --user ninja
   # JIT C++ extensions require ninja, so put it into PATH.
   export PATH="/var/lib/jenkins/.local/bin:$PATH"

diff --git a/.jenkins/pytorch/common_utils.sh b/.jenkins/pytorch/common_utils.sh
@@ -66,7 +66,7 @@ function get_bazel() {
   chmod +x tools/bazel
 }
 
-TORCHVISION_COMMIT=e70c91a9ff9b8a20e05c133aec6ec3ed538c32fb
+TORCHVISION_COMMIT=ae0d80b3c52dc98b3a9763bdb974c3ef7b6eb83d
 
 function install_torchvision() {
   # Check out torch/vision at Jun 11 2020 commit

diff --git a/aten/src/ATen/LegacyTHFunctionsCUDA.h b/aten/src/ATen/LegacyTHFunctionsCUDA.h
@@ -20,8 +20,6 @@ namespace cuda {
 
 Tensor & _th_masked_fill_(Tensor & self, const Tensor & mask, Scalar value);
 Tensor & _th_masked_fill_bool_(Tensor & self, const Tensor & mask, Scalar value);
-Tensor & _th_masked_scatter_(Tensor & self, const Tensor & mask, const Tensor & source);
-Tensor & _th_masked_scatter_bool_(Tensor & self, const Tensor & mask, const Tensor & source);
 Tensor & _th_index_copy_(Tensor & self, int64_t dim, const Tensor & index, const Tensor & source);
 Tensor & _th_take_out(Tensor & result, const Tensor & self, const Tensor & index);
 Tensor _th_take(const Tensor & self, const Tensor & index);