Skip to content

Commit

Permalink
Update on "[quant][graphmode][fx] Support quantization for standalone…
Browse files Browse the repository at this point in the history
… module"

Summary:
Sometimes user need to quantize a submodule as one unit, and this submodule
will be lowered to a different backend like accelerator.

The submodule will be quantized with the same fx based graph mode quantization functions
and will be connected with the rest of the model automatically.

APIs:
```python
class StandaloneModule(torch.nn.Module):
            def __init__(self):
                super().__init__()
                self.conv = torch.nn.Conv2d(1, 1, 1)

            def forward(self, x):
                return self.conv(x)

class CustomTracer(Tracer):
      def is_leaf_module(self, m, module_qualified_name):
          return (m.__module__.startswith('torch.nn') and
                     not isinstance(m, torch.nn.Sequential)) or \
                    isinstance(m, StandaloneModule)

class ModelThatUsesStandaloneModule(...):
      def __init__(self):
          super().__init__()
          self.standalone = StandaloneModule()

      def forward(self, x):
          return self.standalone(x)

m = ModelThatUsesStandaloneModule()
qconfig_dict = {"": qconfig, "standalone_module_name": ["standalone"]}
m = prepare_fx(m, qconfig_dict)
calibrate(m, data)
m = convert_fx(m)

m.standalone = lower_to_acclerator(m.standalone)
```

Test Plan:

Reviewers:

Subscribers:

Tasks:

Tags:

Differential Revision: [D23580642](https://our.internmc.facebook.com/intern/diff/D23580642)

[ghstack-poisoned]
  • Loading branch information
jerryzh168 committed Sep 29, 2020
2 parents 6587a4b + 29670ca commit 7dd90f4
Show file tree
Hide file tree
Showing 51 changed files with 713 additions and 505 deletions.
25 changes: 19 additions & 6 deletions .circleci/cimodel/data/pytorch_build_definitions.py
Expand Up @@ -6,7 +6,7 @@
import cimodel.lib.conf_tree as conf_tree
import cimodel.lib.miniutils as miniutils
from cimodel.data.pytorch_build_data import CONFIG_TREE_DATA, TopLevelNode
from cimodel.data.simple.util.branch_filters import gen_filter_dict
from cimodel.data.simple.util.branch_filters import gen_filter_dict, RC_PATTERN
from cimodel.data.simple.util.docker_constants import gen_docker_image


Expand Down Expand Up @@ -110,6 +110,8 @@ def gen_workflow_params(self, phase):
parameters["resource_class"] = resource_class
if phase == "build" and self.rocm_version is not None:
parameters["resource_class"] = "xlarge"
if hasattr(self, 'filters'):
parameters['filters'] = self.filters
return parameters

def gen_workflow_job(self, phase):
Expand Down Expand Up @@ -139,14 +141,16 @@ def gen_workflow_job(self, phase):

# TODO This is a hack to special case some configs just for the workflow list
class HiddenConf(object):
def __init__(self, name, parent_build=None):
def __init__(self, name, parent_build=None, filters=None):
self.name = name
self.parent_build = parent_build
self.filters = filters

def gen_workflow_job(self, phase):
return {
self.gen_build_name(phase): {
"requires": [self.parent_build.gen_build_name("build")]
"requires": [self.parent_build.gen_build_name("build")],
"filters": self.filters,
}
}

Expand All @@ -166,7 +170,8 @@ def gen_workflow_job(self, phase):
"branch": self.branch,
"requires": [self.parent_build],
"context": "org-member",
"filters": gen_filter_dict(branches_list=["nightly"])
"filters": gen_filter_dict(branches_list=["nightly"],
tags_list=RC_PATTERN)
}
}

Expand Down Expand Up @@ -205,7 +210,9 @@ def gen_docs_configs(xenial_parent_config):
configs.append(
HiddenConf(
"pytorch_python_doc_build",
parent_build=xenial_parent_config
parent_build=xenial_parent_config,
filters=gen_filter_dict(branches_list=r"/.*/",
tags_list=RC_PATTERN),
)
)
configs.append(
Expand All @@ -219,7 +226,9 @@ def gen_docs_configs(xenial_parent_config):
configs.append(
HiddenConf(
"pytorch_cpp_doc_build",
parent_build=xenial_parent_config
parent_build=xenial_parent_config,
filters=gen_filter_dict(branches_list=r"/.*/",
tags_list=RC_PATTERN),
)
)
configs.append(
Expand Down Expand Up @@ -348,6 +357,8 @@ def instantiate_configs():

# run docs builds on "pytorch-linux-xenial-py3.6-gcc5.4". Docs builds
# should run on a CPU-only build that runs on all PRs.
# XXX should this be updated to a more modern build? Projects are
# beginning to drop python3.6
if (
distro_name == "xenial"
and fc.find_prop("pyver") == "3.6"
Expand All @@ -358,6 +369,8 @@ def instantiate_configs():
and compiler_name == "gcc"
and fc.find_prop("compiler_version") == "5.4"
):
c.filters = gen_filter_dict(branches_list=r"/.*/",
tags_list=RC_PATTERN)
c.dependent_tests = gen_docs_configs(c)

if cuda_version == "10.2" and python_version == "3.6" and not is_libtorch:
Expand Down
29 changes: 17 additions & 12 deletions .circleci/cimodel/data/simple/docker_definitions.py
@@ -1,6 +1,7 @@
from collections import OrderedDict

from cimodel.lib.miniutils import quote
from cimodel.data.simple.util.branch_filters import gen_filter_dict, RC_PATTERN


# TODO: make this generated from a matrix rather than just a static list
Expand All @@ -24,7 +25,7 @@
"pytorch-linux-xenial-py3.8",
"pytorch-linux-xenial-py3.6-clang7",
"pytorch-linux-xenial-py3.6-gcc4.8",
"pytorch-linux-xenial-py3.6-gcc5.4",
"pytorch-linux-xenial-py3.6-gcc5.4", # this one is used in doc builds
"pytorch-linux-xenial-py3.6-gcc7.2",
"pytorch-linux-xenial-py3.6-gcc7",
"pytorch-linux-bionic-rocm3.7-py3.6",
Expand All @@ -34,16 +35,20 @@

def get_workflow_jobs():
"""Generates a list of docker image build definitions"""
return [
OrderedDict(
ret = []
for image_name in IMAGE_NAMES:
parameters = OrderedDict({
"name": quote(f"docker-{image_name}"),
"image_name": quote(image_name),
})
if image_name == "pytorch-linux-xenial-py3.6-gcc5.4":
# pushing documentation on tags requires CircleCI to also
# build all the dependencies on tags, including this docker image
parameters['filters'] = gen_filter_dict(branches_list=r"/.*/",
tags_list=RC_PATTERN)
ret.append(OrderedDict(
{
"docker_build_job": OrderedDict(
{
"name": quote(f"docker-{image_name}"),
"image_name": quote(image_name),
}
)
"docker_build_job": parameters
}
)
for image_name in IMAGE_NAMES
]
))
return ret
39 changes: 37 additions & 2 deletions .circleci/config.yml
Expand Up @@ -1188,10 +1188,13 @@ jobs:
set -ex
export COMMIT_DOCKER_IMAGE=${DOCKER_IMAGE}:${DOCKER_TAG}-${CIRCLE_SHA1}
echo "DOCKER_IMAGE: "${COMMIT_DOCKER_IMAGE}
tag=${CIRCLE_TAG:1:5}
target=${tag:-master}
echo "building for ${target}"
time docker pull ${COMMIT_DOCKER_IMAGE} >/dev/null
export id=$(docker run --env-file "${BASH_ENV}" --cap-add=SYS_PTRACE --security-opt seccomp=unconfined -t -d -w /var/lib/jenkins ${COMMIT_DOCKER_IMAGE})
export COMMAND='((echo "sudo chown -R jenkins workspace && cd workspace && . ./.circleci/scripts/python_doc_push_script.sh docs/master master site") | docker exec -u jenkins -i "$id" bash) 2>&1'
export COMMAND='((echo "sudo chown -R jenkins workspace && cd workspace && . ./.circleci/scripts/python_doc_push_script.sh docs/'$target' master site") | docker exec -u jenkins -i "$id" bash) 2>&1'
echo ${COMMAND} > ./command.sh && unbuffer bash ./command.sh | ts
Expand Down Expand Up @@ -1230,10 +1233,13 @@ jobs:
set -ex
export COMMIT_DOCKER_IMAGE=${DOCKER_IMAGE}:${DOCKER_TAG}-${CIRCLE_SHA1}
echo "DOCKER_IMAGE: "${COMMIT_DOCKER_IMAGE}
tag=${CIRCLE_TAG:1:5}
target=${tag:-master}
echo "building for ${target}"
time docker pull ${COMMIT_DOCKER_IMAGE} >/dev/null
export id=$(docker run --env-file "${BASH_ENV}" --cap-add=SYS_PTRACE --security-opt seccomp=unconfined -t -d -w /var/lib/jenkins ${COMMIT_DOCKER_IMAGE})
export COMMAND='((echo "sudo chown -R jenkins workspace && cd workspace && . ./.circleci/scripts/cpp_doc_push_script.sh docs/master master") | docker exec -u jenkins -i "$id" bash) 2>&1'
export COMMAND='((echo "sudo chown -R jenkins workspace && cd workspace && . ./.circleci/scripts/cpp_doc_push_script.sh docs/"$target" master") | docker exec -u jenkins -i "$id" bash) 2>&1'
echo ${COMMAND} > ./command.sh && unbuffer bash ./command.sh | ts
Expand Down Expand Up @@ -6432,6 +6438,11 @@ workflows:
- docker_build_job:
name: "docker-pytorch-linux-xenial-py3.6-gcc5.4"
image_name: "pytorch-linux-xenial-py3.6-gcc5.4"
filters:
branches:
only: /.*/
tags:
only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
- docker_build_job:
name: "docker-pytorch-linux-xenial-py3.6-gcc7.2"
image_name: "pytorch-linux-xenial-py3.6-gcc7.2"
Expand All @@ -6450,14 +6461,29 @@ workflows:
- "docker-pytorch-linux-xenial-py3.6-gcc5.4"
build_environment: "pytorch-linux-xenial-py3.6-gcc5.4-build"
docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-py3.6-gcc5.4"
filters:
branches:
only: /.*/
tags:
only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
- pytorch_linux_test:
name: pytorch_linux_xenial_py3_6_gcc5_4_test
requires:
- pytorch_linux_xenial_py3_6_gcc5_4_build
build_environment: "pytorch-linux-xenial-py3.6-gcc5.4-test"
docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-py3.6-gcc5.4"
resource_class: large
filters:
branches:
only: /.*/
tags:
only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
- pytorch_python_doc_build:
filters:
branches:
only: /.*/
tags:
only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
requires:
- pytorch_linux_xenial_py3_6_gcc5_4_build
- pytorch_doc_push:
Expand All @@ -6467,10 +6493,17 @@ workflows:
branches:
only:
- nightly
tags:
only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
name: pytorch_python_doc_push
requires:
- pytorch_python_doc_build
- pytorch_cpp_doc_build:
filters:
branches:
only: /.*/
tags:
only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
requires:
- pytorch_linux_xenial_py3_6_gcc5_4_build
- pytorch_doc_push:
Expand All @@ -6480,6 +6513,8 @@ workflows:
branches:
only:
- nightly
tags:
only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
name: pytorch_cpp_doc_push
requires:
- pytorch_cpp_doc_build
Expand Down
10 changes: 8 additions & 2 deletions .circleci/verbatim-sources/job-specs/job-specs-custom.yml
Expand Up @@ -43,10 +43,13 @@
set -ex
export COMMIT_DOCKER_IMAGE=${DOCKER_IMAGE}:${DOCKER_TAG}-${CIRCLE_SHA1}
echo "DOCKER_IMAGE: "${COMMIT_DOCKER_IMAGE}
tag=${CIRCLE_TAG:1:5}
target=${tag:-master}
echo "building for ${target}"
time docker pull ${COMMIT_DOCKER_IMAGE} >/dev/null
export id=$(docker run --env-file "${BASH_ENV}" --cap-add=SYS_PTRACE --security-opt seccomp=unconfined -t -d -w /var/lib/jenkins ${COMMIT_DOCKER_IMAGE})
export COMMAND='((echo "sudo chown -R jenkins workspace && cd workspace && . ./.circleci/scripts/python_doc_push_script.sh docs/master master site") | docker exec -u jenkins -i "$id" bash) 2>&1'
export COMMAND='((echo "sudo chown -R jenkins workspace && cd workspace && . ./.circleci/scripts/python_doc_push_script.sh docs/'$target' master site") | docker exec -u jenkins -i "$id" bash) 2>&1'
echo ${COMMAND} > ./command.sh && unbuffer bash ./command.sh | ts
Expand Down Expand Up @@ -85,10 +88,13 @@
set -ex
export COMMIT_DOCKER_IMAGE=${DOCKER_IMAGE}:${DOCKER_TAG}-${CIRCLE_SHA1}
echo "DOCKER_IMAGE: "${COMMIT_DOCKER_IMAGE}
tag=${CIRCLE_TAG:1:5}
target=${tag:-master}
echo "building for ${target}"
time docker pull ${COMMIT_DOCKER_IMAGE} >/dev/null
export id=$(docker run --env-file "${BASH_ENV}" --cap-add=SYS_PTRACE --security-opt seccomp=unconfined -t -d -w /var/lib/jenkins ${COMMIT_DOCKER_IMAGE})
export COMMAND='((echo "sudo chown -R jenkins workspace && cd workspace && . ./.circleci/scripts/cpp_doc_push_script.sh docs/master master") | docker exec -u jenkins -i "$id" bash) 2>&1'
export COMMAND='((echo "sudo chown -R jenkins workspace && cd workspace && . ./.circleci/scripts/cpp_doc_push_script.sh docs/"$target" master") | docker exec -u jenkins -i "$id" bash) 2>&1'
echo ${COMMAND} > ./command.sh && unbuffer bash ./command.sh | ts
Expand Down
2 changes: 2 additions & 0 deletions BUILD.bazel
Expand Up @@ -55,6 +55,7 @@ cc_library(
"c10/cuda/*.h",
"c10/cuda/impl/*.h",
"c10/macros/*.h",
"c10/mobile/*.h",
"c10/util/*.h",
"c10/util/*.hpp",
]),
Expand All @@ -71,6 +72,7 @@ cc_library(
srcs = glob([
"c10/core/*.cpp",
"c10/core/impl/*.cpp",
"c10/mobile/*.cpp",
"c10/util/*.cpp",
]) + if_cuda(
glob([
Expand Down
5 changes: 5 additions & 0 deletions aten/src/ATen/Context.cpp
Expand Up @@ -64,6 +64,11 @@ bool Context::deterministic() const {
}

void Context::setDeterministic(bool b) {
if (b) {
TORCH_WARN_ONCE("torch.set_deterministic is in beta, and its design and "
" functionality may change in the future.");
}

_deterministic = b;
}

Expand Down
12 changes: 10 additions & 2 deletions aten/src/ATen/cpu/vec256/vec256_int.h
Expand Up @@ -104,6 +104,8 @@ class Vec256<int64_t> : public Vec256i {
}
void store(void* ptr, int count = size()) const {
if (count == size()) {
// ptr need not to be aligned here. See
// https://software.intel.com/content/www/us/en/develop/documentation/cpp-compiler-developer-guide-and-reference/top/compiler-reference/intrinsics/intrinsics-for-intel-advanced-vector-extensions/intrinsics-for-load-and-store-operations-1/mm256-storeu-si256.html
_mm256_storeu_si256(reinterpret_cast<__m256i*>(ptr), values);
} else if (count > 0) {
__at_align32__ int64_t tmp_values[size()];
Expand Down Expand Up @@ -228,6 +230,8 @@ class Vec256<int32_t> : public Vec256i {
}
void store(void* ptr, int count = size()) const {
if (count == size()) {
// ptr need not to be aligned here. See
// https://software.intel.com/content/www/us/en/develop/documentation/cpp-compiler-developer-guide-and-reference/top/compiler-reference/intrinsics/intrinsics-for-intel-advanced-vector-extensions/intrinsics-for-load-and-store-operations-1/mm256-storeu-si256.html
_mm256_storeu_si256(reinterpret_cast<__m256i*>(ptr), values);
} else if (count > 0) {
__at_align32__ int32_t tmp_values[size()];
Expand Down Expand Up @@ -449,6 +453,8 @@ class Vec256<int16_t> : public Vec256i {
}
void store(void* ptr, int count = size()) const {
if (count == size()) {
// ptr need not to be aligned here. See
// https://software.intel.com/content/www/us/en/develop/documentation/cpp-compiler-developer-guide-and-reference/top/compiler-reference/intrinsics/intrinsics-for-intel-advanced-vector-extensions/intrinsics-for-load-and-store-operations-1/mm256-storeu-si256.html
_mm256_storeu_si256(reinterpret_cast<__m256i*>(ptr), values);
} else if (count > 0) {
__at_align32__ int16_t tmp_values[size()];
Expand Down Expand Up @@ -699,6 +705,8 @@ class Vec256<int8_t> : public Vec256i {
}
void store(void* ptr, int count = size()) const {
if (count == size()) {
// ptr need not to be aligned here. See
// https://software.intel.com/content/www/us/en/develop/documentation/cpp-compiler-developer-guide-and-reference/top/compiler-reference/intrinsics/intrinsics-for-intel-advanced-vector-extensions/intrinsics-for-load-and-store-operations-1/mm256-storeu-si256.html
_mm256_storeu_si256(reinterpret_cast<__m256i*>(ptr), values);
} else if (count > 0) {
__at_align32__ int8_t tmp_values[size()];
Expand Down Expand Up @@ -879,8 +887,8 @@ Vec256<int16_t> inline operator*(const Vec256<int16_t>& a, const Vec256<int16_t>

template <typename T, typename Op>
Vec256<T> inline int_elementwise_binary_256(const Vec256<T>& a, const Vec256<T>& b, Op op) {
__at_align32__ T values_a[Vec256<T>::size()];
__at_align32__ T values_b[Vec256<T>::size()];
T values_a[Vec256<T>::size()];
T values_b[Vec256<T>::size()];
a.store(values_a);
b.store(values_b);
for (int i = 0; i != Vec256<T>::size(); i++) {
Expand Down
4 changes: 2 additions & 2 deletions aten/src/ATen/native/cpu/UnaryOpsKernel.cpp
Expand Up @@ -238,9 +238,9 @@ static void logical_not_kernel(TensorIterator& iter) {
// NOTE: this implementation differs from the CUDA implementation which only does single dispatch
// (to avoid expensive compilation) because CPU kernels don't handle dynamic_casting
// (see needs_dynamic_casting).
AT_DISPATCH_ALL_TYPES_AND2(kBool, kHalf, iter.dtype(1), "logical_not_cpu", [&]() {
AT_DISPATCH_ALL_TYPES_AND_COMPLEX_AND3(kBool, kHalf, kBFloat16, iter.dtype(1), "logical_not_cpu", [&]() {
using self_t = scalar_t;
AT_DISPATCH_ALL_TYPES_AND2(kBool, kHalf, iter.dtype(0), "logical_not_cpu", [&]() {
AT_DISPATCH_ALL_TYPES_AND_COMPLEX_AND3(kBool, kHalf, kBFloat16, iter.dtype(0), "logical_not_cpu", [&]() {
cpu_kernel(iter, [](self_t a) -> scalar_t { return static_cast<scalar_t>(!a); });
});
});
Expand Down
6 changes: 3 additions & 3 deletions aten/src/ATen/native/cuda/UnarySignKernels.cu
Expand Up @@ -11,11 +11,11 @@
namespace at { namespace native {

void logical_not_kernel_cuda(TensorIterator& iter) {
// error check -- this is just ensuring we don't dispatch on types that aren't in ALL_TYPES_AND2(...)
// error check -- this is just ensuring we don't dispatch on types that aren't in ALL_TYPES_AND_COMPLEX_AND3(...)
// so we don't have to maintain a separate list or to do double dispatch.
AT_DISPATCH_ALL_TYPES_AND2(kBool, kHalf, iter.dtype(0), "logical_not_cuda", [&]() {});
AT_DISPATCH_ALL_TYPES_AND_COMPLEX_AND3(kBool, kHalf, kBFloat16, iter.dtype(0), "logical_not_cuda", [&]() {});

AT_DISPATCH_ALL_TYPES_AND2(kBool, kHalf, iter.dtype(1), "logical_not_cuda", [&]() {
AT_DISPATCH_ALL_TYPES_AND_COMPLEX_AND3(kBool, kHalf, kBFloat16, iter.dtype(1), "logical_not_cuda", [&]() {
gpu_kernel(iter, []GPU_LAMBDA(scalar_t a) -> bool { return !a; });
});
}
Expand Down

0 comments on commit 7dd90f4

Please sign in to comment.