Skip to content

Commit

Permalink
Update on "Remove hacky_wrapper from VariableType and TraceType"
Browse files Browse the repository at this point in the history
Previously, VariableType and TraceType kernels were still written in the legacy way, i.e. they took one TensorOptions argument instead of scattered dtype, layout, device, pin_memory,  and they used hacky_wrapper to be callable.

Now with this PR, variable and tracing kernels are written in the new way and no hacky_wrapper is needed for them.

This only affects ops with `use_c10_dispatcher: full`.

Differential Revision: [D23466042](https://our.internmc.facebook.com/intern/diff/D23466042/)

[ghstack-poisoned]
  • Loading branch information
smessmer committed Sep 24, 2020
2 parents 66186ce + dc67b47 commit b68b323
Show file tree
Hide file tree
Showing 802 changed files with 9,957 additions and 4,743 deletions.
8 changes: 4 additions & 4 deletions .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -924,7 +924,7 @@ jobs:
smoke_mac_test:
<<: *binary_linux_test_upload_params
macos:
xcode: "9.4.1"
xcode: "11.2.1"
steps:
- checkout
- run:
Expand All @@ -949,7 +949,7 @@ jobs:
binary_mac_build:
<<: *binary_mac_params
macos:
xcode: "9.4.1"
xcode: "11.2.1"
steps:
# See Note [Workspace for CircleCI scripts] in job-specs-setup.yml
- checkout
Expand Down Expand Up @@ -1253,7 +1253,7 @@ jobs:
environment:
BUILD_ENVIRONMENT: pytorch-macos-10.13-py3-build
macos:
xcode: "9.4.1"
xcode: "11.2.1"
steps:
- checkout
- run_brew_for_macos_build
Expand Down Expand Up @@ -1287,7 +1287,7 @@ jobs:
environment:
BUILD_ENVIRONMENT: pytorch-macos-10.13-py3-test
macos:
xcode: "9.4.1"
xcode: "11.2.1"
steps:
- checkout
- attach_workspace:
Expand Down
4 changes: 2 additions & 2 deletions .circleci/verbatim-sources/job-specs/binary-job-specs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,7 @@
smoke_mac_test:
<<: *binary_linux_test_upload_params
macos:
xcode: "9.4.1"
xcode: "11.2.1"
steps:
- checkout
- run:
Expand All @@ -160,7 +160,7 @@
binary_mac_build:
<<: *binary_mac_params
macos:
xcode: "9.4.1"
xcode: "11.2.1"
steps:
# See Note [Workspace for CircleCI scripts] in job-specs-setup.yml
- checkout
Expand Down
4 changes: 2 additions & 2 deletions .circleci/verbatim-sources/job-specs/job-specs-custom.yml
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@
environment:
BUILD_ENVIRONMENT: pytorch-macos-10.13-py3-build
macos:
xcode: "9.4.1"
xcode: "11.2.1"
steps:
- checkout
- run_brew_for_macos_build
Expand Down Expand Up @@ -143,7 +143,7 @@
environment:
BUILD_ENVIRONMENT: pytorch-macos-10.13-py3-test
macos:
xcode: "9.4.1"
xcode: "11.2.1"
steps:
- checkout
- attach_workspace:
Expand Down
2 changes: 1 addition & 1 deletion .jenkins/caffe2/test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -171,7 +171,7 @@ if [[ "$BUILD_ENVIRONMENT" == *onnx* ]]; then
# default pip version is too old(9.0.2), unable to support tag `manylinux2010`.
# Fix the pip error: Couldn't find a version that satisfies the requirement
pip install --upgrade pip
pip install -q --user -i https://test.pypi.org/simple/ ort-nightly==1.4.0.dev202008122
pip install -q --user -i https://test.pypi.org/simple/ ort-nightly==1.5.0.dev202009182
fi
"$ROOT_DIR/scripts/onnx/test.sh"
fi
5 changes: 3 additions & 2 deletions .jenkins/pytorch/print_sccache_log.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
lines = f.readlines()

for line in lines:
# Ignore errors from CPU instruction set testing
if 'src.c' not in line:
# Ignore errors from CPU instruction set or symbol existing testing
keywords = ['src.c', 'CheckSymbolExists.c']
if all([keyword not in line for keyword in keywords]):
print(line)
4 changes: 2 additions & 2 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -44,13 +44,13 @@ WORKDIR /opt/pytorch
COPY --from=conda /opt/conda /opt/conda
COPY --from=submodule-update /opt/pytorch /opt/pytorch
RUN --mount=type=cache,target=/opt/ccache \
TORCH_CUDA_ARCH_LIST="3.5 5.2 6.0 6.1 7.0+PTX" TORCH_NVCC_FLAGS="-Xfatbin -compress-all" \
TORCH_CUDA_ARCH_LIST="3.5 5.2 6.0 6.1 7.0+PTX 8.0" TORCH_NVCC_FLAGS="-Xfatbin -compress-all" \
CMAKE_PREFIX_PATH="$(dirname $(which conda))/../" \
python setup.py install

FROM conda as conda-installs
ARG INSTALL_CHANNEL=pytorch-nightly
RUN /opt/conda/bin/conda install -c "${INSTALL_CHANNEL}" -y pytorch torchvision cudatoolkit=10.1 && \
RUN /opt/conda/bin/conda install -c "${INSTALL_CHANNEL}" -y pytorch torchvision cudatoolkit=11.0.221 && \
/opt/conda/bin/conda clean -ya

FROM ${BASE_IMAGE} as official
Expand Down
3 changes: 2 additions & 1 deletion aten/src/ATen/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@ file(GLOB native_cuda_cu "native/cuda/*.cu")
exclude(native_cuda_cu "${native_cuda_cu}" ${native_cuda_cu_sp})
file(GLOB native_cuda_cpp "native/cuda/*.cpp")
file(GLOB native_cuda_h "native/cuda/*.h" "native/cuda/*.cuh")
file(GLOB native_hip_h "native/hip/*.h" "native/hip/*.cuh")
file(GLOB native_cudnn_cpp "native/cudnn/*.cpp")
file(GLOB native_sparse_cuda_cu "native/sparse/cuda/*.cu")
file(GLOB native_sparse_cuda_cpp "native/sparse/cuda/*.cpp")
Expand Down Expand Up @@ -372,7 +373,7 @@ install(FILES "${CMAKE_CURRENT_BINARY_DIR}/cmake-exports/ATenConfig.cmake"

set(INSTALL_HEADERS ${base_h} ${ATen_CORE_HEADERS})
if(NOT INTERN_BUILD_MOBILE)
list(APPEND INSTALL_HEADERS ${native_h} ${native_cpu_h} ${native_quantized_h} ${cuda_h} ${native_cuda_h} ${cudnn_h} ${hip_h} ${miopen_h})
list(APPEND INSTALL_HEADERS ${native_h} ${native_cpu_h} ${native_quantized_h} ${cuda_h} ${native_cuda_h} ${native_hip_h} ${cudnn_h} ${hip_h} ${miopen_h})
endif()

# https://stackoverflow.com/questions/11096471/how-can-i-install-a-hierarchy-of-files-using-cmake
Expand Down
14 changes: 11 additions & 3 deletions aten/src/ATen/WrapDimUtils.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,14 +30,15 @@ static inline int64_t maybe_wrap_dim(int64_t dim, const std::vector<std::vector<
return maybe_wrap_dim(dim, tensor_sizes[0].size());
}

// wrap each of dims basing on dim_post_expr
static inline void maybe_wrap_dims(std::vector<int64_t>& dims, int64_t dim_post_expr) {
// wrap each dim in the dims array, taking dim_post_expr as the true number of dimensions
static inline void maybe_wrap_dims_n(int64_t* dims, int64_t ndims, int64_t dim_post_expr) {
if (dim_post_expr <= 0) {
dim_post_expr = 1; // this will make range [-1, 0]
}
int64_t min = -dim_post_expr;
int64_t max = dim_post_expr - 1;
for (auto& dim : dims) {
for (int64_t i = 0; i < ndims; ++i) {
auto &dim = dims[i];
if (dim < min || dim > max) {
TORCH_CHECK_INDEX(false,
"Dimension out of range (expected to be in range of [",
Expand All @@ -47,6 +48,13 @@ static inline void maybe_wrap_dims(std::vector<int64_t>& dims, int64_t dim_post_
}
}

// Wrap each dim in a contiguous container, taking dim_post_expr as the true number of dimensions
// E.g. could also be std::array or c10::SmallVector
template <typename Container>
inline void maybe_wrap_dims(Container& dims, int64_t dim_post_expr) {
return maybe_wrap_dims_n(dims.data(), dims.size(), dim_post_expr);
}

// previously, size [0] tensors were the only possible empty tensors; thus, it wasn't possible
// to cat empty tensors unless all the other tensors were 1-dimensional, so we allowed these tensors
// to be "skipped" (both for wrap dimension behavior and dimension size checking).
Expand Down
1 change: 1 addition & 0 deletions aten/src/ATen/core/aten_interned_strings.h
Original file line number Diff line number Diff line change
Expand Up @@ -611,6 +611,7 @@ _(aten, sigmoid) \
_(aten, sign) \
_(aten, signbit) \
_(aten, silu) \
_(aten, sgn) \
_(aten, sin) \
_(aten, sinh) \
_(aten, size) \
Expand Down
7 changes: 6 additions & 1 deletion aten/src/ATen/core/jit_type.h
Original file line number Diff line number Diff line change
Expand Up @@ -263,7 +263,12 @@ struct SingleElementType : public Type {
}

protected:
SingleElementType(TypePtr elem) : Type(Kind), elem(std::move(elem)) {}
SingleElementType(TypePtr elem) : Type(Kind), elem(std::move(elem)) {
if (!this->elem) {
throw std::runtime_error(c10::str(
"Can not create ", typeKindToString(Kind), " with None type"));
}
}

private:
TypePtr elem;
Expand Down
3 changes: 3 additions & 0 deletions aten/src/ATen/core/type.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -716,6 +716,9 @@ TupleType::TupleType(
schema_(std::move(schema)) {
has_free_variables_ =
std::any_of(elements_.begin(), elements_.end(), [](TypePtr v) {
if (!v) {
throw std::runtime_error("Can not create tuple with None type");
}
return v->hasFreeVariables();
});
if (schema_) {
Expand Down
7 changes: 7 additions & 0 deletions aten/src/ATen/cpu/vec256/vec256_base.h
Original file line number Diff line number Diff line change
Expand Up @@ -239,6 +239,13 @@ struct Vec256 {
// Specifically map() does not perform the type conversion needed by abs.
return map([](T x) { return static_cast<T>(std::abs(x)); });
}

template <typename other_t_sgn = T,
typename std::enable_if<c10::is_complex<other_t_sgn>::value, int>::type = 0>
Vec256<T> sgn() const {
return map(at::native::sgn_impl);
}

template <typename other_t_angle = T,
typename std::enable_if<!c10::is_complex<other_t_angle>::value, int>::type = 0>
Vec256<T> angle() const {
Expand Down
10 changes: 10 additions & 0 deletions aten/src/ATen/cpu/vec256/vec256_complex_double.h
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,16 @@ template <> class Vec256<c10::complex<double>> {
auto angle = _mm256_permute_pd(angle_(), 0x05); // angle 90-angle
return _mm256_and_pd(angle, real_mask); // angle 0
}
Vec256<c10::complex<double>> sgn() const {
auto abs = abs_();
auto zero = _mm256_setzero_pd();
auto mask = _mm256_cmp_pd(abs, zero, _CMP_EQ_OQ);
auto abs_val = Vec256(abs);

auto div = values / abs_val.values; // x / abs(x)

return blendv(div, zero, mask);
}
__m256d real_() const {
const __m256d real_mask = _mm256_castsi256_pd(_mm256_setr_epi64x(0xFFFFFFFFFFFFFFFF, 0x0000000000000000,
0xFFFFFFFFFFFFFFFF, 0x0000000000000000));
Expand Down
10 changes: 10 additions & 0 deletions aten/src/ATen/cpu/vec256/vec256_complex_float.h
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,16 @@ template <> class Vec256<c10::complex<float>> {
auto angle = _mm256_permute_ps(angle_(), 0xB1); // angle 90-angle
return _mm256_and_ps(angle, real_mask); // angle 0
}
Vec256<c10::complex<float>> sgn() const {
auto abs = abs_();
auto zero = _mm256_setzero_ps();
auto mask = _mm256_cmp_ps(abs, zero, _CMP_EQ_OQ);
auto abs_val = Vec256(abs);

auto div = values / abs_val.values; // x / abs(x)

return _mm256_blendv_ps(div, zero, mask);
}
__m256 real_() const {
const __m256 real_mask = _mm256_castsi256_ps(_mm256_setr_epi32(0xFFFFFFFF, 0x00000000, 0xFFFFFFFF, 0x00000000,
0xFFFFFFFF, 0x00000000, 0xFFFFFFFF, 0x00000000));
Expand Down
10 changes: 6 additions & 4 deletions aten/src/ATen/native/BinaryOps.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,7 @@ Tensor& divide_(Tensor& self, Scalar other) {

// true_divide, an alias for div
Tensor& true_divide_out(Tensor& result, const Tensor& self, const Tensor& divisor) {
return native::div_out(result, self, divisor);
return at::div_out(result, self, divisor);
}

Tensor true_divide(const Tensor& self, const Tensor& divisor) {
Expand Down Expand Up @@ -390,14 +390,16 @@ Tensor rsub(const Tensor& self, const Tensor& other, Scalar alpha) {
}

Tensor& atan2_out(Tensor& result, const Tensor& self, const Tensor& other) {
auto iter = TensorIterator::binary_op(result, self, other);
auto iter = TensorIterator::binary_float_op(result, self, other);
atan2_stub(iter.device_type(), iter);
return result;
}

Tensor atan2(const Tensor& self, const Tensor& other) {
Tensor result = at::empty({0}, self.options());
return native::atan2_out(result, self, other);
Tensor result;
auto iter = TensorIterator::binary_float_op(result, self, other);
atan2_stub(iter.device_type(), iter);
return iter.output();
}

Tensor& atan2_(Tensor& self, const Tensor& other) {
Expand Down
12 changes: 12 additions & 0 deletions aten/src/ATen/native/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -277,6 +277,18 @@ them the same thing!)
If two backends have the same dispatch function, you can write `CPU, CUDA: func`
to reuse the same function name in both cases.

Available backend options can be found at
https://github.com/pytorch/pytorch/blob/master/tools/codegen/gen.py#L970.
In addition to backends above, we also support keyword `Math` which is an alias
that maps to all backend and autograd backend keys. In other words, function registered to `Math` key
should be a plain mathematical composition of other `at::` functions and works for any backend.

If you add `dispatch` section to any API that didn't have it before, you **have to** move
the old implementation to `Math` field so that it's still available for other backends to use.

This work is currently WIP and you can find the design proposal in
https://github.com/pytorch/pytorch/issues/44680.

### `device_guard`

```
Expand Down

0 comments on commit b68b323

Please sign in to comment.