Skip to content

Commit

Permalink
Update base for Update on "Construct CppSignatureGroup from NativeFun…
Browse files Browse the repository at this point in the history
…ction"

This will make it easier to implement the POC in
peterbell10@d534f7d
see also #45666

Signed-off-by: Edward Z. Yang <ezyang@fb.com>

Differential Revision: [D25594005](https://our.internmc.facebook.com/intern/diff/D25594005)

[ghstack-poisoned]
  • Loading branch information
ezyang committed Jan 4, 2021
2 parents 7a43b27 + e44b2b7 commit 53d4160
Show file tree
Hide file tree
Showing 503 changed files with 11,952 additions and 4,597 deletions.
2 changes: 1 addition & 1 deletion .circleci/cimodel/data/dimensions.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@
]

ROCM_VERSIONS = [
"3.9",
"3.10",
"4.0",
]

ROCM_VERSION_LABELS = ["rocm" + v for v in ROCM_VERSIONS]
Expand Down
208 changes: 104 additions & 104 deletions .circleci/config.yml

Large diffs are not rendered by default.

1 change: 0 additions & 1 deletion BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -373,7 +373,6 @@ filegroup(
filegroup(
name = "thc_srcs_cu",
srcs = [
"aten/src/THC/THCBlas.cu.cc",
"aten/src/THC/THCReduceApplyUtils.cu.cc",
"aten/src/THC/THCSleep.cu.cc",
"aten/src/THC/THCSortUtils.cu.cc",
Expand Down
10 changes: 10 additions & 0 deletions CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -903,6 +903,16 @@ You'll need to install an appropriately configured flake8; see
[Lint as you type](https://github.com/pytorch/pytorch/wiki/Lint-as-you-type)
for documentation on how to do this.

If you haven't set up the pre-commit hook and have already committed files and
CI reports `flake8` errors, you can run the check locally in your PR branch with:
```bash
flake8 $(git diff --name-only $(git merge-base --fork-point master))
```
fix the code so that no errors are reported when you re-run the above check again,
and then commit the fix.
## Building PyTorch with ASAN
[ASAN](https://github.com/google/sanitizers/wiki/AddressSanitizer) is very
Expand Down
1 change: 0 additions & 1 deletion android/gradle/android_tasks.gradle
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@

import java.nio.file.Files
import java.nio.file.Paths
import java.io.FileOutputStream
Expand Down
1 change: 0 additions & 1 deletion android/pytorch_android/host/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -38,4 +38,3 @@ dependencies {
}

apply from: rootProject.file('gradle/release.gradle')

1 change: 0 additions & 1 deletion android/settings.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,3 @@ project(':pytorch_android_torchvision').projectDir = file('pytorch_android_torch

project(':pytorch_host').projectDir = file('pytorch_android/host')
project(':test_app').projectDir = file('test_app/app')

1 change: 0 additions & 1 deletion aten/src/ATen/LegacyTHFunctionsCUDA.h
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,6 @@ Tensor & _thnn_log_sigmoid_backward_out(Tensor & grad_input, const Tensor & grad
Tensor _thnn_log_sigmoid_backward(const Tensor & grad_output, const Tensor & self, const Tensor & buffer);
Tensor & _thnn_rrelu_with_noise_forward_out(Tensor & output, const Tensor & self, const Tensor & noise, Scalar lower, Scalar upper, bool training, c10::optional<at::Generator> generator);
Tensor _thnn_rrelu_with_noise_forward(const Tensor & self, const Tensor & noise, Scalar lower, Scalar upper, bool training, c10::optional<at::Generator> generator);
Tensor & _thnn_rrelu_with_noise_backward_out(Tensor & grad_input, const Tensor & grad_output, const Tensor & self, const Tensor & noise, Scalar lower, Scalar upper, bool training);
Tensor _thnn_rrelu_with_noise_backward(const Tensor & grad_output, const Tensor & self, const Tensor & noise, Scalar lower, Scalar upper, bool training);
Tensor & _thnn_rrelu_with_noise_forward_(Tensor & self, const Tensor & noise, Scalar lower, Scalar upper, bool training, c10::optional<at::Generator> generator);
std::tuple<Tensor &,Tensor &,Tensor &> _thnn_conv2d_forward_out(Tensor & output, Tensor & columns, Tensor & ones, const Tensor & self, const Tensor & weight, IntArrayRef kernel_size, const Tensor & bias, IntArrayRef stride, IntArrayRef padding);
Expand Down
3 changes: 3 additions & 0 deletions aten/src/ATen/MemoryOverlap.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,9 @@ MemOverlapStatus get_overlap_status(TensorImpl* a, TensorImpl* b) {
if (!a->is_contiguous() || !b->is_contiguous()) {
return MemOverlapStatus::TOO_HARD;
}
if (!a->has_storage() || !b->has_storage()) {
return MemOverlapStatus::NO;
}
if (a->storage().data() == b->storage().data()) {
const auto a_begin = static_cast<char*>(a->data());
const auto a_end = a_begin + a->numel() * a->itemsize();
Expand Down
113 changes: 113 additions & 0 deletions aten/src/ATen/SparseTensorUtils.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
#include <ATen/SparseTensorUtils.h>

#include <ATen/ATen.h>
#include <ATen/SparseTensorImpl.h>
#include <ATen/Parallel.h>

namespace at { namespace sparse {

// NOTE [ Flatten Sparse Indices ]
// This helper function flattens a sparse indices tensor (a Tensor) into a 1D
// indices tensor. E.g.,
// input = [[2, 4, 0],
// [3, 1, 10]]
// full_size = [2, 12]
// output = [ 2 * 12 + 3, 4 * 12 + 1, 0 * 12 + 10 ] = [27, 49, 10]
//
// In other words, assuming that each `indices[i, :]` is a valid index to a
// tensor `t` of shape `full_size`. This returns the corresponding indices to
// the flattened tensor `t.reshape( prod(full_size[:indices.size(0)]), -1 )`.
// if forceClone is true, the result will forced to be a clone of self.
// if force_clone is true, the result will forced to be a clone of self.
Tensor flatten_indices(const Tensor& indices, IntArrayRef full_size, bool force_clone /*= false*/) {
int64_t sparse_dim = indices.size(0);
if (sparse_dim == 1) {
if (force_clone) {
return indices.squeeze(0).clone(at::MemoryFormat::Contiguous);
} else {
return indices.squeeze(0);
}
} else {
std::vector<int64_t> indices_mult_cpu_vec;
indices_mult_cpu_vec.reserve(sparse_dim);
int64_t mult = 1;
for (int64_t i = sparse_dim - 1; i >= 0; i--) {
indices_mult_cpu_vec[i] = mult;
mult *= full_size[i];
}
auto indices_mult_cpu = at::from_blob(
indices_mult_cpu_vec.data(),
/*size=*/{sparse_dim, 1},
indices.options().device(kCPU));
// NB: must be blocking because this blob may be freed after this closure,
// and non_blocking copy will see garbage.
auto indices_mult = indices_mult_cpu.to(indices.device(), /*non_blocking=*/false);
// Ideally we want matmul but matmul is slow on CPU Long and not implemented
// on CUDA Long. So mul is faster.
return indices.mul(indices_mult).sum(0);
}
}

// Flatten sparse tensor's indices from nD to 1D, similar to NOTE [ Flatten Sparse Indices ],
// except this one allows partial flatten: only flatten on specified dims. Note that
// the flatten indices might be uncoalesced if dims_to_flatten.size() < sparse_dim.
// Also if input indices is already coalesced, the flattened indices will also be sorted.
//
// args:
// indices: sparse tensor indices
// sizes: sparse tensor sizes
// dims_to_flatten: a list of dim index to flatten
//
// Ex1:
// indices = [[2, 4, 0],
// [3, 1, 3]]
// sizes = [2, 12]
// dims_to_flatten = [0, 1]
// new_indices = [ 2 * 12 + 3, 4 * 12 + 1, 0 * 12 + 3 ] = [27, 49, 3]
//
// Ex2:
// dims_to_flatten = [1]
// new_indices = [ 3, 1, 3 ] # uncoalesced
Tensor flatten_indices_by_dims(const Tensor& indices, const IntArrayRef& sizes, const IntArrayRef& dims_to_flatten){
Tensor new_indices = at::zeros({indices.size(1)}, indices.options());
for (auto d : dims_to_flatten) {
new_indices.mul_(sizes[d]);
new_indices.add_(indices.select(0, d));
}
return new_indices;
}

Tensor coo_to_csr(const int64_t* indices, int64_t dim, int64_t nnz) {
/*
Find the CSR representation for a row `indices` from the COO format
Inputs:
`indices` is the row pointer from COO indices
`dim` is the row dimensionality
`nnz` is the number of non-zeros
Output:
`csr` is a compressed row array in a CSR format
*/
Tensor csr = at::zeros({dim + 1}, kLong);

// TODO: eliminate this conditional when zero-size dims supported correctly
if (nnz > 0) {
auto csr_accessor = csr.accessor<int64_t, 1>();
// Convert the sparse matrix to CSR format
at::parallel_for(0, nnz, 10000, [&](int64_t start, int64_t end) {
int64_t h, hp0, hp1;
for (auto i = start; i < end; i++) {
hp0 = indices[i];
hp1 = (i+1 == nnz) ? dim : indices[i+1];
if (hp0 != hp1) {
for (h = hp0; h < hp1; h++) {
csr_accessor[h+1] = i+1;
}
}
}
});
}
return csr;
}

}} // namespace at::sparse
53 changes: 11 additions & 42 deletions aten/src/ATen/SparseTensorUtils.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,36 +2,36 @@

#include <ATen/ATen.h>
#include <ATen/SparseTensorImpl.h>
#include <ATen/Parallel.h>

namespace at { namespace sparse {

// Just for documentary purposes
using SparseTensor = Tensor;
using LongTensor = Tensor;
using IntTensor = Tensor;
using SparseType = Type;


// This is an internal utility function for getting at the SparseTensorImpl,
// so that we can write sparse tensor specific accessors for special fields
// in SparseTensor. You should only use this for writing low level
// setters/getters for SparseTensorImpl fields; otherwise, you should use
// the low level setters/getters that were implemented using this.
//
// This may be called repeatedly, so make sure it's pretty cheap.
inline SparseTensorImpl* get_sparse_impl(const SparseTensor& self) {
inline SparseTensorImpl* get_sparse_impl(const SparseTensor& self) {
AT_ASSERTM(self.is_sparse(), "_internal_get_SparseTensorImpl: not a sparse tensor");
return static_cast<SparseTensorImpl*>(self.unsafeGetTensorImpl());
}

// Takes indices and values and directly puts them into the sparse tensor, no
// copy. This used to be called THSTensor_(_move)
inline void alias_into_sparse(const SparseTensor& self, const LongTensor& indices, const Tensor& values) {
inline void alias_into_sparse(const SparseTensor& self, const Tensor& indices, const Tensor& values) {
get_sparse_impl(self)->set_indices_and_values_unsafe(indices, values);
}

// Take indices and values and makes a (data) copy of them to put into the sparse
// indices/values. This used to be called THSTensor_(_set)
inline void copy_into_sparse(const SparseTensor& self, const LongTensor& indices, const Tensor& values, bool non_blocking) {
inline void copy_into_sparse(const SparseTensor& self, const Tensor& indices, const Tensor& values, bool non_blocking) {
alias_into_sparse(
self,
indices.to(self._indices().options(), non_blocking, /*copy=*/true),
Expand All @@ -58,7 +58,7 @@ inline Tensor new_values_with_size_of(const Tensor& values, int64_t nnz) {
}

// NOTE [ Flatten Sparse Indices ]
// This helper function flattens a sparse indices tensor (a LongTensor) into a 1D
// This helper function flattens a sparse indices tensor (a Tensor) into a 1D
// indices tensor. E.g.,
// input = [[2, 4, 0],
// [3, 1, 10]]
Expand All @@ -70,34 +70,7 @@ inline Tensor new_values_with_size_of(const Tensor& values, int64_t nnz) {
// the flattened tensor `t.reshape( prod(full_size[:indices.size(0)]), -1 )`.
// if forceClone is true, the result will forced to be a clone of self.
// if force_clone is true, the result will forced to be a clone of self.
inline LongTensor flatten_indices(const Tensor& indices, IntArrayRef full_size, bool force_clone = false) {
int64_t sparse_dim = indices.size(0);
if (sparse_dim == 1) {
if (force_clone) {
return indices.squeeze(0).clone(at::MemoryFormat::Contiguous);
} else {
return indices.squeeze(0);
}
} else {
std::vector<int64_t> indices_mult_cpu_vec;
indices_mult_cpu_vec.reserve(sparse_dim);
int64_t mult = 1;
for (int64_t i = sparse_dim - 1; i >= 0; i--) {
indices_mult_cpu_vec[i] = mult;
mult *= full_size[i];
}
auto indices_mult_cpu = at::from_blob(
indices_mult_cpu_vec.data(),
/*size=*/{sparse_dim, 1},
indices.options().device(kCPU));
// NB: must be blocking because this blob may be freed after this closure,
// and non_blocking copy will see garbage.
auto indices_mult = indices_mult_cpu.to(indices.device(), /*non_blocking=*/false);
// Ideally we want matmul but matmul is slow on CPU Long and not implemented
// on CUDA Long. So mul is faster.
return indices.mul(indices_mult).sum(0);
}
}
TORCH_API Tensor flatten_indices(const Tensor& indices, IntArrayRef full_size, bool force_clone = false);

// Flatten sparse tensor's indices from nD to 1D, similar to NOTE [ Flatten Sparse Indices ],
// except this one allows partial flatten: only flatten on specified dims. Note that
Expand All @@ -119,13 +92,9 @@ inline LongTensor flatten_indices(const Tensor& indices, IntArrayRef full_size,
// Ex2:
// dims_to_flatten = [1]
// new_indices = [ 3, 1, 3 ] # uncoalesced
inline LongTensor flatten_indices_by_dims(const LongTensor& indices, const IntArrayRef& sizes, const IntArrayRef& dims_to_flatten){
LongTensor new_indices = at::zeros({indices.size(1)}, indices.options());
for (auto d : dims_to_flatten) {
new_indices.mul_(sizes[d]);
new_indices.add_(indices.select(0, d));
}
return new_indices;
}
TORCH_API Tensor flatten_indices_by_dims(const Tensor& indices, const IntArrayRef& sizes, const IntArrayRef& dims_to_flatten);

// Find the CSR representation for a row `indices` from the COO format
TORCH_API Tensor coo_to_csr(const int64_t* indices, int64_t dim, int64_t nnz);

}} // namespace at::sparse
19 changes: 19 additions & 0 deletions aten/src/ATen/TensorUtils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,25 @@ std::ostream& operator<<(std::ostream & out, TensorGeometryArg t) {
return out;
}

void checkDim(
CheckedFrom c,
const Tensor& tensor,
const char* name,
int pos, // 1-indexed
int64_t dim) {
TORCH_CHECK(
tensor.dim() == dim,
"Expected ",
dim,
"-dimensional tensor, but got ",
tensor.dim(),
"-dimensional tensor for ",
TensorGeometryArg(TensorArg({tensor, name, pos})),
" (while checking arguments for ",
c,
")");
}

void checkDim(CheckedFrom c, const TensorGeometryArg& t, int64_t dim) {
TORCH_CHECK(t->dim() == dim,
"Expected ", dim, "-dimensional tensor, but got ", t->dim(),
Expand Down
6 changes: 6 additions & 0 deletions aten/src/ATen/TensorUtils.h
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,12 @@ using CheckedFrom = const char*;
// conversion will blow up if you have undefined tensors.

TORCH_API std::ostream& operator<<(std::ostream& out, TensorGeometryArg t);
TORCH_API void checkDim(
CheckedFrom c,
const Tensor& tensor,
const char* name,
int pos, // 1-indexed
int64_t dim);
TORCH_API void checkDim(
CheckedFrom c,
const TensorGeometryArg& t,
Expand Down
5 changes: 5 additions & 0 deletions aten/src/ATen/core/Formatting.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -292,6 +292,11 @@ std::ostream& print(std::ostream& stream, const Tensor & tensor_, int64_t linesi
stream << ", axis: " << tensor_.q_per_channel_axis();
}
}

auto& fw_grad = tensor.fw_grad(/* level */ 0);
if (fw_grad.defined()) {
stream << ", tangent:" << std::endl << fw_grad;
}
stream << " ]";
}
return stream;
Expand Down
2 changes: 2 additions & 0 deletions aten/src/ATen/core/NamedRegistrations.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -462,6 +462,7 @@ TORCH_LIBRARY_IMPL(aten, Named, m) {
m.impl("tanh_", CppFunction::makeFallthrough());
m.impl("tensor_split.indices", CppFunction::makeFallthrough());
m.impl("tensor_split.sections", CppFunction::makeFallthrough());
m.impl("tensor_split.tensor_indices_or_sections", CppFunction::makeFallthrough());
m.impl("threshold", CppFunction::makeFallthrough());
m.impl("threshold.out", CppFunction::makeFallthrough());
m.impl("threshold_", CppFunction::makeFallthrough());
Expand Down Expand Up @@ -509,4 +510,5 @@ TORCH_LIBRARY_IMPL(aten, Named, m) {
m.impl("_version", CppFunction::makeFallthrough());
m.impl("requires_grad_", CppFunction::makeFallthrough());
m.impl("retain_grad", CppFunction::makeFallthrough());
m.impl("_fw_primal", CppFunction::makeFallthrough());
}
3 changes: 3 additions & 0 deletions aten/src/ATen/core/aten_interned_strings.h
Original file line number Diff line number Diff line change
Expand Up @@ -220,6 +220,7 @@ _(aten, blackman_window) \
_(aten, block_diag) \
_(aten, bmm) \
_(aten, broadcast_tensors) \
_(aten, broadcast_to) \
_(aten, cartesian_prod) \
_(aten, cat) \
_(aten, cauchy) \
Expand Down Expand Up @@ -435,6 +436,7 @@ _(aten, logdet) \
_(aten, logit) \
_(aten, logspace) \
_(aten, logsumexp) \
_(aten, xlogy) \
_(aten, lstm) \
_(aten, lstm_cell) \
_(aten, lstsq) \
Expand Down Expand Up @@ -551,6 +553,7 @@ _(aten, permute) \
_(aten, pin_memory) \
_(aten, pinverse) \
_(aten, pixel_shuffle) \
_(aten, pixel_unshuffle) \
_(aten, poisson) \
_(aten, polygamma) \
_(aten, pow) \
Expand Down
2 changes: 1 addition & 1 deletion aten/src/ATen/core/function_schema.h
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ struct Argument {
c10::optional<int32_t> N_;

c10::optional<IValue> default_value_;
// is this only specifyable as a keyword argument?
// is this only specifiable as a keyword argument?
bool kwarg_only_;
c10::optional<AliasInfo> alias_info_;
};
Expand Down

0 comments on commit 53d4160

Please sign in to comment.