Skip to content

Commit

Permalink
Update on "fx quant: hook up ConvTranspose{n}d"
Browse files Browse the repository at this point in the history
Summary:

Quantization of `ConvTranpose{n}d` is supported in Eager mode. This PR
adds the support for FX graph mode.

Note: this currenlty only works in `qnnpack` because per-channel weights
are not supported by quantized conv transpose. In a future PR we should throw
an error when someone tries to quantize a ConvTranspose model with per-channel
weight observers until this is fixed.

Test Plan:

```
python test/test_quantization.py TestQuantizeFxOps.test_conv_transpose_1d
python test/test_quantization.py TestQuantizeFxOps.test_conv_transpose_2d
```

Reviewers:

Subscribers:

Tasks:

Tags:

Differential Revision: [D25674636](https://our.internmc.facebook.com/intern/diff/D25674636)

[ghstack-poisoned]
  • Loading branch information
vkuzo committed Dec 22, 2020
2 parents 36e0f69 + 8630901 commit 1b60e51
Show file tree
Hide file tree
Showing 124 changed files with 4,756 additions and 2,013 deletions.
113 changes: 113 additions & 0 deletions aten/src/ATen/SparseTensorUtils.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
#include <ATen/SparseTensorUtils.h>

#include <ATen/ATen.h>
#include <ATen/SparseTensorImpl.h>
#include <ATen/Parallel.h>

namespace at { namespace sparse {

// NOTE [ Flatten Sparse Indices ]
// This helper function flattens a sparse indices tensor (a Tensor) into a 1D
// indices tensor. E.g.,
// input = [[2, 4, 0],
// [3, 1, 10]]
// full_size = [2, 12]
// output = [ 2 * 12 + 3, 4 * 12 + 1, 0 * 12 + 10 ] = [27, 49, 10]
//
// In other words, assuming that each `indices[i, :]` is a valid index to a
// tensor `t` of shape `full_size`. This returns the corresponding indices to
// the flattened tensor `t.reshape( prod(full_size[:indices.size(0)]), -1 )`.
// if forceClone is true, the result will forced to be a clone of self.
// if force_clone is true, the result will forced to be a clone of self.
Tensor flatten_indices(const Tensor& indices, IntArrayRef full_size, bool force_clone /*= false*/) {
int64_t sparse_dim = indices.size(0);
if (sparse_dim == 1) {
if (force_clone) {
return indices.squeeze(0).clone(at::MemoryFormat::Contiguous);
} else {
return indices.squeeze(0);
}
} else {
std::vector<int64_t> indices_mult_cpu_vec;
indices_mult_cpu_vec.reserve(sparse_dim);
int64_t mult = 1;
for (int64_t i = sparse_dim - 1; i >= 0; i--) {
indices_mult_cpu_vec[i] = mult;
mult *= full_size[i];
}
auto indices_mult_cpu = at::from_blob(
indices_mult_cpu_vec.data(),
/*size=*/{sparse_dim, 1},
indices.options().device(kCPU));
// NB: must be blocking because this blob may be freed after this closure,
// and non_blocking copy will see garbage.
auto indices_mult = indices_mult_cpu.to(indices.device(), /*non_blocking=*/false);
// Ideally we want matmul but matmul is slow on CPU Long and not implemented
// on CUDA Long. So mul is faster.
return indices.mul(indices_mult).sum(0);
}
}

// Flatten sparse tensor's indices from nD to 1D, similar to NOTE [ Flatten Sparse Indices ],
// except this one allows partial flatten: only flatten on specified dims. Note that
// the flatten indices might be uncoalesced if dims_to_flatten.size() < sparse_dim.
// Also if input indices is already coalesced, the flattened indices will also be sorted.
//
// args:
// indices: sparse tensor indices
// sizes: sparse tensor sizes
// dims_to_flatten: a list of dim index to flatten
//
// Ex1:
// indices = [[2, 4, 0],
// [3, 1, 3]]
// sizes = [2, 12]
// dims_to_flatten = [0, 1]
// new_indices = [ 2 * 12 + 3, 4 * 12 + 1, 0 * 12 + 3 ] = [27, 49, 3]
//
// Ex2:
// dims_to_flatten = [1]
// new_indices = [ 3, 1, 3 ] # uncoalesced
Tensor flatten_indices_by_dims(const Tensor& indices, const IntArrayRef& sizes, const IntArrayRef& dims_to_flatten){
Tensor new_indices = at::zeros({indices.size(1)}, indices.options());
for (auto d : dims_to_flatten) {
new_indices.mul_(sizes[d]);
new_indices.add_(indices.select(0, d));
}
return new_indices;
}

Tensor coo_to_csr(const int64_t* indices, int64_t dim, int64_t nnz) {
/*
Find the CSR representation for a row `indices` from the COO format
Inputs:
`indices` is the row pointer from COO indices
`dim` is the row dimensionality
`nnz` is the number of non-zeros
Output:
`csr` is a compressed row array in a CSR format
*/
Tensor csr = at::zeros({dim + 1}, kLong);

// TODO: eliminate this conditional when zero-size dims supported correctly
if (nnz > 0) {
auto csr_accessor = csr.accessor<int64_t, 1>();
// Convert the sparse matrix to CSR format
at::parallel_for(0, nnz, 10000, [&](int64_t start, int64_t end) {
int64_t h, hp0, hp1;
for (auto i = start; i < end; i++) {
hp0 = indices[i];
hp1 = (i+1 == nnz) ? dim : indices[i+1];
if (hp0 != hp1) {
for (h = hp0; h < hp1; h++) {
csr_accessor[h+1] = i+1;
}
}
}
});
}
return csr;
}

}} // namespace at::sparse
53 changes: 11 additions & 42 deletions aten/src/ATen/SparseTensorUtils.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,36 +2,36 @@

#include <ATen/ATen.h>
#include <ATen/SparseTensorImpl.h>
#include <ATen/Parallel.h>

namespace at { namespace sparse {

// Just for documentary purposes
using SparseTensor = Tensor;
using LongTensor = Tensor;
using IntTensor = Tensor;
using SparseType = Type;


// This is an internal utility function for getting at the SparseTensorImpl,
// so that we can write sparse tensor specific accessors for special fields
// in SparseTensor. You should only use this for writing low level
// setters/getters for SparseTensorImpl fields; otherwise, you should use
// the low level setters/getters that were implemented using this.
//
// This may be called repeatedly, so make sure it's pretty cheap.
inline SparseTensorImpl* get_sparse_impl(const SparseTensor& self) {
inline SparseTensorImpl* get_sparse_impl(const SparseTensor& self) {
AT_ASSERTM(self.is_sparse(), "_internal_get_SparseTensorImpl: not a sparse tensor");
return static_cast<SparseTensorImpl*>(self.unsafeGetTensorImpl());
}

// Takes indices and values and directly puts them into the sparse tensor, no
// copy. This used to be called THSTensor_(_move)
inline void alias_into_sparse(const SparseTensor& self, const LongTensor& indices, const Tensor& values) {
inline void alias_into_sparse(const SparseTensor& self, const Tensor& indices, const Tensor& values) {
get_sparse_impl(self)->set_indices_and_values_unsafe(indices, values);
}

// Take indices and values and makes a (data) copy of them to put into the sparse
// indices/values. This used to be called THSTensor_(_set)
inline void copy_into_sparse(const SparseTensor& self, const LongTensor& indices, const Tensor& values, bool non_blocking) {
inline void copy_into_sparse(const SparseTensor& self, const Tensor& indices, const Tensor& values, bool non_blocking) {
alias_into_sparse(
self,
indices.to(self._indices().options(), non_blocking, /*copy=*/true),
Expand All @@ -58,7 +58,7 @@ inline Tensor new_values_with_size_of(const Tensor& values, int64_t nnz) {
}

// NOTE [ Flatten Sparse Indices ]
// This helper function flattens a sparse indices tensor (a LongTensor) into a 1D
// This helper function flattens a sparse indices tensor (a Tensor) into a 1D
// indices tensor. E.g.,
// input = [[2, 4, 0],
// [3, 1, 10]]
Expand All @@ -70,34 +70,7 @@ inline Tensor new_values_with_size_of(const Tensor& values, int64_t nnz) {
// the flattened tensor `t.reshape( prod(full_size[:indices.size(0)]), -1 )`.
// if forceClone is true, the result will forced to be a clone of self.
// if force_clone is true, the result will forced to be a clone of self.
inline LongTensor flatten_indices(const Tensor& indices, IntArrayRef full_size, bool force_clone = false) {
int64_t sparse_dim = indices.size(0);
if (sparse_dim == 1) {
if (force_clone) {
return indices.squeeze(0).clone(at::MemoryFormat::Contiguous);
} else {
return indices.squeeze(0);
}
} else {
std::vector<int64_t> indices_mult_cpu_vec;
indices_mult_cpu_vec.reserve(sparse_dim);
int64_t mult = 1;
for (int64_t i = sparse_dim - 1; i >= 0; i--) {
indices_mult_cpu_vec[i] = mult;
mult *= full_size[i];
}
auto indices_mult_cpu = at::from_blob(
indices_mult_cpu_vec.data(),
/*size=*/{sparse_dim, 1},
indices.options().device(kCPU));
// NB: must be blocking because this blob may be freed after this closure,
// and non_blocking copy will see garbage.
auto indices_mult = indices_mult_cpu.to(indices.device(), /*non_blocking=*/false);
// Ideally we want matmul but matmul is slow on CPU Long and not implemented
// on CUDA Long. So mul is faster.
return indices.mul(indices_mult).sum(0);
}
}
TORCH_API Tensor flatten_indices(const Tensor& indices, IntArrayRef full_size, bool force_clone = false);

// Flatten sparse tensor's indices from nD to 1D, similar to NOTE [ Flatten Sparse Indices ],
// except this one allows partial flatten: only flatten on specified dims. Note that
Expand All @@ -119,13 +92,9 @@ inline LongTensor flatten_indices(const Tensor& indices, IntArrayRef full_size,
// Ex2:
// dims_to_flatten = [1]
// new_indices = [ 3, 1, 3 ] # uncoalesced
inline LongTensor flatten_indices_by_dims(const LongTensor& indices, const IntArrayRef& sizes, const IntArrayRef& dims_to_flatten){
LongTensor new_indices = at::zeros({indices.size(1)}, indices.options());
for (auto d : dims_to_flatten) {
new_indices.mul_(sizes[d]);
new_indices.add_(indices.select(0, d));
}
return new_indices;
}
TORCH_API Tensor flatten_indices_by_dims(const Tensor& indices, const IntArrayRef& sizes, const IntArrayRef& dims_to_flatten);

// Find the CSR representation for a row `indices` from the COO format
TORCH_API Tensor coo_to_csr(const int64_t* indices, int64_t dim, int64_t nnz);

}} // namespace at::sparse
19 changes: 19 additions & 0 deletions aten/src/ATen/TensorUtils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,25 @@ std::ostream& operator<<(std::ostream & out, TensorGeometryArg t) {
return out;
}

void checkDim(
CheckedFrom c,
const Tensor& tensor,
const char* name,
int pos, // 1-indexed
int64_t dim) {
TORCH_CHECK(
tensor.dim() == dim,
"Expected ",
dim,
"-dimensional tensor, but got ",
tensor.dim(),
"-dimensional tensor for ",
TensorGeometryArg(TensorArg({tensor, name, pos})),
" (while checking arguments for ",
c,
")");
}

void checkDim(CheckedFrom c, const TensorGeometryArg& t, int64_t dim) {
TORCH_CHECK(t->dim() == dim,
"Expected ", dim, "-dimensional tensor, but got ", t->dim(),
Expand Down
6 changes: 6 additions & 0 deletions aten/src/ATen/TensorUtils.h
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,12 @@ using CheckedFrom = const char*;
// conversion will blow up if you have undefined tensors.

TORCH_API std::ostream& operator<<(std::ostream& out, TensorGeometryArg t);
TORCH_API void checkDim(
CheckedFrom c,
const Tensor& tensor,
const char* name,
int pos, // 1-indexed
int64_t dim);
TORCH_API void checkDim(
CheckedFrom c,
const TensorGeometryArg& t,
Expand Down
1 change: 1 addition & 0 deletions aten/src/ATen/core/aten_interned_strings.h
Original file line number Diff line number Diff line change
Expand Up @@ -220,6 +220,7 @@ _(aten, blackman_window) \
_(aten, block_diag) \
_(aten, bmm) \
_(aten, broadcast_tensors) \
_(aten, broadcast_to) \
_(aten, cartesian_prod) \
_(aten, cat) \
_(aten, cauchy) \
Expand Down

0 comments on commit 1b60e51

Please sign in to comment.