Skip to content

Commit

Permalink
torch.empty_like and torch.zeros_like raise error if any memory forma…
Browse files Browse the repository at this point in the history
…t is provided with sparse input (#43699) (#44058)

Summary:
Fixes #43699

- Changed the order of `TORCH_CHECK` and `if (options.layout() == kSparse && self.is_sparse())`
inside `empty_like` method.

- [x] Added tests

EDIT:

More details on that and why we can not take zeros_like  approach.
Python code :
```python
res = torch.zeros_like(input_coalesced, memory_format=torch.preserve_format)
```
is routed to
```c++
// TensorFactories.cpp
Tensor zeros_like(
    const Tensor& self,
    const TensorOptions& options,
    c10::optional<c10::MemoryFormat> optional_memory_format) {
  if (options.layout() == kSparse && self.is_sparse()) {
    auto res = at::empty({0}, options); // to be resized
    res.sparse_resize_and_clear_(
        self.sizes(), self.sparse_dim(), self.dense_dim());
    return res;
  }
  auto result = at::empty_like(self, options, optional_memory_format);
  return result.zero_();
}
```
and passed to `if (options.layout() == kSparse && self.is_sparse())`

When we call in Python
```python
res = torch.empty_like(input_coalesced, memory_format=torch.preserve_format)
```
it is routed to
```c++
Tensor empty_like(
    const Tensor& self,
    const TensorOptions& options_,
    c10::optional<c10::MemoryFormat> optional_memory_format) {
  TORCH_CHECK(
    !(options_.has_memory_format() && optional_memory_format.has_value()),
    "Cannot set memory_format both in TensorOptions and explicit argument; please delete "
    "the redundant setter.");
  TensorOptions options =
      self.options()
          .merge_in(options_)
          .merge_in(TensorOptions().memory_format(optional_memory_format));
  TORCH_CHECK(
      !(options.layout() != kStrided &&
          optional_memory_format.has_value()),
      "memory format option is only supported by strided tensors");
  if (options.layout() == kSparse && self.is_sparse()) {
    auto result = at::empty({0}, options); // to be resized
    result.sparse_resize_and_clear_(
        self.sizes(), self.sparse_dim(), self.dense_dim());
    return result;
  }
```

cc pearu

Pull Request resolved: #44058

Reviewed By: albanD

Differential Revision: D23672494

Pulled By: mruberry

fbshipit-source-id: af232274dd2b516dd6e875fc986e3090fa285658
  • Loading branch information
vfdev-5 authored and facebook-github-bot committed Sep 17, 2020
1 parent 1fde54d commit 24df3b7
Show file tree
Hide file tree
Showing 4 changed files with 133 additions and 39 deletions.
3 changes: 3 additions & 0 deletions aten/src/ATen/native/TensorFactories.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -860,6 +860,9 @@ Tensor zeros_like(
const TensorOptions& options,
c10::optional<c10::MemoryFormat> optional_memory_format) {
if (options.layout() == kSparse && self.is_sparse()) {
TORCH_CHECK(
!(optional_memory_format.has_value()),
"memory format option is only supported by strided tensors");
auto res = at::empty({0}, options); // to be resized
res.sparse_resize_and_clear_(
self.sizes(), self.sparse_dim(), self.dense_dim());
Expand Down
94 changes: 94 additions & 0 deletions test/test_sparse.py
Original file line number Diff line number Diff line change
Expand Up @@ -1611,6 +1611,100 @@ def test_shape(i_shapes, v_shapes, nnzs):
test_shape([0, 3, 4], [3, 4, 5, 6], [0])
test_shape([2, 3, 4], [0, 4, 5, 6], [9, 12])

sparse_tensor, _, _ = self._gen_sparse(len([2, 3]), 9, [2, 3] + [5, 6])
data = (sparse_tensor, sparse_tensor, sparse_tensor, sparse_tensor.unsqueeze(0))
mem_formats = [torch.channels_last, torch.contiguous_format, torch.preserve_format, torch.channels_last_3d]
for x, mem_format in zip(data, mem_formats):

with self.assertRaisesRegex(RuntimeError, "memory format option is only supported by strided tensors"):
result = torch.zeros_like(x, memory_format=mem_format)

result = torch.zeros_like(x, layout=torch.strided, memory_format=mem_format)
self.assertTrue(result.layout == torch.strided)

with self.assertRaisesRegex(
RuntimeError, r"Could not run 'aten::empty_strided' with arguments from the 'Sparse(CPU|CUDA)' backend"
):
dense_tensor = sparse_tensor.to_dense()
result = torch.zeros_like(dense_tensor, layout=torch.sparse_coo)

def _assert_sparse_invars(self, t):
# SparseTensor has the following invariants:
# - sparse_dim + dense_dim = len(SparseTensor.shape)
# - SparseTensor._indices().shape = (sparse_dim, nnz)
# - SparseTensor._values().shape = (nnz, SparseTensor.shape[sparse_dim:])
self.assertEqual(t.sparse_dim() + t.dense_dim(), len(t.shape))
self.assertEqual(tuple(t._indices().shape), (t.sparse_dim(), t._nnz()))
self.assertEqual(tuple(t._values().shape), (t._nnz(), ) + t.shape[t.sparse_dim():])

def _test_empty_like(self, sparse_tensor):

result = torch.empty_like(sparse_tensor)
self.assertTrue(result.is_sparse)
self._assert_sparse_invars(result)
self.assertEqual(result.shape, sparse_tensor.shape)
self.assertEqual(result.dtype, sparse_tensor.dtype)
self.assertEqual(result.device, sparse_tensor.device)
self.assertEqual(result.sparse_dim(), sparse_tensor.sparse_dim())
self.assertEqual(result.dense_dim(), sparse_tensor.dense_dim())

sparse_tensor, _, _ = self._gen_sparse(len([2, 3]), 9, [2, 3] + [5, 6])
data = (sparse_tensor, sparse_tensor, sparse_tensor, sparse_tensor.unsqueeze(0))
mem_formats = [torch.channels_last, torch.contiguous_format, torch.preserve_format, torch.channels_last_3d]
for x, mem_format in zip(data, mem_formats):

with self.assertRaisesRegex(RuntimeError, "memory format option is only supported by strided tensors"):
result = torch.empty_like(x, memory_format=mem_format)

result = torch.empty_like(x, layout=torch.strided, memory_format=mem_format)
self.assertTrue(result.layout == torch.strided)

with self.assertRaisesRegex(
RuntimeError, r"Could not run 'aten::empty_strided' with arguments from the 'Sparse(CPU|CUDA)' backend"
):
dense_tensor = sparse_tensor.to_dense()
result = torch.empty_like(dense_tensor, layout=torch.sparse_coo)

def test_empty_like(self):
# tests https://github.com/pytorch/pytorch/issues/43699

if not self.is_uncoalesced:
input_coalesced = torch.sparse_coo_tensor(
indices=torch.tensor([[0, 1, 2]]),
values=torch.tensor([3.0, -4.0, 5.0]),
size=[3, ],
device=self.device
).coalesce()
self._test_empty_like(input_coalesced)

# hybrid sparse input
input_coalesced = torch.sparse_coo_tensor(
indices=torch.tensor([[1, 3], [2, 4]]),
values=torch.tensor([[-1.0, 3.0], [-5.0, 7.0]]),
size=[4, 5, 2],
device=self.device
).coalesce()
self._test_empty_like(input_coalesced)

if self.is_uncoalesced:
# test uncoalesced input
input_uncoalesced = torch.sparse_coo_tensor(
indices=torch.tensor([[0], [1], [2], [0], [1], [2]]).transpose(1, 0),
values=torch.tensor([2.0, -3.0, -4.0, 1.0, -1.0, 1.5]),
size=[3, ],
device=self.device
)
self._test_empty_like(input_uncoalesced)

# test on empty sparse tensor
input_uncoalesced = torch.sparse_coo_tensor(
indices=torch.zeros([2, 0]),
values=torch.zeros([0, 5, 5, 5, 5, 5, 5, 0]),
size=[0, 0, 5, 5, 5, 5, 5, 5, 0],
device=self.device
)
self._test_empty_like(input_uncoalesced)

def _test_narrow(self, input, narrow_args):
expected = input.to_dense().narrow(*narrow_args)
self.assertEqual(expected, input.narrow_copy(*narrow_args).to_dense())
Expand Down
3 changes: 0 additions & 3 deletions test/test_torch.py
Original file line number Diff line number Diff line change
Expand Up @@ -14428,9 +14428,6 @@ def test_topk_4d(self, device):
self.assertEqual(val, expected_val, atol=0, rtol=0)
self.assertEqual(ind, expected_ind, atol=0, rtol=0)




def test_is_signed(self, device):
self.assertEqual(torch.IntTensor(5).to(device).is_signed(), True)
self.assertEqual(torch.ByteTensor(5).to(device).is_signed(), False)
Expand Down
72 changes: 36 additions & 36 deletions tools/autograd/derivatives.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -265,14 +265,14 @@
batch2: batch1.transpose(1, 2).bmm(grad) * alpha

- name: bernoulli(Tensor self, *, Generator? generator=None) -> Tensor
self: zeros_like(grad, at::MemoryFormat::Preserve)
self: zeros_like(grad)

- name: bernoulli_.Tensor(Tensor(a!) self, Tensor p, *, Generator? generator=None) -> Tensor(a!)
self: zeros_like(grad, at::MemoryFormat::Preserve)
self: zeros_like(grad)
p: zeros_like(p)

- name: bernoulli_.float(Tensor(a!) self, float p=0.5, *, Generator? generator=None) -> Tensor(a!)
self: zeros_like(grad, at::MemoryFormat::Preserve)
self: zeros_like(grad)

- name: bmm(Tensor self, Tensor mat2) -> Tensor
self: grad.bmm(mat2.transpose(1, 2))
Expand All @@ -286,10 +286,10 @@
tensors: cat_tensors_backward(grad, to_args_sizes(tensors), dim)

- name: cauchy_(Tensor(a!) self, float median=0, float sigma=1, *, Generator? generator=None) -> Tensor(a!)
self: zeros_like(grad, at::MemoryFormat::Preserve)
self: zeros_like(grad)

- name: ceil(Tensor self) -> Tensor
self: zeros_like(grad, at::MemoryFormat::Preserve)
self: zeros_like(grad)

- name: cholesky(Tensor self, bool upper=False) -> Tensor
self: cholesky_backward(grad, upper, result)
Expand Down Expand Up @@ -446,7 +446,7 @@
self: at::sum_to(grad, self.sizes())

- name: exponential_(Tensor(a!) self, float lambd=1, *, Generator? generator=None) -> Tensor(a!)
self: zeros_like(grad, at::MemoryFormat::Preserve)
self: zeros_like(grad)

- name: fake_quantize_per_tensor_affine(Tensor self, float scale, int zero_point, int quant_min, int quant_max) -> Tensor
self: fake_quantize_per_tensor_affine_backward(grad, self, scale, zero_point, quant_min, quant_max)
Expand All @@ -461,14 +461,14 @@
self, scale, zero_point: "grad.defined() ? _fake_quantize_learnable_per_channel_affine_backward(grad, self, scale, zero_point, axis, quant_min, quant_max) : std::tuple<Tensor, Tensor, Tensor>()"

- name: fill_.Scalar(Tensor(a!) self, Scalar value) -> Tensor(a!)
self: zeros_like(grad, at::MemoryFormat::Preserve)
self: zeros_like(grad)

- name: fill_.Tensor(Tensor(a!) self, Tensor value) -> Tensor(a!)
self: zeros_like(grad, at::MemoryFormat::Preserve)
self: zeros_like(grad)
value: grad.sum()

- name: floor(Tensor self) -> Tensor
self: zeros_like(grad, at::MemoryFormat::Preserve)
self: zeros_like(grad)

- name: fmod.Scalar(Tensor self, Scalar other) -> Tensor
self: grad
Expand All @@ -492,7 +492,7 @@
other: zeros_like(other)

- name: geometric_(Tensor(a!) self, float p, *, Generator? generator=None) -> Tensor(a!)
self: zeros_like(grad, at::MemoryFormat::Preserve)
self: zeros_like(grad)

- name: geqrf(Tensor self) -> (Tensor a, Tensor tau)
self: not_implemented("geqrf")
Expand Down Expand Up @@ -633,7 +633,7 @@
self: logdet_backward(grad, self, result)

- name: log_normal_(Tensor(a!) self, float mean=1, float std=2, *, Generator? generator=None) -> Tensor(a!)
self: zeros_like(grad, at::MemoryFormat::Preserve)
self: zeros_like(grad)

- name: logsumexp(Tensor self, int[1] dim, bool keepdim=False) -> Tensor
self: logsumexp_backward(grad, self, result, dim, keepdim)
Expand Down Expand Up @@ -661,7 +661,7 @@

- name: masked_fill_.Tensor(Tensor(a!) self, Tensor mask, Tensor value) -> Tensor(a!)
self: grad.clone().masked_fill_(mask, 0)
value: at::where(mask, grad, zeros_like(grad, at::MemoryFormat::Preserve)).sum()
value: at::where(mask, grad, zeros_like(grad)).sum()
mask: non_differentiable

- name: masked_scatter_(Tensor(a!) self, Tensor mask, Tensor source) -> Tensor(a!)
Expand Down Expand Up @@ -811,7 +811,7 @@
cdist: not_implemented("_cdist_backward")

- name: normal_(Tensor(a!) self, float mean=0, float std=1, *, Generator? generator=None) -> Tensor(a!)
self: zeros_like(grad, at::MemoryFormat::Preserve)
self: zeros_like(grad)

- name: normal.Tensor_float(Tensor mean, float std=1, *, Generator? generator=None) -> Tensor
mean: at::zeros(mean.sizes(), grad.options())
Expand Down Expand Up @@ -855,7 +855,7 @@
self: prod_backward(grad, self.to(grad.scalar_type()), result, dim, keepdim)

- name: put_(Tensor(a!) self, Tensor index, Tensor source, bool accumulate=False) -> Tensor(a!)
self: grad.clone().put_(index, zeros_like(source, at::MemoryFormat::Preserve), accumulate)
self: grad.clone().put_(index, zeros_like(source), accumulate)
index: non_differentiable
source: grad.take(index)

Expand All @@ -866,13 +866,13 @@
self: rad2deg_backward(grad)

- name: random_.from(Tensor(a!) self, int from, int? to, *, Generator? generator=None) -> Tensor(a!)
self: zeros_like(grad, at::MemoryFormat::Preserve)
self: zeros_like(grad)

- name: random_.to(Tensor(a!) self, int to, *, Generator? generator=None) -> Tensor(a!)
self: zeros_like(grad, at::MemoryFormat::Preserve)
self: zeros_like(grad)

- name: random_(Tensor(a!) self, *, Generator? generator=None) -> Tensor(a!)
self: zeros_like(grad, at::MemoryFormat::Preserve)
self: zeros_like(grad)

- name: reciprocal(Tensor self) -> Tensor
self: -grad * result * result
Expand All @@ -897,7 +897,7 @@
# - name: reshape(Tensor self, IntArrayRef shape)

- name: round(Tensor self) -> Tensor
self: zeros_like(grad, at::MemoryFormat::Preserve)
self: zeros_like(grad)

- name: rsqrt(Tensor self) -> Tensor
self: -0.5 * grad * result.pow(3)
Expand Down Expand Up @@ -926,7 +926,7 @@
self: "GradMode::is_enabled() ? infinitely_differentiable_logit_backward(grad, self, eps) : logit_backward(grad, self, eps)"

- name: sign(Tensor self) -> Tensor
self: zeros_like(grad, at::MemoryFormat::Preserve)
self: zeros_like(grad)

- name: sin(Tensor self) -> Tensor
self: grad * self.cos()
Expand Down Expand Up @@ -1062,7 +1062,7 @@
self: grad.triu(diagonal)

- name: trunc(Tensor self) -> Tensor
self: zeros_like(grad, at::MemoryFormat::Preserve)
self: zeros_like(grad)

- name: to_dense(Tensor self) -> Tensor
self: to_dense_backward(grad, self)
Expand All @@ -1080,7 +1080,7 @@
grad_in: grad.unfold(dim, size, step)

- name: uniform_(Tensor(a!) self, float from=0, float to=1, *, Generator? generator=None) -> Tensor(a!)
self: zeros_like(grad, at::MemoryFormat::Preserve)
self: zeros_like(grad)

- name: _unique(Tensor self, bool sorted=True, bool return_inverse=False) -> (Tensor, Tensor)
self: not_implemented("_unique")
Expand Down Expand Up @@ -1111,8 +1111,8 @@

- name: _s_where(Tensor condition, Tensor self, Tensor other) -> Tensor
condition: non_differentiable
self: where(condition, grad, zeros_like(grad, at::MemoryFormat::Preserve))
other: where(condition, zeros_like(grad, at::MemoryFormat::Preserve), grad)
self: where(condition, grad, zeros_like(grad))
other: where(condition, zeros_like(grad), grad)

# weight_norm_cuda_interface_backward does not have an explicitly defined derivative, so if we do happen
# to be running backward with create_graph=True, fall back to a backward function that uses
Expand All @@ -1121,7 +1121,7 @@
v, g: "grad.defined() ? (GradMode::is_enabled() ? _weight_norm_differentiable_backward(grad.contiguous(), v, g, result1, dim) : _weight_norm_cuda_interface_backward(grad.contiguous(), v, g, result1, dim)) : std::tuple<Tensor, Tensor>()"

- name: zero_(Tensor(a!) self) -> Tensor(a!)
self: zeros_like(grad, at::MemoryFormat::Preserve)
self: zeros_like(grad)

- name: sparse_mask(Tensor self, Tensor mask) -> Tensor
self: grad.to_dense().sparse_mask(mask).to_dense()
Expand Down Expand Up @@ -1252,7 +1252,7 @@

- name: hardshrink_backward(Tensor grad_out, Tensor self, Scalar lambd) -> Tensor
grad_out: hardshrink_backward(grad, self, lambd)
self: zeros_like(grad, at::MemoryFormat::Preserve)
self: zeros_like(grad)

- name: hardtanh(Tensor self, Scalar min_val=-1, Scalar max_val=1) -> Tensor
self: hardtanh_backward(grad, self, min_val, max_val)
Expand Down Expand Up @@ -1506,17 +1506,17 @@

- name: hardtanh_backward(Tensor grad_output, Tensor self, Scalar min_val, Scalar max_val) -> Tensor
grad_output: hardtanh_backward(grad, self, min_val, max_val)
self: zeros_like(grad, at::MemoryFormat::Preserve)
self: zeros_like(grad)

- name: kl_div_backward(Tensor grad_output, Tensor self, Tensor target, int reduction=Mean, *, bool log_target=False) -> Tensor
grad_output: kl_div_double_backward_grad_output(grad, self, target, reduction, log_target)
self: zeros_like(grad, at::MemoryFormat::Preserve)
target: zeros_like(grad, at::MemoryFormat::Preserve)
self: zeros_like(grad)
target: zeros_like(grad)

- name: l1_loss_backward(Tensor grad_output, Tensor self, Tensor target, int reduction) -> Tensor
grad_output: l1_loss_double_backward_grad_output(grad, self, target, reduction)
self: zeros_like(grad, at::MemoryFormat::Preserve)
target: zeros_like(grad, at::MemoryFormat::Preserve)
self: zeros_like(grad)
target: zeros_like(grad)

- name: log_sigmoid_backward(Tensor grad_output, Tensor self, Tensor buffer) -> Tensor
grad_output: log_sigmoid_backward(grad, self, buffer)
Expand All @@ -1529,7 +1529,7 @@
- name: leaky_relu_backward(Tensor grad_output, Tensor self, Scalar negative_slope, bool self_is_result) -> Tensor
# self_is_result is always false here since double backward call is an out-of-place call, self is input itself
grad_output: leaky_relu_backward(grad, self, negative_slope, false)
self: zeros_like(grad, at::MemoryFormat::Preserve)
self: zeros_like(grad)

- name: max_pool2d_with_indices_backward(Tensor grad_output, Tensor self, int[2] kernel_size, int[2] stride, int[2] padding, int[2] dilation, bool ceil_mode, Tensor indices) -> Tensor
grad_output: max_pool_double_backward(grad, indices, 2)
Expand All @@ -1553,18 +1553,18 @@

- name: nll_loss_backward(Tensor grad_output, Tensor self, Tensor target, Tensor? weight, int reduction, int ignore_index, Tensor total_weight) -> Tensor
grad_output: nll_loss(grad, target, weight, reduction, ignore_index)
self: zeros_like(grad, at::MemoryFormat::Preserve)
self: zeros_like(grad)
target: non_differentiable

- name: nll_loss2d_backward(Tensor grad_output, Tensor self, Tensor target, Tensor? weight, int reduction, int ignore_index, Tensor total_weight) -> Tensor
grad_output: nll_loss2d(grad, target, weight, reduction, ignore_index)
self: zeros_like(grad, at::MemoryFormat::Preserve)
self: zeros_like(grad)
target: non_differentiable

- name: rrelu_with_noise_backward(Tensor grad_output, Tensor self, Tensor noise, Scalar lower, Scalar upper, bool training, bool self_is_result) -> Tensor
# self_is_result is always false here since double backward call is an out-of-place call, self is input itself
grad_output: rrelu_with_noise_backward(grad, self, noise, lower, upper, training, false)
self: zeros_like(grad, at::MemoryFormat::Preserve)
self: zeros_like(grad)

- name: reflection_pad1d_backward(Tensor grad_output, Tensor self, int[2] padding) -> Tensor
grad_output: reflection_pad1d(grad, padding)
Expand Down Expand Up @@ -1605,11 +1605,11 @@

- name: softshrink_backward(Tensor grad_output, Tensor self, Scalar lambd) -> Tensor
grad_output: softshrink_backward(grad, self, lambd)
self: zeros_like(grad, at::MemoryFormat::Preserve)
self: zeros_like(grad)

- name: threshold_backward(Tensor grad_output, Tensor self, Scalar threshold) -> Tensor
grad_output: threshold_backward(grad, self, threshold)
self: zeros_like(grad, at::MemoryFormat::Preserve)
self: zeros_like(grad)

- name: upsample_linear1d_backward(Tensor grad_output, int[1] output_size, int[3] input_size, bool align_corners, float? scales=None) -> Tensor
grad_output: upsample_linear1d(grad, output_size, align_corners, scales)
Expand Down

0 comments on commit 24df3b7

Please sign in to comment.