Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add mul_ function for sparse Tensor, mul_(dense, sparse) -> sparse #69416

Closed
wants to merge 8 commits into from
Closed
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
60 changes: 55 additions & 5 deletions aten/src/ATen/native/sparse/SparseTensorMath.cpp
Expand Up @@ -9,6 +9,7 @@
#include <ATen/NativeFunctions.h>
#include <ATen/InitialTensorOptions.h>
#include <ATen/SparseTensorUtils.h>
#include <ATen/TensorIndexing.h>
#include <ATen/WrapDimUtilsMulti.h>
#include <ATen/native/BinaryOps.h>
#include <ATen/native/Copy.h>
Expand Down Expand Up @@ -627,9 +628,6 @@ Tensor& add_out_dense_sparse_cpu(Tensor& r, const Tensor& dense, const SparseTen

Tensor mul_sparse(const Tensor& self, const Tensor& other) {
auto commonDtype = at::result_type(self, other);
// Arbitrary (dense, sparse) and (sparse, dense) multiplication is not
// currently supported, but (0dim-dense, sparse) and (sparse, 0dim-dense) is.
// Make sure we use the sparse exemplar for result.
auto result_options = self.is_sparse() ?
self.options().dtype(commonDtype) : other.options().dtype(commonDtype);
Tensor result = at::empty({0}, result_options);
Expand All @@ -640,6 +638,8 @@ Tensor& mul_sparse_(Tensor& self, const Tensor& other) {
return at::mul_out(self, self, other); // redispatch!
}

SparseTensor& mul_out_sparse_dense_cpu(SparseTensor& r, const Tensor& dense, const SparseTensor& sparse_);

SparseTensor& mul_out_sparse_cpu(const Tensor& t_, const Tensor& src_, SparseTensor& r) {
if (src_.dim() == 0) {
return mul_out_sparse_zerodim(r, t_, src_);
Expand All @@ -651,10 +651,12 @@ SparseTensor& mul_out_sparse_cpu(const Tensor& t_, const Tensor& src_, SparseTen
AT_ASSERT(!t_.is_cuda()); // dispatch argument
TORCH_CHECK(!r.is_cuda(), "mul: expected 'out' to be CPU tensor, but got CUDA tensor");
TORCH_CHECK(!src_.is_cuda(), "mul: expected 'other' to be a CPU tensor, but got a CUDA tensor");
TORCH_CHECK(src_.is_sparse(), "mul(sparse, dense) is not supported");
TORCH_CHECK(t_.is_sparse(), "mul(dense, sparse) is not supported");
TORCH_CHECK(t_.is_sparse(), "mul(dense, sparse) is not supported. Use mul(sparse, dense) instead.");
TORCH_CHECK(t_.sizes().equals(src_.sizes()), "mul: expected 'self' and 'other' to have same sizes, but ", t_.sizes(), " != ", src_.sizes());

if (!src_.is_sparse()) {
dev-jwel marked this conversation as resolved.
Show resolved Hide resolved
return mul_out_sparse_dense_cpu(r, t_, src_);
}
if (src_._nnz() == 0 || t_._nnz() == 0) {
r.resize_as_(src_);
return r.zero_();
Expand Down Expand Up @@ -739,6 +741,54 @@ SparseTensor& mul_out_sparse_cpu(const Tensor& t_, const Tensor& src_, SparseTen
return r._coalesced_(true);
}

// --------------------------------------------------------------------
// mul(Tensor, SparseTensor)
// --------------------------------------------------------------------

SparseTensor& mul_out_sparse_dense_cpu(SparseTensor& r, const SparseTensor& sparse_, const Tensor& dense) {
AT_ASSERT(r.is_sparse());
AT_ASSERT(!dense.is_sparse());
dev-jwel marked this conversation as resolved.
Show resolved Hide resolved
AT_ASSERT(sparse_.is_sparse());

AT_ASSERT(!dense.is_cuda()); // dispatch argument
TORCH_CHECK(!r.is_cuda(), "mul: expected 'out' to be CPU tensor, but got CUDA tensor");
TORCH_CHECK(!sparse_.is_cuda(), "mul: expected 'other' to be a CPU tensor, but got a CUDA tensor");

TORCH_CHECK(dense.sizes().equals(sparse_.sizes()), "mul: expected 'self' and 'other' to have same size, but self has size ",
dense.sizes(), " while other has size ", sparse_.sizes(), " (FYI: dense-sparse multiplication does not currently support broadcasting)");

auto commonDtype = promoteTypes(dense.scalar_type(), sparse_.scalar_type());
TORCH_CHECK(canCast(commonDtype, r.scalar_type()), "Can't convert result type ", commonDtype, " to output ", r.scalar_type(), " in mul operation");

SparseTensor sparse = sparse_.coalesce().to(commonDtype);

int64_t nnz = sparse._nnz();
int64_t sparse_dim = sparse.sparse_dim();
int64_t dense_dim = sparse.dense_dim();
Tensor sparse_indices = sparse._indices();
Tensor sparse_values = sparse._values();

std::vector<at::indexing::TensorIndex> indices;

for (int64_t d=0; d<sparse_dim; d++) {
std::vector<at::indexing::TensorIndex> i;
i.push_back(d);
i.push_back(at::indexing::Slice());
indices.push_back(sparse_indices.index(i));
}
for (int64_t d=0; d<dense_dim; d++) {
indices.push_back(at::indexing::Slice());
}

Tensor r_indices = at::empty({sparse_dim, nnz}, sparse_indices.options());
r_indices.copy_(sparse_indices);
dev-jwel marked this conversation as resolved.
Show resolved Hide resolved
Tensor r_values = dense.index(indices).to(commonDtype).mul_(sparse_values).to(r.scalar_type());

r.resize_as_(sparse);
get_sparse_impl(r)->set_indices_and_values_unsafe(r_indices, r_values);
return r._coalesced_(true);
}

// --------------------------------------------------------------------
// addmm(D1, S, D2, beta, alpha) -> D [broadcasts]
//
Expand Down
53 changes: 53 additions & 0 deletions aten/src/ATen/native/sparse/cuda/SparseCUDATensorMath.cu
Expand Up @@ -438,6 +438,8 @@ struct TensorMulOp {
}
};

SparseTensor& mul_out_sparse_dense_cuda(SparseTensor& r, const SparseTensor& sparse_, const Tensor& dense);

SparseTensor& mul_out_sparse_cuda(const SparseTensor& t_, const SparseTensor& src_, SparseTensor& r_) {
if (src_.dim() == 0) {
return mul_out_sparse_zerodim(r_, t_, src_);
Expand All @@ -450,6 +452,11 @@ SparseTensor& mul_out_sparse_cuda(const SparseTensor& t_, const SparseTensor& sr
TORCH_CHECK(r_.is_cuda(), "mul: expected 'out' to be CUDA, but got CPU");
TORCH_CHECK(cuda::check_device({r_, t_, src_}));
TORCH_CHECK(t_.sizes().equals(src_.sizes()), "mul: expected 'self' and 'other' to have same size, but ", t_.sizes(), " != ", src_.sizes());
TORCH_CHECK(t_.is_sparse(), "mul(dense, sparse) is not supported. Use mul(sparse, dense) instead.");

if (!src_.is_sparse()) {
dev-jwel marked this conversation as resolved.
Show resolved Hide resolved
return mul_out_sparse_dense_cuda(r_, t_, src_);
}

SparseTensor t = t_.coalesce();
SparseTensor src = src_.coalesce();
Expand Down Expand Up @@ -511,6 +518,52 @@ SparseTensor& mul_out_sparse_cuda(const SparseTensor& t_, const SparseTensor& sr
return r_._coalesced_(true);
}

// --------------------------------------------------------------------
// mul(Tensor, SparseTensor)
// --------------------------------------------------------------------

SparseTensor& mul_out_sparse_dense_cuda(SparseTensor& r, const SparseTensor& sparse_, const Tensor& dense) {
dev-jwel marked this conversation as resolved.
Show resolved Hide resolved
TORCH_CHECK(dense.is_cuda(), "mul: expected 'self' to be a CUDA tensor, but got a CPU tensor");
TORCH_CHECK(sparse_.is_cuda(), "mul: expected 'other' to be a CUDA tensor, but got a CPU tensor");
TORCH_CHECK(r.is_cuda(), "mul: expected 'out' to be a CUDA tensor, but got a CPU tensor");

TORCH_CHECK(cuda::check_device({sparse_, r, dense}));

TORCH_CHECK(dense.sizes().equals(sparse_.sizes()), "mul: expected 'self' and 'other' to have same size, but self has size ",
dense.sizes(), " while other has size ", sparse_.sizes(), " (FYI: dense-sparse multiplication does not currently support broadcasting)");

auto commonDtype = promoteTypes(dense.scalar_type(), sparse_.scalar_type());
TORCH_CHECK(canCast(commonDtype, r.scalar_type()), "Can't convert result type ", commonDtype, " to output ", r.scalar_type(), " in mul operation");

SparseTensor sparse = sparse_.coalesce().to(commonDtype);

int64_t nnz = sparse._nnz();
int64_t sparse_dim = sparse.sparse_dim();
int64_t dense_dim = sparse.dense_dim();
Tensor sparse_indices = sparse._indices();
Tensor sparse_values = sparse._values();

std::vector<at::indexing::TensorIndex> indices;

for (int64_t d=0; d<sparse_dim; d++) {
std::vector<at::indexing::TensorIndex> i;
i.push_back(d);
i.push_back(at::indexing::Slice());
indices.push_back(sparse_indices.index(i));
}
for (int64_t d=0; d<dense_dim; d++) {
indices.push_back(at::indexing::Slice());
}

Tensor r_indices = at::empty({sparse_dim, nnz}, sparse_indices.options());
r_indices.copy_(sparse_indices);
Tensor r_values = dense.index(indices).to(commonDtype).mul_(sparse_values).to(r.scalar_type());

r.resize_as_(sparse);
get_sparse_impl(r)->set_indices_and_values_unsafe(r_indices, r_values);
return r._coalesced_(true);
}

// --------------------------------------------------------------------
// sparse.sum() backward
//
Expand Down
20 changes: 8 additions & 12 deletions test/test_sparse.py
Expand Up @@ -3338,25 +3338,21 @@ def assign_to():

self.assertRaises(TypeError, assign_to)

def test_cpu_sparse_dense_mul(self, device):
# general multiplication is not supported, but 0dim multiplication is supported
def test_sparse_dense_mul(self, device):
s = torch.sparse_coo_tensor([[0], [1]], [5.0], (2, 3), device=device)
dev-jwel marked this conversation as resolved.
Show resolved Hide resolved
t23 = s.to_dense()
t0 = torch.tensor(2.0, device=device)
d = torch.tensor([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]], device=device)

r = s * 2.0
r2 = s * d
r3 = t23 * d

self.assertEqual(r, 2.0 * s)
self.assertEqual(r, t0 * s)
self.assertEqual(r, s * t0)
if device == 'cpu':
with self.assertRaisesRegex(RuntimeError, r"mul\(sparse, dense\) is not supported"):
s * t23
with self.assertRaisesRegex(RuntimeError, r"mul\(dense, sparse\) is not supported"):
t23 * s
elif device == 'cuda':
with self.assertRaisesRegex(NotImplementedError, "CUDA"):
s * t23
with self.assertRaisesRegex(NotImplementedError, "CUDA"):
t23 * s
self.assertEqual(r2.to_dense(), r3)
self.assertEqual(r2, s * d)


class TestSparseOneOff(TestCase):
Expand Down
15 changes: 12 additions & 3 deletions test/test_type_promotion.py
Expand Up @@ -729,16 +729,25 @@ def op(t1, t2):
e, d1, s1, d2, s2 = [x.clone() for x in test_tensors]
dense_sparse = op(d1, s2)
self.assertEqual(e, dense_sparse, atol=precision, rtol=rtol, msg=err)
elif op_name == 'mul':
if inplace:
e, d1, s1, d2, s2 = [x.clone() for x in test_tensors]
sparse_dense = op(s1, d2)
self.assertEqual(e, sparse_dense.to_dense(), atol=precision, rtol=rtol, msg=err)
else:
# sparse division only supports division by a scalar
# mul: Didn't find kernel to dispatch to for operator 'aten::_nnz'
self.assertRaises(RuntimeError, lambda: op(d1, s2))

# Test op(sparse, dense) not supported for any ops:
# add(sparse, dense) is not supported. Use add(dense, sparse) instead.
# Test op(sparse, dense) not supported for any ops except for mul.
# op(sparse, dense) is not supported. Use op(dense, sparse) instead if op is not mul.
# op(dense, sparse) is not supported. Use op(sparse, dense) instead if op is mul.
# sparse division only supports division by a scalar
# mul: Didn't find kernel to dispatch to for operator 'aten::_nnz'.
self.assertRaises(RuntimeError, lambda: op(s1, d2))
if op_name != 'mul':
self.assertRaises(RuntimeError, lambda: op(s1, d2))
else:
self.assertRaises(RuntimeError, lambda: op(d1, s2))

# Test op(sparse, scalar)
if not add_sub and not (self.device_type == 'cpu' and dtype1 == torch.half):
Expand Down