Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion sparse/_compressed/__init__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
from .compressed import GCXS
from .compressed import GCXS, CSC, CSR
from .common import stack, concatenate
102 changes: 101 additions & 1 deletion sparse/_compressed/compressed.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
from functools import reduce
from collections.abc import Iterable
import scipy.sparse as ss
from scipy.sparse import compressed
from typing import Tuple

from .._sparse_array import SparseArray, _reduce_super_ufunc
from .._coo.common import linear_loc
Expand Down Expand Up @@ -136,6 +138,9 @@ def __init__(
idx_dtype=None,
):

if isinstance(arg, ss.spmatrix):
arg = self.from_scipy_sparse(arg)

if isinstance(arg, np.ndarray):
(arg, shape, compressed_axes, fill_value) = _from_coo(
COO(arg), compressed_axes
Expand All @@ -146,6 +151,16 @@ def __init__(
arg, compressed_axes, idx_dtype
)

elif isinstance(arg, GCXS):
if compressed_axes is not None and arg.compressed_axes != compressed_axes:
arg = arg.change_compressed_axes(self.compressed_axes)
(arg, shape, compressed_axes, fill_value) = (
(arg.data, arg.indices, arg.indptr),
arg.shape,
arg.compressed_axes,
arg.fill_value,
)

if shape is None:
raise ValueError("missing `shape` argument")

Expand All @@ -160,6 +175,7 @@ def __init__(
raise ValueError("data must be a scalar or 1-dimensional.")

self.shape = shape

self.compressed_axes = (
tuple(compressed_axes) if isinstance(compressed_axes, Iterable) else None
)
Expand Down Expand Up @@ -440,7 +456,7 @@ def todense(self):

def todok(self):

from ..dok import DOK
from .. import DOK

return DOK.from_coo(self.tocoo()) # probably a temporary solution

Expand Down Expand Up @@ -496,6 +512,10 @@ def asformat(self, format, compressed_axes=None):
return self.tocoo()
elif format == "dok":
return self.todok()
elif format == "csr":
return CSR(self)
elif format == "csc":
return CSC(self)
elif format == "gcxs":
if compressed_axes is None:
compressed_axes = self.compressed_axes
Expand Down Expand Up @@ -817,3 +837,83 @@ def _prune(self):
self.indptr = indptr
else:
self.indices = self.indices[mask]


class Compressed2d(GCXS):
def __init__(self, arg, shape=None, prune=False, fill_value=0):
if not hasattr(arg, "shape") and shape is None:
raise ValueError("missing `shape` argument")
if shape is not None and hasattr(arg, "shape"):
raise NotImplementedError("Cannot change shape in constructor")
nd = len(shape if shape is not None else arg.shape)
if nd != 2:
raise ValueError(f"{type(self).__name__} must be 2-d, passed {nd}-d shape.")

super().__init__(
arg,
shape=shape,
compressed_axes=self.compressed_axes,
prune=prune,
fill_value=fill_value,
)

def __str__(self):
return "<{}: shape={}, dtype={}, nnz={}, fill_value={}>".format(
type(self).__name__,
self.shape,
self.dtype,
self.nnz,
self.fill_value,
)

__repr__ = __str__

@property
def ndim(self) -> int:
return 2


class CSR(Compressed2d):
@classmethod
def from_scipy_sparse(cls, x):
x = x.asformat("csr", copy=False)
return cls((x.data, x.indices, x.indptr), shape=x.shape)

@property
def compressed_axes(self) -> int:
return (0,)

@compressed_axes.setter
def compressed_axes(self, val):
if val != self.compressed_axes:
raise ValueError()

def transpose(self, axes: None = None, copy: bool = False) -> "CSC":
if axes is not None:
raise ValueError()
if copy:
self = self.copy()
return CSC((self.data, self.indices, self.indptr), self.shape[::-1])


class CSC(Compressed2d):
@classmethod
def from_scipy_sparse(cls, x):
x = x.asformat("csc", copy=False)
return cls((x.data, x.indices, x.indptr), shape=x.shape)

@property
def compressed_axes(self) -> int:
return (1,)

@compressed_axes.setter
def compressed_axes(self, val):
if val != self.compressed_axes:
raise ValueError()

def transpose(self, axes: None = None, copy: bool = False) -> CSR:
if axes is not None:
raise ValueError()
if copy:
self = self.copy()
return CSR((self.data, self.indices, self.indptr), self.shape[::-1])
124 changes: 124 additions & 0 deletions sparse/tests/test_compressed_2d.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
import numpy as np
from numpy.core.numeric import indices
import pytest
import scipy.sparse
from scipy.sparse import data
from scipy.sparse.construct import random
import scipy.stats

import sparse
from sparse import COO
from sparse._compressed.compressed import GCXS, CSR, CSC
from sparse._utils import assert_eq


@pytest.fixture(scope="module", params=[CSR, CSC])
def cls(request):
return request.param


@pytest.fixture(scope="module", params=["f8", "f4", "i8", "i4"])
def dtype(request):
return request.param


@pytest.fixture(scope="module")
def random_sparse(cls, dtype):
if np.issubdtype(dtype, np.integer):

def data_rvs(n):
return np.random.randint(-1000, 1000, n)

else:
data_rvs = None
return cls(sparse.random((20, 30), density=0.25, data_rvs=data_rvs).astype(dtype))


@pytest.fixture(scope="module")
def random_sparse_small(cls, dtype):
if np.issubdtype(dtype, np.integer):

def data_rvs(n):
return np.random.randint(-10, 10, n)

else:
data_rvs = None
return cls(
sparse.random((20, 30, 40), density=0.25, data_rvs=data_rvs).astype(dtype)
)


def test_repr(random_sparse):
cls = type(random_sparse).__name__

str_repr = repr(random_sparse)
assert cls in str_repr


def test_bad_constructor_input(cls):
with pytest.raises(ValueError, match=r".*shape.*"):
cls(arg="hello world")


@pytest.mark.parametrize("n", [0, 1, 3])
def test_bad_nd_input(cls, n):
a = np.ones(shape=tuple(5 for _ in range(n)))
with pytest.raises(ValueError, match=f"{n}-d"):
cls(a)


@pytest.mark.parametrize("source_type", ["gcxs", "coo"])
def test_from_sparse(cls, source_type):
gcxs = sparse.random((20, 30), density=0.25, format=source_type)
result = cls(gcxs)

assert_eq(result, gcxs)


@pytest.mark.parametrize("scipy_type", ["coo", "csr", "csc", "lil"])
@pytest.mark.parametrize("CLS", [CSR, CSC, GCXS])
def test_from_scipy_sparse(scipy_type, CLS, dtype):
orig = scipy.sparse.random(20, 30, density=0.2, format=scipy_type, dtype=dtype)
ref = COO.from_scipy_sparse(orig)
result = CLS.from_scipy_sparse(orig)

assert_eq(ref, result)

result_via_init = CLS(orig)

assert_eq(ref, result_via_init)


@pytest.mark.parametrize("cls_str", ["coo", "dok", "csr", "csc", "gcxs"])
def test_to_sparse(cls_str, random_sparse):
result = random_sparse.asformat(cls_str)

assert_eq(random_sparse, result)


@pytest.mark.parametrize("copy", [True, False])
def test_transpose(random_sparse, copy):
from operator import is_, is_not

t = random_sparse.transpose(copy=copy)
tt = t.transpose(copy=copy)

# Check if a copy was made
if copy:
check = is_not
else:
check = is_

assert check(random_sparse.data, t.data)
assert check(random_sparse.indices, t.indices)
assert check(random_sparse.indptr, t.indptr)

assert random_sparse.shape == t.shape[::-1]

assert_eq(random_sparse, tt)
assert type(random_sparse) == type(tt)


def test_transpose_error(random_sparse):
with pytest.raises(ValueError):
random_sparse.transpose(axes=1)