Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adding support for saving matrices to files in npz format #154

Merged
merged 9 commits into from
May 20, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions docs/generated/sparse.load_npz.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
load\_npz
=========

.. currentmodule:: sparse

.. autofunction:: load_npz
5 changes: 4 additions & 1 deletion docs/generated/sparse.rst
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@ API

elemwise

load_npz

nanmax

nanmin
Expand All @@ -43,6 +45,8 @@ API

random

save_npz

stack

tensordot
Expand All @@ -52,4 +56,3 @@ API
triu

where

6 changes: 6 additions & 0 deletions docs/generated/sparse.save_npz.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
save\_npz
=========

.. currentmodule:: sparse

.. autofunction:: save_npz
1 change: 1 addition & 0 deletions sparse/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from .dok import DOK
from .sparse_array import SparseArray
from .utils import random
from .io import save_npz, load_npz

from ._version import get_versions
__version__ = get_versions()['version']
Expand Down
104 changes: 104 additions & 0 deletions sparse/io.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
import numpy as np

from .coo.core import COO


def save_npz(filename, matrix, compressed=True):
""" Save a sparse matrix to disk in numpy's ``.npz`` format.
Note: This is not binary compatible with scipy's ``save_npz()``.
Will save a file that can only be opend with this package's ``load_npz()``.

Parameters
----------
filename : string or file
Either the file name (string) or an open file (file-like object)
where the data will be saved. If file is a string or a Path, the
``.npz`` extension will be appended to the file name if it is not
already there
matrix : COO
The matrix to save to disk
compressed : bool
Whether to save in compressed or uncompressed mode

Example
--------
Store sparse matrix to disk, and load it again:

>>> import os
>>> import sparse
>>> import numpy as np
>>> dense_mat = np.array([[[0., 0.], [0., 0.70677779]], [[0., 0.], [0., 0.86522495]]])
>>> mat = sparse.COO(dense_mat)
>>> mat
<COO: shape=(2, 2, 2), dtype=float64, nnz=2>
>>> sparse.save_npz('mat.npz', mat)
>>> loaded_mat = sparse.load_npz('mat.npz')
>>> loaded_mat
<COO: shape=(2, 2, 2), dtype=float64, nnz=2>
>>> loaded_mat.todense()
array([[[0. , 0. ],
[0. , 0.70677779]],
<BLANKLINE>
[[0. , 0. ],
[0. , 0.86522495]]])
>>> os.remove('mat.npz')

Copy link
Collaborator

@hameerabbasi hameerabbasi May 18, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Optional, but would be really nice to have: A see also section with load_npz, np.savez and scipy.sparse.save_npz. Also a note saying that it's binary incompatible. See examples in our code and here: https://numpydoc.readthedocs.io/en/latest/format.html

See Also
--------
load_npz
scipy.sparse.save_npz
scipy.sparse.load_npz
numpy.savez
numpy.load

"""

nodes = {'data': matrix.data,
'coords': matrix.coords,
'shape': matrix.shape}

if compressed:
np.savez_compressed(filename, **nodes)
else:
np.savez(filename, **nodes)


def load_npz(filename):
""" Load a sparse matrix in numpy's ``.npz`` format from disk.
Note: This is not binary compatible with scipy's ``save_npz()``
output. Will only load files saved by this package.

Parameters
----------
filename : file-like object, string, or pathlib.Path
The file to read. File-like objects must support the
``seek()`` and ``read()`` methods.

Returns
-------
COO
The sparse matrix at path ``filename``

Example
--------
See :obj:`save_npz` for usage examples.

See Also
--------
save_npz
scipy.sparse.save_npz
scipy.sparse.load_npz
numpy.savez
numpy.load

Copy link
Collaborator

@hameerabbasi hameerabbasi May 18, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same as above:

Optional, but would be really nice to have: A see also section with save_npz, np.load and scipy.sparse.load_npz. Also a note saying that it's binary incompatible.

"""

with np.load(filename) as fp:
try:
coords = fp['coords']
data = fp['data']
shape = tuple(fp['shape'])
except KeyError:
raise RuntimeError('The file {} does not contain a valid sparse matrix'.format(filename))

return COO(coords=coords, data=data, shape=shape, sorted=True, has_duplicates=False)
39 changes: 39 additions & 0 deletions sparse/tests/test_io.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
import os
import tempfile
import shutil
import pytest
import numpy as np

import sparse

from sparse import save_npz, load_npz
from sparse.utils import assert_eq


@pytest.mark.parametrize('compression', [True, False])
def test_save_load_npz_file(compression):
x = sparse.random((2, 3, 4, 5), density=.25)
y = x.todense()

dir_name = tempfile.mkdtemp()
filename = os.path.join(dir_name, 'mat.npz')

save_npz(filename, x, compressed=compression)
z = load_npz(filename)
assert_eq(x, z)
assert_eq(y, z.todense())

shutil.rmtree(dir_name)


def test_load_wrong_format_exception():
x = np.array([1, 2, 3])

dir_name = tempfile.mkdtemp()
filename = os.path.join(dir_name, 'mat.npz')

np.savez(filename, x)
with pytest.raises(RuntimeError):
load_npz(filename)

shutil.rmtree(dir_name)