Skip to content

Commit

Permalink
BUG: incorrect handling of scipy.sparse.dok formats (pandas-dev#16197) (
Browse files Browse the repository at this point in the history
  • Loading branch information
keitakurita authored and stangirala committed Jun 11, 2017
1 parent 08baae9 commit 3793c31
Show file tree
Hide file tree
Showing 3 changed files with 27 additions and 6 deletions.
3 changes: 1 addition & 2 deletions doc/source/whatsnew/v0.20.2.txt
Original file line number Diff line number Diff line change
Expand Up @@ -70,8 +70,7 @@ Groupby/Resample/Rolling
Sparse
^^^^^^



- Bug in construction of SparseDataFrame from ``scipy.sparse.dok_matrix`` (:issue:`16179`)

Reshaping
^^^^^^^^^
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/sparse/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -190,8 +190,8 @@ def _init_spmatrix(self, data, index, columns, dtype=None,
values = Series(data.data, index=data.row, copy=False)
for col, rowvals in values.groupby(data.col):
# get_blocks expects int32 row indices in sorted order
rowvals = rowvals.sort_index()
rows = rowvals.index.values.astype(np.int32)
rows.sort()
blocs, blens = get_blocks(rows)

sdict[columns[col]] = SparseSeries(
Expand Down
28 changes: 25 additions & 3 deletions pandas/tests/sparse/test_frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -1146,8 +1146,8 @@ def test_isnotnull(self):
tm.assert_frame_equal(res.to_dense(), exp)


@pytest.mark.parametrize('index', [None, list('ab')]) # noqa: F811
@pytest.mark.parametrize('columns', [None, list('cd')])
@pytest.mark.parametrize('index', [None, list('abc')]) # noqa: F811
@pytest.mark.parametrize('columns', [None, list('def')])
@pytest.mark.parametrize('fill_value', [None, 0, np.nan])
@pytest.mark.parametrize('dtype', [bool, int, float, np.uint16])
def test_from_to_scipy(spmatrix, index, columns, fill_value, dtype):
Expand All @@ -1156,7 +1156,9 @@ def test_from_to_scipy(spmatrix, index, columns, fill_value, dtype):

# Make one ndarray and from it one sparse matrix, both to be used for
# constructing frames and comparing results
arr = np.eye(2, dtype=dtype)
arr = np.eye(3, dtype=dtype)
# GH 16179
arr[0, 1] = dtype(2)
try:
spm = spmatrix(arr)
assert spm.dtype == arr.dtype
Expand Down Expand Up @@ -1245,6 +1247,26 @@ def test_from_to_scipy_object(spmatrix, fill_value):
assert sdf.to_coo().dtype == res_dtype


def test_from_scipy_correct_ordering(spmatrix):
# GH 16179
tm.skip_if_no_package('scipy')

arr = np.arange(1, 5).reshape(2, 2)
try:
spm = spmatrix(arr)
assert spm.dtype == arr.dtype
except (TypeError, AssertionError):
# If conversion to sparse fails for this spmatrix type and arr.dtype,
# then the combination is not currently supported in NumPy, so we
# can just skip testing it thoroughly
return

sdf = pd.SparseDataFrame(spm)
expected = pd.SparseDataFrame(arr)
tm.assert_sp_frame_equal(sdf, expected)
tm.assert_frame_equal(sdf.to_dense(), expected.to_dense())


class TestSparseDataFrameArithmetic(object):

def test_numeric_op_scalar(self):
Expand Down

0 comments on commit 3793c31

Please sign in to comment.