From 3793c31b8eaa4c178b69d98e77309ce9ba417d76 Mon Sep 17 00:00:00 2001 From: keitakurita Date: Fri, 12 May 2017 08:00:04 +0900 Subject: [PATCH] BUG: incorrect handling of scipy.sparse.dok formats (#16197) (#16191) --- doc/source/whatsnew/v0.20.2.txt | 3 +-- pandas/core/sparse/frame.py | 2 +- pandas/tests/sparse/test_frame.py | 28 +++++++++++++++++++++++++--- 3 files changed, 27 insertions(+), 6 deletions(-) diff --git a/doc/source/whatsnew/v0.20.2.txt b/doc/source/whatsnew/v0.20.2.txt index cede553dd145b9..9a3035e4334c7a 100644 --- a/doc/source/whatsnew/v0.20.2.txt +++ b/doc/source/whatsnew/v0.20.2.txt @@ -70,8 +70,7 @@ Groupby/Resample/Rolling Sparse ^^^^^^ - - +- Bug in construction of SparseDataFrame from ``scipy.sparse.dok_matrix`` (:issue:`16179`) Reshaping ^^^^^^^^^ diff --git a/pandas/core/sparse/frame.py b/pandas/core/sparse/frame.py index 3c8f6e8c6257dd..461dd50c5da6e0 100644 --- a/pandas/core/sparse/frame.py +++ b/pandas/core/sparse/frame.py @@ -190,8 +190,8 @@ def _init_spmatrix(self, data, index, columns, dtype=None, values = Series(data.data, index=data.row, copy=False) for col, rowvals in values.groupby(data.col): # get_blocks expects int32 row indices in sorted order + rowvals = rowvals.sort_index() rows = rowvals.index.values.astype(np.int32) - rows.sort() blocs, blens = get_blocks(rows) sdict[columns[col]] = SparseSeries( diff --git a/pandas/tests/sparse/test_frame.py b/pandas/tests/sparse/test_frame.py index 0312b76ec30a5d..654d12b782f374 100644 --- a/pandas/tests/sparse/test_frame.py +++ b/pandas/tests/sparse/test_frame.py @@ -1146,8 +1146,8 @@ def test_isnotnull(self): tm.assert_frame_equal(res.to_dense(), exp) -@pytest.mark.parametrize('index', [None, list('ab')]) # noqa: F811 -@pytest.mark.parametrize('columns', [None, list('cd')]) +@pytest.mark.parametrize('index', [None, list('abc')]) # noqa: F811 +@pytest.mark.parametrize('columns', [None, list('def')]) @pytest.mark.parametrize('fill_value', [None, 0, np.nan]) @pytest.mark.parametrize('dtype', [bool, int, float, np.uint16]) def test_from_to_scipy(spmatrix, index, columns, fill_value, dtype): @@ -1156,7 +1156,9 @@ def test_from_to_scipy(spmatrix, index, columns, fill_value, dtype): # Make one ndarray and from it one sparse matrix, both to be used for # constructing frames and comparing results - arr = np.eye(2, dtype=dtype) + arr = np.eye(3, dtype=dtype) + # GH 16179 + arr[0, 1] = dtype(2) try: spm = spmatrix(arr) assert spm.dtype == arr.dtype @@ -1245,6 +1247,26 @@ def test_from_to_scipy_object(spmatrix, fill_value): assert sdf.to_coo().dtype == res_dtype +def test_from_scipy_correct_ordering(spmatrix): + # GH 16179 + tm.skip_if_no_package('scipy') + + arr = np.arange(1, 5).reshape(2, 2) + try: + spm = spmatrix(arr) + assert spm.dtype == arr.dtype + except (TypeError, AssertionError): + # If conversion to sparse fails for this spmatrix type and arr.dtype, + # then the combination is not currently supported in NumPy, so we + # can just skip testing it thoroughly + return + + sdf = pd.SparseDataFrame(spm) + expected = pd.SparseDataFrame(arr) + tm.assert_sp_frame_equal(sdf, expected) + tm.assert_frame_equal(sdf.to_dense(), expected.to_dense()) + + class TestSparseDataFrameArithmetic(object): def test_numeric_op_scalar(self):