BUG: incorrect handling of scipy.sparse.dok formats (pandas-dev#16197) (

pandas-dev#16191)
stangirala · Jun 11, 2017 · 3793c31 · 3793c31
1 parent 08baae9
commit 3793c31
Show file tree

Hide file tree

Showing 3 changed files with 27 additions and 6 deletions.
diff --git a/doc/source/whatsnew/v0.20.2.txt b/doc/source/whatsnew/v0.20.2.txt
@@ -70,8 +70,7 @@ Groupby/Resample/Rolling
 Sparse
 ^^^^^^
 
-
-
+- Bug in construction of SparseDataFrame from ``scipy.sparse.dok_matrix`` (:issue:`16179`)
 
 Reshaping
 ^^^^^^^^^

diff --git a/pandas/core/sparse/frame.py b/pandas/core/sparse/frame.py
@@ -190,8 +190,8 @@ def _init_spmatrix(self, data, index, columns, dtype=None,
         values = Series(data.data, index=data.row, copy=False)
         for col, rowvals in values.groupby(data.col):
             # get_blocks expects int32 row indices in sorted order
+            rowvals = rowvals.sort_index()
             rows = rowvals.index.values.astype(np.int32)
-            rows.sort()
             blocs, blens = get_blocks(rows)
 
             sdict[columns[col]] = SparseSeries(

diff --git a/pandas/tests/sparse/test_frame.py b/pandas/tests/sparse/test_frame.py
@@ -1146,8 +1146,8 @@ def test_isnotnull(self):
         tm.assert_frame_equal(res.to_dense(), exp)
 
 
-@pytest.mark.parametrize('index', [None, list('ab')])  # noqa: F811
-@pytest.mark.parametrize('columns', [None, list('cd')])
+@pytest.mark.parametrize('index', [None, list('abc')])  # noqa: F811
+@pytest.mark.parametrize('columns', [None, list('def')])
 @pytest.mark.parametrize('fill_value', [None, 0, np.nan])
 @pytest.mark.parametrize('dtype', [bool, int, float, np.uint16])
 def test_from_to_scipy(spmatrix, index, columns, fill_value, dtype):
@@ -1156,7 +1156,9 @@ def test_from_to_scipy(spmatrix, index, columns, fill_value, dtype):
 
     # Make one ndarray and from it one sparse matrix, both to be used for
     # constructing frames and comparing results
-    arr = np.eye(2, dtype=dtype)
+    arr = np.eye(3, dtype=dtype)
+    # GH 16179
+    arr[0, 1] = dtype(2)
     try:
         spm = spmatrix(arr)
         assert spm.dtype == arr.dtype
@@ -1245,6 +1247,26 @@ def test_from_to_scipy_object(spmatrix, fill_value):
     assert sdf.to_coo().dtype == res_dtype
 
 
+def test_from_scipy_correct_ordering(spmatrix):
+    # GH 16179
+    tm.skip_if_no_package('scipy')
+
+    arr = np.arange(1, 5).reshape(2, 2)
+    try:
+        spm = spmatrix(arr)
+        assert spm.dtype == arr.dtype
+    except (TypeError, AssertionError):
+        # If conversion to sparse fails for this spmatrix type and arr.dtype,
+        # then the combination is not currently supported in NumPy, so we
+        # can just skip testing it thoroughly
+        return
+
+    sdf = pd.SparseDataFrame(spm)
+    expected = pd.SparseDataFrame(arr)
+    tm.assert_sp_frame_equal(sdf, expected)
+    tm.assert_frame_equal(sdf.to_dense(), expected.to_dense())
+
+
 class TestSparseDataFrameArithmetic(object):
 
     def test_numeric_op_scalar(self):