From 3793c31b8eaa4c178b69d98e77309ce9ba417d76 Mon Sep 17 00:00:00 2001
From: keitakurita <keita.kurita@gmail.com>
Date: Fri, 12 May 2017 08:00:04 +0900
Subject: [PATCH] BUG: incorrect handling of scipy.sparse.dok formats (#16197)
 (#16191)

---
 doc/source/whatsnew/v0.20.2.txt   |  3 +--
 pandas/core/sparse/frame.py       |  2 +-
 pandas/tests/sparse/test_frame.py | 28 +++++++++++++++++++++++++---
 3 files changed, 27 insertions(+), 6 deletions(-)

diff --git a/doc/source/whatsnew/v0.20.2.txt b/doc/source/whatsnew/v0.20.2.txt
index cede553dd145b9..9a3035e4334c7a 100644
--- a/doc/source/whatsnew/v0.20.2.txt
+++ b/doc/source/whatsnew/v0.20.2.txt
@@ -70,8 +70,7 @@ Groupby/Resample/Rolling
 Sparse
 ^^^^^^
 
-
-
+- Bug in construction of SparseDataFrame from ``scipy.sparse.dok_matrix`` (:issue:`16179`)
 
 Reshaping
 ^^^^^^^^^
diff --git a/pandas/core/sparse/frame.py b/pandas/core/sparse/frame.py
index 3c8f6e8c6257dd..461dd50c5da6e0 100644
--- a/pandas/core/sparse/frame.py
+++ b/pandas/core/sparse/frame.py
@@ -190,8 +190,8 @@ def _init_spmatrix(self, data, index, columns, dtype=None,
         values = Series(data.data, index=data.row, copy=False)
         for col, rowvals in values.groupby(data.col):
             # get_blocks expects int32 row indices in sorted order
+            rowvals = rowvals.sort_index()
             rows = rowvals.index.values.astype(np.int32)
-            rows.sort()
             blocs, blens = get_blocks(rows)
 
             sdict[columns[col]] = SparseSeries(
diff --git a/pandas/tests/sparse/test_frame.py b/pandas/tests/sparse/test_frame.py
index 0312b76ec30a5d..654d12b782f374 100644
--- a/pandas/tests/sparse/test_frame.py
+++ b/pandas/tests/sparse/test_frame.py
@@ -1146,8 +1146,8 @@ def test_isnotnull(self):
         tm.assert_frame_equal(res.to_dense(), exp)
 
 
-@pytest.mark.parametrize('index', [None, list('ab')])  # noqa: F811
-@pytest.mark.parametrize('columns', [None, list('cd')])
+@pytest.mark.parametrize('index', [None, list('abc')])  # noqa: F811
+@pytest.mark.parametrize('columns', [None, list('def')])
 @pytest.mark.parametrize('fill_value', [None, 0, np.nan])
 @pytest.mark.parametrize('dtype', [bool, int, float, np.uint16])
 def test_from_to_scipy(spmatrix, index, columns, fill_value, dtype):
@@ -1156,7 +1156,9 @@ def test_from_to_scipy(spmatrix, index, columns, fill_value, dtype):
 
     # Make one ndarray and from it one sparse matrix, both to be used for
     # constructing frames and comparing results
-    arr = np.eye(2, dtype=dtype)
+    arr = np.eye(3, dtype=dtype)
+    # GH 16179
+    arr[0, 1] = dtype(2)
     try:
         spm = spmatrix(arr)
         assert spm.dtype == arr.dtype
@@ -1245,6 +1247,26 @@ def test_from_to_scipy_object(spmatrix, fill_value):
     assert sdf.to_coo().dtype == res_dtype
 
 
+def test_from_scipy_correct_ordering(spmatrix):
+    # GH 16179
+    tm.skip_if_no_package('scipy')
+
+    arr = np.arange(1, 5).reshape(2, 2)
+    try:
+        spm = spmatrix(arr)
+        assert spm.dtype == arr.dtype
+    except (TypeError, AssertionError):
+        # If conversion to sparse fails for this spmatrix type and arr.dtype,
+        # then the combination is not currently supported in NumPy, so we
+        # can just skip testing it thoroughly
+        return
+
+    sdf = pd.SparseDataFrame(spm)
+    expected = pd.SparseDataFrame(arr)
+    tm.assert_sp_frame_equal(sdf, expected)
+    tm.assert_frame_equal(sdf.to_dense(), expected.to_dense())
+
+
 class TestSparseDataFrameArithmetic(object):
 
     def test_numeric_op_scalar(self):