Skip to content

Commit

Permalink
API: SparseSeries comparison now returns sparse
Browse files Browse the repository at this point in the history
Because #13985 has been fixed, ``SparseSeries`` comparison op now can return
``SparseSeries`` (it returns normal ``Series`` on current master).
also fixed a bug when ``SparseArray`` created from ``SparseSeries``
may not inherit ``dtype``.

Author: sinhrks <sinhrks@gmail.com>

Closes #13999 from sinhrks/sparse_comparison and squashes the following commits:

eafc94c [sinhrks] API: SparseSeries comparison now returns sparse
  • Loading branch information
sinhrks authored and jreback committed Aug 20, 2016
1 parent 5c78ee6 commit 51b20de
Show file tree
Hide file tree
Showing 5 changed files with 40 additions and 9 deletions.
3 changes: 3 additions & 0 deletions doc/source/whatsnew/v0.19.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -814,6 +814,9 @@ Note that the limitation is applied to ``fill_value`` which default is ``np.nan`
- Bug in ``SparseSeries.abs`` incorrectly keeps negative ``fill_value`` (:issue:`13853`)
- Bug in single row slicing on multi-type ``SparseDataFrame``s, types were previously forced to float (:issue:`13917`)
- Bug in sparse indexing using ``SparseArray`` with ``bool`` dtype may return incorrect result (:issue:`13985`)
- Bug in ``SparseArray`` created from ``SparseSeries`` may lose ``dtype`` (:issue:`13999`)
- Bug in ``SparseSeries`` comparison with dense returns normal ``Series`` rather than ``SparseSeries`` (:issue:`13999`)


.. _whatsnew_0190.indexer_dtype:

Expand Down
11 changes: 9 additions & 2 deletions pandas/sparse/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -188,9 +188,17 @@ def __new__(cls, data, sparse_index=None, index=None, kind='integer',
values.fill(data)
data = values

if isinstance(data, ABCSparseSeries):
data = data.values
is_sparse_array = isinstance(data, SparseArray)

if dtype is not None:
dtype = np.dtype(dtype)
is_sparse_array = isinstance(data, SparseArray)
if is_sparse_array:
# temp, always inherit passed SparseArray dtype
# can be removed after GH 13849
dtype = data.dtype

if fill_value is None:
if is_sparse_array:
fill_value = data.fill_value
Expand All @@ -211,7 +219,6 @@ def __new__(cls, data, sparse_index=None, index=None, kind='integer',
raise AssertionError("Non array-like type {0} must have"
" the same length as the"
" index".format(type(values)))

# Create array, do *not* copy data by default
if copy:
try:
Expand Down
2 changes: 1 addition & 1 deletion pandas/sparse/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -806,7 +806,7 @@ def from_coo(cls, A, dense_index=False):
# overwrite basic arithmetic to use SparseSeries version
# force methods to overwrite previous definitions.
ops.add_special_arithmetic_methods(SparseSeries, _arith_method,
comp_method=None,
comp_method=_arith_method,
bool_method=None, use_numexpr=False,
force=True)

Expand Down
5 changes: 0 additions & 5 deletions pandas/sparse/tests/test_arithmetics.py
Original file line number Diff line number Diff line change
Expand Up @@ -358,11 +358,6 @@ class TestSparseSeriesArithmetic(TestSparseArrayArithmetics):
def _assert(self, a, b):
tm.assert_series_equal(a, b)

def _check_bool_result(self, res):
# ToDo: Must return SparseSeries after GH 667
tm.assertIsInstance(res, self._base)
self.assertEqual(res.dtype, np.bool)

def test_alignment(self):
da = pd.Series(np.arange(4))
db = pd.Series(np.arange(4), index=[1, 2, 3, 4])
Expand Down
28 changes: 27 additions & 1 deletion pandas/sparse/tests/test_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import numpy as np

from pandas import _np_version_under1p8
from pandas.sparse.api import SparseArray
from pandas.sparse.api import SparseArray, SparseSeries
from pandas._sparse import IntIndex
from pandas.util.testing import assert_almost_equal, assertRaisesRegexp
import pandas.util.testing as tm
Expand Down Expand Up @@ -102,6 +102,32 @@ def test_constructor_spindex_dtype(self):
self.assertEqual(arr.dtype, np.int64)
self.assertEqual(arr.fill_value, 0)

def test_sparseseries_roundtrip(self):
# GH 13999
for kind in ['integer', 'block']:
for fill in [1, np.nan, 0]:
arr = SparseArray([np.nan, 1, np.nan, 2, 3], kind=kind,
fill_value=fill)
res = SparseArray(SparseSeries(arr))
tm.assert_sp_array_equal(arr, res)

arr = SparseArray([0, 0, 0, 1, 1, 2], dtype=np.int64,
kind=kind, fill_value=fill)
res = SparseArray(SparseSeries(arr), dtype=np.int64)
tm.assert_sp_array_equal(arr, res)

res = SparseArray(SparseSeries(arr))
tm.assert_sp_array_equal(arr, res)

for fill in [True, False, np.nan]:
arr = SparseArray([True, False, True, True], dtype=np.bool,
kind=kind, fill_value=fill)
res = SparseArray(SparseSeries(arr))
tm.assert_sp_array_equal(arr, res)

res = SparseArray(SparseSeries(arr))
tm.assert_sp_array_equal(arr, res)

def test_get_item(self):

self.assertTrue(np.isnan(self.arr[1]))
Expand Down

0 comments on commit 51b20de

Please sign in to comment.