Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DEPR: deprecate SparseArray.values #26421

Merged
4 changes: 3 additions & 1 deletion doc/source/whatsnew/v0.25.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -259,8 +259,10 @@ Deprecations

- The deprecated ``.ix[]`` indexer now raises a more visible FutureWarning instead of DeprecationWarning (:issue:`26438`).
- Deprecated the ``units=M`` (months) and ``units=Y`` (year) parameters for ``units`` of :func:`pandas.to_timedelta`, :func:`pandas.Timedelta` and :func:`pandas.TimedeltaIndex` (:issue:`16344`)
- The :attr:`SparseArray.values` attribute is deprecated. You can use ``np.asarray(...)`` or
the :meth:`SparseArray.to_dense` method instead (:issue:`26421`).
- The functions :func:`pandas.to_datetime` and :func:`pandas.to_timedelta` have deprecated the ``box`` keyword. Instead, use :meth:`to_numpy` or :meth:`Timestamp.to_datetime64` or :meth:`Timedelta.to_timedelta64`. (:issue:`24416`)
- The :meth:`DataFrame.compound` and :meth:`Series.compound` methods are deprecated and will be removed in a future version.
- The :meth:`DataFrame.compound` and :meth:`Series.compound` methods are deprecated and will be removed in a future version (:issue:`26405`).


.. _whatsnew_0250.prior_deprecations:
Expand Down
19 changes: 14 additions & 5 deletions pandas/_libs/reduction.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ from numpy cimport (ndarray,
cnp.import_array()

cimport pandas._libs.util as util
from pandas._libs.lib import maybe_convert_objects
from pandas._libs.lib import maybe_convert_objects, values_from_object


cdef _get_result_array(object obj, Py_ssize_t size, Py_ssize_t cnt):
Expand All @@ -28,6 +28,14 @@ cdef _get_result_array(object obj, Py_ssize_t size, Py_ssize_t cnt):
return np.empty(size, dtype='O')


cdef bint _is_sparse_array(object obj):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would this be better-suited for pandas._libs.util? Or keep here since this is the only file using it and it's temporary?

Copy link
Member Author

@jorisvandenbossche jorisvandenbossche May 20, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, exactly for those reasons (It's only used here, and should be removed again once we get rid of this deprecation), I would keep it here (it's not mean to be a general utility)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is not the right location not should be in util
your argument is not correct ; just because we eventually will remove it does not mean it should. it be with similar code

# TODO can be removed one SparseArray.values is removed (GH26421)
if hasattr(obj, '_subtyp'):
Copy link
Contributor

@jreback jreback May 21, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this idiom should be getattr

if obj._subtyp == 'sparse_array':
return True
return False


cdef class Reducer:
"""
Performs generic reduction operation on a C or Fortran-contiguous ndarray
Expand Down Expand Up @@ -146,7 +154,8 @@ cdef class Reducer:
else:
res = self.f(chunk)

if hasattr(res, 'values') and util.is_array(res.values):
if (not _is_sparse_array(res) and hasattr(res, 'values')
and util.is_array(res.values)):
res = res.values
if i == 0:
result = _get_result_array(res,
Expand Down Expand Up @@ -432,7 +441,8 @@ cdef class SeriesGrouper:
cdef inline _extract_result(object res):
""" extract the result object, it might be a 0-dim ndarray
or a len-1 0-dim, or a scalar """
if hasattr(res, 'values') and util.is_array(res.values):
if (not _is_sparse_array(res) and hasattr(res, 'values')
and util.is_array(res.values)):
res = res.values
if not np.isscalar(res):
if util.is_array(res):
Expand Down Expand Up @@ -635,8 +645,7 @@ def reduce(arr, f, axis=0, dummy=None, labels=None):
raise Exception('Cannot use shortcut')

# pass as an ndarray
if hasattr(labels, 'values'):
labels = labels.values
labels = values_from_object(labels)

reducer = Reducer(arr, f, axis=axis, dummy=dummy, labels=labels)
return reducer.get_result()
26 changes: 23 additions & 3 deletions pandas/_libs/src/ujson/python/objToJSON.c
Original file line number Diff line number Diff line change
Expand Up @@ -210,17 +210,37 @@ static TypeContext *createTypeContext(void) {
return pc;
}


static int is_sparse_array(PyObject *obj) {
// TODO can be removed again once SparseArray.values is removed (GH26421)
if (PyObject_HasAttrString(obj, "_subtyp")) {
PyObject *_subtype = PyObject_GetAttrString(obj, "_subtyp");
PyObject *sparse_array = PyUnicode_FromString("sparse_array");
int ret = PyUnicode_Compare(_subtype, sparse_array);

if (ret == 0) {
return 1;
}
}
return 0;
}


static PyObject *get_values(PyObject *obj) {
PyObject *values = PyObject_GetAttrString(obj, "values");
PRINTMARK();
PyObject *values = NULL;

if (!is_sparse_array(obj)) {
values = PyObject_GetAttrString(obj, "values");
PRINTMARK();
}

if (values && !PyArray_CheckExact(values)) {

if (PyObject_HasAttrString(values, "to_numpy")) {
values = PyObject_CallMethod(values, "to_numpy", NULL);
}

if (PyObject_HasAttrString(values, "values")) {
if (!is_sparse_array(values) && PyObject_HasAttrString(values, "values")) {
PyObject *subvals = get_values(values);
PyErr_Clear();
PRINTMARK();
Expand Down
49 changes: 15 additions & 34 deletions pandas/core/arrays/sparse.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,11 +21,10 @@
from pandas.core.dtypes.base import ExtensionDtype
from pandas.core.dtypes.cast import (
astype_nansafe, construct_1d_arraylike_from_scalar, find_common_type,
infer_dtype_from_scalar, maybe_convert_platform)
infer_dtype_from_scalar)
from pandas.core.dtypes.common import (
is_array_like, is_bool_dtype, is_datetime64_any_dtype, is_dtype_equal,
is_integer, is_list_like, is_object_dtype, is_scalar, is_string_dtype,
pandas_dtype)
is_integer, is_object_dtype, is_scalar, is_string_dtype, pandas_dtype)
from pandas.core.dtypes.dtypes import register_extension_dtype
from pandas.core.dtypes.generic import (
ABCIndexClass, ABCSeries, ABCSparseArray, ABCSparseSeries)
Expand Down Expand Up @@ -890,7 +889,16 @@ def npoints(self):
def values(self):
"""
Dense values

.. deprecated:: 0.25.0

Use ``np.asarray(...)`` or the ``.to_dense()`` method instead.
"""
msg = (
"The SparseArray.values attribute is deprecated and will be "
"removed in a future version. You can use `np.asarray(...)` or "
"the `.to_dense()` method instead.")
warnings.warn(msg, FutureWarning, stacklevel=2)
return self.to_dense()

def isna(self):
Expand Down Expand Up @@ -1076,7 +1084,7 @@ def __getitem__(self, key):
if is_integer(key):
return self._get_val_at(key)
elif isinstance(key, tuple):
data_slice = self.values[key]
data_slice = self.to_dense()[key]
elif isinstance(key, slice):
# special case to preserve dtypes
if key == slice(None):
Expand Down Expand Up @@ -1635,7 +1643,7 @@ def __array_wrap__(self, array, context=None):
from pandas.core.dtypes.generic import ABCSparseSeries

ufunc, inputs, _ = context
inputs = tuple(x.values if isinstance(x, ABCSparseSeries) else x
inputs = tuple(x.to_dense() if isinstance(x, ABCSparseSeries) else x
for x in inputs)
return self.__array_ufunc__(ufunc, '__call__', *inputs)

Expand Down Expand Up @@ -1854,37 +1862,10 @@ def _maybe_to_sparse(array):
array must be SparseSeries or SparseArray
"""
if isinstance(array, ABCSparseSeries):
array = array.values.copy()
array = array.array.copy()
return array


def _sanitize_values(arr):
"""
return an ndarray for our input,
in a platform independent manner
"""

if hasattr(arr, 'values'):
arr = arr.values
else:

# scalar
if is_scalar(arr):
arr = [arr]

# ndarray
if isinstance(arr, np.ndarray):
pass

elif is_list_like(arr) and len(arr) > 0:
arr = maybe_convert_platform(arr)

else:
arr = np.asarray(arr)

return arr


def make_sparse(arr, kind='block', fill_value=None, dtype=None, copy=False):
"""
Convert ndarray to sparse format
Expand All @@ -1902,7 +1883,7 @@ def make_sparse(arr, kind='block', fill_value=None, dtype=None, copy=False):
(sparse_values, index, fill_value) : (ndarray, SparseIndex, Scalar)
"""

arr = _sanitize_values(arr)
arr = com.values_from_object(arr)

if arr.ndim > 1:
raise TypeError("expected dimension <= 1 data")
Expand Down
4 changes: 2 additions & 2 deletions pandas/core/internals/managers.py
Original file line number Diff line number Diff line change
Expand Up @@ -375,8 +375,8 @@ def apply(self, f, axes=None, filter=None, do_integrity_check=False,
# with a .values attribute.
aligned_args = {k: kwargs[k]
for k in align_keys
if hasattr(kwargs[k], 'values') and
not isinstance(kwargs[k], ABCExtensionArray)}
if not isinstance(kwargs[k], ABCExtensionArray) and
hasattr(kwargs[k], 'values')}

for b in self.blocks:
if filter is not None:
Expand Down
4 changes: 2 additions & 2 deletions pandas/core/ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -2272,10 +2272,10 @@ def _cast_sparse_series_op(left, right, opname):
# TODO: This should be moved to the array?
if is_integer_dtype(left) and is_integer_dtype(right):
# series coerces to float64 if result should have NaN/inf
if opname in ('floordiv', 'mod') and (right.values == 0).any():
if opname in ('floordiv', 'mod') and (right.to_dense() == 0).any():
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

should we not be using np.asarry? generally rather than .to_dense()?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Both are equivalent (although to_dense actually does a bit less as it specified the dtype and asarray does some inference (not sure for that difference though)).

left = left.astype(SparseDtype(np.float64, left.fill_value))
right = right.astype(SparseDtype(np.float64, right.fill_value))
elif opname in ('rfloordiv', 'rmod') and (left.values == 0).any():
elif opname in ('rfloordiv', 'rmod') and (left.to_dense() == 0).any():
left = left.astype(SparseDtype(np.float64, left.fill_value))
right = right.astype(SparseDtype(np.float64, right.fill_value))

Expand Down
2 changes: 1 addition & 1 deletion pandas/core/sparse/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -627,7 +627,7 @@ def _reindex_index(self, index, method, copy, level, fill_value=np.nan,
# .take returns SparseArray
new = values.take(indexer)
if need_mask:
new = new.values
new = new.to_dense()
# convert integer to float if necessary. need to do a lot
# more than that, handle boolean etc also
new, fill_value = maybe_upcast(new, fill_value=fill_value)
Expand Down
44 changes: 26 additions & 18 deletions pandas/tests/arrays/sparse/test_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -433,9 +433,9 @@ def test_constructor_bool(self):
tm.assert_numpy_array_equal(arr.sp_index.indices,
np.array([2, 3], np.int32))

for dense in [arr.to_dense(), arr.values]:
assert dense.dtype == bool
tm.assert_numpy_array_equal(dense, data)
dense = arr.to_dense()
assert dense.dtype == bool
tm.assert_numpy_array_equal(dense, data)

def test_constructor_bool_fill_value(self):
arr = SparseArray([True, False, True], dtype=None)
Expand Down Expand Up @@ -463,9 +463,9 @@ def test_constructor_float32(self):
tm.assert_numpy_array_equal(arr.sp_index.indices,
np.array([0, 2], dtype=np.int32))

for dense in [arr.to_dense(), arr.values]:
assert dense.dtype == np.float32
tm.assert_numpy_array_equal(dense, data)
dense = arr.to_dense()
assert dense.dtype == np.float32
tm.assert_numpy_array_equal(dense, data)

def test_astype(self):
# float -> float
Expand Down Expand Up @@ -514,7 +514,7 @@ def test_astype_all(self, any_real_dtype):
assert res.dtype == SparseDtype(typ, 1)
assert res.sp_values.dtype == typ

tm.assert_numpy_array_equal(np.asarray(res.values),
tm.assert_numpy_array_equal(np.asarray(res.to_dense()),
vals.astype(typ))

@pytest.mark.parametrize('array, dtype, expected', [
Expand Down Expand Up @@ -596,7 +596,6 @@ def test_copy_shallow(self):
assert arr2.sp_index is self.arr.sp_index

def test_values_asarray(self):
assert_almost_equal(self.arr.values, self.arr_data)
assert_almost_equal(self.arr.to_dense(), self.arr_data)

@pytest.mark.parametrize('data,shape,dtype', [
Expand Down Expand Up @@ -627,7 +626,7 @@ def test_dense_repr(self, vals, fill_value, method):

def test_getitem(self):
def _checkit(i):
assert_almost_equal(self.arr[i], self.arr.values[i])
assert_almost_equal(self.arr[i], self.arr.to_dense()[i])

for i in range(len(self.arr)):
_checkit(i)
Expand All @@ -641,11 +640,11 @@ def test_getitem_arraylike_mask(self):

def test_getslice(self):
result = self.arr[:-3]
exp = SparseArray(self.arr.values[:-3])
exp = SparseArray(self.arr.to_dense()[:-3])
tm.assert_sp_array_equal(result, exp)

result = self.arr[-4:]
exp = SparseArray(self.arr.values[-4:])
exp = SparseArray(self.arr.to_dense()[-4:])
tm.assert_sp_array_equal(result, exp)

# two corner cases from Series
Expand All @@ -654,7 +653,7 @@ def test_getslice(self):
tm.assert_sp_array_equal(result, exp)

result = self.arr[:-12]
exp = SparseArray(self.arr.values[:0])
exp = SparseArray(self.arr.to_dense()[:0])
tm.assert_sp_array_equal(result, exp)

def test_getslice_tuple(self):
Expand Down Expand Up @@ -702,16 +701,16 @@ def test_binary_operators(self, op):

def _check_op(op, first, second):
res = op(first, second)
exp = SparseArray(op(first.values, second.values),
exp = SparseArray(op(first.to_dense(), second.to_dense()),
fill_value=first.fill_value)
assert isinstance(res, SparseArray)
assert_almost_equal(res.values, exp.values)
assert_almost_equal(res.to_dense(), exp.to_dense())

res2 = op(first, second.values)
res2 = op(first, second.to_dense())
assert isinstance(res2, SparseArray)
tm.assert_sp_array_equal(res, res2)

res3 = op(first.values, second)
res3 = op(first.to_dense(), second)
assert isinstance(res3, SparseArray)
tm.assert_sp_array_equal(res, res3)

Expand All @@ -720,13 +719,13 @@ def _check_op(op, first, second):

# Ignore this if the actual op raises (e.g. pow).
try:
exp = op(first.values, 4)
exp = op(first.to_dense(), 4)
exp_fv = op(first.fill_value, 4)
except ValueError:
pass
else:
assert_almost_equal(res4.fill_value, exp_fv)
assert_almost_equal(res4.values, exp)
assert_almost_equal(res4.to_dense(), exp)

with np.errstate(all="ignore"):
for first_arr, second_arr in [(arr1, arr2), (farr1, farr2)]:
Expand Down Expand Up @@ -1230,3 +1229,12 @@ def test_map_missing():

result = arr.map({0: 10, 1: 11})
tm.assert_sp_array_equal(result, expected)


def test_deprecated_values():
arr = SparseArray([0, 1, 2])

with tm.assert_produces_warning(FutureWarning):
result = arr.values

tm.assert_numpy_array_equal(result, arr.to_dense())
6 changes: 3 additions & 3 deletions pandas/tests/sparse/series/test_series.py
Original file line number Diff line number Diff line change
Expand Up @@ -255,7 +255,7 @@ def test_constructor(self):
assert isinstance(self.iseries.sp_index, IntIndex)

assert self.zbseries.fill_value == 0
tm.assert_numpy_array_equal(self.zbseries.values.values,
tm.assert_numpy_array_equal(self.zbseries.values.to_dense(),
self.bseries.to_dense().fillna(0).values)

# pass SparseSeries
Expand Down Expand Up @@ -322,7 +322,7 @@ def test_constructor_ndarray(self):
def test_constructor_nonnan(self):
arr = [0, 0, 0, nan, nan]
sp_series = SparseSeries(arr, fill_value=0)
tm.assert_numpy_array_equal(sp_series.values.values, np.array(arr))
tm.assert_numpy_array_equal(sp_series.values.to_dense(), np.array(arr))
assert len(sp_series) == 5
assert sp_series.shape == (5, )

Expand Down Expand Up @@ -514,7 +514,7 @@ def _compare(idx):
sparse_result = sp.take(idx)
assert isinstance(sparse_result, SparseSeries)
tm.assert_almost_equal(dense_result,
sparse_result.values.values)
sparse_result.values.to_dense())

_compare([1., 2., 3., 4., 5., 0.])
_compare([7, 2, 9, 0, 4])
Expand Down
2 changes: 1 addition & 1 deletion pandas/util/testing.py
Original file line number Diff line number Diff line change
Expand Up @@ -1403,7 +1403,7 @@ def assert_sp_array_equal(left, right, check_dtype=True, check_kind=True,
assert_attr_equal('fill_value', left, right)
if check_dtype:
assert_attr_equal('dtype', left, right)
assert_numpy_array_equal(left.values, right.values,
assert_numpy_array_equal(left.to_dense(), right.to_dense(),
check_dtype=check_dtype)


Expand Down
Loading