Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DEPR: deprecate get_values #26409

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions doc/source/whatsnew/v0.25.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -615,6 +615,9 @@ Other deprecations
Use the public attributes :attr:`~RangeIndex.start`, :attr:`~RangeIndex.stop` and :attr:`~RangeIndex.step` instead (:issue:`26581`).
- The :meth:`Series.ftype`, :meth:`Series.ftypes` and :meth:`DataFrame.ftypes` methods are deprecated and will be removed in a future version.
Instead, use :meth:`Series.dtype` and :meth:`DataFrame.dtypes` (:issue:`26705`).
- The :meth:`Series.get_values`, :meth:`DataFrame.get_values`, :meth:`Index.get_values`,
:meth:`SparseArray.get_values` and :meth:`Categorical.get_values` methods are deprecated.
One of ``np.asarray(..)`` or :meth:`~Series.to_numpy` can be used instead (:issue:`19617`).
- :meth:`Timedelta.resolution` is deprecated and replaced with :meth:`Timedelta.resolution_string`. In a future version, :meth:`Timedelta.resolution` will be changed to behave like the standard library :attr:`timedelta.resolution` (:issue:`21344`)
- :func:`read_table` has been undeprecated. (:issue:`25220`)
- :attr:`Index.dtype_str` is deprecated. (:issue:`18262`)
Expand Down
5 changes: 4 additions & 1 deletion pandas/_libs/lib.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,10 @@ def values_from_object(obj: object):
""" return my values or the object if we are say an ndarray """
func: object

func = getattr(obj, 'get_values', None)
if getattr(obj, '_typ', '') == 'dataframe':
return obj.values

func = getattr(obj, '_internal_get_values', None)
if func is not None:
obj = func()

Expand Down
14 changes: 12 additions & 2 deletions pandas/_libs/src/ujson/python/objToJSON.c
Original file line number Diff line number Diff line change
Expand Up @@ -269,9 +269,19 @@ static PyObject *get_values(PyObject *obj) {
}
}

if (!values && PyObject_HasAttrString(obj, "get_values")) {
if (!values && PyObject_HasAttrString(obj, "_internal_get_values")) {
PRINTMARK();
values = PyObject_CallMethod(obj, "get_values", NULL);
values = PyObject_CallMethod(obj, "_internal_get_values", NULL);
if (values && !PyArray_CheckExact(values)) {
PRINTMARK();
Py_DECREF(values);
values = NULL;
}
}

if (!values && PyObject_HasAttrString(obj, "get_block_values")) {
PRINTMARK();
values = PyObject_CallMethod(obj, "get_block_values", NULL);
if (values && !PyArray_CheckExact(values)) {
PRINTMARK();
Py_DECREF(values);
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/algorithms.py
Original file line number Diff line number Diff line change
Expand Up @@ -1590,7 +1590,7 @@ def take_nd(arr, indexer, axis=0, out=None, fill_value=np.nan, mask_info=None,
return arr.take(indexer, fill_value=fill_value, allow_fill=allow_fill)

if is_sparse(arr):
arr = arr.get_values()
arr = arr.to_dense()
elif isinstance(arr, (ABCIndexClass, ABCSeries)):
arr = arr.values

Expand Down
9 changes: 8 additions & 1 deletion pandas/core/arrays/categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -1483,6 +1483,8 @@ def get_values(self):
"""
Return the values.

.. deprecated:: 0.25.0

For internal compatibility with pandas formatting.

Returns
Expand All @@ -1491,6 +1493,11 @@ def get_values(self):
A numpy array of the same dtype as categorical.categories.dtype or
Index if datetime / periods.
"""
warn("The 'get_values' method is deprecated and will be removed in a "
"future version", FutureWarning, stacklevel=2)
return self._internal_get_values()

def _internal_get_values(self):
# if we are a datetime and period index, return Index to keep metadata
if is_datetimelike(self.categories):
return self.categories.take(self._codes, fill_value=np.nan)
Expand Down Expand Up @@ -1923,7 +1930,7 @@ def __iter__(self):
"""
Returns an Iterator over the values of this Categorical.
"""
return iter(self.get_values().tolist())
return iter(self._internal_get_values().tolist())

def __contains__(self, key):
"""
Expand Down
19 changes: 16 additions & 3 deletions pandas/core/arrays/sparse.py
Original file line number Diff line number Diff line change
Expand Up @@ -454,7 +454,7 @@ def _sparse_array_op(

if left.sp_index.ngaps == 0 or right.sp_index.ngaps == 0:
with np.errstate(all='ignore'):
result = op(left.get_values(), right.get_values())
result = op(left.to_dense(), right.to_dense())
fill = op(_get_fill(left), _get_fill(right))

if left.sp_index.ngaps == 0:
Expand Down Expand Up @@ -1468,8 +1468,21 @@ def to_dense(self):
"""
return np.asarray(self, dtype=self.sp_values.dtype)

# TODO: Look into deprecating this in favor of `to_dense`.
get_values = to_dense
def get_values(self):
"""
Convert SparseArray to a NumPy array.

.. deprecated:: 0.25.0
Use `to_dense` instead.

"""
warnings.warn(
"The 'get_values' method is deprecated and will be removed in a "
"future version. Use the 'to_dense' method instead.",
FutureWarning, stacklevel=2)
return self._internal_get_values()

_internal_get_values = to_dense

# ------------------------------------------------------------------------
# IO
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/dtypes/concat.py
Original file line number Diff line number Diff line change
Expand Up @@ -194,7 +194,7 @@ def _concat_categorical(to_concat, axis=0):
return union_categoricals(categoricals)

# extract the categoricals & coerce to object if needed
to_concat = [x.get_values() if is_categorical_dtype(x.dtype)
to_concat = [x._internal_get_values() if is_categorical_dtype(x.dtype)
else np.asarray(x).ravel() if not is_datetime64tz_dtype(x)
else np.asarray(x.astype(object)) for x in to_concat]
result = _concat_compat(to_concat)
Expand Down
5 changes: 3 additions & 2 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -1616,7 +1616,8 @@ def to_records(self, index=True, convert_datetime64=None,
else:
ix_vals = [self.index.values]

arrays = ix_vals + [self[c].get_values() for c in self.columns]
arrays = ix_vals + [self[c]._internal_get_values()
for c in self.columns]

count = 0
index_names = list(self.index.names)
Expand All @@ -1632,7 +1633,7 @@ def to_records(self, index=True, convert_datetime64=None,
names = [str(name) for name in itertools.chain(index_names,
self.columns)]
else:
arrays = [self[c].get_values() for c in self.columns]
arrays = [self[c]._internal_get_values() for c in self.columns]
names = [str(c) for c in self.columns]
index_names = []

Expand Down
10 changes: 10 additions & 0 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -5220,6 +5220,9 @@ def get_values(self):
"""
Return an ndarray after converting sparse values to dense.

.. deprecated:: 0.25.0
Use ``np.asarray(..)`` or :meth:`DataFrame.values` instead.

This is the same as ``.values`` for non-sparse data. For sparse
data contained in a `SparseArray`, the data are first
converted to a dense representation.
Expand Down Expand Up @@ -5259,6 +5262,13 @@ def get_values(self):
[nan, 2.],
[nan, 3.]])
"""
warnings.warn(
"The 'get_values' method is deprecated and will be removed in a "
"future version. Use '.values' or 'np.asarray(..)' instead.",
FutureWarning, stacklevel=2)
return self._internal_get_values()

def _internal_get_values(self):
return self.values

def get_dtype_counts(self):
Expand Down
6 changes: 3 additions & 3 deletions pandas/core/groupby/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -1118,7 +1118,7 @@ def nunique(self, dropna=True):
"""
ids, _, _ = self.grouper.group_info

val = self.obj.get_values()
val = self.obj._internal_get_values()

try:
sorter = np.lexsort((val, ids))
Expand Down Expand Up @@ -1192,7 +1192,7 @@ def value_counts(self, normalize=False, sort=True, ascending=False,
bins=bins)

ids, _, _ = self.grouper.group_info
val = self.obj.get_values()
val = self.obj._internal_get_values()

# groupby removes null keys from groupings
mask = ids != -1
Expand Down Expand Up @@ -1306,7 +1306,7 @@ def count(self):
Count of values within each group.
"""
ids, _, ngroups = self.grouper.group_info
val = self.obj.get_values()
val = self.obj._internal_get_values()

mask = (ids != -1) & ~isna(val)
ids = ensure_platform_int(ids)
Expand Down
10 changes: 10 additions & 0 deletions pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -3764,6 +3764,9 @@ def get_values(self):
"""
Return `Index` data as an `numpy.ndarray`.

.. deprecated:: 0.25.0
Use :meth:`Index.to_numpy` or :attr:`Index.array` instead.

Returns
-------
numpy.ndarray
Expand Down Expand Up @@ -3802,6 +3805,13 @@ def get_values(self):
>>> midx.get_values().ndim
1
"""
warnings.warn(
"The 'get_values' method is deprecated and will be removed in a "
"future version. Use '.to_numpy()' or '.array' instead.",
FutureWarning, stacklevel=2)
return self._internal_get_values()

def _internal_get_values(self):
return self.values

@Appender(IndexOpsMixin.memory_usage.__doc__)
Expand Down
7 changes: 4 additions & 3 deletions pandas/core/indexes/category.py
Original file line number Diff line number Diff line change
Expand Up @@ -355,9 +355,10 @@ def _wrap_setop_result(self, other, result):
name = get_op_result_name(self, other)
return self._shallow_copy(result, name=name)

def get_values(self):
""" return the underlying data as an ndarray """
return self._data.get_values()
def _internal_get_values(self):
# override base Index version to get the numpy array representation of
# the underlying Categorical
return self._data._internal_get_values()

def tolist(self):
return self._data.tolist()
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/indexes/multi.py
Original file line number Diff line number Diff line change
Expand Up @@ -1246,7 +1246,7 @@ def values(self):
for i in range(self.nlevels):
vals = self._get_level_values(i)
if is_categorical_dtype(vals):
vals = vals.get_values()
vals = vals._internal_get_values()
if (isinstance(vals.dtype, ExtensionDtype)
or hasattr(vals, '_box_values')):
vals = vals.astype(object)
Expand Down
10 changes: 8 additions & 2 deletions pandas/core/internals/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,12 @@ def get_values(self, dtype=None):
return self.values.astype(object)
return self.values

def get_block_values(self, dtype=None):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we really need this?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is this still needed?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Well, the JSON C code does handle blocks (if I remove the get_block_values part in objToJSON.C that I introduced in this PR, a couple tests fail).
I could name this here the same as for Series/INdex (i.e. _internal_get_values), but I prefer a distinct name to make it clear that the json code is handling blocks and not series/index (and that also makes it clear that all other places where _internal_get_values is used is not handling blocks). That will also make it easier to isolate and try to remove the block handling in the json C code.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Opened #27164 for follow-up on this JSON issue

"""
This is used in the JSON C code
"""
return self.get_values(dtype=dtype)

def to_dense(self):
return self.values.view()

Expand Down Expand Up @@ -2921,7 +2927,7 @@ def to_dense(self):
# Categorical.get_values returns a DatetimeIndex for datetime
# categories, so we can't simply use `np.asarray(self.values)` like
# other types.
return self.values.get_values()
return self.values._internal_get_values()

def to_native_types(self, slicer=None, na_rep='', quoting=None, **kwargs):
""" convert to our native types format, slicing if desired """
Expand Down Expand Up @@ -3222,7 +3228,7 @@ def _putmask_preserve(nv, n):
dtype, _ = maybe_promote(n.dtype)

if is_extension_type(v.dtype) and is_object_dtype(dtype):
v = v.get_values(dtype)
v = v._internal_get_values(dtype)
else:
v = v.astype(dtype)

Expand Down
2 changes: 1 addition & 1 deletion pandas/core/ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -1891,7 +1891,7 @@ def wrapper(self, other, axis=None):
name=res_name, dtype='bool')

else:
values = self.get_values()
values = self.to_numpy()

with np.errstate(all='ignore'):
res = na_op(values, other)
Expand Down
10 changes: 10 additions & 0 deletions pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -506,11 +506,21 @@ def get_values(self):
"""
Same as values (but handles sparseness conversions); is a view.

.. deprecated:: 0.25.0
Use :meth:`Series.to_numpy` or :attr:`Series.array` instead.

Returns
-------
numpy.ndarray
Data of the Series.
"""
warnings.warn(
"The 'get_values' method is deprecated and will be removed in a "
"future version. Use '.to_numpy()' or '.array' instead.",
FutureWarning, stacklevel=2)
return self._internal_get_values()

def _internal_get_values(self):
return self._data.get_values()

@property
Expand Down
9 changes: 5 additions & 4 deletions pandas/core/sparse/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -493,7 +493,7 @@ def xs(self, key, axis=0, copy=False):
return data

i = self.index.get_loc(key)
data = self.take([i]).get_values()[0]
data = self.take([i])._internal_get_values()[0]
return Series(data, index=self.columns)

# ----------------------------------------------------------------------
Expand Down Expand Up @@ -694,9 +694,10 @@ def _reindex_with_indexers(self, reindexers, method=None, fill_value=None,
if col not in self:
continue
if row_indexer is not None:
new_arrays[col] = algos.take_1d(self[col].get_values(),
row_indexer,
fill_value=fill_value)
new_arrays[col] = algos.take_1d(
self[col]._internal_get_values(),
row_indexer,
fill_value=fill_value)
else:
new_arrays[col] = self[col]

Expand Down
2 changes: 1 addition & 1 deletion pandas/core/sparse/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -224,7 +224,7 @@ def __repr__(self):
def _reduce(self, op, name, axis=0, skipna=True, numeric_only=None,
filter_type=None, **kwds):
""" perform a reduction operation """
return op(self.get_values(), skipna=skipna, **kwds)
return op(self.array.to_dense(), skipna=skipna, **kwds)

def __getstate__(self):
# pickling
Expand Down
4 changes: 2 additions & 2 deletions pandas/io/formats/format.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,7 @@ def _get_footer(self):
return str(footer)

def _get_formatted_values(self):
return format_array(self.categorical.get_values(), None,
return format_array(self.categorical._internal_get_values(), None,
float_format=None, na_rep=self.na_rep)

def to_string(self):
Expand Down Expand Up @@ -1196,7 +1196,7 @@ def _format_strings(self):

if is_categorical_dtype(values.dtype):
# Categorical is special for now, so that we can preserve tzinfo
array = values.get_values()
array = values._internal_get_values()
else:
array = np.asarray(values)

Expand Down
Loading