Skip to content

Commit

Permalink
DEPR: is_period_dtype, is_sparse (#52642)
Browse files Browse the repository at this point in the history
  • Loading branch information
jbrockmendel committed Apr 22, 2023
1 parent 681af4c commit e38d786
Show file tree
Hide file tree
Showing 11 changed files with 74 additions and 42 deletions.
4 changes: 4 additions & 0 deletions doc/source/user_guide/io.rst
Original file line number Diff line number Diff line change
Expand Up @@ -5239,6 +5239,7 @@ See the `Full Documentation <https://github.com/wesm/feather>`__.
Write to a feather file.

.. ipython:: python
:okwarning:
df.to_feather("example.feather")
Expand Down Expand Up @@ -5382,6 +5383,7 @@ Serializing a ``DataFrame`` to parquet may include the implicit index as one or
more columns in the output file. Thus, this code:

.. ipython:: python
:okwarning:
df = pd.DataFrame({"a": [1, 2], "b": [3, 4]})
df.to_parquet("test.parquet", engine="pyarrow")
Expand All @@ -5398,6 +5400,7 @@ If you want to omit a dataframe's indexes when writing, pass ``index=False`` to
:func:`~pandas.DataFrame.to_parquet`:

.. ipython:: python
:okwarning:
df.to_parquet("test.parquet", index=False)
Expand All @@ -5420,6 +5423,7 @@ Partitioning Parquet files
Parquet supports partitioning of data based on the values of one or more columns.

.. ipython:: python
:okwarning:
df = pd.DataFrame({"a": [0, 0, 1, 1], "b": [0, 1, 0, 1]})
df.to_parquet(path="test", engine="pyarrow", partition_cols=["a"], compression=None)
Expand Down
3 changes: 3 additions & 0 deletions doc/source/user_guide/scale.rst
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ Suppose our raw dataset on disk has many columns::
That can be generated by the following code snippet:

.. ipython:: python
:okwarning:
import pandas as pd
import numpy as np
Expand Down Expand Up @@ -106,6 +107,7 @@ referred to as "low-cardinality" data). By using more efficient data types, you
can store larger datasets in memory.

.. ipython:: python
:okwarning:
ts = make_timeseries(freq="30S", seed=0)
ts.to_parquet("timeseries.parquet")
Expand Down Expand Up @@ -183,6 +185,7 @@ Suppose we have an even larger "logical dataset" on disk that's a directory of p
files. Each file in the directory represents a different year of the entire dataset.

.. ipython:: python
:okwarning:
import pathlib
Expand Down
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.19.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -905,6 +905,7 @@ As a consequence of this change, ``PeriodIndex`` no longer has an integer dtype:
**New behavior**:

.. ipython:: python
:okwarning:
pi = pd.PeriodIndex(["2016-08-01"], freq="D")
pi
Expand Down
2 changes: 2 additions & 0 deletions doc/source/whatsnew/v2.1.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -234,6 +234,8 @@ Deprecations
- Deprecated :func:`is_datetime64tz_dtype`, check ``isinstance(dtype, pd.DatetimeTZDtype)`` instead (:issue:`52607`)
- Deprecated :func:`is_int64_dtype`, check ``dtype == np.dtype(np.int64)`` instead (:issue:`52564`)
- Deprecated :func:`is_interval_dtype`, check ``isinstance(dtype, pd.IntervalDtype)`` instead (:issue:`52607`)
- Deprecated :func:`is_period_dtype`, check ``isinstance(dtype, pd.PeriodDtype)`` instead (:issue:`52642`)
- Deprecated :func:`is_sparse`, check ``isinstance(dtype, pd.SparseDtype)`` instead (:issue:`52642`)
- Deprecated :meth:`DataFrame.applymap`. Use the new :meth:`DataFrame.map` method instead (:issue:`52353`)
- Deprecated :meth:`DataFrame.swapaxes` and :meth:`Series.swapaxes`, use :meth:`DataFrame.transpose` or :meth:`Series.transpose` instead (:issue:`51946`)
- Deprecated ``freq`` parameter in :class:`PeriodArray` constructor, pass ``dtype`` instead (:issue:`52462`)
Expand Down
4 changes: 3 additions & 1 deletion pandas/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,10 @@ def pytest_collection_modifyitems(items, config) -> None:
ignored_doctest_warnings = [
("is_int64_dtype", "is_int64_dtype is deprecated"),
("is_interval_dtype", "is_interval_dtype is deprecated"),
("is_period_dtype", "is_period_dtype is deprecated"),
("is_datetime64tz_dtype", "is_datetime64tz_dtype is deprecated"),
("is_categorical_dtype", "is_categorical_dtype is deprecated"),
("is_sparse", "is_sparse is deprecated"),
# Docstring divides by zero to show behavior difference
("missing.mask_zero_div_zero", "divide by zero encountered"),
(
Expand All @@ -149,7 +152,6 @@ def pytest_collection_modifyitems(items, config) -> None:
"(Series|DataFrame).bool is now deprecated and will be removed "
"in future version of pandas",
),
("is_categorical_dtype", "is_categorical_dtype is deprecated"),
]

for item in items:
Expand Down
8 changes: 6 additions & 2 deletions pandas/core/arrays/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,6 @@
is_bool_dtype,
is_dtype_equal,
is_float_dtype,
is_sparse,
is_string_dtype,
pandas_dtype,
)
Expand All @@ -65,6 +64,7 @@

from pandas.core.arrays import datetimelike as dtl
from pandas.core.arrays._ranges import generate_regular_range
from pandas.core.arrays.sparse.dtype import SparseDtype
import pandas.core.common as com

from pandas.tseries.frequencies import get_period_alias
Expand Down Expand Up @@ -2038,7 +2038,11 @@ def _sequence_to_dt64ns(
if out_unit is not None:
out_dtype = np.dtype(f"M8[{out_unit}]")

if data_dtype == object or is_string_dtype(data_dtype) or is_sparse(data_dtype):
if (
data_dtype == object
or is_string_dtype(data_dtype)
or isinstance(data_dtype, SparseDtype)
):
# TODO: We do not have tests specific to string-dtypes,
# also complex or categorical or other extension
copy = False
Expand Down
5 changes: 3 additions & 2 deletions pandas/core/arrays/period.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,6 @@
from pandas.core.dtypes.common import (
ensure_object,
is_dtype_equal,
is_period_dtype,
pandas_dtype,
)
from pandas.core.dtypes.dtypes import (
Expand Down Expand Up @@ -172,7 +171,9 @@ class PeriodArray(dtl.DatelikeOps, libperiod.PeriodMixin): # type: ignore[misc]
_typ = "periodarray" # ABCPeriodArray
_internal_fill_value = np.int64(iNaT)
_recognized_scalars = (Period,)
_is_recognized_dtype = is_period_dtype # check_compatible_with checks freq match
_is_recognized_dtype = lambda x: isinstance(
x, PeriodDtype
) # check_compatible_with checks freq match
_infer_matches = ("period",)

@property
Expand Down
33 changes: 22 additions & 11 deletions pandas/core/dtypes/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -207,6 +207,12 @@ def is_sparse(arr) -> bool:
Returns `False` if the parameter has more than one dimension.
"""
warnings.warn(
"is_sparse is deprecated and will be removed in a future "
"version. Check `isinstance(dtype, pd.SparseDtype)` instead.",
FutureWarning,
stacklevel=find_stack_level(),
)
from pandas.core.arrays.sparse import SparseDtype

dtype = getattr(arr, "dtype", arr)
Expand Down Expand Up @@ -399,6 +405,12 @@ def is_period_dtype(arr_or_dtype) -> bool:
>>> is_period_dtype(pd.PeriodIndex([], freq="A"))
True
"""
warnings.warn(
"is_period_dtype is deprecated and will be removed in a future version. "
"Use `isinstance(dtype, pd.PeriodDtype)` instead",
FutureWarning,
stacklevel=find_stack_level(),
)
if isinstance(arr_or_dtype, ExtensionDtype):
# GH#33400 fastpath for dtype object
return arr_or_dtype.type is Period
Expand Down Expand Up @@ -539,7 +551,7 @@ def is_string_dtype(arr_or_dtype) -> bool:
>>> is_string_dtype(pd.Series([1, 2], dtype=object))
False
"""
if hasattr(arr_or_dtype, "dtype") and get_dtype(arr_or_dtype).kind == "O":
if hasattr(arr_or_dtype, "dtype") and _get_dtype(arr_or_dtype).kind == "O":
return is_all_strings(arr_or_dtype)

def condition(dtype) -> bool:
Expand Down Expand Up @@ -585,7 +597,7 @@ def is_dtype_equal(source, target) -> bool:
# GH#38516 ensure we get the same behavior from
# is_dtype_equal(CDT, "category") and CDT == "category"
try:
src = get_dtype(source)
src = _get_dtype(source)
if isinstance(src, ExtensionDtype):
return src == target
except (TypeError, AttributeError, ImportError):
Expand All @@ -594,8 +606,8 @@ def is_dtype_equal(source, target) -> bool:
return is_dtype_equal(target, source)

try:
source = get_dtype(source)
target = get_dtype(target)
source = _get_dtype(source)
target = _get_dtype(target)
return source == target
except (TypeError, AttributeError, ImportError):
# invalid comparison
Expand Down Expand Up @@ -875,7 +887,7 @@ def is_datetime64_any_dtype(arr_or_dtype) -> bool:
return False

try:
tipo = get_dtype(arr_or_dtype)
tipo = _get_dtype(arr_or_dtype)
except TypeError:
return False
return (isinstance(tipo, np.dtype) and tipo.kind == "M") or isinstance(
Expand Down Expand Up @@ -923,7 +935,7 @@ def is_datetime64_ns_dtype(arr_or_dtype) -> bool:
if arr_or_dtype is None:
return False
try:
tipo = get_dtype(arr_or_dtype)
tipo = _get_dtype(arr_or_dtype)
except TypeError:
return False
return tipo == DT64NS_DTYPE or (
Expand Down Expand Up @@ -1214,7 +1226,7 @@ def is_bool_dtype(arr_or_dtype) -> bool:
if arr_or_dtype is None:
return False
try:
dtype = get_dtype(arr_or_dtype)
dtype = _get_dtype(arr_or_dtype)
except (TypeError, ValueError):
return False

Expand Down Expand Up @@ -1373,13 +1385,13 @@ def _is_dtype(arr_or_dtype, condition) -> bool:
if arr_or_dtype is None:
return False
try:
dtype = get_dtype(arr_or_dtype)
dtype = _get_dtype(arr_or_dtype)
except (TypeError, ValueError):
return False
return condition(dtype)


def get_dtype(arr_or_dtype) -> DtypeObj:
def _get_dtype(arr_or_dtype) -> DtypeObj:
"""
Get the dtype instance associated with an array
or dtype object.
Expand Down Expand Up @@ -1510,7 +1522,7 @@ def infer_dtype_from_object(dtype) -> type:
try:
return infer_dtype_from_object(getattr(np, dtype))
except (AttributeError, TypeError):
# Handles cases like get_dtype(int) i.e.,
# Handles cases like _get_dtype(int) i.e.,
# Python objects that are valid dtypes
# (unlike user-defined types, in general)
#
Expand Down Expand Up @@ -1653,7 +1665,6 @@ def is_all_strings(value: ArrayLike) -> bool:
"ensure_float64",
"ensure_python_int",
"ensure_str",
"get_dtype",
"infer_dtype_from_object",
"INT64_DTYPE",
"is_1d_only_ea_dtype",
Expand Down
29 changes: 17 additions & 12 deletions pandas/tests/dtypes/test_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,7 @@ def test_get_dtype_error_catch(func):
or func is com.is_interval_dtype
or func is com.is_datetime64tz_dtype
or func is com.is_categorical_dtype
or func is com.is_period_dtype
):
warn = FutureWarning

Expand All @@ -197,14 +198,16 @@ def test_is_object():
"check_scipy", [False, pytest.param(True, marks=td.skip_if_no_scipy)]
)
def test_is_sparse(check_scipy):
assert com.is_sparse(SparseArray([1, 2, 3]))
msg = "is_sparse is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
assert com.is_sparse(SparseArray([1, 2, 3]))

assert not com.is_sparse(np.array([1, 2, 3]))
assert not com.is_sparse(np.array([1, 2, 3]))

if check_scipy:
import scipy.sparse
if check_scipy:
import scipy.sparse

assert not com.is_sparse(scipy.sparse.bsr_matrix([1, 2, 3]))
assert not com.is_sparse(scipy.sparse.bsr_matrix([1, 2, 3]))


@td.skip_if_no_scipy
Expand Down Expand Up @@ -264,12 +267,14 @@ def test_is_timedelta64_dtype():


def test_is_period_dtype():
assert not com.is_period_dtype(object)
assert not com.is_period_dtype([1, 2, 3])
assert not com.is_period_dtype(pd.Period("2017-01-01"))
msg = "is_period_dtype is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
assert not com.is_period_dtype(object)
assert not com.is_period_dtype([1, 2, 3])
assert not com.is_period_dtype(pd.Period("2017-01-01"))

assert com.is_period_dtype(PeriodDtype(freq="D"))
assert com.is_period_dtype(pd.PeriodIndex([], freq="A"))
assert com.is_period_dtype(PeriodDtype(freq="D"))
assert com.is_period_dtype(pd.PeriodIndex([], freq="A"))


def test_is_interval_dtype():
Expand Down Expand Up @@ -681,7 +686,7 @@ def test_is_complex_dtype():
],
)
def test_get_dtype(input_param, result):
assert com.get_dtype(input_param) == result
assert com._get_dtype(input_param) == result


@pytest.mark.parametrize(
Expand All @@ -700,7 +705,7 @@ def test_get_dtype_fails(input_param, expected_error_message):
# 2020-02-02 npdev changed error message
expected_error_message += f"|Cannot interpret '{input_param}' as a data type"
with pytest.raises(TypeError, match=expected_error_message):
com.get_dtype(input_param)
com._get_dtype(input_param)


@pytest.mark.parametrize(
Expand Down
24 changes: 12 additions & 12 deletions pandas/tests/dtypes/test_dtypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -427,12 +427,10 @@ def test_construction(self):
for s in ["period[D]", "Period[D]", "D"]:
dt = PeriodDtype(s)
assert dt.freq == pd.tseries.offsets.Day()
assert is_period_dtype(dt)

for s in ["period[3D]", "Period[3D]", "3D"]:
dt = PeriodDtype(s)
assert dt.freq == pd.tseries.offsets.Day(3)
assert is_period_dtype(dt)

for s in [
"period[26H]",
Expand All @@ -444,7 +442,6 @@ def test_construction(self):
]:
dt = PeriodDtype(s)
assert dt.freq == pd.tseries.offsets.Hour(26)
assert is_period_dtype(dt)

def test_cannot_use_custom_businessday(self):
# GH#52534
Expand Down Expand Up @@ -530,20 +527,22 @@ def test_equality(self, dtype):
assert not is_dtype_equal(PeriodDtype("D"), PeriodDtype("2D"))

def test_basic(self, dtype):
assert is_period_dtype(dtype)
msg = "is_period_dtype is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
assert is_period_dtype(dtype)

pidx = pd.period_range("2013-01-01 09:00", periods=5, freq="H")
pidx = pd.period_range("2013-01-01 09:00", periods=5, freq="H")

assert is_period_dtype(pidx.dtype)
assert is_period_dtype(pidx)
assert is_period_dtype(pidx.dtype)
assert is_period_dtype(pidx)

s = Series(pidx, name="A")
s = Series(pidx, name="A")

assert is_period_dtype(s.dtype)
assert is_period_dtype(s)
assert is_period_dtype(s.dtype)
assert is_period_dtype(s)

assert not is_period_dtype(np.dtype("float64"))
assert not is_period_dtype(1.0)
assert not is_period_dtype(np.dtype("float64"))
assert not is_period_dtype(1.0)

def test_freq_argument_required(self):
# GH#27388
Expand Down Expand Up @@ -1132,6 +1131,7 @@ def test_is_dtype_no_warning(check):
check is is_categorical_dtype
or check is is_interval_dtype
or check is is_datetime64tz_dtype
or check is is_period_dtype
):
warn = FutureWarning

Expand Down
Loading

0 comments on commit e38d786

Please sign in to comment.