DEPR: is_period_dtype, is_sparse (#52642)

pandas-dev · Apr 22, 2023 · e38d786 · e38d786
1 parent 681af4c
commit e38d786
Show file tree

Hide file tree

Showing 11 changed files with 74 additions and 42 deletions.
diff --git a/doc/source/user_guide/io.rst b/doc/source/user_guide/io.rst
@@ -5239,6 +5239,7 @@ See the `Full Documentation <https://github.com/wesm/feather>`__.
 Write to a feather file.
 
 .. ipython:: python
+   :okwarning:
 
    df.to_feather("example.feather")
 
@@ -5382,6 +5383,7 @@ Serializing a ``DataFrame`` to parquet may include the implicit index as one or
 more columns in the output file. Thus, this code:
 
 .. ipython:: python
+   :okwarning:
 
     df = pd.DataFrame({"a": [1, 2], "b": [3, 4]})
     df.to_parquet("test.parquet", engine="pyarrow")
@@ -5398,6 +5400,7 @@ If you want to omit a dataframe's indexes when writing, pass ``index=False`` to
 :func:`~pandas.DataFrame.to_parquet`:
 
 .. ipython:: python
+   :okwarning:
 
     df.to_parquet("test.parquet", index=False)
 
@@ -5420,6 +5423,7 @@ Partitioning Parquet files
 Parquet supports partitioning of data based on the values of one or more columns.
 
 .. ipython:: python
+   :okwarning:
 
     df = pd.DataFrame({"a": [0, 0, 1, 1], "b": [0, 1, 0, 1]})
     df.to_parquet(path="test", engine="pyarrow", partition_cols=["a"], compression=None)

diff --git a/doc/source/user_guide/scale.rst b/doc/source/user_guide/scale.rst
@@ -42,6 +42,7 @@ Suppose our raw dataset on disk has many columns::
 That can be generated by the following code snippet:
 
 .. ipython:: python
+   :okwarning:
 
    import pandas as pd
    import numpy as np
@@ -106,6 +107,7 @@ referred to as "low-cardinality" data). By using more efficient data types, you
 can store larger datasets in memory.
 
 .. ipython:: python
+   :okwarning:
 
    ts = make_timeseries(freq="30S", seed=0)
    ts.to_parquet("timeseries.parquet")
@@ -183,6 +185,7 @@ Suppose we have an even larger "logical dataset" on disk that's a directory of p
 files. Each file in the directory represents a different year of the entire dataset.
 
 .. ipython:: python
+   :okwarning:
 
    import pathlib
 

diff --git a/doc/source/whatsnew/v0.19.0.rst b/doc/source/whatsnew/v0.19.0.rst
@@ -905,6 +905,7 @@ As a consequence of this change, ``PeriodIndex`` no longer has an integer dtype:
 **New behavior**:
 
 .. ipython:: python
+   :okwarning:
 
    pi = pd.PeriodIndex(["2016-08-01"], freq="D")
    pi

diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst
@@ -234,6 +234,8 @@ Deprecations
 - Deprecated :func:`is_datetime64tz_dtype`, check ``isinstance(dtype, pd.DatetimeTZDtype)`` instead (:issue:`52607`)
 - Deprecated :func:`is_int64_dtype`, check ``dtype == np.dtype(np.int64)`` instead (:issue:`52564`)
 - Deprecated :func:`is_interval_dtype`, check ``isinstance(dtype, pd.IntervalDtype)`` instead (:issue:`52607`)
+- Deprecated :func:`is_period_dtype`, check ``isinstance(dtype, pd.PeriodDtype)`` instead (:issue:`52642`)
+- Deprecated :func:`is_sparse`, check ``isinstance(dtype, pd.SparseDtype)`` instead (:issue:`52642`)
 - Deprecated :meth:`DataFrame.applymap`. Use the new :meth:`DataFrame.map` method instead (:issue:`52353`)
 - Deprecated :meth:`DataFrame.swapaxes` and :meth:`Series.swapaxes`, use :meth:`DataFrame.transpose` or :meth:`Series.transpose` instead (:issue:`51946`)
 - Deprecated ``freq`` parameter in :class:`PeriodArray` constructor, pass ``dtype`` instead (:issue:`52462`)

diff --git a/pandas/conftest.py b/pandas/conftest.py
@@ -137,7 +137,10 @@ def pytest_collection_modifyitems(items, config) -> None:
     ignored_doctest_warnings = [
         ("is_int64_dtype", "is_int64_dtype is deprecated"),
         ("is_interval_dtype", "is_interval_dtype is deprecated"),
+        ("is_period_dtype", "is_period_dtype is deprecated"),
         ("is_datetime64tz_dtype", "is_datetime64tz_dtype is deprecated"),
+        ("is_categorical_dtype", "is_categorical_dtype is deprecated"),
+        ("is_sparse", "is_sparse is deprecated"),
         # Docstring divides by zero to show behavior difference
         ("missing.mask_zero_div_zero", "divide by zero encountered"),
         (
@@ -149,7 +152,6 @@ def pytest_collection_modifyitems(items, config) -> None:
             "(Series|DataFrame).bool is now deprecated and will be removed "
             "in future version of pandas",
         ),
-        ("is_categorical_dtype", "is_categorical_dtype is deprecated"),
     ]
 
     for item in items:

diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py
@@ -52,7 +52,6 @@
     is_bool_dtype,
     is_dtype_equal,
     is_float_dtype,
-    is_sparse,
     is_string_dtype,
     pandas_dtype,
 )
@@ -65,6 +64,7 @@
 
 from pandas.core.arrays import datetimelike as dtl
 from pandas.core.arrays._ranges import generate_regular_range
+from pandas.core.arrays.sparse.dtype import SparseDtype
 import pandas.core.common as com
 
 from pandas.tseries.frequencies import get_period_alias
@@ -2038,7 +2038,11 @@ def _sequence_to_dt64ns(
     if out_unit is not None:
         out_dtype = np.dtype(f"M8[{out_unit}]")
 
-    if data_dtype == object or is_string_dtype(data_dtype) or is_sparse(data_dtype):
+    if (
+        data_dtype == object
+        or is_string_dtype(data_dtype)
+        or isinstance(data_dtype, SparseDtype)
+    ):
         # TODO: We do not have tests specific to string-dtypes,
         #  also complex or categorical or other extension
         copy = False

diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py
@@ -56,7 +56,6 @@
 from pandas.core.dtypes.common import (
     ensure_object,
     is_dtype_equal,
-    is_period_dtype,
     pandas_dtype,
 )
 from pandas.core.dtypes.dtypes import (
@@ -172,7 +171,9 @@ class PeriodArray(dtl.DatelikeOps, libperiod.PeriodMixin):  # type: ignore[misc]
     _typ = "periodarray"  # ABCPeriodArray
     _internal_fill_value = np.int64(iNaT)
     _recognized_scalars = (Period,)
-    _is_recognized_dtype = is_period_dtype  # check_compatible_with checks freq match
+    _is_recognized_dtype = lambda x: isinstance(
+        x, PeriodDtype
+    )  # check_compatible_with checks freq match
     _infer_matches = ("period",)
 
     @property

diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py
@@ -207,6 +207,12 @@ def is_sparse(arr) -> bool:
 
     Returns `False` if the parameter has more than one dimension.
     """
+    warnings.warn(
+        "is_sparse is deprecated and will be removed in a future "
+        "version. Check `isinstance(dtype, pd.SparseDtype)` instead.",
+        FutureWarning,
+        stacklevel=find_stack_level(),
+    )
     from pandas.core.arrays.sparse import SparseDtype
 
     dtype = getattr(arr, "dtype", arr)
@@ -399,6 +405,12 @@ def is_period_dtype(arr_or_dtype) -> bool:
     >>> is_period_dtype(pd.PeriodIndex([], freq="A"))
     True
     """
+    warnings.warn(
+        "is_period_dtype is deprecated and will be removed in a future version. "
+        "Use `isinstance(dtype, pd.PeriodDtype)` instead",
+        FutureWarning,
+        stacklevel=find_stack_level(),
+    )
     if isinstance(arr_or_dtype, ExtensionDtype):
         # GH#33400 fastpath for dtype object
         return arr_or_dtype.type is Period
@@ -539,7 +551,7 @@ def is_string_dtype(arr_or_dtype) -> bool:
     >>> is_string_dtype(pd.Series([1, 2], dtype=object))
     False
     """
-    if hasattr(arr_or_dtype, "dtype") and get_dtype(arr_or_dtype).kind == "O":
+    if hasattr(arr_or_dtype, "dtype") and _get_dtype(arr_or_dtype).kind == "O":
         return is_all_strings(arr_or_dtype)
 
     def condition(dtype) -> bool:
@@ -585,7 +597,7 @@ def is_dtype_equal(source, target) -> bool:
             # GH#38516 ensure we get the same behavior from
             #  is_dtype_equal(CDT, "category") and CDT == "category"
             try:
-                src = get_dtype(source)
+                src = _get_dtype(source)
                 if isinstance(src, ExtensionDtype):
                     return src == target
             except (TypeError, AttributeError, ImportError):
@@ -594,8 +606,8 @@ def is_dtype_equal(source, target) -> bool:
         return is_dtype_equal(target, source)
 
     try:
-        source = get_dtype(source)
-        target = get_dtype(target)
+        source = _get_dtype(source)
+        target = _get_dtype(target)
         return source == target
     except (TypeError, AttributeError, ImportError):
         # invalid comparison
@@ -875,7 +887,7 @@ def is_datetime64_any_dtype(arr_or_dtype) -> bool:
         return False
 
     try:
-        tipo = get_dtype(arr_or_dtype)
+        tipo = _get_dtype(arr_or_dtype)
     except TypeError:
         return False
     return (isinstance(tipo, np.dtype) and tipo.kind == "M") or isinstance(
@@ -923,7 +935,7 @@ def is_datetime64_ns_dtype(arr_or_dtype) -> bool:
     if arr_or_dtype is None:
         return False
     try:
-        tipo = get_dtype(arr_or_dtype)
+        tipo = _get_dtype(arr_or_dtype)
     except TypeError:
         return False
     return tipo == DT64NS_DTYPE or (
@@ -1214,7 +1226,7 @@ def is_bool_dtype(arr_or_dtype) -> bool:
     if arr_or_dtype is None:
         return False
     try:
-        dtype = get_dtype(arr_or_dtype)
+        dtype = _get_dtype(arr_or_dtype)
     except (TypeError, ValueError):
         return False
 
@@ -1373,13 +1385,13 @@ def _is_dtype(arr_or_dtype, condition) -> bool:
     if arr_or_dtype is None:
         return False
     try:
-        dtype = get_dtype(arr_or_dtype)
+        dtype = _get_dtype(arr_or_dtype)
     except (TypeError, ValueError):
         return False
     return condition(dtype)
 
 
-def get_dtype(arr_or_dtype) -> DtypeObj:
+def _get_dtype(arr_or_dtype) -> DtypeObj:
     """
     Get the dtype instance associated with an array
     or dtype object.
@@ -1510,7 +1522,7 @@ def infer_dtype_from_object(dtype) -> type:
         try:
             return infer_dtype_from_object(getattr(np, dtype))
         except (AttributeError, TypeError):
-            # Handles cases like get_dtype(int) i.e.,
+            # Handles cases like _get_dtype(int) i.e.,
             # Python objects that are valid dtypes
             # (unlike user-defined types, in general)
             #
@@ -1653,7 +1665,6 @@ def is_all_strings(value: ArrayLike) -> bool:
     "ensure_float64",
     "ensure_python_int",
     "ensure_str",
-    "get_dtype",
     "infer_dtype_from_object",
     "INT64_DTYPE",
     "is_1d_only_ea_dtype",

diff --git a/pandas/tests/dtypes/test_common.py b/pandas/tests/dtypes/test_common.py
@@ -177,6 +177,7 @@ def test_get_dtype_error_catch(func):
         or func is com.is_interval_dtype
         or func is com.is_datetime64tz_dtype
         or func is com.is_categorical_dtype
+        or func is com.is_period_dtype
     ):
         warn = FutureWarning
 
@@ -197,14 +198,16 @@ def test_is_object():
     "check_scipy", [False, pytest.param(True, marks=td.skip_if_no_scipy)]
 )
 def test_is_sparse(check_scipy):
-    assert com.is_sparse(SparseArray([1, 2, 3]))
+    msg = "is_sparse is deprecated"
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        assert com.is_sparse(SparseArray([1, 2, 3]))
 
-    assert not com.is_sparse(np.array([1, 2, 3]))
+        assert not com.is_sparse(np.array([1, 2, 3]))
 
-    if check_scipy:
-        import scipy.sparse
+        if check_scipy:
+            import scipy.sparse
 
-        assert not com.is_sparse(scipy.sparse.bsr_matrix([1, 2, 3]))
+            assert not com.is_sparse(scipy.sparse.bsr_matrix([1, 2, 3]))
 
 
 @td.skip_if_no_scipy
@@ -264,12 +267,14 @@ def test_is_timedelta64_dtype():
 
 
 def test_is_period_dtype():
-    assert not com.is_period_dtype(object)
-    assert not com.is_period_dtype([1, 2, 3])
-    assert not com.is_period_dtype(pd.Period("2017-01-01"))
+    msg = "is_period_dtype is deprecated"
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        assert not com.is_period_dtype(object)
+        assert not com.is_period_dtype([1, 2, 3])
+        assert not com.is_period_dtype(pd.Period("2017-01-01"))
 
-    assert com.is_period_dtype(PeriodDtype(freq="D"))
-    assert com.is_period_dtype(pd.PeriodIndex([], freq="A"))
+        assert com.is_period_dtype(PeriodDtype(freq="D"))
+        assert com.is_period_dtype(pd.PeriodIndex([], freq="A"))
 
 
 def test_is_interval_dtype():
@@ -681,7 +686,7 @@ def test_is_complex_dtype():
     ],
 )
 def test_get_dtype(input_param, result):
-    assert com.get_dtype(input_param) == result
+    assert com._get_dtype(input_param) == result
 
 
 @pytest.mark.parametrize(
@@ -700,7 +705,7 @@ def test_get_dtype_fails(input_param, expected_error_message):
     # 2020-02-02 npdev changed error message
     expected_error_message += f"|Cannot interpret '{input_param}' as a data type"
     with pytest.raises(TypeError, match=expected_error_message):
-        com.get_dtype(input_param)
+        com._get_dtype(input_param)
 
 
 @pytest.mark.parametrize(

diff --git a/pandas/tests/dtypes/test_dtypes.py b/pandas/tests/dtypes/test_dtypes.py
@@ -427,12 +427,10 @@ def test_construction(self):
         for s in ["period[D]", "Period[D]", "D"]:
             dt = PeriodDtype(s)
             assert dt.freq == pd.tseries.offsets.Day()
-            assert is_period_dtype(dt)
 
         for s in ["period[3D]", "Period[3D]", "3D"]:
             dt = PeriodDtype(s)
             assert dt.freq == pd.tseries.offsets.Day(3)
-            assert is_period_dtype(dt)
 
         for s in [
             "period[26H]",
@@ -444,7 +442,6 @@ def test_construction(self):
         ]:
             dt = PeriodDtype(s)
             assert dt.freq == pd.tseries.offsets.Hour(26)
-            assert is_period_dtype(dt)
 
     def test_cannot_use_custom_businessday(self):
         # GH#52534
@@ -530,20 +527,22 @@ def test_equality(self, dtype):
         assert not is_dtype_equal(PeriodDtype("D"), PeriodDtype("2D"))
 
     def test_basic(self, dtype):
-        assert is_period_dtype(dtype)
+        msg = "is_period_dtype is deprecated"
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            assert is_period_dtype(dtype)
 
-        pidx = pd.period_range("2013-01-01 09:00", periods=5, freq="H")
+            pidx = pd.period_range("2013-01-01 09:00", periods=5, freq="H")
 
-        assert is_period_dtype(pidx.dtype)
-        assert is_period_dtype(pidx)
+            assert is_period_dtype(pidx.dtype)
+            assert is_period_dtype(pidx)
 
-        s = Series(pidx, name="A")
+            s = Series(pidx, name="A")
 
-        assert is_period_dtype(s.dtype)
-        assert is_period_dtype(s)
+            assert is_period_dtype(s.dtype)
+            assert is_period_dtype(s)
 
-        assert not is_period_dtype(np.dtype("float64"))
-        assert not is_period_dtype(1.0)
+            assert not is_period_dtype(np.dtype("float64"))
+            assert not is_period_dtype(1.0)
 
     def test_freq_argument_required(self):
         # GH#27388
@@ -1132,6 +1131,7 @@ def test_is_dtype_no_warning(check):
         check is is_categorical_dtype
         or check is is_interval_dtype
         or check is is_datetime64tz_dtype
+        or check is is_period_dtype
     ):
         warn = FutureWarning