CLN: Remove inf_as_na (#57428)

pandas-dev · Feb 19, 2024 · c379331 · c379331
1 parent 997e1b8
commit c379331
Show file tree

Hide file tree

Showing 20 changed files with 44 additions and 382 deletions.
diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
@@ -146,6 +146,7 @@ Removal of prior version deprecations/changes
 - Removed ``year``, ``month``, ``quarter``, ``day``, ``hour``, ``minute``, and ``second`` keywords in the :class:`PeriodIndex` constructor, use :meth:`PeriodIndex.from_fields` instead (:issue:`55960`)
 - Removed deprecated argument ``obj`` in :meth:`.DataFrameGroupBy.get_group` and :meth:`.SeriesGroupBy.get_group` (:issue:`53545`)
 - Removed deprecated behavior of :meth:`Series.agg` using :meth:`Series.apply` (:issue:`53325`)
+- Removed option ``mode.use_inf_as_na``, convert inf entries to ``NaN`` before instead (:issue:`51684`)
 - Removed support for :class:`DataFrame` in :meth:`DataFrame.from_records`(:issue:`51697`)
 - Removed support for ``errors="ignore"`` in :func:`to_datetime`, :func:`to_timedelta` and :func:`to_numeric` (:issue:`55734`)
 - Removed support for ``slice`` in :meth:`DataFrame.take` (:issue:`51539`)

diff --git a/pandas/_libs/missing.pxd b/pandas/_libs/missing.pxd
@@ -7,8 +7,8 @@ from numpy cimport (
 cpdef bint is_matching_na(object left, object right, bint nan_matches_none=*)
 cpdef bint check_na_tuples_nonequal(object left, object right)
 
-cpdef bint checknull(object val, bint inf_as_na=*)
-cpdef ndarray[uint8_t] isnaobj(ndarray arr, bint inf_as_na=*)
+cpdef bint checknull(object val)
+cpdef ndarray[uint8_t] isnaobj(ndarray arr)
 
 cdef bint is_null_datetime64(v)
 cdef bint is_null_timedelta64(v)

diff --git a/pandas/_libs/missing.pyi b/pandas/_libs/missing.pyi
@@ -11,6 +11,6 @@ def is_matching_na(
 ) -> bool: ...
 def isposinf_scalar(val: object) -> bool: ...
 def isneginf_scalar(val: object) -> bool: ...
-def checknull(val: object, inf_as_na: bool = ...) -> bool: ...
-def isnaobj(arr: np.ndarray, inf_as_na: bool = ...) -> npt.NDArray[np.bool_]: ...
+def checknull(val: object) -> bool: ...
+def isnaobj(arr: np.ndarray) -> npt.NDArray[np.bool_]: ...
 def is_numeric_na(values: np.ndarray) -> npt.NDArray[np.bool_]: ...
diff --git a/pandas/_libs/missing.pyx b/pandas/_libs/missing.pyx
@@ -137,7 +137,7 @@ cpdef bint is_matching_na(object left, object right, bint nan_matches_none=False
     return False
 
 
-cpdef bint checknull(object val, bint inf_as_na=False):
+cpdef bint checknull(object val):
     """
     Return boolean describing of the input is NA-like, defined here as any
     of:
@@ -152,8 +152,6 @@ cpdef bint checknull(object val, bint inf_as_na=False):
     Parameters
     ----------
     val : object
-    inf_as_na : bool, default False
-        Whether to treat INF and -INF as NA values.
 
     Returns
     -------
@@ -164,8 +162,6 @@ cpdef bint checknull(object val, bint inf_as_na=False):
     elif util.is_float_object(val) or util.is_complex_object(val):
         if val != val:
             return True
-        elif inf_as_na:
-            return val == INF or val == NEGINF
         return False
     elif cnp.is_timedelta64_object(val):
         return cnp.get_timedelta64_value(val) == NPY_NAT
@@ -184,7 +180,7 @@ cdef bint is_decimal_na(object val):
 
 @cython.wraparound(False)
 @cython.boundscheck(False)
-cpdef ndarray[uint8_t] isnaobj(ndarray arr, bint inf_as_na=False):
+cpdef ndarray[uint8_t] isnaobj(ndarray arr):
     """
     Return boolean mask denoting which elements of a 1-D array are na-like,
     according to the criteria defined in `checknull`:
@@ -217,7 +213,7 @@ cpdef ndarray[uint8_t] isnaobj(ndarray arr, bint inf_as_na=False):
         #  equivalents to `val = values[i]`
         val = cnp.PyArray_GETITEM(arr, cnp.PyArray_ITER_DATA(it))
         cnp.PyArray_ITER_NEXT(it)
-        is_null = checknull(val, inf_as_na=inf_as_na)
+        is_null = checknull(val)
         # Dereference pointer (set value)
         (<uint8_t *>(cnp.PyArray_ITER_DATA(it2)))[0] = <uint8_t>is_null
         cnp.PyArray_ITER_NEXT(it2)

diff --git a/pandas/core/config_init.py b/pandas/core/config_init.py
@@ -406,35 +406,6 @@ def is_terminal() -> bool:
 with cf.config_prefix("mode"):
     cf.register_option("sim_interactive", False, tc_sim_interactive_doc)
 
-use_inf_as_na_doc = """
-: boolean
-    True means treat None, NaN, INF, -INF as NA (old way),
-    False means None and NaN are null, but INF, -INF are not NA
-    (new way).
-
-    This option is deprecated in pandas 2.1.0 and will be removed in 3.0.
-"""
-
-# We don't want to start importing everything at the global context level
-# or we'll hit circular deps.
-
-
-def use_inf_as_na_cb(key) -> None:
-    # TODO(3.0): enforcing this deprecation will close GH#52501
-    from pandas.core.dtypes.missing import _use_inf_as_na
-
-    _use_inf_as_na(key)
-
-
-with cf.config_prefix("mode"):
-    cf.register_option("use_inf_as_na", False, use_inf_as_na_doc, cb=use_inf_as_na_cb)
-
-cf.deprecate_option(
-    # GH#51684
-    "mode.use_inf_as_na",
-    "use_inf_as_na option is deprecated and will be removed in a future "
-    "version. Convert inf values to NaN before operating instead.",
-)
 
 # TODO better name?
 copy_on_write_doc = """

diff --git a/pandas/core/dtypes/missing.py b/pandas/core/dtypes/missing.py
@@ -4,7 +4,6 @@
 from __future__ import annotations
 
 from decimal import Decimal
-from functools import partial
 from typing import (
     TYPE_CHECKING,
     overload,
@@ -13,8 +12,6 @@
 
 import numpy as np
 
-from pandas._config import get_option
-
 from pandas._libs import lib
 import pandas._libs.missing as libmissing
 from pandas._libs.tslibs import (
@@ -64,8 +61,6 @@
 isposinf_scalar = libmissing.isposinf_scalar
 isneginf_scalar = libmissing.isneginf_scalar
 
-nan_checker = np.isnan
-INF_AS_NA = False
 _dtype_object = np.dtype("object")
 _dtype_str = np.dtype(str)
 
@@ -180,95 +175,57 @@ def isna(obj: object) -> bool | npt.NDArray[np.bool_] | NDFrame:
 isnull = isna
 
 
-def _isna(obj, inf_as_na: bool = False):
+def _isna(obj):
     """
-    Detect missing values, treating None, NaN or NA as null. Infinite
-    values will also be treated as null if inf_as_na is True.
+    Detect missing values, treating None, NaN or NA as null.
 
     Parameters
     ----------
     obj: ndarray or object value
         Input array or scalar value.
-    inf_as_na: bool
-        Whether to treat infinity as null.
 
     Returns
     -------
     boolean ndarray or boolean
     """
     if is_scalar(obj):
-        return libmissing.checknull(obj, inf_as_na=inf_as_na)
+        return libmissing.checknull(obj)
     elif isinstance(obj, ABCMultiIndex):
         raise NotImplementedError("isna is not defined for MultiIndex")
     elif isinstance(obj, type):
         return False
     elif isinstance(obj, (np.ndarray, ABCExtensionArray)):
-        return _isna_array(obj, inf_as_na=inf_as_na)
+        return _isna_array(obj)
     elif isinstance(obj, ABCIndex):
         # Try to use cached isna, which also short-circuits for integer dtypes
         #  and avoids materializing RangeIndex._values
         if not obj._can_hold_na:
             return obj.isna()
-        return _isna_array(obj._values, inf_as_na=inf_as_na)
+        return _isna_array(obj._values)
 
     elif isinstance(obj, ABCSeries):
-        result = _isna_array(obj._values, inf_as_na=inf_as_na)
+        result = _isna_array(obj._values)
         # box
         result = obj._constructor(result, index=obj.index, name=obj.name, copy=False)
         return result
     elif isinstance(obj, ABCDataFrame):
         return obj.isna()
     elif isinstance(obj, list):
-        return _isna_array(np.asarray(obj, dtype=object), inf_as_na=inf_as_na)
+        return _isna_array(np.asarray(obj, dtype=object))
     elif hasattr(obj, "__array__"):
-        return _isna_array(np.asarray(obj), inf_as_na=inf_as_na)
+        return _isna_array(np.asarray(obj))
     else:
         return False
 
 
-def _use_inf_as_na(key) -> None:
-    """
-    Option change callback for na/inf behaviour.
-
-    Choose which replacement for numpy.isnan / -numpy.isfinite is used.
-
-    Parameters
-    ----------
-    flag: bool
-        True means treat None, NaN, INF, -INF as null (old way),
-        False means None and NaN are null, but INF, -INF are not null
-        (new way).
-
-    Notes
-    -----
-    This approach to setting global module values is discussed and
-    approved here:
-
-    * https://stackoverflow.com/questions/4859217/
-      programmatically-creating-variables-in-python/4859312#4859312
-    """
-    inf_as_na = get_option(key)
-    globals()["_isna"] = partial(_isna, inf_as_na=inf_as_na)
-    if inf_as_na:
-        globals()["nan_checker"] = lambda x: ~np.isfinite(x)
-        globals()["INF_AS_NA"] = True
-    else:
-        globals()["nan_checker"] = np.isnan
-        globals()["INF_AS_NA"] = False
-
-
-def _isna_array(
-    values: ArrayLike, inf_as_na: bool = False
-) -> npt.NDArray[np.bool_] | NDFrame:
+def _isna_array(values: ArrayLike) -> npt.NDArray[np.bool_] | NDFrame:
     """
     Return an array indicating which values of the input array are NaN / NA.
 
     Parameters
     ----------
     obj: ndarray or ExtensionArray
         The input array whose elements are to be checked.
-    inf_as_na: bool
-        Whether or not to treat infinite values as NA.
 
     Returns
     -------
@@ -280,73 +237,47 @@ def _isna_array(
 
     if not isinstance(values, np.ndarray):
         # i.e. ExtensionArray
-        if inf_as_na and isinstance(dtype, CategoricalDtype):
-            result = libmissing.isnaobj(values.to_numpy(), inf_as_na=inf_as_na)
-        else:
-            # error: Incompatible types in assignment (expression has type
-            # "Union[ndarray[Any, Any], ExtensionArraySupportsAnyAll]", variable has
-            # type "ndarray[Any, dtype[bool_]]")
-            result = values.isna()  # type: ignore[assignment]
+        # error: Incompatible types in assignment (expression has type
+        # "Union[ndarray[Any, Any], ExtensionArraySupportsAnyAll]", variable has
+        # type "ndarray[Any, dtype[bool_]]")
+        result = values.isna()  # type: ignore[assignment]
     elif isinstance(values, np.rec.recarray):
         # GH 48526
-        result = _isna_recarray_dtype(values, inf_as_na=inf_as_na)
+        result = _isna_recarray_dtype(values)
     elif is_string_or_object_np_dtype(values.dtype):
-        result = _isna_string_dtype(values, inf_as_na=inf_as_na)
+        result = _isna_string_dtype(values)
     elif dtype.kind in "mM":
         # this is the NaT pattern
         result = values.view("i8") == iNaT
     else:
-        if inf_as_na:
-            result = ~np.isfinite(values)
-        else:
-            result = np.isnan(values)
+        result = np.isnan(values)
 
     return result
 
 
-def _isna_string_dtype(values: np.ndarray, inf_as_na: bool) -> npt.NDArray[np.bool_]:
+def _isna_string_dtype(values: np.ndarray) -> npt.NDArray[np.bool_]:
     # Working around NumPy ticket 1542
     dtype = values.dtype
 
     if dtype.kind in ("S", "U"):
         result = np.zeros(values.shape, dtype=bool)
     else:
         if values.ndim in {1, 2}:
-            result = libmissing.isnaobj(values, inf_as_na=inf_as_na)
+            result = libmissing.isnaobj(values)
         else:
             # 0-D, reached via e.g. mask_missing
-            result = libmissing.isnaobj(values.ravel(), inf_as_na=inf_as_na)
+            result = libmissing.isnaobj(values.ravel())
             result = result.reshape(values.shape)
 
     return result
 
 
-def _has_record_inf_value(record_as_array: np.ndarray) -> np.bool_:
-    is_inf_in_record = np.zeros(len(record_as_array), dtype=bool)
-    for i, value in enumerate(record_as_array):
-        is_element_inf = False
-        try:
-            is_element_inf = np.isinf(value)
-        except TypeError:
-            is_element_inf = False
-        is_inf_in_record[i] = is_element_inf
-
-    return np.any(is_inf_in_record)
-
-
-def _isna_recarray_dtype(
-    values: np.rec.recarray, inf_as_na: bool
-) -> npt.NDArray[np.bool_]:
+def _isna_recarray_dtype(values: np.rec.recarray) -> npt.NDArray[np.bool_]:
     result = np.zeros(values.shape, dtype=bool)
     for i, record in enumerate(values):
         record_as_array = np.array(record.tolist())
         does_record_contain_nan = isna_all(record_as_array)
-        does_record_contain_inf = False
-        if inf_as_na:
-            does_record_contain_inf = bool(_has_record_inf_value(record_as_array))
-        result[i] = np.any(
-            np.logical_or(does_record_contain_nan, does_record_contain_inf)
-        )
+        result[i] = np.any(does_record_contain_nan)
 
     return result
 
@@ -788,7 +719,7 @@ def isna_all(arr: ArrayLike) -> bool:
 
     dtype = arr.dtype
     if lib.is_np_dtype(dtype, "f"):
-        checker = nan_checker
+        checker = np.isnan
 
     elif (lib.is_np_dtype(dtype, "mM")) or isinstance(
         dtype, (DatetimeTZDtype, PeriodDtype)
@@ -800,9 +731,7 @@ def isna_all(arr: ArrayLike) -> bool:
     else:
         # error: Incompatible types in assignment (expression has type "Callable[[Any],
         # Any]", variable has type "ufunc")
-        checker = lambda x: _isna_array(  # type: ignore[assignment]
-            x, inf_as_na=INF_AS_NA
-        )
+        checker = _isna_array  # type: ignore[assignment]
 
     return all(
         checker(arr[i : i + chunk_len]).all() for i in range(0, total_len, chunk_len)

diff --git a/pandas/core/generic.py b/pandas/core/generic.py
@@ -8102,8 +8102,7 @@ def isna(self) -> Self:
         NA values, such as None or :attr:`numpy.NaN`, gets mapped to True
         values.
         Everything else gets mapped to False values. Characters such as empty
-        strings ``''`` or :attr:`numpy.inf` are not considered NA values
-        (unless you set ``pandas.options.mode.use_inf_as_na = True``).
+        strings ``''`` or :attr:`numpy.inf` are not considered NA values.
 
         Returns
         -------
@@ -8174,8 +8173,7 @@ def notna(self) -> Self:
 
         Return a boolean same-sized object indicating if the values are not NA.
         Non-missing values get mapped to True. Characters such as empty
-        strings ``''`` or :attr:`numpy.inf` are not considered NA values
-        (unless you set ``pandas.options.mode.use_inf_as_na = True``).
+        strings ``''`` or :attr:`numpy.inf` are not considered NA values.
         NA values, such as None or :attr:`numpy.NaN`, get mapped to False
         values.
 

diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py
@@ -1205,10 +1205,6 @@ def _format(x):
                     return "None"
                 elif x is NA:
                     return str(NA)
-                elif lib.is_float(x) and np.isinf(x):
-                    # TODO(3.0): this will be unreachable when use_inf_as_na
-                    #  deprecation is enforced
-                    return str(x)
                 elif x is NaT or isinstance(x, (np.datetime64, np.timedelta64)):
                     return "NaT"
                 return self.na_rep