Skip to content

Commit

Permalink
CLN: Remove inf_as_na (#57428)
Browse files Browse the repository at this point in the history
  • Loading branch information
mroeschke committed Feb 19, 2024
1 parent 997e1b8 commit c379331
Show file tree
Hide file tree
Showing 20 changed files with 44 additions and 382 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v3.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,7 @@ Removal of prior version deprecations/changes
- Removed ``year``, ``month``, ``quarter``, ``day``, ``hour``, ``minute``, and ``second`` keywords in the :class:`PeriodIndex` constructor, use :meth:`PeriodIndex.from_fields` instead (:issue:`55960`)
- Removed deprecated argument ``obj`` in :meth:`.DataFrameGroupBy.get_group` and :meth:`.SeriesGroupBy.get_group` (:issue:`53545`)
- Removed deprecated behavior of :meth:`Series.agg` using :meth:`Series.apply` (:issue:`53325`)
- Removed option ``mode.use_inf_as_na``, convert inf entries to ``NaN`` before instead (:issue:`51684`)
- Removed support for :class:`DataFrame` in :meth:`DataFrame.from_records`(:issue:`51697`)
- Removed support for ``errors="ignore"`` in :func:`to_datetime`, :func:`to_timedelta` and :func:`to_numeric` (:issue:`55734`)
- Removed support for ``slice`` in :meth:`DataFrame.take` (:issue:`51539`)
Expand Down
4 changes: 2 additions & 2 deletions pandas/_libs/missing.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@ from numpy cimport (
cpdef bint is_matching_na(object left, object right, bint nan_matches_none=*)
cpdef bint check_na_tuples_nonequal(object left, object right)

cpdef bint checknull(object val, bint inf_as_na=*)
cpdef ndarray[uint8_t] isnaobj(ndarray arr, bint inf_as_na=*)
cpdef bint checknull(object val)
cpdef ndarray[uint8_t] isnaobj(ndarray arr)

cdef bint is_null_datetime64(v)
cdef bint is_null_timedelta64(v)
Expand Down
4 changes: 2 additions & 2 deletions pandas/_libs/missing.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,6 @@ def is_matching_na(
) -> bool: ...
def isposinf_scalar(val: object) -> bool: ...
def isneginf_scalar(val: object) -> bool: ...
def checknull(val: object, inf_as_na: bool = ...) -> bool: ...
def isnaobj(arr: np.ndarray, inf_as_na: bool = ...) -> npt.NDArray[np.bool_]: ...
def checknull(val: object) -> bool: ...
def isnaobj(arr: np.ndarray) -> npt.NDArray[np.bool_]: ...
def is_numeric_na(values: np.ndarray) -> npt.NDArray[np.bool_]: ...
10 changes: 3 additions & 7 deletions pandas/_libs/missing.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,7 @@ cpdef bint is_matching_na(object left, object right, bint nan_matches_none=False
return False


cpdef bint checknull(object val, bint inf_as_na=False):
cpdef bint checknull(object val):
"""
Return boolean describing of the input is NA-like, defined here as any
of:
Expand All @@ -152,8 +152,6 @@ cpdef bint checknull(object val, bint inf_as_na=False):
Parameters
----------
val : object
inf_as_na : bool, default False
Whether to treat INF and -INF as NA values.
Returns
-------
Expand All @@ -164,8 +162,6 @@ cpdef bint checknull(object val, bint inf_as_na=False):
elif util.is_float_object(val) or util.is_complex_object(val):
if val != val:
return True
elif inf_as_na:
return val == INF or val == NEGINF
return False
elif cnp.is_timedelta64_object(val):
return cnp.get_timedelta64_value(val) == NPY_NAT
Expand All @@ -184,7 +180,7 @@ cdef bint is_decimal_na(object val):

@cython.wraparound(False)
@cython.boundscheck(False)
cpdef ndarray[uint8_t] isnaobj(ndarray arr, bint inf_as_na=False):
cpdef ndarray[uint8_t] isnaobj(ndarray arr):
"""
Return boolean mask denoting which elements of a 1-D array are na-like,
according to the criteria defined in `checknull`:
Expand Down Expand Up @@ -217,7 +213,7 @@ cpdef ndarray[uint8_t] isnaobj(ndarray arr, bint inf_as_na=False):
# equivalents to `val = values[i]`
val = cnp.PyArray_GETITEM(arr, cnp.PyArray_ITER_DATA(it))
cnp.PyArray_ITER_NEXT(it)
is_null = checknull(val, inf_as_na=inf_as_na)
is_null = checknull(val)
# Dereference pointer (set value)
(<uint8_t *>(cnp.PyArray_ITER_DATA(it2)))[0] = <uint8_t>is_null
cnp.PyArray_ITER_NEXT(it2)
Expand Down
29 changes: 0 additions & 29 deletions pandas/core/config_init.py
Original file line number Diff line number Diff line change
Expand Up @@ -406,35 +406,6 @@ def is_terminal() -> bool:
with cf.config_prefix("mode"):
cf.register_option("sim_interactive", False, tc_sim_interactive_doc)

use_inf_as_na_doc = """
: boolean
True means treat None, NaN, INF, -INF as NA (old way),
False means None and NaN are null, but INF, -INF are not NA
(new way).
This option is deprecated in pandas 2.1.0 and will be removed in 3.0.
"""

# We don't want to start importing everything at the global context level
# or we'll hit circular deps.


def use_inf_as_na_cb(key) -> None:
# TODO(3.0): enforcing this deprecation will close GH#52501
from pandas.core.dtypes.missing import _use_inf_as_na

_use_inf_as_na(key)


with cf.config_prefix("mode"):
cf.register_option("use_inf_as_na", False, use_inf_as_na_doc, cb=use_inf_as_na_cb)

cf.deprecate_option(
# GH#51684
"mode.use_inf_as_na",
"use_inf_as_na option is deprecated and will be removed in a future "
"version. Convert inf values to NaN before operating instead.",
)

# TODO better name?
copy_on_write_doc = """
Expand Down
117 changes: 23 additions & 94 deletions pandas/core/dtypes/missing.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
from __future__ import annotations

from decimal import Decimal
from functools import partial
from typing import (
TYPE_CHECKING,
overload,
Expand All @@ -13,8 +12,6 @@

import numpy as np

from pandas._config import get_option

from pandas._libs import lib
import pandas._libs.missing as libmissing
from pandas._libs.tslibs import (
Expand Down Expand Up @@ -64,8 +61,6 @@
isposinf_scalar = libmissing.isposinf_scalar
isneginf_scalar = libmissing.isneginf_scalar

nan_checker = np.isnan
INF_AS_NA = False
_dtype_object = np.dtype("object")
_dtype_str = np.dtype(str)

Expand Down Expand Up @@ -180,95 +175,57 @@ def isna(obj: object) -> bool | npt.NDArray[np.bool_] | NDFrame:
isnull = isna


def _isna(obj, inf_as_na: bool = False):
def _isna(obj):
"""
Detect missing values, treating None, NaN or NA as null. Infinite
values will also be treated as null if inf_as_na is True.
Detect missing values, treating None, NaN or NA as null.
Parameters
----------
obj: ndarray or object value
Input array or scalar value.
inf_as_na: bool
Whether to treat infinity as null.
Returns
-------
boolean ndarray or boolean
"""
if is_scalar(obj):
return libmissing.checknull(obj, inf_as_na=inf_as_na)
return libmissing.checknull(obj)
elif isinstance(obj, ABCMultiIndex):
raise NotImplementedError("isna is not defined for MultiIndex")
elif isinstance(obj, type):
return False
elif isinstance(obj, (np.ndarray, ABCExtensionArray)):
return _isna_array(obj, inf_as_na=inf_as_na)
return _isna_array(obj)
elif isinstance(obj, ABCIndex):
# Try to use cached isna, which also short-circuits for integer dtypes
# and avoids materializing RangeIndex._values
if not obj._can_hold_na:
return obj.isna()
return _isna_array(obj._values, inf_as_na=inf_as_na)
return _isna_array(obj._values)

elif isinstance(obj, ABCSeries):
result = _isna_array(obj._values, inf_as_na=inf_as_na)
result = _isna_array(obj._values)
# box
result = obj._constructor(result, index=obj.index, name=obj.name, copy=False)
return result
elif isinstance(obj, ABCDataFrame):
return obj.isna()
elif isinstance(obj, list):
return _isna_array(np.asarray(obj, dtype=object), inf_as_na=inf_as_na)
return _isna_array(np.asarray(obj, dtype=object))
elif hasattr(obj, "__array__"):
return _isna_array(np.asarray(obj), inf_as_na=inf_as_na)
return _isna_array(np.asarray(obj))
else:
return False


def _use_inf_as_na(key) -> None:
"""
Option change callback for na/inf behaviour.
Choose which replacement for numpy.isnan / -numpy.isfinite is used.
Parameters
----------
flag: bool
True means treat None, NaN, INF, -INF as null (old way),
False means None and NaN are null, but INF, -INF are not null
(new way).
Notes
-----
This approach to setting global module values is discussed and
approved here:
* https://stackoverflow.com/questions/4859217/
programmatically-creating-variables-in-python/4859312#4859312
"""
inf_as_na = get_option(key)
globals()["_isna"] = partial(_isna, inf_as_na=inf_as_na)
if inf_as_na:
globals()["nan_checker"] = lambda x: ~np.isfinite(x)
globals()["INF_AS_NA"] = True
else:
globals()["nan_checker"] = np.isnan
globals()["INF_AS_NA"] = False


def _isna_array(
values: ArrayLike, inf_as_na: bool = False
) -> npt.NDArray[np.bool_] | NDFrame:
def _isna_array(values: ArrayLike) -> npt.NDArray[np.bool_] | NDFrame:
"""
Return an array indicating which values of the input array are NaN / NA.
Parameters
----------
obj: ndarray or ExtensionArray
The input array whose elements are to be checked.
inf_as_na: bool
Whether or not to treat infinite values as NA.
Returns
-------
Expand All @@ -280,73 +237,47 @@ def _isna_array(

if not isinstance(values, np.ndarray):
# i.e. ExtensionArray
if inf_as_na and isinstance(dtype, CategoricalDtype):
result = libmissing.isnaobj(values.to_numpy(), inf_as_na=inf_as_na)
else:
# error: Incompatible types in assignment (expression has type
# "Union[ndarray[Any, Any], ExtensionArraySupportsAnyAll]", variable has
# type "ndarray[Any, dtype[bool_]]")
result = values.isna() # type: ignore[assignment]
# error: Incompatible types in assignment (expression has type
# "Union[ndarray[Any, Any], ExtensionArraySupportsAnyAll]", variable has
# type "ndarray[Any, dtype[bool_]]")
result = values.isna() # type: ignore[assignment]
elif isinstance(values, np.rec.recarray):
# GH 48526
result = _isna_recarray_dtype(values, inf_as_na=inf_as_na)
result = _isna_recarray_dtype(values)
elif is_string_or_object_np_dtype(values.dtype):
result = _isna_string_dtype(values, inf_as_na=inf_as_na)
result = _isna_string_dtype(values)
elif dtype.kind in "mM":
# this is the NaT pattern
result = values.view("i8") == iNaT
else:
if inf_as_na:
result = ~np.isfinite(values)
else:
result = np.isnan(values)
result = np.isnan(values)

return result


def _isna_string_dtype(values: np.ndarray, inf_as_na: bool) -> npt.NDArray[np.bool_]:
def _isna_string_dtype(values: np.ndarray) -> npt.NDArray[np.bool_]:
# Working around NumPy ticket 1542
dtype = values.dtype

if dtype.kind in ("S", "U"):
result = np.zeros(values.shape, dtype=bool)
else:
if values.ndim in {1, 2}:
result = libmissing.isnaobj(values, inf_as_na=inf_as_na)
result = libmissing.isnaobj(values)
else:
# 0-D, reached via e.g. mask_missing
result = libmissing.isnaobj(values.ravel(), inf_as_na=inf_as_na)
result = libmissing.isnaobj(values.ravel())
result = result.reshape(values.shape)

return result


def _has_record_inf_value(record_as_array: np.ndarray) -> np.bool_:
is_inf_in_record = np.zeros(len(record_as_array), dtype=bool)
for i, value in enumerate(record_as_array):
is_element_inf = False
try:
is_element_inf = np.isinf(value)
except TypeError:
is_element_inf = False
is_inf_in_record[i] = is_element_inf

return np.any(is_inf_in_record)


def _isna_recarray_dtype(
values: np.rec.recarray, inf_as_na: bool
) -> npt.NDArray[np.bool_]:
def _isna_recarray_dtype(values: np.rec.recarray) -> npt.NDArray[np.bool_]:
result = np.zeros(values.shape, dtype=bool)
for i, record in enumerate(values):
record_as_array = np.array(record.tolist())
does_record_contain_nan = isna_all(record_as_array)
does_record_contain_inf = False
if inf_as_na:
does_record_contain_inf = bool(_has_record_inf_value(record_as_array))
result[i] = np.any(
np.logical_or(does_record_contain_nan, does_record_contain_inf)
)
result[i] = np.any(does_record_contain_nan)

return result

Expand Down Expand Up @@ -788,7 +719,7 @@ def isna_all(arr: ArrayLike) -> bool:

dtype = arr.dtype
if lib.is_np_dtype(dtype, "f"):
checker = nan_checker
checker = np.isnan

elif (lib.is_np_dtype(dtype, "mM")) or isinstance(
dtype, (DatetimeTZDtype, PeriodDtype)
Expand All @@ -800,9 +731,7 @@ def isna_all(arr: ArrayLike) -> bool:
else:
# error: Incompatible types in assignment (expression has type "Callable[[Any],
# Any]", variable has type "ufunc")
checker = lambda x: _isna_array( # type: ignore[assignment]
x, inf_as_na=INF_AS_NA
)
checker = _isna_array # type: ignore[assignment]

return all(
checker(arr[i : i + chunk_len]).all() for i in range(0, total_len, chunk_len)
Expand Down
6 changes: 2 additions & 4 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -8102,8 +8102,7 @@ def isna(self) -> Self:
NA values, such as None or :attr:`numpy.NaN`, gets mapped to True
values.
Everything else gets mapped to False values. Characters such as empty
strings ``''`` or :attr:`numpy.inf` are not considered NA values
(unless you set ``pandas.options.mode.use_inf_as_na = True``).
strings ``''`` or :attr:`numpy.inf` are not considered NA values.
Returns
-------
Expand Down Expand Up @@ -8174,8 +8173,7 @@ def notna(self) -> Self:
Return a boolean same-sized object indicating if the values are not NA.
Non-missing values get mapped to True. Characters such as empty
strings ``''`` or :attr:`numpy.inf` are not considered NA values
(unless you set ``pandas.options.mode.use_inf_as_na = True``).
strings ``''`` or :attr:`numpy.inf` are not considered NA values.
NA values, such as None or :attr:`numpy.NaN`, get mapped to False
values.
Expand Down
4 changes: 0 additions & 4 deletions pandas/io/formats/format.py
Original file line number Diff line number Diff line change
Expand Up @@ -1205,10 +1205,6 @@ def _format(x):
return "None"
elif x is NA:
return str(NA)
elif lib.is_float(x) and np.isinf(x):
# TODO(3.0): this will be unreachable when use_inf_as_na
# deprecation is enforced
return str(x)
elif x is NaT or isinstance(x, (np.datetime64, np.timedelta64)):
return "NaT"
return self.na_rep
Expand Down

0 comments on commit c379331

Please sign in to comment.