Skip to content

Commit

Permalink
CLN: assorted (#53086)
Browse files Browse the repository at this point in the history
  • Loading branch information
jbrockmendel committed May 5, 2023
1 parent b0140bf commit 5fa7f31
Show file tree
Hide file tree
Showing 27 changed files with 64 additions and 96 deletions.
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v2.1.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -439,7 +439,7 @@ Metadata

Other
^^^^^
- Bug in :class:`FloatingArray.__contains__` with ``NaN`` item incorrectly returning ``False`` when ``NaN`` values are presnet (:issue:`52840`)
- Bug in :class:`FloatingArray.__contains__` with ``NaN`` item incorrectly returning ``False`` when ``NaN`` values are present (:issue:`52840`)
- Bug in :func:`assert_almost_equal` now throwing assertion error for two unequal sets (:issue:`51727`)
- Bug in :func:`assert_frame_equal` checks category dtypes even when asked not to check index type (:issue:`52126`)
- Bug in :meth:`DataFrame.reindex` with a ``fill_value`` that should be inferred with a :class:`ExtensionDtype` incorrectly inferring ``object`` dtype (:issue:`52586`)
Expand Down
4 changes: 2 additions & 2 deletions pandas/_libs/algos.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -346,7 +346,7 @@ def kth_smallest(numeric_t[::1] arr, Py_ssize_t k) -> numeric_t:
def nancorr(const float64_t[:, :] mat, bint cov=False, minp=None):
cdef:
Py_ssize_t i, xi, yi, N, K
bint minpv
int64_t minpv
float64_t[:, ::1] result
ndarray[uint8_t, ndim=2] mask
int64_t nobs = 0
Expand All @@ -357,7 +357,7 @@ def nancorr(const float64_t[:, :] mat, bint cov=False, minp=None):
if minp is None:
minpv = 1
else:
minpv = <int>minp
minpv = <int64_t>minp

result = np.empty((K, K), dtype=np.float64)
mask = np.isfinite(mat).view(np.uint8)
Expand Down
2 changes: 2 additions & 0 deletions pandas/_libs/index.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ from pandas._typing import npt
from pandas import MultiIndex
from pandas.core.arrays import ExtensionArray

multiindex_nulls_shift: int

class IndexEngine:
over_size_threshold: bool
def __init__(self, values: np.ndarray) -> None: ...
Expand Down
2 changes: 1 addition & 1 deletion pandas/_libs/internals.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -102,5 +102,5 @@ class BlockValuesRefs:
referenced_blocks: list[weakref.ref]
def __init__(self, blk: SharedBlock | None = ...) -> None: ...
def add_reference(self, blk: SharedBlock) -> None: ...
def add_index_reference(self, index: object) -> None: ...
def add_index_reference(self, index: Index) -> None: ...
def has_reference(self) -> bool: ...
2 changes: 1 addition & 1 deletion pandas/_libs/internals.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -966,7 +966,7 @@ cdef class BlockValuesRefs:

Parameters
----------
index: object
index : Index
The index that the new reference should point to.
"""
self.referenced_blocks.append(weakref.ref(index))
Expand Down
6 changes: 3 additions & 3 deletions pandas/_libs/lib.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -2612,7 +2612,7 @@ def maybe_convert_objects(ndarray[object] objects,
return tdi._data._ndarray
seen.object_ = True

if seen.period_:
elif seen.period_:
if is_period_array(objects):
from pandas import PeriodIndex
pi = PeriodIndex(objects)
Expand All @@ -2621,7 +2621,7 @@ def maybe_convert_objects(ndarray[object] objects,
return pi._data
seen.object_ = True

if seen.interval_:
elif seen.interval_:
if is_interval_array(objects):
from pandas import IntervalIndex
ii = IntervalIndex(objects)
Expand All @@ -2631,7 +2631,7 @@ def maybe_convert_objects(ndarray[object] objects,

seen.object_ = True

if seen.nat_:
elif seen.nat_:
if not seen.object_ and not seen.numeric_ and not seen.bool_:
# all NaT, None, or nan (at least one NaT)
# see GH#49340 for discussion of desired behavior
Expand Down
4 changes: 1 addition & 3 deletions pandas/_testing/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -852,9 +852,7 @@ def _constructor_sliced(self):


class SubclassedCategorical(Categorical):
@property
def _constructor(self):
return SubclassedCategorical
pass


def _make_skipna_wrapper(alternative, skipna_alternative=None):
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/algorithms.py
Original file line number Diff line number Diff line change
Expand Up @@ -1240,7 +1240,7 @@ def take(
if not is_array_like(arr):
arr = np.asarray(arr)

indices = np.asarray(indices, dtype=np.intp)
indices = ensure_platform_int(indices)

if allow_fill:
# Pandas style, -1 means NA
Expand Down
5 changes: 0 additions & 5 deletions pandas/core/apply.py
Original file line number Diff line number Diff line change
Expand Up @@ -1086,15 +1086,10 @@ def agg(self):
result = super().agg()
if result is None:
f = self.f
kwargs = self.kwargs

# string, list-like, and dict-like are entirely handled in super
assert callable(f)

# we can be called from an inner function which
# passes this meta-data
kwargs.pop("_level", None)

# try a regular apply, this evaluates lambdas
# row-by-row; however if the lambda is expected a Series
# expression, e.g.: lambda x: x-x.quantile(0.25)
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/array_algos/putmask.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,7 @@ def setitem_datetimelike_compat(values: np.ndarray, num_set: int, other):
if values.dtype == object:
dtype, _ = infer_dtype_from(other)

if isinstance(dtype, np.dtype) and dtype.kind in "mM":
if lib.is_np_dtype(dtype, "mM"):
# https://github.com/numpy/numpy/issues/12550
# timedelta64 will incorrectly cast to int
if not is_list_like(other):
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/arrays/categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -2022,7 +2022,7 @@ def _validate_listlike(self, value):
"Cannot set a Categorical with another, "
"without identical categories"
)
# is_dtype_equal implies categories_match_up_to_permutation
# dtype equality implies categories_match_up_to_permutation
value = self._encode_with_my_categories(value)
return value._codes

Expand Down
7 changes: 1 addition & 6 deletions pandas/core/arrays/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,6 @@

from pandas.core.arrays import datetimelike as dtl
from pandas.core.arrays._ranges import generate_regular_range
from pandas.core.arrays.sparse.dtype import SparseDtype
import pandas.core.common as com

from pandas.tseries.frequencies import get_period_alias
Expand Down Expand Up @@ -2035,11 +2034,7 @@ def _sequence_to_dt64ns(
if out_unit is not None:
out_dtype = np.dtype(f"M8[{out_unit}]")

if (
data_dtype == object
or is_string_dtype(data_dtype)
or isinstance(data_dtype, SparseDtype)
):
if data_dtype == object or is_string_dtype(data_dtype):
# TODO: We do not have tests specific to string-dtypes,
# also complex or categorical or other extension
copy = False
Expand Down
10 changes: 1 addition & 9 deletions pandas/core/arrays/masked.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,6 @@
pandas_dtype,
)
from pandas.core.dtypes.dtypes import BaseMaskedDtype
from pandas.core.dtypes.inference import is_array_like
from pandas.core.dtypes.missing import (
array_equivalent,
is_valid_na_for_dtype,
Expand Down Expand Up @@ -172,20 +171,13 @@ def __getitem__(self, item: PositionalIndexer) -> Self | Any:

return type(self)(self._data[item], newmask)

@doc(ExtensionArray.fillna)
@doc(ExtensionArray.fillna)
def fillna(self, value=None, method=None, limit: int | None = None) -> Self:
value, method = validate_fillna_kwargs(value, method)

mask = self._mask

if is_array_like(value):
if len(value) != len(self):
raise ValueError(
f"Length of 'value' does not match. Got ({len(value)}) "
f" expected {len(self)}"
)
value = value[mask]
value = missing.check_value_size(value, mask, len(self))

if mask.any():
if method is not None:
Expand Down
14 changes: 9 additions & 5 deletions pandas/core/flags.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,11 @@
from __future__ import annotations

from typing import TYPE_CHECKING
import weakref

if TYPE_CHECKING:
from pandas.core.generic import NDFrame


class Flags:
"""
Expand Down Expand Up @@ -44,9 +48,9 @@ class Flags:
<Flags(allows_duplicate_labels=True)>
"""

_keys = {"allows_duplicate_labels"}
_keys: set[str] = {"allows_duplicate_labels"}

def __init__(self, obj, *, allows_duplicate_labels) -> None:
def __init__(self, obj: NDFrame, *, allows_duplicate_labels: bool) -> None:
self._allows_duplicate_labels = allows_duplicate_labels
self._obj = weakref.ref(obj)

Expand Down Expand Up @@ -95,21 +99,21 @@ def allows_duplicate_labels(self, value: bool) -> None:

self._allows_duplicate_labels = value

def __getitem__(self, key):
def __getitem__(self, key: str):
if key not in self._keys:
raise KeyError(key)

return getattr(self, key)

def __setitem__(self, key, value) -> None:
def __setitem__(self, key: str, value) -> None:
if key not in self._keys:
raise ValueError(f"Unknown flag {key}. Must be one of {self._keys}")
setattr(self, key, value)

def __repr__(self) -> str:
return f"<Flags(allows_duplicate_labels={self.allows_duplicate_labels})>"

def __eq__(self, other):
def __eq__(self, other) -> bool:
if isinstance(other, type(self)):
return self.allows_duplicate_labels == other.allows_duplicate_labels
return False
4 changes: 2 additions & 2 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -6546,7 +6546,7 @@ def sort_values(
axis: Axis = ...,
ascending=...,
inplace: Literal[True],
kind: str = ...,
kind: SortKind = ...,
na_position: str = ...,
ignore_index: bool = ...,
key: ValueKeyFunc = ...,
Expand All @@ -6560,7 +6560,7 @@ def sort_values(
axis: Axis = 0,
ascending: bool | list[bool] | tuple[bool, ...] = True,
inplace: bool = False,
kind: str = "quicksort",
kind: SortKind = "quicksort",
na_position: str = "last",
ignore_index: bool = False,
key: ValueKeyFunc = None,
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -6206,7 +6206,7 @@ def _check_inplace_setting(self, value) -> bool_t:
"""check whether we allow in-place setting with this type of value"""
if self._is_mixed_type and not self._mgr.is_numeric_mixed_type:
# allow an actual np.nan through
if is_float(value) and np.isnan(value) or value is lib.no_default:
if (is_float(value) and np.isnan(value)) or value is lib.no_default:
return True

raise TypeError(
Expand Down
15 changes: 3 additions & 12 deletions pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -2868,8 +2868,9 @@ def fillna(self, value=None, downcast=None):
DataFrame.fillna : Fill NaN values of a DataFrame.
Series.fillna : Fill NaN Values of a Series.
"""
if not is_scalar(value):
raise TypeError(f"'value' must be a scalar, passed: {type(value).__name__}")

value = self._require_scalar(value)
if self.hasnans:
result = self.putmask(self._isnan, value)
if downcast is None:
Expand Down Expand Up @@ -3211,7 +3212,7 @@ def union(self, other, sort=None):

elif not len(other) or self.equals(other):
# NB: whether this (and the `if not len(self)` check below) come before
# or after the is_dtype_equal check above affects the returned dtype
# or after the dtype equality check above affects the returned dtype
result = self._get_reconciled_name_object(other)
if sort is True:
return result.sort_values()
Expand Down Expand Up @@ -5119,16 +5120,6 @@ def _validate_fill_value(self, value):
raise TypeError
return value

@final
def _require_scalar(self, value):
"""
Check that this is a scalar value that we can use for setitem-like
operations without changing dtype.
"""
if not is_scalar(value):
raise TypeError(f"'value' must be a scalar, passed: {type(value).__name__}")
return value

def _is_memory_usage_qualified(self) -> bool:
"""
Return a boolean if we need a qualified .info display.
Expand Down
6 changes: 1 addition & 5 deletions pandas/core/indexes/multi.py
Original file line number Diff line number Diff line change
Expand Up @@ -1116,11 +1116,7 @@ def _engine(self):
# calculating the indexer are shifted to 0
sizes = np.ceil(
np.log2(
[
len(level)
+ libindex.multiindex_nulls_shift # type: ignore[attr-defined]
for level in self.levels
]
[len(level) + libindex.multiindex_nulls_shift for level in self.levels]
)
)

Expand Down
6 changes: 1 addition & 5 deletions pandas/core/internals/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -309,11 +309,7 @@ def should_store(self, value: ArrayLike) -> bool:
-------
bool
"""
# faster equivalent to is_dtype_equal(value.dtype, self.dtype)
try:
return value.dtype == self.dtype
except TypeError:
return False
return value.dtype == self.dtype

# ---------------------------------------------------------------------
# Apply/Reduce and Helpers
Expand Down
6 changes: 5 additions & 1 deletion pandas/core/ops/invalid.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,15 @@
from __future__ import annotations

import operator
from typing import TYPE_CHECKING

import numpy as np

if TYPE_CHECKING:
from pandas._typing import npt

def invalid_comparison(left, right, op) -> np.ndarray:

def invalid_comparison(left, right, op) -> npt.NDArray[np.bool_]:
"""
If a comparison has mismatched types and is not necessarily meaningful,
follow python3 conventions by:
Expand Down
16 changes: 6 additions & 10 deletions pandas/core/ops/missing.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,16 +27,10 @@

import numpy as np

from pandas.core.dtypes.common import (
is_float_dtype,
is_integer_dtype,
is_scalar,
)

from pandas.core import roperator


def _fill_zeros(result, x, y):
def _fill_zeros(result: np.ndarray, x, y):
"""
If this is a reversed op, then flip x,y
Expand All @@ -46,11 +40,11 @@ def _fill_zeros(result, x, y):
Mask the nan's from x.
"""
if is_float_dtype(result.dtype):
if result.dtype.kind == "f":
return result

is_variable_type = hasattr(y, "dtype")
is_scalar_type = is_scalar(y)
is_scalar_type = not isinstance(y, np.ndarray)

if not is_variable_type and not is_scalar_type:
# e.g. test_series_ops_name_retention with mod we get here with list/tuple
Expand All @@ -59,7 +53,7 @@ def _fill_zeros(result, x, y):
if is_scalar_type:
y = np.array(y)

if is_integer_dtype(y.dtype):
if y.dtype.kind in "iu":
ymask = y == 0
if ymask.any():
# GH#7325, mask and nans must be broadcastable
Expand Down Expand Up @@ -143,7 +137,9 @@ def dispatch_fill_zeros(op, left, right, result):
----------
op : function (operator.add, operator.div, ...)
left : object (np.ndarray for non-reversed ops)
We have excluded ExtensionArrays here
right : object (np.ndarray for reversed ops)
We have excluded ExtensionArrays here
result : ndarray
Returns
Expand Down

0 comments on commit 5fa7f31

Please sign in to comment.