Skip to content

Commit

Permalink
CLN: assorted (#52569)
Browse files Browse the repository at this point in the history
  • Loading branch information
jbrockmendel committed Apr 10, 2023
1 parent c537b36 commit 2d5ad57
Show file tree
Hide file tree
Showing 28 changed files with 65 additions and 148 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v2.1.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -248,6 +248,7 @@ Performance improvements
- Performance improvement in :meth:`Series.combine_first` (:issue:`51777`)
- Performance improvement in :meth:`MultiIndex.set_levels` and :meth:`MultiIndex.set_codes` when ``verify_integrity=True`` (:issue:`51873`)
- Performance improvement in :func:`factorize` for object columns not containing strings (:issue:`51921`)
- Performance improvement in :func:`concat` (:issue:`52291`, :issue:`52290`)
- Performance improvement in :class:`Series` reductions (:issue:`52341`)
- Performance improvement in :meth:`Series.to_numpy` when dtype is a numpy float dtype and ``na_value`` is ``np.nan`` (:issue:`52430`)
- Performance improvement in :meth:`Series.corr` and :meth:`Series.cov` for extension dtypes (:issue:`52502`)
Expand Down
4 changes: 2 additions & 2 deletions pandas/_config/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,11 +30,11 @@
from pandas._config.display import detect_console_encoding


def using_copy_on_write():
def using_copy_on_write() -> bool:
_mode_options = _global_config["mode"]
return _mode_options["copy_on_write"] and _mode_options["data_manager"] == "block"


def using_nullable_dtypes():
def using_nullable_dtypes() -> bool:
_mode_options = _global_config["mode"]
return _mode_options["nullable_dtypes"]
11 changes: 6 additions & 5 deletions pandas/_libs/tslibs/timestamps.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -131,12 +131,13 @@ class Timestamp(datetime):
def astimezone(self, tz: _tzinfo | None) -> Self: ... # type: ignore[override]
def ctime(self) -> str: ...
def isoformat(self, sep: str = ..., timespec: str = ...) -> str: ...
# Return type "datetime" of "strptime" incompatible with return type "Timestamp"
# in supertype "datetime"
@classmethod
def strptime( # type: ignore[override]
cls, date_string: str, format: str
) -> datetime: ...
def strptime(
# Note: strptime is actually disabled and raises NotImplementedError
cls,
date_string: str,
format: str,
) -> Self: ...
def utcoffset(self) -> timedelta | None: ...
def tzname(self) -> str | None: ...
def dst(self) -> timedelta | None: ...
Expand Down
2 changes: 1 addition & 1 deletion pandas/_testing/asserters.py
Original file line number Diff line number Diff line change
Expand Up @@ -547,7 +547,7 @@ def assert_period_array_equal(left, right, obj: str = "PeriodArray") -> None:
_check_isinstance(left, right, PeriodArray)

assert_numpy_array_equal(left._ndarray, right._ndarray, obj=f"{obj}._ndarray")
assert_attr_equal("freq", left, right, obj=obj)
assert_attr_equal("dtype", left, right, obj=obj)


def assert_datetime_array_equal(
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/algorithms.py
Original file line number Diff line number Diff line change
Expand Up @@ -471,7 +471,7 @@ def isin(comps: AnyArrayLike, values: AnyArrayLike) -> npt.NDArray[np.bool_]:

if (
len(values) > 0
and is_numeric_dtype(values)
and is_numeric_dtype(values.dtype)
and not is_signed_integer_dtype(comps)
):
# GH#46485 Use object to avoid upcast to float64 later
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/apply.py
Original file line number Diff line number Diff line change
Expand Up @@ -236,7 +236,7 @@ def transform(self) -> DataFrame | Series:
# DataFrameGroupBy, BaseWindow, Resampler]"; expected "Union[DataFrame,
# Series]"
if not isinstance(result, (ABCSeries, ABCDataFrame)) or not result.index.equals(
obj.index # type:ignore[arg-type]
obj.index # type: ignore[arg-type]
):
raise ValueError("Function did not transform")

Expand Down
2 changes: 1 addition & 1 deletion pandas/core/arrays/arrow/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -1150,7 +1150,7 @@ def _concat_same_type(cls, to_concat) -> Self:
"""
chunks = [array for ea in to_concat for array in ea._pa_array.iterchunks()]
if to_concat[0].dtype == "string":
# StringDtype has no attrivute pyarrow_dtype
# StringDtype has no attribute pyarrow_dtype
pa_dtype = pa.string()
else:
pa_dtype = to_concat[0].dtype.pyarrow_dtype
Expand Down
7 changes: 2 additions & 5 deletions pandas/core/arrays/boolean.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,7 @@
missing as libmissing,
)

from pandas.core.dtypes.common import (
is_list_like,
is_numeric_dtype,
)
from pandas.core.dtypes.common import is_list_like
from pandas.core.dtypes.dtypes import register_extension_dtype
from pandas.core.dtypes.missing import isna

Expand Down Expand Up @@ -180,7 +177,7 @@ def coerce_to_array(
if isinstance(values, np.ndarray) and values.dtype == np.bool_:
if copy:
values = values.copy()
elif isinstance(values, np.ndarray) and is_numeric_dtype(values.dtype):
elif isinstance(values, np.ndarray) and values.dtype.kind in "iufcb":
mask_values = isna(values)

values_bool = np.zeros(len(values), dtype=bool)
Expand Down
9 changes: 1 addition & 8 deletions pandas/core/arrays/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,6 @@
from pandas.core import (
algorithms,
nanops,
ops,
)
from pandas.core.algorithms import (
checked_add_with_arr,
Expand Down Expand Up @@ -903,13 +902,7 @@ def _cmp_method(self, other, op):

dtype = getattr(other, "dtype", None)
if is_object_dtype(dtype):
# We have to use comp_method_OBJECT_ARRAY instead of numpy
# comparison otherwise it would fail to raise when
# comparing tz-aware and tz-naive
result = ops.comp_method_OBJECT_ARRAY(
op, np.asarray(self.astype(object)), other
)
return result
return op(np.asarray(self, dtype=object), other)

if other is NaT:
if op is operator.ne:
Expand Down
15 changes: 0 additions & 15 deletions pandas/core/arrays/sparse/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -1805,21 +1805,6 @@ def _formatter(self, boxed: bool = False):
# This will infer the correct formatter from the dtype of the values.
return None

# ------------------------------------------------------------------------
# GroupBy Methods

def _groupby_op(
self,
*,
how: str,
has_dropped_na: bool,
min_count: int,
ngroups: int,
ids: npt.NDArray[np.intp],
**kwargs,
):
raise NotImplementedError(f"{self.dtype} dtype not supported")


def _make_sparse(
arr: np.ndarray,
Expand Down
19 changes: 0 additions & 19 deletions pandas/core/dtypes/missing.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,10 +25,8 @@
DT64NS_DTYPE,
TD64NS_DTYPE,
ensure_object,
is_bool_dtype,
is_dtype_equal,
is_extension_array_dtype,
is_integer_dtype,
is_object_dtype,
is_scalar,
is_string_or_object_np_dtype,
Expand Down Expand Up @@ -431,23 +429,6 @@ def notna(obj: object) -> bool | npt.NDArray[np.bool_] | NDFrame:
notnull = notna


def isna_compat(arr, fill_value=np.nan) -> bool:
"""
Parameters
----------
arr: a numpy array
fill_value: fill value, default to np.nan
Returns
-------
True if we can fill using this fill_value
"""
if isna(fill_value):
dtype = arr.dtype
return not (is_bool_dtype(dtype) or is_integer_dtype(dtype))
return True


def array_equivalent(
left,
right,
Expand Down
14 changes: 2 additions & 12 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -278,15 +278,8 @@
axis : int or str, optional
Axis to target. Can be either the axis name ('index', 'columns')
or number (0, 1).""",
"replace_iloc": """
This differs from updating with ``.loc`` or ``.iloc``, which require
you to specify a location to update with some value.""",
}

_numeric_only_doc = """numeric_only : bool, default False
Include only float, int, boolean data.
"""

_merge_doc = """
Merge DataFrame or named Series objects with a database-style join.
Expand Down Expand Up @@ -5736,7 +5729,7 @@ def set_index(

# error: Argument 1 to "append" of "list" has incompatible type
# "Union[Index, Series]"; expected "Index"
arrays.append(col) # type:ignore[arg-type]
arrays.append(col) # type: ignore[arg-type]
names.append(col.name)
elif isinstance(col, (list, np.ndarray)):
# error: Argument 1 to "append" of "list" has incompatible type
Expand Down Expand Up @@ -7791,10 +7784,7 @@ def _flex_arith_method(
# through the DataFrame path
raise NotImplementedError(f"fill_value {fill_value} not supported.")

other = ops.maybe_prepare_scalar_for_op(
other,
self.shape,
)
other = ops.maybe_prepare_scalar_for_op(other, self.shape)
self, other = self._align_for_op(other, axis, flex=True, level=level)

with np.errstate(all="ignore"):
Expand Down
21 changes: 3 additions & 18 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -212,16 +212,12 @@
"axes": "keywords for axes",
"klass": "Series/DataFrame",
"axes_single_arg": "{0 or 'index'} for Series, {0 or 'index', 1 or 'columns'} for DataFrame", # noqa:E501
"args_transpose": "axes to permute (int or label for object)",
"inplace": """
inplace : bool, default False
If True, performs operation inplace and returns None.""",
"optional_by": """
by : str or list of str
Name or list of names to sort by""",
"replace_iloc": """
This differs from updating with ``.loc`` or ``.iloc``, which require
you to specify a location to update with some value.""",
}


Expand Down Expand Up @@ -264,22 +260,11 @@ class NDFrame(PandasObject, indexing.IndexingMixin):
# ----------------------------------------------------------------------
# Constructors

def __init__(
self,
data: Manager,
copy: bool_t = False,
attrs: Mapping[Hashable, Any] | None = None,
) -> None:
# copy kwarg is retained for mypy compat, is not used

def __init__(self, data: Manager) -> None:
object.__setattr__(self, "_is_copy", None)
object.__setattr__(self, "_mgr", data)
object.__setattr__(self, "_item_cache", {})
if attrs is None:
attrs = {}
else:
attrs = dict(attrs)
object.__setattr__(self, "_attrs", attrs)
object.__setattr__(self, "_attrs", {})
object.__setattr__(self, "_flags", Flags(self, allows_duplicate_labels=True))

@final
Expand Down Expand Up @@ -313,6 +298,7 @@ def _init_mgr(
mgr = mgr.astype(dtype=dtype)
return mgr

@final
def _as_manager(self, typ: str, copy: bool_t = True) -> Self:
"""
Private helper function to create a DataFrame with specific manager.
Expand Down Expand Up @@ -7314,7 +7300,6 @@ def replace(
_shared_docs["replace"],
klass=_shared_doc_kwargs["klass"],
inplace=_shared_doc_kwargs["inplace"],
replace_iloc=_shared_doc_kwargs["replace_iloc"],
)
def replace(
self,
Expand Down
4 changes: 3 additions & 1 deletion pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -431,7 +431,7 @@ def _outer_indexer(

@cache_readonly
def _can_hold_strings(self) -> bool:
return not is_numeric_dtype(self)
return not is_numeric_dtype(self.dtype)

_engine_types: dict[np.dtype | ExtensionDtype, type[libindex.IndexEngine]] = {
np.dtype(np.int8): libindex.Int8Engine,
Expand Down Expand Up @@ -3307,6 +3307,8 @@ def _wrap_setop_result(self, other: Index, result) -> Index:

@final
def intersection(self, other, sort: bool = False):
# default sort keyword is different here from other setops intentionally
# done in GH#25063
"""
Form the intersection of two Index objects.
Expand Down
3 changes: 0 additions & 3 deletions pandas/core/indexes/category.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@
contains,
)
from pandas.core.construction import extract_array
import pandas.core.indexes.base as ibase
from pandas.core.indexes.base import (
Index,
maybe_extract_name,
Expand All @@ -47,8 +46,6 @@
DtypeObj,
npt,
)
_index_doc_kwargs: dict[str, str] = dict(ibase._index_doc_kwargs)
_index_doc_kwargs.update({"target_klass": "CategoricalIndex"})


@inherit_names(
Expand Down
9 changes: 7 additions & 2 deletions pandas/core/internals/concat.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,10 @@
)

from pandas import Index
from pandas.core.internals.blocks import Block
from pandas.core.internals.blocks import (
Block,
BlockPlacement,
)


def _concatenate_array_managers(
Expand Down Expand Up @@ -317,7 +320,9 @@ def _maybe_reindex_columns_na_proxy(
return new_mgrs_indexers


def _get_mgr_concatenation_plan(mgr: BlockManager):
def _get_mgr_concatenation_plan(
mgr: BlockManager,
) -> list[tuple[BlockPlacement, JoinUnit]]:
"""
Construct concatenation plan for given block manager.
Expand Down
18 changes: 6 additions & 12 deletions pandas/core/nanops.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,6 @@
needs_i8_conversion,
pandas_dtype,
)
from pandas.core.dtypes.dtypes import PeriodDtype
from pandas.core.dtypes.missing import (
isna,
na_value_for_dtype,
Expand Down Expand Up @@ -669,7 +668,6 @@ def _mask_datetimelike_result(
return result


@disallow(PeriodDtype)
@bottleneck_switch()
@_datetimelike_compat
def nanmean(
Expand Down Expand Up @@ -808,38 +806,34 @@ def get_median(x, _mask=None):
# empty set so return nans of shape "everything but the passed axis"
# since "axis" is where the reduction would occur if we had a nonempty
# array
res = get_empty_reduction_result(values.shape, axis, np.float_, np.nan)
res = _get_empty_reduction_result(values.shape, axis)

else:
# otherwise return a scalar value
res = get_median(values, mask) if notempty else np.nan
return _wrap_results(res, dtype)


def get_empty_reduction_result(
shape: tuple[int, ...],
def _get_empty_reduction_result(
shape: Shape,
axis: AxisInt,
dtype: np.dtype | type[np.floating],
fill_value: Any,
) -> np.ndarray:
"""
The result from a reduction on an empty ndarray.
Parameters
----------
shape : Tuple[int]
shape : Tuple[int, ...]
axis : int
dtype : np.dtype
fill_value : Any
Returns
-------
np.ndarray
"""
shp = np.array(shape)
dims = np.arange(len(shape))
ret = np.empty(shp[dims != axis], dtype=dtype)
ret.fill(fill_value)
ret = np.empty(shp[dims != axis], dtype=np.float64)
ret.fill(np.nan)
return ret


Expand Down

0 comments on commit 2d5ad57

Please sign in to comment.