diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index bb208785f46fa..1b9759c451765 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -35,6 +35,7 @@ Timestamp, delta_to_nanoseconds, iNaT, + ints_to_pydatetime, to_offset, ) from pandas._libs.tslibs.fields import ( @@ -406,6 +407,19 @@ def astype(self, dtype, copy: bool = True): dtype = pandas_dtype(dtype) if is_object_dtype(dtype): + if self.dtype.kind == "M": + # *much* faster than self._box_values + # for e.g. test_get_loc_tuple_monotonic_above_size_cutoff + i8data = self.asi8.ravel() + converted = ints_to_pydatetime( + i8data, + # error: "DatetimeLikeArrayMixin" has no attribute "tz" + tz=self.tz, # type: ignore[attr-defined] + freq=self.freq, + box="timestamp", + ) + return converted.reshape(self.shape) + return self._box_values(self.asi8.ravel()).reshape(self.shape) elif isinstance(dtype, ExtensionDtype): diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py index da69e70b89072..72cc28c1dd66d 100644 --- a/pandas/core/dtypes/common.py +++ b/pandas/core/dtypes/common.py @@ -528,6 +528,15 @@ def is_categorical_dtype(arr_or_dtype) -> bool: return CategoricalDtype.is_dtype(arr_or_dtype) +def is_string_or_object_np_dtype(dtype: np.dtype) -> bool: + """ + Faster alternative to is_string_dtype, assumes we have a np.dtype object. + """ + # error: Non-overlapping equality check (left operand type: "dtype[Any]", + # right operand type: "Type[object]") + return dtype == object or dtype.kind in "SU" # type: ignore[comparison-overlap] + + def is_string_dtype(arr_or_dtype) -> bool: """ Check whether the provided array or dtype is of the string dtype. diff --git a/pandas/core/dtypes/missing.py b/pandas/core/dtypes/missing.py index d2733cddf8cee..656441b6a5136 100644 --- a/pandas/core/dtypes/missing.py +++ b/pandas/core/dtypes/missing.py @@ -33,7 +33,7 @@ is_integer_dtype, is_object_dtype, is_scalar, - is_string_dtype, + is_string_or_object_np_dtype, needs_i8_conversion, ) from pandas.core.dtypes.dtypes import ( @@ -242,7 +242,7 @@ def _isna_array(values: ArrayLike, inf_as_na: bool = False): result = libmissing.isnaobj(values.to_numpy(), inf_as_na=inf_as_na) else: result = values.isna() - elif is_string_dtype(dtype): + elif is_string_or_object_np_dtype(values.dtype): result = _isna_string_dtype(values, inf_as_na=inf_as_na) elif needs_i8_conversion(dtype): # this is the NaT pattern @@ -376,7 +376,10 @@ def isna_compat(arr, fill_value=np.nan) -> bool: def array_equivalent( - left, right, strict_nan: bool = False, dtype_equal: bool = False + left, + right, + strict_nan: bool = False, + dtype_equal: bool = False, ) -> bool: """ True if two arrays, left and right, have equal non-NaN elements, and NaNs @@ -421,13 +424,13 @@ def array_equivalent( if dtype_equal: # fastpath when we require that the dtypes match (Block.equals) - if is_float_dtype(left.dtype) or is_complex_dtype(left.dtype): + if left.dtype.kind in ["f", "c"]: return _array_equivalent_float(left, right) elif is_datetimelike_v_numeric(left.dtype, right.dtype): return False elif needs_i8_conversion(left.dtype): return _array_equivalent_datetimelike(left, right) - elif is_string_dtype(left.dtype): + elif is_string_or_object_np_dtype(left.dtype): # TODO: fastpath for pandas' StringDtype return _array_equivalent_object(left, right, strict_nan) else: diff --git a/pandas/tests/dtypes/test_common.py b/pandas/tests/dtypes/test_common.py index 14f6557fbc31e..be5cb81506efd 100644 --- a/pandas/tests/dtypes/test_common.py +++ b/pandas/tests/dtypes/test_common.py @@ -154,6 +154,7 @@ def get_is_dtype_funcs(): """ fnames = [f for f in dir(com) if (f.startswith("is_") and f.endswith("dtype"))] + fnames.remove("is_string_or_object_np_dtype") # fastpath requires np.dtype obj return [getattr(com, fname) for fname in fnames] diff --git a/pandas/tests/indexing/test_chaining_and_caching.py b/pandas/tests/indexing/test_chaining_and_caching.py index 90bda69eaf139..3a754b5aaeba0 100644 --- a/pandas/tests/indexing/test_chaining_and_caching.py +++ b/pandas/tests/indexing/test_chaining_and_caching.py @@ -20,14 +20,12 @@ def random_text(nobs=100): - df = [] - for i in range(nobs): - idx = np.random.randint(len(letters), size=2) - idx.sort() + # Construct a DataFrame where each row is a random slice from 'letters' + idxs = np.random.randint(len(letters), size=(nobs, 2)) + idxs.sort(axis=1) + strings = [letters[x[0] : x[1]] for x in idxs] - df.append([letters[idx[0] : idx[1]]]) - - return DataFrame(df, columns=["letters"]) + return DataFrame(strings, columns=["letters"]) class TestCaching: