Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/master' into deprecate-nonkeyw…
Browse files Browse the repository at this point in the history
…ord-args-clip
  • Loading branch information
MarcoGorelli committed May 19, 2021
2 parents 37faa10 + bda839c commit e14c5dd
Show file tree
Hide file tree
Showing 20 changed files with 346 additions and 388 deletions.
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v1.3.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -228,7 +228,7 @@ Other enhancements
- Constructing a :class:`DataFrame` or :class:`Series` with the ``data`` argument being a Python iterable that is *not* a NumPy ``ndarray`` consisting of NumPy scalars will now result in a dtype with a precision the maximum of the NumPy scalars; this was already the case when ``data`` is a NumPy ``ndarray`` (:issue:`40908`)
- Add keyword ``sort`` to :func:`pivot_table` to allow non-sorting of the result (:issue:`39143`)
- Add keyword ``dropna`` to :meth:`DataFrame.value_counts` to allow counting rows that include ``NA`` values (:issue:`41325`)
-
- :meth:`Series.replace` will now cast results to ``PeriodDtype`` where possible instead of ``object`` dtype (:issue:`41526`)

.. ---------------------------------------------------------------------------
Expand Down
19 changes: 14 additions & 5 deletions pandas/_libs/lib.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,6 @@ def is_integer(val: object) -> bool: ...
def is_float(val: object) -> bool: ...

def is_interval_array(values: np.ndarray) -> bool: ...
def is_period_array(values: np.ndarray) -> bool: ...
def is_datetime64_array(values: np.ndarray) -> bool: ...
def is_timedelta_or_timedelta64_array(values: np.ndarray) -> bool: ...
def is_datetime_with_singletz_array(values: np.ndarray) -> bool: ...
Expand All @@ -67,50 +66,60 @@ def map_infer(
@overload # both convert_datetime and convert_to_nullable_integer False -> np.ndarray
def maybe_convert_objects(
objects: np.ndarray, # np.ndarray[object]
*,
try_float: bool = ...,
safe: bool = ...,
convert_datetime: Literal[False] = ...,
convert_timedelta: bool = ...,
convert_period: Literal[False] = ...,
convert_to_nullable_integer: Literal[False] = ...,
) -> np.ndarray: ...

@overload
def maybe_convert_objects(
objects: np.ndarray, # np.ndarray[object]
*,
try_float: bool = ...,
safe: bool = ...,
convert_datetime: Literal[False] = False,
convert_datetime: bool = ...,
convert_timedelta: bool = ...,
convert_period: bool = ...,
convert_to_nullable_integer: Literal[True] = ...,
) -> ArrayLike: ...

@overload
def maybe_convert_objects(
objects: np.ndarray, # np.ndarray[object]
*,
try_float: bool = ...,
safe: bool = ...,
convert_datetime: Literal[True] = ...,
convert_timedelta: bool = ...,
convert_to_nullable_integer: Literal[False] = ...,
convert_period: bool = ...,
convert_to_nullable_integer: bool = ...,
) -> ArrayLike: ...

@overload
def maybe_convert_objects(
objects: np.ndarray, # np.ndarray[object]
*,
try_float: bool = ...,
safe: bool = ...,
convert_datetime: Literal[True] = ...,
convert_datetime: bool = ...,
convert_timedelta: bool = ...,
convert_to_nullable_integer: Literal[True] = ...,
convert_period: Literal[True] = ...,
convert_to_nullable_integer: bool = ...,
) -> ArrayLike: ...

@overload
def maybe_convert_objects(
objects: np.ndarray, # np.ndarray[object]
*,
try_float: bool = ...,
safe: bool = ...,
convert_datetime: bool = ...,
convert_timedelta: bool = ...,
convert_period: bool = ...,
convert_to_nullable_integer: bool = ...,
) -> ArrayLike: ...

Expand Down
77 changes: 54 additions & 23 deletions pandas/_libs/lib.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -1186,6 +1186,7 @@ cdef class Seen:
bint coerce_numeric # coerce data to numeric
bint timedelta_ # seen_timedelta
bint datetimetz_ # seen_datetimetz
bint period_ # seen_period

def __cinit__(self, bint coerce_numeric=False):
"""
Expand All @@ -1210,6 +1211,7 @@ cdef class Seen:
self.datetime_ = False
self.timedelta_ = False
self.datetimetz_ = False
self.period_ = False
self.coerce_numeric = coerce_numeric

cdef inline bint check_uint64_conflict(self) except -1:
Expand Down Expand Up @@ -1996,18 +1998,35 @@ cpdef bint is_time_array(ndarray values, bint skipna=False):
return validator.validate(values)


cdef class PeriodValidator(TemporalValidator):
cdef inline bint is_value_typed(self, object value) except -1:
return is_period_object(value)
cdef bint is_period_array(ndarray[object] values):
"""
Is this an ndarray of Period objects (or NaT) with a single `freq`?
"""
cdef:
Py_ssize_t i, n = len(values)
int dtype_code = -10000 # i.e. c_FreqGroup.FR_UND
object val

cdef inline bint is_valid_null(self, object value) except -1:
return checknull_with_nat(value)
if len(values) == 0:
return False

for val in values:
if is_period_object(val):
if dtype_code == -10000:
dtype_code = val._dtype._dtype_code
elif dtype_code != val._dtype._dtype_code:
# mismatched freqs
return False
elif checknull_with_nat(val):
pass
else:
# Not a Period or NaT-like
return False

cpdef bint is_period_array(ndarray values):
cdef:
PeriodValidator validator = PeriodValidator(len(values), skipna=True)
return validator.validate(values)
if dtype_code == -10000:
# we saw all-NaTs, no actual Periods
return False
return True


cdef class IntervalValidator(Validator):
Expand Down Expand Up @@ -2249,9 +2268,13 @@ def maybe_convert_numeric(

@cython.boundscheck(False)
@cython.wraparound(False)
def maybe_convert_objects(ndarray[object] objects, bint try_float=False,
bint safe=False, bint convert_datetime=False,
def maybe_convert_objects(ndarray[object] objects,
*,
bint try_float=False,
bint safe=False,
bint convert_datetime=False,
bint convert_timedelta=False,
bint convert_period=False,
bint convert_to_nullable_integer=False) -> "ArrayLike":
"""
Type inference function-- convert object array to proper dtype
Expand All @@ -2272,6 +2295,9 @@ def maybe_convert_objects(ndarray[object] objects, bint try_float=False,
convert_timedelta : bool, default False
If an array-like object contains only timedelta values or NaT is
encountered, whether to convert and return an array of m8[ns] dtype.
convert_period : bool, default False
If an array-like object contains only (homogeneous-freq) Period values
or NaT, whether to convert and return a PeriodArray.
convert_to_nullable_integer : bool, default False
If an array-like object contains only integer values (and NaN) is
encountered, whether to convert and return an IntegerArray.
Expand All @@ -2292,7 +2318,7 @@ def maybe_convert_objects(ndarray[object] objects, bint try_float=False,
int64_t[:] itimedeltas
Seen seen = Seen()
object val
float64_t fval, fnan
float64_t fval, fnan = np.nan

n = len(objects)

Expand All @@ -2311,8 +2337,6 @@ def maybe_convert_objects(ndarray[object] objects, bint try_float=False,
timedeltas = np.empty(n, dtype='m8[ns]')
itimedeltas = timedeltas.view(np.int64)

fnan = np.nan

for i in range(n):
val = objects[i]
if itemsize_max != -1:
Expand All @@ -2330,7 +2354,7 @@ def maybe_convert_objects(ndarray[object] objects, bint try_float=False,
idatetimes[i] = NPY_NAT
if convert_timedelta:
itimedeltas[i] = NPY_NAT
if not (convert_datetime or convert_timedelta):
if not (convert_datetime or convert_timedelta or convert_period):
seen.object_ = True
break
elif val is np.nan:
Expand All @@ -2343,14 +2367,6 @@ def maybe_convert_objects(ndarray[object] objects, bint try_float=False,
elif util.is_float_object(val):
floats[i] = complexes[i] = val
seen.float_ = True
elif util.is_datetime64_object(val):
if convert_datetime:
idatetimes[i] = convert_to_tsobject(
val, None, None, 0, 0).value
seen.datetime_ = True
else:
seen.object_ = True
break
elif is_timedelta(val):
if convert_timedelta:
itimedeltas[i] = convert_to_timedelta64(val, "ns").view("i8")
Expand Down Expand Up @@ -2396,6 +2412,13 @@ def maybe_convert_objects(ndarray[object] objects, bint try_float=False,
else:
seen.object_ = True
break
elif is_period_object(val):
if convert_period:
seen.period_ = True
break
else:
seen.object_ = True
break
elif try_float and not isinstance(val, str):
# this will convert Decimal objects
try:
Expand All @@ -2419,6 +2442,14 @@ def maybe_convert_objects(ndarray[object] objects, bint try_float=False,
return dti._data
seen.object_ = True

if seen.period_:
if is_period_array(objects):
from pandas import PeriodIndex
pi = PeriodIndex(objects)

# unbox to PeriodArray
return pi._data

if not seen.object_:
result = None
if not safe:
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/arrays/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -261,7 +261,7 @@ def _box_values(self, values) -> np.ndarray:
"""
apply box func to passed values
"""
return lib.map_infer(values, self._box_func)
return lib.map_infer(values, self._box_func, convert=False)

def __iter__(self):
if self.ndim > 1:
Expand Down
14 changes: 9 additions & 5 deletions pandas/core/construction.py
Original file line number Diff line number Diff line change
Expand Up @@ -682,17 +682,21 @@ def _try_cast(
subarr = construct_1d_object_array_from_listlike(arr)
return subarr

if dtype is None and isinstance(arr, list):
# filter out cases that we _dont_ want to go through maybe_cast_to_datetime
varr = np.array(arr, copy=False)
if varr.dtype != object or varr.size == 0:
return varr
arr = varr

try:
# GH#15832: Check if we are requesting a numeric dtype and
# that we can convert the data to the requested dtype.
if is_integer_dtype(dtype):
# this will raise if we have e.g. floats

# error: Argument 2 to "maybe_cast_to_integer_array" has incompatible type
# "Union[dtype, ExtensionDtype, None]"; expected "Union[ExtensionDtype, str,
# dtype, Type[str], Type[float], Type[int], Type[complex], Type[bool],
# Type[object]]"
maybe_cast_to_integer_array(arr, dtype) # type: ignore[arg-type]
dtype = cast(np.dtype, dtype)
maybe_cast_to_integer_array(arr, dtype)
subarr = arr
else:
subarr = maybe_cast_to_datetime(arr, dtype)
Expand Down
37 changes: 23 additions & 14 deletions pandas/core/dtypes/cast.py
Original file line number Diff line number Diff line change
Expand Up @@ -1315,6 +1315,7 @@ def soft_convert_objects(
datetime: bool = True,
numeric: bool = True,
timedelta: bool = True,
period: bool = True,
copy: bool = True,
) -> ArrayLike:
"""
Expand All @@ -1327,6 +1328,7 @@ def soft_convert_objects(
datetime : bool, default True
numeric: bool, default True
timedelta : bool, default True
period : bool, default True
copy : bool, default True
Returns
Expand All @@ -1348,7 +1350,10 @@ def soft_convert_objects(
# bound of nanosecond-resolution 64-bit integers.
try:
converted = lib.maybe_convert_objects(
values, convert_datetime=datetime, convert_timedelta=timedelta
values,
convert_datetime=datetime,
convert_timedelta=timedelta,
convert_period=period,
)
except (OutOfBoundsDatetime, ValueError):
return values
Expand Down Expand Up @@ -1475,7 +1480,9 @@ def maybe_castable(dtype: np.dtype) -> bool:
return dtype.name not in POSSIBLY_CAST_DTYPES


def maybe_infer_to_datetimelike(value: np.ndarray | list):
def maybe_infer_to_datetimelike(
value: np.ndarray,
) -> np.ndarray | DatetimeArray | TimedeltaArray:
"""
we might have a array (or single object) that is datetime like,
and no dtype is passed don't change the value unless we find a
Expand All @@ -1486,18 +1493,19 @@ def maybe_infer_to_datetimelike(value: np.ndarray | list):
Parameters
----------
value : np.ndarray or list
value : np.ndarray[object]
Returns
-------
np.ndarray, DatetimeArray, or TimedeltaArray
"""
if not isinstance(value, (np.ndarray, list)):
if not isinstance(value, np.ndarray) or value.dtype != object:
# Caller is responsible for passing only ndarray[object]
raise TypeError(type(value)) # pragma: no cover

v = np.array(value, copy=False)

# we only care about object dtypes
if not is_object_dtype(v.dtype):
return value

shape = v.shape
if v.ndim != 1:
v = v.ravel()
Expand Down Expand Up @@ -1575,6 +1583,8 @@ def maybe_cast_to_datetime(
"""
try to cast the array/value to a datetimelike dtype, converting float
nan to iNaT
We allow a list *only* when dtype is not None.
"""
from pandas.core.arrays.datetimes import sequence_to_datetimes
from pandas.core.arrays.timedeltas import sequence_to_td64ns
Expand Down Expand Up @@ -1666,11 +1676,10 @@ def maybe_cast_to_datetime(
value = maybe_infer_to_datetimelike(value)

elif isinstance(value, list):
# only do this if we have an array and the dtype of the array is not
# setup already we are not an integer/object, so don't bother with this
# conversion

value = maybe_infer_to_datetimelike(value)
# we only get here with dtype=None, which we do not allow
raise ValueError(
"maybe_cast_to_datetime allows a list *only* if dtype is not None"
)

return value

Expand Down Expand Up @@ -2017,7 +2026,7 @@ def maybe_cast_to_integer_array(
if is_unsigned_integer_dtype(dtype) and (arr < 0).any():
raise OverflowError("Trying to coerce negative values to unsigned integers")

if is_float_dtype(arr) or is_object_dtype(arr):
if is_float_dtype(arr.dtype) or is_object_dtype(arr.dtype):
raise ValueError("Trying to coerce float values to integers")


Expand Down

0 comments on commit e14c5dd

Please sign in to comment.