Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

implement astype portion of #24024 #24405

Merged
merged 20 commits into from
Dec 28, 2018
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
51 changes: 46 additions & 5 deletions pandas/core/arrays/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,11 @@
from pandas.util._decorators import Appender, Substitution, deprecate_kwarg

from pandas.core.dtypes.common import (
is_bool_dtype, is_datetime64_any_dtype, is_datetime64_dtype,
is_datetime64tz_dtype, is_extension_array_dtype, is_float_dtype,
is_integer_dtype, is_list_like, is_object_dtype, is_offsetlike,
is_period_dtype, is_timedelta64_dtype, needs_i8_conversion)
is_bool_dtype, is_categorical_dtype, is_datetime64_any_dtype,
is_datetime64_dtype, is_datetime64tz_dtype, is_datetime_or_timedelta_dtype,
is_dtype_equal, is_extension_array_dtype, is_float_dtype, is_integer_dtype,
is_list_like, is_object_dtype, is_offsetlike, is_period_dtype,
is_string_dtype, is_timedelta64_dtype, needs_i8_conversion, pandas_dtype)
from pandas.core.dtypes.generic import ABCDataFrame, ABCIndexClass, ABCSeries
from pandas.core.dtypes.missing import isna

Expand Down Expand Up @@ -403,9 +404,49 @@ def __getitem__(self, key):
return self._simple_new(result, **attribs)

def astype(self, dtype, copy=True):
# Some notes on cases we don't have to handle here in the base class:
# 1. PeriodArray.astype handles period -> period
# 2. DatetimeArray.astype handles conversion between tz.
# 3. DatetimeArray.astype handles datetime -> period
from pandas import Categorical
dtype = pandas_dtype(dtype)

if is_object_dtype(dtype):
return self._box_values(self.asi8)
return super(DatetimeLikeArrayMixin, self).astype(dtype, copy)
elif is_string_dtype(dtype) and not is_categorical_dtype(dtype):
return self._format_native_types()
elif is_integer_dtype(dtype):
# we deliberately ignore int32 vs. int64 here.
# See https://github.com/pandas-dev/pandas/issues/24381 for more.
values = self.asi8
if copy:
values = values.copy()
return values
elif (is_datetime_or_timedelta_dtype(dtype) and
not is_dtype_equal(self.dtype, dtype)) or is_float_dtype(dtype):
# disallow conversion between datetime/timedelta,
# and conversions for any datetimelike to float
msg = 'Cannot cast {name} to dtype {dtype}'
raise TypeError(msg.format(name=type(self).__name__, dtype=dtype))
elif is_categorical_dtype(dtype):
return Categorical(self, dtype=dtype)
jreback marked this conversation as resolved.
Show resolved Hide resolved
else:
return np.asarray(self, dtype=dtype)
jbrockmendel marked this conversation as resolved.
Show resolved Hide resolved

def view(self, dtype=None):
"""
New view on this array with the same data.

Parameters
----------
dtype : numpy dtype, optional

Returns
-------
ndarray
With the specified `dtype`.
"""
return self._data.view(dtype=dtype)

# ------------------------------------------------------------------
# ExtensionArray Interface
Expand Down
34 changes: 32 additions & 2 deletions pandas/core/arrays/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,9 @@

from pandas.core.dtypes.common import (
_INT64_DTYPE, _NS_DTYPE, is_categorical_dtype, is_datetime64_dtype,
is_datetime64tz_dtype, is_extension_type, is_float_dtype, is_int64_dtype,
is_object_dtype, is_period_dtype, is_string_dtype, is_timedelta64_dtype)
is_datetime64_ns_dtype, is_datetime64tz_dtype, is_dtype_equal,
is_extension_type, is_float_dtype, is_int64_dtype, is_object_dtype,
is_period_dtype, is_string_dtype, is_timedelta64_dtype, pandas_dtype)
from pandas.core.dtypes.dtypes import DatetimeTZDtype
from pandas.core.dtypes.generic import ABCIndexClass, ABCSeries
from pandas.core.dtypes.missing import isna
Expand Down Expand Up @@ -469,6 +470,35 @@ def __iter__(self):
for v in converted:
yield v

def astype(self, dtype, copy=True):
# We handle
# --> datetime
# --> period
# DatetimeLikeArrayMixin Super handles the rest.
dtype = pandas_dtype(dtype)

if (is_datetime64_ns_dtype(dtype) and
not is_dtype_equal(dtype, self.dtype)):
# GH#18951: datetime64_ns dtype but not equal means different tz
new_tz = getattr(dtype, 'tz', None)
if getattr(self.dtype, 'tz', None) is None:
return self.tz_localize(new_tz)
result = self.tz_convert(new_tz)
if new_tz is None:
# Do we want .astype('datetime64[ns]') to be an ndarray.
# The astype in Block._astype expects this to return an
# ndarray, but we could maybe work around it there.
result = result._data
jreback marked this conversation as resolved.
Show resolved Hide resolved
return result
elif is_datetime64tz_dtype(self.dtype) and is_dtype_equal(self.dtype,
dtype):
if copy:
return self.copy()
return self
elif is_period_dtype(dtype):
return self.to_period(freq=dtype.freq)
return dtl.DatetimeLikeArrayMixin.astype(self, dtype, copy)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just noticed... it'd be nice to leave a bunch of TODO: Use super for places like this.

Actually... I think Python2 will force us to make this changes when we switch inheritance to composition, since we won't be able to call the unbound method with a DatetimeIndex anymore (I think).


# ----------------------------------------------------------------
# ExtensionArray Interface

Expand Down
43 changes: 6 additions & 37 deletions pandas/core/arrays/period.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,8 @@
from pandas.util._validators import validate_fillna_kwargs

from pandas.core.dtypes.common import (
_TD_DTYPE, ensure_object, is_array_like, is_categorical_dtype,
is_datetime64_dtype, is_datetime_or_timedelta_dtype, is_dtype_equal,
is_float_dtype, is_integer_dtype, is_list_like, is_object_dtype,
is_period_dtype, is_string_dtype, pandas_dtype)
_TD_DTYPE, ensure_object, is_array_like, is_datetime64_dtype,
is_float_dtype, is_list_like, is_period_dtype, pandas_dtype)
from pandas.core.dtypes.dtypes import PeriodDtype
from pandas.core.dtypes.generic import ABCIndexClass, ABCPeriodIndex, ABCSeries
from pandas.core.dtypes.missing import isna, notna
Expand Down Expand Up @@ -593,42 +591,13 @@ def _format_native_types(self, na_rep=u'NaT', date_format=None, **kwargs):
# ------------------------------------------------------------------

def astype(self, dtype, copy=True):
# TODO: Figure out something better here...
# We have DatetimeLikeArrayMixin ->
# super(...), which ends up being... DatetimeIndexOpsMixin?
# this is complicated.
# need a pandas_astype(arr, dtype).
from pandas import Categorical

# We handle Period[T] -> Period[U]
# Our parent handles everything else.
dtype = pandas_dtype(dtype)

if is_object_dtype(dtype):
return np.asarray(self, dtype=object)
elif is_string_dtype(dtype) and not is_categorical_dtype(dtype):
return self._format_native_types()
elif is_integer_dtype(dtype):
values = self._data

if values.dtype != dtype:
# int32 vs. int64
values = values.astype(dtype)

elif copy:
values = values.copy()

return values
elif (is_datetime_or_timedelta_dtype(dtype) and
not is_dtype_equal(self.dtype, dtype)) or is_float_dtype(dtype):
# disallow conversion between datetime/timedelta,
# and conversions for any datetimelike to float
msg = 'Cannot cast {name} to dtype {dtype}'
raise TypeError(msg.format(name=type(self).__name__, dtype=dtype))
elif is_categorical_dtype(dtype):
return Categorical(self, dtype=dtype)
elif is_period_dtype(dtype):
if is_period_dtype(dtype):
return self.asfreq(dtype.freq)
else:
return np.asarray(self, dtype=dtype)
return super(PeriodArray, self).astype(dtype, copy=copy)

@property
def flags(self):
Expand Down
25 changes: 24 additions & 1 deletion pandas/core/arrays/timedeltas.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,8 @@
from pandas.core.dtypes.common import (
_NS_DTYPE, _TD_DTYPE, ensure_int64, is_datetime64_dtype, is_float_dtype,
is_integer_dtype, is_list_like, is_object_dtype, is_scalar,
is_string_dtype, is_timedelta64_dtype)
is_string_dtype, is_timedelta64_dtype, is_timedelta64_ns_dtype,
pandas_dtype)
from pandas.core.dtypes.dtypes import DatetimeTZDtype
from pandas.core.dtypes.generic import (
ABCDataFrame, ABCIndexClass, ABCSeries, ABCTimedeltaIndex)
Expand Down Expand Up @@ -231,6 +232,28 @@ def _validate_fill_value(self, fill_value):
"Got '{got}'.".format(got=fill_value))
return fill_value

def astype(self, dtype, copy=True):
# We handle
# --> timedelta64[ns]
# --> timedelta64
# DatetimeLikeArrayMixin super call handles other cases
dtype = pandas_dtype(dtype)

if is_timedelta64_dtype(dtype) and not is_timedelta64_ns_dtype(dtype):
# essentially this is division
jbrockmendel marked this conversation as resolved.
Show resolved Hide resolved
result = self._data.astype(dtype, copy=copy)
jbrockmendel marked this conversation as resolved.
Show resolved Hide resolved
if self._hasnans:
values = self._maybe_mask_results(result,
fill_value=None,
convert='float64')
return values
return result.astype('i8')
TomAugspurger marked this conversation as resolved.
Show resolved Hide resolved
elif is_timedelta64_ns_dtype(dtype):
if copy:
return self.copy()
return self
return dtl.DatetimeLikeArrayMixin.astype(self, dtype, copy=copy)

# ----------------------------------------------------------------
# Rendering Methods

Expand Down
6 changes: 4 additions & 2 deletions pandas/core/dtypes/missing.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,8 @@
is_period_dtype, is_scalar, is_string_dtype, is_string_like_dtype,
is_timedelta64_dtype, needs_i8_conversion, pandas_dtype)
from .generic import (
ABCExtensionArray, ABCGeneric, ABCIndexClass, ABCMultiIndex, ABCSeries)
ABCDatetimeArray, ABCExtensionArray, ABCGeneric, ABCIndexClass,
ABCMultiIndex, ABCSeries, ABCTimedeltaArray)
from .inference import is_list_like

isposinf_scalar = libmissing.isposinf_scalar
Expand Down Expand Up @@ -108,7 +109,8 @@ def _isna_new(obj):
elif isinstance(obj, ABCMultiIndex):
raise NotImplementedError("isna is not defined for MultiIndex")
elif isinstance(obj, (ABCSeries, np.ndarray, ABCIndexClass,
ABCExtensionArray)):
ABCExtensionArray,
ABCDatetimeArray, ABCTimedeltaArray)):
return _isna_ndarraylike(obj)
elif isinstance(obj, ABCGeneric):
return obj._constructor(obj._data.isna(func=isna))
Expand Down
40 changes: 19 additions & 21 deletions pandas/core/indexes/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,8 @@
from pandas.util._decorators import Appender, cache_readonly

from pandas.core.dtypes.common import (
ensure_int64, is_bool_dtype, is_categorical_dtype,
is_datetime_or_timedelta_dtype, is_dtype_equal, is_float, is_float_dtype,
is_integer, is_integer_dtype, is_list_like, is_object_dtype,
is_period_dtype, is_scalar, is_string_dtype)
ensure_int64, is_bool_dtype, is_dtype_equal, is_float, is_integer,
is_integer_dtype, is_list_like, is_period_dtype, is_scalar, pandas_dtype)
from pandas.core.dtypes.generic import ABCIndex, ABCIndexClass, ABCSeries

from pandas.core import algorithms, ops
Expand All @@ -40,6 +38,7 @@ class DatetimeIndexOpsMixin(DatetimeLikeArrayMixin):
# override DatetimeLikeArrayMixin method
copy = Index.copy
unique = Index.unique
view = Index.view

# DatetimeLikeArrayMixin assumes subclasses are mutable, so these are
# properties there. They can be made into cache_readonly for Index
Expand Down Expand Up @@ -527,24 +526,23 @@ def _maybe_box_as_values(self, values, **attribs):
# - sort_values
return values

@Appender(_index_shared_docs['astype'])
def astype(self, dtype, copy=True):
if is_object_dtype(dtype):
return self._box_values_as_index()
elif is_string_dtype(dtype) and not is_categorical_dtype(dtype):
return Index(self.format(), name=self.name, dtype=object)
elif is_integer_dtype(dtype):
# TODO(DatetimeArray): use self._values here.
# Can't use ._values currently, because that returns a
# DatetimeIndex, which throws us in an infinite loop.
return Index(self.values.astype('i8', copy=copy), name=self.name,
dtype='i8')
elif (is_datetime_or_timedelta_dtype(dtype) and
not is_dtype_equal(self.dtype, dtype)) or is_float_dtype(dtype):
# disallow conversion between datetime/timedelta,
# and conversions for any datetimelike to float
msg = 'Cannot cast {name} to dtype {dtype}'
raise TypeError(msg.format(name=type(self).__name__, dtype=dtype))
return super(DatetimeIndexOpsMixin, self).astype(dtype, copy=copy)
if is_dtype_equal(self.dtype, dtype) and copy is False:
# Ensure that self.astype(self.dtype) is self
return self

new_values = self._eadata.astype(dtype, copy=copy)

# we pass `dtype` to the Index constructor, for cases like
# dtype=object to disable inference. But, DTA.astype ignores
# integer sign and size, so we need to detect that case and
# just choose int64.
dtype = pandas_dtype(dtype)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

not sure this is necessary as it already coerces properly, doing it here is very weird.

In [2]: pd.Index([1,2,3],dtype='int32')
Out[2]: Int64Index([1, 2, 3], dtype='int64')

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

did you address this?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not in the last 8 hours, no. May need to wait on Tom to clarify, since all of this was taken from 24024.

(the fact that these things get closer attention in smaller doses reassures me that splitting is a good idea, even if it does cause rebasing hassles in the parent PR)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yep, sounds ok then.

Copy link
Contributor

@TomAugspurger TomAugspurger Dec 26, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What’s the question here? Why we do the integer check? Astype ignores the sign and size. I suppose the index constructor just ignores the size?

Copy link
Contributor

@TomAugspurger TomAugspurger Dec 28, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure if you saw the part about uint vs. int.

So I'm just going to decide that the expected behavior for {Datetime,Timedelta,Period}Index.astype("uint{8,16,32,64}") is to return a UInt64Index. That means we can remove this check and just pass new_values through with the original dtype.

@jbrockmendel do you want to do that here? It's not at all tested, and will need a release note.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ok by me (of course its weird to do this, but hey)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

do you want to do that here? It's not at all tested, and will need a release note.

I tried this, pretty much just deleting ten lines here, and ended up getting two failures in pandas/tests/indexes/interval/test_astype.py. I can fix this by changing dtype=dtype to dtype=new_values.dtype in the call that wraps self._eadata.astype. Is that what you have in mind?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Attempt #2 at this also failed. Any other ideas?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sorry I missed this note last night. I implemented this in 3fca810 if you could take a look.

if is_integer_dtype(dtype):
dtype = np.dtype("int64")

return Index(new_values, dtype=dtype, name=self.name)
jbrockmendel marked this conversation as resolved.
Show resolved Hide resolved

@Appender(DatetimeLikeArrayMixin._time_shift.__doc__)
def _time_shift(self, periods, freq=None):
Expand Down
27 changes: 9 additions & 18 deletions pandas/core/indexes/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,17 +14,16 @@
from pandas.util._decorators import Appender, Substitution, cache_readonly

from pandas.core.dtypes.common import (
_NS_DTYPE, ensure_int64, is_datetime64_ns_dtype, is_dtype_equal, is_float,
is_integer, is_list_like, is_period_dtype, is_scalar, is_string_like,
pandas_dtype)
_NS_DTYPE, ensure_int64, is_float, is_integer, is_list_like, is_scalar,
is_string_like)
import pandas.core.dtypes.concat as _concat
from pandas.core.dtypes.missing import isna

from pandas.core.arrays.datetimes import (
DatetimeArrayMixin as DatetimeArray, _to_m8)
from pandas.core.base import _shared_docs
import pandas.core.common as com
from pandas.core.indexes.base import Index, _index_shared_docs
from pandas.core.indexes.base import Index
from pandas.core.indexes.datetimelike import (
DatetimeIndexOpsMixin, wrap_array_method, wrap_field_accessor)
from pandas.core.indexes.numeric import Int64Index
Expand Down Expand Up @@ -603,20 +602,6 @@ def intersection(self, other):

# --------------------------------------------------------------------

@Appender(_index_shared_docs['astype'])
def astype(self, dtype, copy=True):
dtype = pandas_dtype(dtype)
if (is_datetime64_ns_dtype(dtype) and
not is_dtype_equal(dtype, self.dtype)):
# GH 18951: datetime64_ns dtype but not equal means different tz
new_tz = getattr(dtype, 'tz', None)
if getattr(self.dtype, 'tz', None) is None:
return self.tz_localize(new_tz)
return self.tz_convert(new_tz)
elif is_period_dtype(dtype):
return self.to_period(freq=dtype.freq)
return super(DatetimeIndex, self).astype(dtype, copy=copy)

def _get_time_micros(self):
values = self.asi8
if self.tz is not None and not timezones.is_utc(self.tz):
Expand Down Expand Up @@ -1089,10 +1074,16 @@ def slice_indexer(self, start=None, end=None, step=None, kind=None):
# --------------------------------------------------------------------
# Wrapping DatetimeArray

@property
def _eadata(self):
return DatetimeArray._simple_new(self._data,
tz=self.tz, freq=self.freq)

# Compat for frequency inference, see GH#23789
_is_monotonic_increasing = Index.is_monotonic_increasing
_is_monotonic_decreasing = Index.is_monotonic_decreasing
_is_unique = Index.is_unique
astype = DatetimeIndexOpsMixin.astype

_timezone = cache_readonly(DatetimeArray._timezone.fget)
is_normalized = cache_readonly(DatetimeArray.is_normalized.fget)
Expand Down
11 changes: 6 additions & 5 deletions pandas/core/indexes/period.py
Original file line number Diff line number Diff line change
Expand Up @@ -247,6 +247,10 @@ def _simple_new(cls, values, name=None, freq=None, **kwargs):
# ------------------------------------------------------------------------
# Data

@property
jreback marked this conversation as resolved.
Show resolved Hide resolved
def _eadata(self):
return self._data

@property
def _ndarray_values(self):
return self._data._ndarray_values
Expand Down Expand Up @@ -539,16 +543,13 @@ def asof_locs(self, where, mask):
def astype(self, dtype, copy=True, how='start'):
dtype = pandas_dtype(dtype)

# We have a few special-cases for `dtype`.
# Failing those, we fall back to astyping the values

if is_datetime64_any_dtype(dtype):
# 'how' is index-speicifc, isn't part of the EA interface.
tz = getattr(dtype, 'tz', None)
return self.to_timestamp(how=how).tz_localize(tz)

jbrockmendel marked this conversation as resolved.
Show resolved Hide resolved
result = self._data.astype(dtype, copy=copy)
return Index(result, name=self.name, dtype=dtype, copy=False)
# TODO: should probably raise on `how` here, so we don't ignore it.
return super(PeriodIndex, self).astype(dtype, copy=copy)

@Substitution(klass='PeriodIndex')
@Appender(_shared_docs['searchsorted'])
Expand Down
Loading