Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

BUG/REF: TimedeltaIndex.__new__ #23539

Merged
merged 19 commits into from
Nov 11, 2018
Merged
Show file tree
Hide file tree
Changes from 11 commits
Commits
Show all changes
19 commits
Select commit Hold shift + click to select a range
afc2d30
simplify+fix+test TimedeltaIndex constructor
jbrockmendel Nov 7, 2018
e4b06ca
tests for datetime64 data being invalid, floats being valid iff non-l…
jbrockmendel Nov 7, 2018
231a5c1
Merge branch 'master' of https://github.com/pandas-dev/pandas into pr…
jbrockmendel Nov 7, 2018
1ff432b
comments and whitespace
jbrockmendel Nov 7, 2018
645e99c
GH references
jbrockmendel Nov 7, 2018
9c89746
deprecate instead of raising for datetime64 dtypes
jbrockmendel Nov 7, 2018
ef3f277
implement sequence_to_td64ns, deprecate datetime64 data, add and test…
jbrockmendel Nov 8, 2018
b20eda9
Merge branch 'master' of https://github.com/pandas-dev/pandas into pr…
jbrockmendel Nov 8, 2018
3f76c02
catch warnings
jbrockmendel Nov 8, 2018
9d79205
Merge branch 'master' of https://github.com/pandas-dev/pandas into pr…
jbrockmendel Nov 8, 2018
d73bee6
revert float changes, and tests
jbrockmendel Nov 8, 2018
f39b806
Merge branch 'master' of https://github.com/pandas-dev/pandas into pr…
jbrockmendel Nov 10, 2018
ccc7fcf
change ValueError-->TypeError
jbrockmendel Nov 10, 2018
6fda27e
double quotes
jbrockmendel Nov 10, 2018
c63796a
test that no copy is made with int64 data
jbrockmendel Nov 10, 2018
e9b5da6
update tests to TypeError
jbrockmendel Nov 10, 2018
b43e936
Merge branch 'master' of https://github.com/pandas-dev/pandas into pr…
jbrockmendel Nov 11, 2018
da6b286
dtype=object instead of 'O'
jbrockmendel Nov 11, 2018
898444f
use pytest.raises instead of tm.assert_raises_regex
jbrockmendel Nov 11, 2018
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.24.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -966,6 +966,7 @@ Deprecations
- The class ``FrozenNDArray`` has been deprecated. When unpickling, ``FrozenNDArray`` will be unpickled to ``np.ndarray`` once this class is removed (:issue:`9031`)
- Deprecated the `nthreads` keyword of :func:`pandas.read_feather` in favor of
`use_threads` to reflect the changes in pyarrow 0.11.0. (:issue:`23053`)
- Constructing a :class:`TimedeltaIndex` from data with ``datetime64``-dtyped data is deprecated, will raise ``TypeError`` in a future version (:issue:`23539`)
jreback marked this conversation as resolved.
Show resolved Hide resolved

.. _whatsnew_0240.deprecations.datetimelike_int_ops:

Expand Down
4 changes: 1 addition & 3 deletions pandas/core/arrays/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -221,9 +221,7 @@ def __new__(cls, values, freq=None, tz=None, dtype=None):

result = cls._simple_new(values, freq=freq, tz=tz)
if freq_infer:
inferred = result.inferred_freq
if inferred:
result.freq = to_offset(inferred)
result.freq = to_offset(result.inferred_freq)
jbrockmendel marked this conversation as resolved.
Show resolved Hide resolved

# NB: Among other things not yet ported from the DatetimeIndex
# constructor, this does not call _deepcopy_if_needed
Expand Down
179 changes: 172 additions & 7 deletions pandas/core/arrays/timedeltas.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,28 @@
# -*- coding: utf-8 -*-
from datetime import timedelta
import warnings

import numpy as np

from pandas._libs import tslibs
from pandas._libs.tslibs import Timedelta, Timestamp, NaT
from pandas._libs.tslibs import Timedelta, Timestamp, NaT, iNaT
from pandas._libs.tslibs.fields import get_timedelta_field
from pandas._libs.tslibs.timedeltas import array_to_timedelta64
from pandas._libs.tslibs.timedeltas import (
array_to_timedelta64, parse_timedelta_unit)

from pandas import compat

from pandas.core.dtypes.common import (
_TD_DTYPE, is_list_like)
from pandas.core.dtypes.generic import ABCSeries
_TD_DTYPE,
is_object_dtype,
is_string_dtype,
is_float_dtype,
is_integer_dtype,
is_timedelta64_dtype,
is_datetime64_dtype,
is_list_like,
ensure_int64)
from pandas.core.dtypes.generic import ABCSeries, ABCTimedeltaIndex
from pandas.core.dtypes.missing import isna

import pandas.core.common as com
Expand Down Expand Up @@ -137,9 +147,7 @@ def __new__(cls, values, freq=None):

result = cls._simple_new(values, freq=freq)
if freq_infer:
inferred = result.inferred_freq
if inferred:
result.freq = to_offset(inferred)
result.freq = to_offset(result.inferred_freq)

return result

Expand Down Expand Up @@ -395,6 +403,163 @@ def f(x):
# ---------------------------------------------------------------------
# Constructor Helpers

def sequence_to_td64ns(data, copy=False, unit='ns', errors='raise'):
jbrockmendel marked this conversation as resolved.
Show resolved Hide resolved
"""
Parameters
----------
array : list-like
copy : bool, default False
unit : str, default "ns"
errors : {"raise", "coerce", "ignore"}, default "raise"
jbrockmendel marked this conversation as resolved.
Show resolved Hide resolved

Returns
-------
ndarray[timedelta64[ns]]
jbrockmendel marked this conversation as resolved.
Show resolved Hide resolved
inferred_freq : Tick or None

Raises
------
ValueError : data cannot be converted to timedelta64[ns]
jbrockmendel marked this conversation as resolved.
Show resolved Hide resolved

Notes
-----
Unlike `pandas.to_timedelta`, if setting `errors=ignore` will not cause
jbrockmendel marked this conversation as resolved.
Show resolved Hide resolved
errors to be ignored; they are caught and subsequently ignored at a
higher level.
"""
inferred_freq = None
unit = parse_timedelta_unit(unit)

# Unwrap whatever we have into a np.ndarray
if not hasattr(data, 'dtype'):
# e.g. list, tuple
if np.ndim(data) == 0:
# i.e. generator
data = list(data)
data = np.array(data, copy=False)
elif isinstance(data, ABCSeries):
data = data._values
TomAugspurger marked this conversation as resolved.
Show resolved Hide resolved
elif isinstance(data, (ABCTimedeltaIndex, TimedeltaArrayMixin)):
inferred_freq = data.freq
data = data._data

# Convert whatever we have into timedelta64[ns] dtype
if is_object_dtype(data) or is_string_dtype(data):
jreback marked this conversation as resolved.
Show resolved Hide resolved
# no need to make a copy, need to convert if string-dtyped
data = objects_to_td64ns(data, unit=unit, errors=errors)
copy = False

elif is_integer_dtype(data):
# treat as multiples of the given unit
data, copy_made = ints_to_td64ns(data, unit=unit)
copy = copy and not copy_made
jreback marked this conversation as resolved.
Show resolved Hide resolved

elif is_float_dtype(data):
# treat as multiples of the given unit. If after converting to nanos,
# there are fractional components left, these are truncated
# (i.e. NOT rounded)
mask = np.isnan(data)
coeff = np.timedelta64(1, unit) / np.timedelta64(1, 'ns')
data = (coeff * data).astype(np.int64).view('timedelta64[ns]')
data[mask] = iNaT
copy = False

elif is_timedelta64_dtype(data):
if data.dtype != _TD_DTYPE:
# non-nano unit
# TODO: watch out for overflows
data = data.astype(_TD_DTYPE)
copy = False
jreback marked this conversation as resolved.
Show resolved Hide resolved

elif is_datetime64_dtype(data):
# GH#23539
warnings.warn("Passing datetime64-dtype data to TimedeltaIndex is "
"deprecated, will raise a TypeError in a future "
"version",
FutureWarning, stacklevel=3)
data = ensure_int64(data).view(_TD_DTYPE)

else:
raise TypeError("dtype {dtype} cannot be converted to timedelta64[ns]"
.format(dtype=data.dtype))

data = np.array(data, copy=copy)
assert data.dtype == 'm8[ns]', data
return data, inferred_freq


def ints_to_td64ns(data, unit="ns"):
"""
Convert an ndarray with integer-dtype to timedelta64[ns] dtype, treating
the integers as multiples of the given timedelta unit.

Parameters
----------
data : np.ndarray with integer-dtype
unit : str, default "ns"
jbrockmendel marked this conversation as resolved.
Show resolved Hide resolved

Returns
-------
ndarray[timedelta64[ns]]
bool : whether a copy was made
jbrockmendel marked this conversation as resolved.
Show resolved Hide resolved
"""
copy_made = False
unit = unit if unit is not None else "ns"

if data.dtype != np.int64:
# converting to int64 makes a copy, so we can avoid
# re-copying later
data = data.astype(np.int64)
copy_made = True

if unit != "ns":
dtype_str = "timedelta64[{unit}]".format(unit=unit)
data = data.view(dtype_str)

# TODO: watch out for overflows when converting from lower-resolution
jreback marked this conversation as resolved.
Show resolved Hide resolved
data = data.astype("timedelta64[ns]")
# the astype conversion makes a copy, so we can avoid re-copying later
copy_made = True

else:
data = data.view("timedelta64[ns]")

return data, copy_made


def objects_to_td64ns(data, unit="ns", errors="raise"):
"""
Convert a object-dtyped or string-dtyped array into an
timedelta64[ns]-dtyped array.

Parameters
----------
data : ndarray or Index
unit : str, default "ns"
errors : {"raise", "coerce", "ignore"}, default "raise"
jbrockmendel marked this conversation as resolved.
Show resolved Hide resolved

Returns
-------
ndarray[timedelta64[ns]]
jbrockmendel marked this conversation as resolved.
Show resolved Hide resolved

Raises
------
ValueError : data cannot be converted to timedelta64[ns]
jbrockmendel marked this conversation as resolved.
Show resolved Hide resolved

Notes
-----
Unlike `pandas.to_timedelta`, if setting `errors=ignore` will not cause
errors to be ignored; they are caught and subsequently ignored at a
higher level.
"""
# coerce Index to np.ndarray, converting string-dtype if necessary
values = np.array(data, dtype=np.object_, copy=False)

result = array_to_timedelta64(values,
unit=unit, errors=errors)
return result.view('timedelta64[ns]')


def _generate_regular_range(start, end, periods, offset):
stride = offset.nanos
if periods is None:
Expand Down
31 changes: 16 additions & 15 deletions pandas/core/indexes/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -239,6 +239,21 @@ def __new__(cls, data=None,
dayfirst=False, yearfirst=False, dtype=None,
copy=False, name=None, verify_integrity=True):

if data is None:
# TODO: Remove this block and associated kwargs; GH#20535
result = cls._generate_range(start, end, periods,
freq=freq, tz=tz, normalize=normalize,
closed=closed, ambiguous=ambiguous)
result.name = name
return result

if is_scalar(data):
raise ValueError("{cls}() must be called with a "
"collection of some kind, {data} was passed"
jbrockmendel marked this conversation as resolved.
Show resolved Hide resolved
.format(cls=cls.__name__, data=repr(data)))

# - Cases checked above all return/raise before reaching here - #

# This allows to later ensure that the 'copy' parameter is honored:
if isinstance(data, Index):
ref_to_data = data._data
Expand All @@ -253,20 +268,8 @@ def __new__(cls, data=None,
# if dtype has an embedded tz, capture it
tz = dtl.validate_tz_from_dtype(dtype, tz)

if data is None:
# TODO: Remove this block and associated kwargs; GH#20535
result = cls._generate_range(start, end, periods,
freq=freq, tz=tz, normalize=normalize,
closed=closed, ambiguous=ambiguous)
result.name = name
return result

if not isinstance(data, (np.ndarray, Index, ABCSeries,
DatetimeArrayMixin)):
if is_scalar(data):
raise ValueError('DatetimeIndex() must be called with a '
'collection of some kind, %s was passed'
% repr(data))
# other iterable of some kind
if not isinstance(data, (list, tuple)):
data = list(data)
Expand Down Expand Up @@ -328,9 +331,7 @@ def __new__(cls, data=None,
cls._validate_frequency(subarr, freq, ambiguous=ambiguous)

if freq_infer:
inferred = subarr.inferred_freq
if inferred:
subarr.freq = to_offset(inferred)
subarr.freq = to_offset(subarr.inferred_freq)

return subarr._deepcopy_if_needed(ref_to_data, copy)

Expand Down
59 changes: 25 additions & 34 deletions pandas/core/indexes/timedeltas.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,8 @@
from pandas.core.dtypes.missing import isna

from pandas.core.arrays.timedeltas import (
TimedeltaArrayMixin, _is_convertible_to_td, _to_m8)
TimedeltaArrayMixin, _is_convertible_to_td, _to_m8,
sequence_to_td64ns)
from pandas.core.arrays import datetimelike as dtl

from pandas.core.indexes.base import Index
Expand All @@ -33,10 +34,9 @@
TimelikeOps, DatetimeIndexOpsMixin, wrap_arithmetic_op,
wrap_array_method, wrap_field_accessor)
from pandas.core.tools.timedeltas import (
to_timedelta, _coerce_scalar_to_timedelta_type)
_coerce_scalar_to_timedelta_type)
from pandas._libs import (lib, index as libindex,
join as libjoin, Timedelta, NaT)
from pandas._libs.tslibs.timedeltas import array_to_timedelta64


class TimedeltaIndex(TimedeltaArrayMixin, DatetimeIndexOpsMixin,
Expand Down Expand Up @@ -139,12 +139,6 @@ def __new__(cls, data=None, unit=None, freq=None, start=None, end=None,
periods=None, closed=None, dtype=None, copy=False,
name=None, verify_integrity=True):

if isinstance(data, TimedeltaIndex) and freq is None and name is None:
if copy:
return data.copy()
else:
return data._shallow_copy()

freq, freq_infer = dtl.maybe_infer_freq(freq)

if data is None:
Expand All @@ -154,32 +148,31 @@ def __new__(cls, data=None, unit=None, freq=None, start=None, end=None,
result.name = name
return result

if unit is not None:
data = to_timedelta(data, unit=unit, box=False)

if is_scalar(data):
raise ValueError('TimedeltaIndex() must be called with a '
raise ValueError('{cls}() must be called with a '
'collection of some kind, {data} was passed'
.format(data=repr(data)))
jreback marked this conversation as resolved.
Show resolved Hide resolved

# convert if not already
if getattr(data, 'dtype', None) != _TD_DTYPE:
data = to_timedelta(data, unit=unit, box=False)
elif copy:
data = np.array(data, copy=True)

data = np.array(data, copy=False)
if data.dtype == np.object_:
data = array_to_timedelta64(data)
if data.dtype != _TD_DTYPE:
if is_timedelta64_dtype(data):
# non-nano unit
# TODO: watch out for overflows
data = data.astype(_TD_DTYPE)
.format(cls=cls.__name__, data=repr(data)))

if isinstance(data, TimedeltaIndex) and freq is None and name is None:
if copy:
return data.copy()
else:
data = ensure_int64(data).view(_TD_DTYPE)
return data._shallow_copy()

assert data.dtype == 'm8[ns]', data.dtype
# - Cases checked above all return/raise before reaching here - #

data, inferred_freq = sequence_to_td64ns(data, copy=copy, unit=unit)
if inferred_freq is not None:
if freq is not None and freq != inferred_freq:
raise ValueError('Inferred frequency {inferred} from passed '
'values does not conform to passed frequency '
'{passed}'
.format(inferred=inferred_freq,
passed=freq.freqstr))
elif freq_infer:
freq = inferred_freq
freq_infer = False
verify_integrity = False

subarr = cls._simple_new(data, name=name, freq=freq)
# check that we are matching freqs
Expand All @@ -188,9 +181,7 @@ def __new__(cls, data=None, unit=None, freq=None, start=None, end=None,
cls._validate_frequency(subarr, freq)

if freq_infer:
inferred = subarr.inferred_freq
if inferred:
subarr.freq = to_offset(inferred)
subarr.freq = to_offset(subarr.inferred_freq)

return subarr

Expand Down
Loading