From f8efbeffb63d119b69d90673573f274afe88905c Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Tue, 13 Nov 2018 13:04:50 -0800 Subject: [PATCH 01/21] start untangling DatetimeIndex constructor; deprecate passing of timedelta64 data --- doc/source/whatsnew/v0.24.0.txt | 1 + pandas/_libs/tslibs/conversion.pyx | 4 +- pandas/core/arrays/datetimes.py | 2 +- pandas/core/indexes/datetimes.py | 70 ++++++++++++++----- pandas/core/tools/datetimes.py | 3 + .../indexes/datetimes/test_construction.py | 8 +++ 6 files changed, 68 insertions(+), 20 deletions(-) diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index d836ef3441e89..9e1924a80f8ec 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -978,6 +978,7 @@ Deprecations `use_threads` to reflect the changes in pyarrow 0.11.0. (:issue:`23053`) - :func:`pandas.read_excel` has deprecated accepting ``usecols`` as an integer. Please pass in a list of ints from 0 to ``usecols`` inclusive instead (:issue:`23527`) - Constructing a :class:`TimedeltaIndex` from data with ``datetime64``-dtyped data is deprecated, will raise ``TypeError`` in a future version (:issue:`23539`) +- Constructing a :class:`DatetimeIndex` from data with ``timedelta64``-dtyped data is deprecated, will raise ``TypeError`` in a future version (:issue:`?????`) .. _whatsnew_0240.deprecations.datetimelike_int_ops: diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index f88671b41a16a..319a76988683f 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -858,8 +858,8 @@ def tz_localize_to_utc(ndarray[int64_t] vals, object tz, object ambiguous=None, - bool if True, treat all vals as DST. If False, treat them as non-DST - 'NaT' will return NaT where there are ambiguous times - nonexistent : str - If arraylike, must have the same length as vals + nonexistent : {None, "NaT", "shift", "raise"} + How to handle non-existent times when converting wall times to UTC .. versionadded:: 0.24.0 diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 926228f267049..fececf43a83e3 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -187,7 +187,7 @@ class DatetimeArrayMixin(dtl.DatetimeLikeArrayMixin): _freq = None @classmethod - def _simple_new(cls, values, freq=None, tz=None, **kwargs): + def _simple_new(cls, values, freq=None, tz=None): """ we require the we have a dtype compat for the values if we are passed a non-dtype compat, then coerce using the constructor diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index b754b2705d034..b094784540e02 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -16,10 +16,11 @@ from pandas.util._decorators import Appender, Substitution, cache_readonly from pandas.core.dtypes.common import ( - _INT64_DTYPE, _NS_DTYPE, ensure_int64, is_datetime64_dtype, - is_datetime64_ns_dtype, is_datetimetz, is_dtype_equal, is_float, - is_integer, is_integer_dtype, is_list_like, is_period_dtype, is_scalar, - is_string_like, pandas_dtype) + _INT64_DTYPE, _NS_DTYPE, ensure_int64, is_datetime64_ns_dtype, + is_datetime64tz_dtype, is_dtype_equal, is_extension_type, is_float, + is_float_dtype, is_integer, is_list_like, is_object_dtype, is_period_dtype, + is_scalar, is_string_dtype, is_string_like, is_timedelta64_dtype, + pandas_dtype) import pandas.core.dtypes.concat as _concat from pandas.core.dtypes.generic import ABCSeries from pandas.core.dtypes.missing import isna @@ -252,19 +253,55 @@ def __new__(cls, data=None, # if dtype has an embedded tz, capture it tz = dtl.validate_tz_from_dtype(dtype, tz) - if not isinstance(data, (np.ndarray, Index, ABCSeries, DatetimeArray)): - # other iterable of some kind - if not isinstance(data, (list, tuple)): + if not hasattr(data, "dtype"): + # e.g. list, tuple + if np.ndim(data) == 0: + # i.e. generator data = list(data) - data = np.asarray(data, dtype='O') + data = np.asarray(data) + copy = False elif isinstance(data, ABCSeries): data = data._values - # data must be Index or np.ndarray here - if not (is_datetime64_dtype(data) or is_datetimetz(data) or - is_integer_dtype(data) or lib.infer_dtype(data) == 'integer'): - data = tools.to_datetime(data, dayfirst=dayfirst, - yearfirst=yearfirst) + # By this point we are assured to have either a numpy array or Index + + if is_float_dtype(data): + # Note: we must cast to datetime64[ns] here in order to treat these + # as wall-times instead of UTC timestamps. + data = data.astype(_NS_DTYPE) + copy = False + # TODO: Why do we treat this differently from integer dtypes? + + elif is_timedelta64_dtype(data): + warnings.warn("Passing timedelta64-dtype data to {cls} is " + "deprecated, will raise a TypeError in a future " + "version".format(cls=cls.__name__), + FutureWarning, stacklevel=2) + data = data.view(_NS_DTYPE) + + elif is_period_dtype(data): + # Note: without explicitly raising here, PeriondIndex + # test_setops.test_join_does_not_recur fails + raise TypeError("Passing PeriodDtype data to {cls} is invalid. " + "Use `data.to_timestamp()` instead" + .format(cls=cls.__name__)) + + elif is_extension_type(data) and not is_datetime64tz_dtype(data): + # Includes categorical + # TODO: We have no tests for these + data = np.array(data, dtype=np.object_) + copy = False + + if is_object_dtype(data) or is_string_dtype(data): + # TODO: We do not have tests specific to string-dtypes, + # also complex or categorical or other extension + copy = False + if lib.infer_dtype(data) == 'integer': + # TODO: This is an ugly special-casing; can we avoid it? + data = data.astype(np.int64) + else: + data = tools.to_datetime(data, dayfirst=dayfirst, + yearfirst=yearfirst) if isinstance(data, DatetimeArray): if tz is None: @@ -281,6 +318,7 @@ def __new__(cls, data=None, subarr = data._data if freq is None: + # TODO: Should this be the stronger condition of `freq_infer`? freq = data.freq verify_integrity = False elif issubclass(data.dtype.type, np.datetime64): @@ -319,8 +357,7 @@ def __new__(cls, data=None, return subarr._deepcopy_if_needed(ref_to_data, copy) @classmethod - def _simple_new(cls, values, name=None, freq=None, tz=None, - dtype=None, **kwargs): + def _simple_new(cls, values, name=None, freq=None, tz=None, dtype=None): """ we require the we have a dtype compat for the values if we are passed a non-dtype compat, then coerce using the constructor @@ -328,8 +365,7 @@ def _simple_new(cls, values, name=None, freq=None, tz=None, # DatetimeArray._simple_new will accept either i8 or M8[ns] dtypes assert isinstance(values, np.ndarray), type(values) - result = super(DatetimeIndex, cls)._simple_new(values, freq, tz, - **kwargs) + result = super(DatetimeIndex, cls)._simple_new(values, freq, tz) result.name = name result._reset_identity() return result diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py index 0eb2ffeab28f1..d71e42f56a09f 100644 --- a/pandas/core/tools/datetimes.py +++ b/pandas/core/tools/datetimes.py @@ -302,6 +302,9 @@ def _convert_listlike_datetimes(arg, box, format, name=None, tz=None, return result except ValueError as e: + if "unless utc=True" not in str(e): + # raised by array_to_datetime + raise e try: values, tz = conversion.datetime_to_datetime64(arg) return DatetimeIndex._simple_new(values, name=name, tz=tz) diff --git a/pandas/tests/indexes/datetimes/test_construction.py b/pandas/tests/indexes/datetimes/test_construction.py index 42a75f277faa6..1cf323ac51fb4 100644 --- a/pandas/tests/indexes/datetimes/test_construction.py +++ b/pandas/tests/indexes/datetimes/test_construction.py @@ -18,6 +18,14 @@ class TestDatetimeIndex(object): + def test_td64_deprecation(self): + # GH#????? + data = np.array([0], dtype='m8[ns]') + with tm.assert_produces_warning(FutureWarning): + result = DatetimeIndex(data) + + assert result[0] == Timestamp('1970-01-01') + def test_construction_caching(self): df = pd.DataFrame({'dt': pd.date_range('20130101', periods=3), From 9d20bc93a2acca821ed463ac884e78566b7dd2d6 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Tue, 13 Nov 2018 13:10:48 -0800 Subject: [PATCH 02/21] add GH references --- doc/source/whatsnew/v0.24.0.txt | 2 +- pandas/tests/indexes/datetimes/test_construction.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index 9e1924a80f8ec..6ea95eaa20246 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -978,7 +978,7 @@ Deprecations `use_threads` to reflect the changes in pyarrow 0.11.0. (:issue:`23053`) - :func:`pandas.read_excel` has deprecated accepting ``usecols`` as an integer. Please pass in a list of ints from 0 to ``usecols`` inclusive instead (:issue:`23527`) - Constructing a :class:`TimedeltaIndex` from data with ``datetime64``-dtyped data is deprecated, will raise ``TypeError`` in a future version (:issue:`23539`) -- Constructing a :class:`DatetimeIndex` from data with ``timedelta64``-dtyped data is deprecated, will raise ``TypeError`` in a future version (:issue:`?????`) +- Constructing a :class:`DatetimeIndex` from data with ``timedelta64``-dtyped data is deprecated, will raise ``TypeError`` in a future version (:issue:`23675`) .. _whatsnew_0240.deprecations.datetimelike_int_ops: diff --git a/pandas/tests/indexes/datetimes/test_construction.py b/pandas/tests/indexes/datetimes/test_construction.py index 1cf323ac51fb4..571313229ce18 100644 --- a/pandas/tests/indexes/datetimes/test_construction.py +++ b/pandas/tests/indexes/datetimes/test_construction.py @@ -19,7 +19,7 @@ class TestDatetimeIndex(object): def test_td64_deprecation(self): - # GH#????? + # GH#23675 data = np.array([0], dtype='m8[ns]') with tm.assert_produces_warning(FutureWarning): result = DatetimeIndex(data) From aef3f4cfb45e7d4794b4c8b8f10e65f9b216863d Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Tue, 13 Nov 2018 13:23:11 -0800 Subject: [PATCH 03/21] Fix incorrect usage of DatetimeIndex --- pandas/tests/scalar/timedelta/test_timedelta.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/scalar/timedelta/test_timedelta.py b/pandas/tests/scalar/timedelta/test_timedelta.py index 58064213d9b3b..a7be6b20ba3ba 100644 --- a/pandas/tests/scalar/timedelta/test_timedelta.py +++ b/pandas/tests/scalar/timedelta/test_timedelta.py @@ -548,7 +548,7 @@ def test_overflow(self): # mean result = (s - s.min()).mean() - expected = pd.Timedelta((pd.DatetimeIndex((s - s.min())).asi8 / len(s) + expected = pd.Timedelta((pd.TimedeltaIndex((s - s.min())).asi8 / len(s) ).sum()) # the computation is converted to float so From 66ae42b82e52bbfac3802aca2a218d84d19a887b Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Tue, 13 Nov 2018 17:57:59 -0800 Subject: [PATCH 04/21] dummy commit to force CI From d0e8ee33303783b929341a25aafa0c5076a90554 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Wed, 14 Nov 2018 08:24:40 -0800 Subject: [PATCH 05/21] more explicit name, test with TimedeltaIndex --- pandas/tests/indexes/datetimes/test_construction.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/pandas/tests/indexes/datetimes/test_construction.py b/pandas/tests/indexes/datetimes/test_construction.py index 571313229ce18..183e3d31a2dc0 100644 --- a/pandas/tests/indexes/datetimes/test_construction.py +++ b/pandas/tests/indexes/datetimes/test_construction.py @@ -18,7 +18,7 @@ class TestDatetimeIndex(object): - def test_td64_deprecation(self): + def test_dti_with_timedelta64_data_deprecation(self): # GH#23675 data = np.array([0], dtype='m8[ns]') with tm.assert_produces_warning(FutureWarning): @@ -26,6 +26,11 @@ def test_td64_deprecation(self): assert result[0] == Timestamp('1970-01-01') + with tm.assert_produces_warning(FutureWarning): + result = DatetimeIndex(pd.TimedeltaIndex(data)) + + assert result[0] == Timestamp('1970-01-01') + def test_construction_caching(self): df = pd.DataFrame({'dt': pd.date_range('20130101', periods=3), From e1f4e1701d5448e3cd6aa5e6353afa46704d8d9d Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Wed, 14 Nov 2018 09:02:05 -0800 Subject: [PATCH 06/21] remove comment --- pandas/core/indexes/datetimes.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index b094784540e02..21eb2e486fede 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -297,7 +297,6 @@ def __new__(cls, data=None, # also complex or categorical or other extension copy = False if lib.infer_dtype(data) == 'integer': - # TODO: This is an ugly special-casing; can we avoid it? data = data.astype(np.int64) else: data = tools.to_datetime(data, dayfirst=dayfirst, From a4c8c779cedae8cbb12e15ab5f0da8f13ade92c2 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Wed, 14 Nov 2018 09:05:07 -0800 Subject: [PATCH 07/21] make exception catching less specific --- pandas/core/tools/datetimes.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py index d71e42f56a09f..0eb2ffeab28f1 100644 --- a/pandas/core/tools/datetimes.py +++ b/pandas/core/tools/datetimes.py @@ -302,9 +302,6 @@ def _convert_listlike_datetimes(arg, box, format, name=None, tz=None, return result except ValueError as e: - if "unless utc=True" not in str(e): - # raised by array_to_datetime - raise e try: values, tz = conversion.datetime_to_datetime64(arg) return DatetimeIndex._simple_new(values, name=name, tz=tz) From 7464d15151d76b445ec6c80e43beb9acf851f821 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Thu, 15 Nov 2018 08:25:24 -0800 Subject: [PATCH 08/21] checks in both to_datetime and DatetimeIndex.__new__ --- pandas/core/arrays/datetimes.py | 145 +++++++++++++++++++++++++++++++ pandas/core/indexes/datetimes.py | 69 +++++---------- pandas/core/tools/datetimes.py | 6 ++ 3 files changed, 173 insertions(+), 47 deletions(-) diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 86f758a03168f..1619f8bd6bcf1 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -18,6 +18,10 @@ from pandas.core.dtypes.common import ( _NS_DTYPE, + is_float_dtype, + is_timedelta64_dtype, + is_period_dtype, + is_extension_type, is_object_dtype, is_int64_dtype, is_datetime64tz_dtype, @@ -1408,6 +1412,147 @@ def to_julian_date(self): DatetimeArrayMixin._add_datetimelike_methods() +# ------------------------------------------------------------------- +# Constructor Helpers + +def maybe_infer_tz(tz, inferred_tz): + """ + If a timezone is inferred from data, check that it is compatible with + the user-provided timezone, if any. + + Parameters + ---------- + tz : tzinfo or None + inferred_tz : tzinfo or None + + Returns + ------- + tz : tzinfo or None + + Raises + ------ + TypeError : if both timezones are present but do not match + """ + if tz is None: + tz = inferred_tz + elif inferred_tz is None: + pass + elif not timezones.tz_compare(tz, inferred_tz): + raise TypeError('data is already tz-aware {inferred_tz}, unable to ' + 'set specified tz: {tz}' + .format(inferred_tz=inferred_tz, tz=tz)) + return tz + + +def dtype_conversions(data, copy, has_format=False): + """ + Convert data based on dtype conventions, issuing deprecation warnings + or errors where appropriate. + + Parameters + ---------- + data : np.ndarray or pd.Index + copy : bool + has_format : bool, default False + Indicates if the data will be passed to a parsing function with a + `format` kwarg. + + Returns + ------- + data : np.ndarray or pd.Index + copy : bool + + Raises + ------ + TypeError : PeriodDType data is passed + """ + + if is_float_dtype(data) and not has_format: + # Note: we must cast to datetime64[ns] here in order to treat these + # as wall-times instead of UTC timestamps. + data = data.astype(_NS_DTYPE) + copy = False + # TODO: Why do we treat this differently from integer dtypes? + + elif is_timedelta64_dtype(data): + warnings.warn("Passing timedelta64-dtype data to {cls} is " + "deprecated, will raise a TypeError in a future " + "version".format(cls="TimedeltaIndex/Array"), + FutureWarning, stacklevel=3) + data = data.view(_NS_DTYPE) + + elif is_period_dtype(data): + # Note: without explicitly raising here, PeriondIndex + # test_setops.test_join_does_not_recur fails + raise TypeError("Passing PeriodDtype data to {cls} is invalid. " + "Use `data.to_timestamp()` instead" + .format(cls="TimedeltaIndex/Array")) + + elif is_extension_type(data) and not is_datetime64tz_dtype(data): + # Includes categorical + # TODO: We have no tests for these + data = np.array(data, dtype=np.object_) + copy = False + + return data, copy + + +def _from_objects(data, dayfirst, yearfirst): + """ + Convert data to array of timestamps. + + Parameters + ---------- + data : np.ndarray[object] + dayfirst : bool + yearfirst : bool + + Raises + ------ + ValueError : if data cannot be converted to datetimes + """ + errors = "raise" + tz = None + require_iso8601 = False + + # if str-dtype, convert + data = np.array(data, copy=False, dtype=np.object_) + + try: + result, tz_parsed = tslib.array_to_datetime( + data, + errors=errors, + utc=tz == 'utc', + dayfirst=dayfirst, + yearfirst=yearfirst, + require_iso8601=require_iso8601 + ) + except ValueError as e: + try: + values, tz = conversion.datetime_to_datetime64(data) + # If tzaware, these values represent unix timestamps, so we + # return them as i8 to distinguish from wall times + return values.view('i8'), tz + except (ValueError, TypeError): + raise e + + if tz_parsed is not None: + # We can take a shortcut since the datetime64 numpy array + # is in UTC + # Return i8 values to denote unix timestamps + return result.view('i8'), tz_parsed + elif is_datetime64_dtype(result): + # returning M8[ns] denotes wall-times; since tz is None + # the distinction is a thin one + return result, tz + elif is_object_dtype(result): + # e.g. an Index of datetime objects; raise and let the + # calling function salvage the result if desired + raise ValueError(result) + else: # pragma: no cover + raise TypeError(result) + + def _generate_regular_range(cls, start, end, periods, freq): """ Generate a range of dates with the spans between dates described by diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index a05d7a4ca69c2..bbe7113cbcd35 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -27,7 +27,8 @@ from pandas.core.arrays import datetimelike as dtl from pandas.core.arrays.datetimes import ( - DatetimeArrayMixin as DatetimeArray, _to_m8) + DatetimeArrayMixin as DatetimeArray, _from_objects, _to_m8, + dtype_conversions, maybe_infer_tz) from pandas.core.base import _shared_docs import pandas.core.common as com from pandas.core.indexes.base import Index, _index_shared_docs @@ -265,32 +266,7 @@ def __new__(cls, data=None, # By this point we are assured to have either a numpy array or Index - if is_float_dtype(data): - # Note: we must cast to datetime64[ns] here in order to treat these - # as wall-times instead of UTC timestamps. - data = data.astype(_NS_DTYPE) - copy = False - # TODO: Why do we treat this differently from integer dtypes? - - elif is_timedelta64_dtype(data): - warnings.warn("Passing timedelta64-dtype data to {cls} is " - "deprecated, will raise a TypeError in a future " - "version".format(cls=cls.__name__), - FutureWarning, stacklevel=2) - data = data.view(_NS_DTYPE) - - elif is_period_dtype(data): - # Note: without explicitly raising here, PeriondIndex - # test_setops.test_join_does_not_recur fails - raise TypeError("Passing PeriodDtype data to {cls} is invalid. " - "Use `data.to_timestamp()` instead" - .format(cls=cls.__name__)) - - elif is_extension_type(data) and not is_datetime64tz_dtype(data): - # Includes categorical - # TODO: We have no tests for these - data = np.array(data, dtype=np.object_) - copy = False + data, copy = dtype_conversions(data, copy) if is_object_dtype(data) or is_string_dtype(data): # TODO: We do not have tests specific to string-dtypes, @@ -299,36 +275,35 @@ def __new__(cls, data=None, if lib.infer_dtype(data) == 'integer': data = data.astype(np.int64) else: - data = tools.to_datetime(data, dayfirst=dayfirst, - yearfirst=yearfirst) - - if isinstance(data, DatetimeArray): - if tz is None: - tz = data.tz - elif data.tz is None: - data = data.tz_localize(tz, ambiguous=ambiguous) - else: - # the tz's must match - if not timezones.tz_compare(tz, data.tz): - msg = ('data is already tz-aware {0}, unable to ' - 'set specified tz: {1}') - raise TypeError(msg.format(data.tz, tz)) - + # data comes back here as either i8 to denote UTC timestamps + # or M8[ns] to denote wall times + data, inferred_tz = _from_objects(data, dayfirst=dayfirst, + yearfirst=yearfirst) + tz = maybe_infer_tz(tz, inferred_tz) + + if freq is None and hasattr(data, "freq"): #TODO: move earlier + # i.e. DatetimeArray/Index + # TODO: Should this be the stronger condition of `freq_infer`? + freq = data.freq + verify_integrity = False + + if is_datetime64tz_dtype(data): + tz = maybe_infer_tz(tz, data.tz) subarr = data._data - if freq is None: - # TODO: Should this be the stronger condition of `freq_infer`? - freq = data.freq - verify_integrity = False - elif issubclass(data.dtype.type, np.datetime64): + elif is_datetime64_dtype(data): + # DatetimeIndex or ndarray[datetime64] + data = getattr(data, "_data", data) if data.dtype != _NS_DTYPE: data = conversion.ensure_datetime64ns(data) + if tz is not None: # Convert tz-naive to UTC tz = timezones.maybe_get_tz(tz) data = conversion.tz_localize_to_utc(data.view('i8'), tz, ambiguous=ambiguous) subarr = data.view(_NS_DTYPE) + else: # must be integer dtype otherwise # assume this data are epoch timestamps diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py index 0eb2ffeab28f1..2c615a1b8b475 100644 --- a/pandas/core/tools/datetimes.py +++ b/pandas/core/tools/datetimes.py @@ -183,6 +183,8 @@ def _convert_listlike_datetimes(arg, box, format, name=None, tz=None, - ndarray of Timestamps if box=False """ from pandas import DatetimeIndex + from pandas.core.arrays.datetimes import dtype_conversions + if isinstance(arg, (list, tuple)): arg = np.array(arg, dtype='O') @@ -220,6 +222,10 @@ def _convert_listlike_datetimes(arg, box, format, name=None, tz=None, raise TypeError('arg must be a string, datetime, list, tuple, ' '1-d array, or Series') + # warn if passing timedelta64, raise for PeriodDtype + # NB: this must come after unit transformation + arg = dtype_conversions(arg, False, has_format=format is not None)[0] + arg = ensure_object(arg) require_iso8601 = False From 3c822f149f8ccc53d34d8f6e6b7bb38f438414da Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Thu, 15 Nov 2018 08:29:31 -0800 Subject: [PATCH 09/21] name and docstring --- pandas/core/arrays/datetimes.py | 9 ++++++++- pandas/core/indexes/datetimes.py | 6 +++--- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 1619f8bd6bcf1..b3b244a5fc7da 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -1497,7 +1497,7 @@ def dtype_conversions(data, copy, has_format=False): return data, copy -def _from_objects(data, dayfirst, yearfirst): +def _objects_to_datetime64ns(data, dayfirst, yearfirst): """ Convert data to array of timestamps. @@ -1507,6 +1507,13 @@ def _from_objects(data, dayfirst, yearfirst): dayfirst : bool yearfirst : bool + Returns + ------- + result : ndarray + np.int64 dtype if returned values represent UTC timestamps + np.datetime64[ns] if returned values represent wall times + inferred_tz : tzinfo or None + Raises ------ ValueError : if data cannot be converted to datetimes diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index bbe7113cbcd35..0b44ebd862244 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -27,7 +27,7 @@ from pandas.core.arrays import datetimelike as dtl from pandas.core.arrays.datetimes import ( - DatetimeArrayMixin as DatetimeArray, _from_objects, _to_m8, + DatetimeArrayMixin as DatetimeArray, _objects_to_datetime64ns, _to_m8, dtype_conversions, maybe_infer_tz) from pandas.core.base import _shared_docs import pandas.core.common as com @@ -277,8 +277,8 @@ def __new__(cls, data=None, else: # data comes back here as either i8 to denote UTC timestamps # or M8[ns] to denote wall times - data, inferred_tz = _from_objects(data, dayfirst=dayfirst, - yearfirst=yearfirst) + data, inferred_tz = _objects_to_datetime64ns( + data, dayfirst=dayfirst, yearfirst=yearfirst) tz = maybe_infer_tz(tz, inferred_tz) if freq is None and hasattr(data, "freq"): #TODO: move earlier From ba7e5e8575337ab0b4c6269d6ec5ea70080fcbb0 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Thu, 15 Nov 2018 08:31:10 -0800 Subject: [PATCH 10/21] isort and flake8 fixup --- pandas/core/indexes/datetimes.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 0b44ebd862244..0e9bfc4cf6b06 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -16,11 +16,10 @@ from pandas.util._decorators import Appender, Substitution, cache_readonly from pandas.core.dtypes.common import ( - _INT64_DTYPE, _NS_DTYPE, ensure_int64, is_datetime64_ns_dtype, - is_datetime64tz_dtype, is_dtype_equal, is_extension_type, is_float, - is_float_dtype, is_integer, is_list_like, is_object_dtype, is_period_dtype, - is_scalar, is_string_dtype, is_string_like, is_timedelta64_dtype, - pandas_dtype) + _INT64_DTYPE, _NS_DTYPE, ensure_int64, is_datetime64_dtype, + is_datetime64_ns_dtype, is_datetime64tz_dtype, is_dtype_equal, is_float, + is_integer, is_list_like, is_object_dtype, is_period_dtype, is_scalar, + is_string_dtype, is_string_like, pandas_dtype) import pandas.core.dtypes.concat as _concat from pandas.core.dtypes.generic import ABCSeries from pandas.core.dtypes.missing import isna @@ -281,7 +280,7 @@ def __new__(cls, data=None, data, dayfirst=dayfirst, yearfirst=yearfirst) tz = maybe_infer_tz(tz, inferred_tz) - if freq is None and hasattr(data, "freq"): #TODO: move earlier + if freq is None and hasattr(data, "freq"): # i.e. DatetimeArray/Index # TODO: Should this be the stronger condition of `freq_infer`? freq = data.freq From 3ba9da7e8ea6941fcc3e35cc6d4fec3307a8bfd9 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Thu, 15 Nov 2018 08:32:46 -0800 Subject: [PATCH 11/21] move freq check earlier --- pandas/core/indexes/datetimes.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 0e9bfc4cf6b06..a9fbb54d5952f 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -249,6 +249,11 @@ def __new__(cls, data=None, name = data.name freq, freq_infer = dtl.maybe_infer_freq(freq) + if freq is None and hasattr(data, "freq"): + # i.e. DatetimeArray/Index + # TODO: Should this be the stronger condition of `freq_infer`? + freq = data.freq + verify_integrity = False # if dtype has an embedded tz, capture it tz = dtl.validate_tz_from_dtype(dtype, tz) @@ -280,12 +285,6 @@ def __new__(cls, data=None, data, dayfirst=dayfirst, yearfirst=yearfirst) tz = maybe_infer_tz(tz, inferred_tz) - if freq is None and hasattr(data, "freq"): - # i.e. DatetimeArray/Index - # TODO: Should this be the stronger condition of `freq_infer`? - freq = data.freq - verify_integrity = False - if is_datetime64tz_dtype(data): tz = maybe_infer_tz(tz, data.tz) subarr = data._data From f1d3fd8b2c15b463b815c03dc8a588bc2aa4fa07 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Fri, 16 Nov 2018 10:57:06 -0800 Subject: [PATCH 12/21] improve exc message --- pandas/core/arrays/datetimes.py | 10 ++++------ pandas/core/tools/datetimes.py | 2 +- 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index b3b244a5fc7da..db8ec20711ad2 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -1475,18 +1475,16 @@ def dtype_conversions(data, copy, has_format=False): # TODO: Why do we treat this differently from integer dtypes? elif is_timedelta64_dtype(data): - warnings.warn("Passing timedelta64-dtype data to {cls} is " - "deprecated, will raise a TypeError in a future " - "version".format(cls="TimedeltaIndex/Array"), + warnings.warn("Passing timedelta64-dtype data is deprecated, will " + "raise a TypeError in a future version", FutureWarning, stacklevel=3) data = data.view(_NS_DTYPE) elif is_period_dtype(data): # Note: without explicitly raising here, PeriondIndex # test_setops.test_join_does_not_recur fails - raise TypeError("Passing PeriodDtype data to {cls} is invalid. " - "Use `data.to_timestamp()` instead" - .format(cls="TimedeltaIndex/Array")) + raise TypeError("Passing PeriodDtype data is invalid. " + "Use `data.to_timestamp()` instead") elif is_extension_type(data) and not is_datetime64tz_dtype(data): # Includes categorical diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py index 2c615a1b8b475..54bd859ea067d 100644 --- a/pandas/core/tools/datetimes.py +++ b/pandas/core/tools/datetimes.py @@ -224,7 +224,7 @@ def _convert_listlike_datetimes(arg, box, format, name=None, tz=None, # warn if passing timedelta64, raise for PeriodDtype # NB: this must come after unit transformation - arg = dtype_conversions(arg, False, has_format=format is not None)[0] + arg = dtype_conversions(arg, copy=False, has_format=format is not None)[0] arg = ensure_object(arg) require_iso8601 = False From 1471a2b07fbe2482a32913f6c283a615223e7304 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Fri, 16 Nov 2018 11:00:33 -0800 Subject: [PATCH 13/21] tests for to_datetime and PeriodDtype --- .../indexes/datetimes/test_construction.py | 25 +++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/pandas/tests/indexes/datetimes/test_construction.py b/pandas/tests/indexes/datetimes/test_construction.py index 183e3d31a2dc0..ff9bc13ef9b74 100644 --- a/pandas/tests/indexes/datetimes/test_construction.py +++ b/pandas/tests/indexes/datetimes/test_construction.py @@ -18,6 +18,21 @@ class TestDatetimeIndex(object): + def test_dti_with_period_data_raises(self): + data = pd.PeriodIndex(['2016Q1', '2017Q1']) + + with pytest.raises(TypeError, match="PeriodDtype data is invalid"): + result = DatetimeIndex(data) + + with pytest.raises(TypeError, match="PeriodDtype data is invalid"): + result = to_datetime(data) + + with pytest.raises(TypeError, match="PeriodDtype data is invalid"): + result = DatetimeIndex(pd.period_array(data)) + + with pytest.raises(TypeError, match="PeriodDtype data is invalid"): + result = to_datetime(pd.period_array(data)) + def test_dti_with_timedelta64_data_deprecation(self): # GH#23675 data = np.array([0], dtype='m8[ns]') @@ -26,11 +41,21 @@ def test_dti_with_timedelta64_data_deprecation(self): assert result[0] == Timestamp('1970-01-01') + with tm.assert_produces_warning(FutureWarning): + result = to_datetime(data) + + assert result[0] == Timestamp('1970-01-01') + with tm.assert_produces_warning(FutureWarning): result = DatetimeIndex(pd.TimedeltaIndex(data)) assert result[0] == Timestamp('1970-01-01') + with tm.assert_produces_warning(FutureWarning): + result = to_datetime(pd.TimedeltaIndex(data)) + + assert result[0] == Timestamp('1970-01-01') + def test_construction_caching(self): df = pd.DataFrame({'dt': pd.date_range('20130101', periods=3), From 11b5f6c345a616659306ae2ba6395f2118f09d53 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Fri, 16 Nov 2018 11:35:48 -0800 Subject: [PATCH 14/21] use objects_to_datetime64ns in to_datetime --- pandas/core/arrays/datetimes.py | 26 +++--- pandas/core/indexes/datetimes.py | 4 +- pandas/core/tools/datetimes.py | 81 +++++++++---------- .../indexes/datetimes/test_construction.py | 16 ++-- 4 files changed, 67 insertions(+), 60 deletions(-) diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index b17046fc50173..448dd526659ad 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -1498,7 +1498,9 @@ def dtype_conversions(data, copy, has_format=False): return data, copy -def _objects_to_datetime64ns(data, dayfirst, yearfirst): +def objects_to_datetime64ns(data, dayfirst, yearfirst, + tz=None, errors="raise", + require_iso8601=False, allow_object=False): """ Convert data to array of timestamps. @@ -1507,21 +1509,24 @@ def _objects_to_datetime64ns(data, dayfirst, yearfirst): data : np.ndarray[object] dayfirst : bool yearfirst : bool + tz : {None, 'utc'} + errors : {'raise', 'ignore', 'coerce'} + allow_object : bool Returns ------- result : ndarray np.int64 dtype if returned values represent UTC timestamps np.datetime64[ns] if returned values represent wall times + object if mixed timezones inferred_tz : tzinfo or None Raises ------ ValueError : if data cannot be converted to datetimes """ - errors = "raise" - tz = None - require_iso8601 = False + assert errors in ["raise", "ignore", "coerce"] + assert tz is None or tz == "utc" # if str-dtype, convert data = np.array(data, copy=False, dtype=np.object_) @@ -1537,10 +1542,10 @@ def _objects_to_datetime64ns(data, dayfirst, yearfirst): ) except ValueError as e: try: - values, tz = conversion.datetime_to_datetime64(data) + values, tz_parsed = conversion.datetime_to_datetime64(data) # If tzaware, these values represent unix timestamps, so we # return them as i8 to distinguish from wall times - return values.view('i8'), tz + return values.view('i8'), tz_parsed except (ValueError, TypeError): raise e @@ -1552,11 +1557,12 @@ def _objects_to_datetime64ns(data, dayfirst, yearfirst): elif is_datetime64_dtype(result): # returning M8[ns] denotes wall-times; since tz is None # the distinction is a thin one - return result, tz + return result, tz_parsed elif is_object_dtype(result): - # e.g. an Index of datetime objects; raise and let the - # calling function salvage the result if desired - raise ValueError(result) + if allow_object: + # allowed by to_datetime, not by DatetimeIndex constructor + return result, tz_parsed + raise TypeError(result) else: # pragma: no cover raise TypeError(result) diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 345a6f2186470..3064c2cc4ee03 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -26,7 +26,7 @@ from pandas.core.arrays import datetimelike as dtl from pandas.core.arrays.datetimes import ( - DatetimeArrayMixin as DatetimeArray, _objects_to_datetime64ns, _to_m8, + DatetimeArrayMixin as DatetimeArray, objects_to_datetime64ns, _to_m8, dtype_conversions, maybe_infer_tz) from pandas.core.base import _shared_docs import pandas.core.common as com @@ -281,7 +281,7 @@ def __new__(cls, data=None, else: # data comes back here as either i8 to denote UTC timestamps # or M8[ns] to denote wall times - data, inferred_tz = _objects_to_datetime64ns( + data, inferred_tz = objects_to_datetime64ns( data, dayfirst=dayfirst, yearfirst=yearfirst) tz = maybe_infer_tz(tz, inferred_tz) diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py index 54bd859ea067d..4fb7bf2bf66ec 100644 --- a/pandas/core/tools/datetimes.py +++ b/pandas/core/tools/datetimes.py @@ -183,7 +183,8 @@ def _convert_listlike_datetimes(arg, box, format, name=None, tz=None, - ndarray of Timestamps if box=False """ from pandas import DatetimeIndex - from pandas.core.arrays.datetimes import dtype_conversions + from pandas.core.arrays.datetimes import ( + dtype_conversions, objects_to_datetime64ns) if isinstance(arg, (list, tuple)): arg = np.array(arg, dtype='O') @@ -242,10 +243,11 @@ def _convert_listlike_datetimes(arg, box, format, name=None, tz=None, require_iso8601 = not infer_datetime_format format = None - try: - result = None + result = None + tz_parsed = None - if format is not None: + if format is not None: + try: # shortcut formatting here if format == '%Y%m%d': try: @@ -274,45 +276,42 @@ def _convert_listlike_datetimes(arg, box, format, name=None, tz=None, if errors == 'raise': raise result = arg - - if result is None and (format is None or infer_datetime_format): - result, tz_parsed = tslib.array_to_datetime( - arg, - errors=errors, - utc=tz == 'utc', - dayfirst=dayfirst, - yearfirst=yearfirst, - require_iso8601=require_iso8601 - ) - if tz_parsed is not None: - if box: - # We can take a shortcut since the datetime64 numpy array - # is in UTC - return DatetimeIndex._simple_new(result, name=name, - tz=tz_parsed) - else: - # Convert the datetime64 numpy array to an numpy array - # of datetime objects - result = [Timestamp(ts, tz=tz_parsed).to_pydatetime() - for ts in result] - return np.array(result, dtype=object) - + except ValueError as e: + try: + values, tz = conversion.datetime_to_datetime64(arg) + return DatetimeIndex._simple_new(values, name=name, tz=tz) + except (ValueError, TypeError): + raise e + + if result is None: + assert format is None or infer_datetime_format + result, tz_parsed = objects_to_datetime64ns( + arg, dayfirst=dayfirst, yearfirst=yearfirst, + tz=tz, errors=errors, require_iso8601=require_iso8601, + allow_object=True) + + if tz_parsed is not None: if box: - # Ensure we return an Index in all cases where box=True - if is_datetime64_dtype(result): - return DatetimeIndex(result, tz=tz, name=name) - elif is_object_dtype(result): - # e.g. an Index of datetime objects - from pandas import Index - return Index(result, name=name) - return result + # We can take a shortcut since the datetime64 numpy array + # is in UTC + return DatetimeIndex._simple_new(result, name=name, + tz=tz_parsed) + else: + # Convert the datetime64 numpy array to an numpy array + # of datetime objects + result = [Timestamp(ts, tz=tz_parsed).to_pydatetime() + for ts in result] + return np.array(result, dtype=object) - except ValueError as e: - try: - values, tz = conversion.datetime_to_datetime64(arg) - return DatetimeIndex._simple_new(values, name=name, tz=tz) - except (ValueError, TypeError): - raise e + if box: + # Ensure we return an Index in all cases where box=True + if is_datetime64_dtype(result): + return DatetimeIndex(result, tz=tz, name=name) + elif is_object_dtype(result): + # e.g. an Index of datetime objects + from pandas import Index + return Index(result, name=name) + return result def _adjust_to_origin(arg, origin, unit): diff --git a/pandas/tests/indexes/datetimes/test_construction.py b/pandas/tests/indexes/datetimes/test_construction.py index ff9bc13ef9b74..dc2060dcab89f 100644 --- a/pandas/tests/indexes/datetimes/test_construction.py +++ b/pandas/tests/indexes/datetimes/test_construction.py @@ -13,25 +13,27 @@ from pandas import ( DatetimeIndex, Index, Timestamp, date_range, datetime, offsets, to_datetime) +from pandas.core.arrays import period_array import pandas.util.testing as tm class TestDatetimeIndex(object): def test_dti_with_period_data_raises(self): - data = pd.PeriodIndex(['2016Q1', '2017Q1']) + # GH#23675 + data = pd.PeriodIndex(['2016Q1', '2016Q2'], freq='Q') with pytest.raises(TypeError, match="PeriodDtype data is invalid"): - result = DatetimeIndex(data) + DatetimeIndex(data) with pytest.raises(TypeError, match="PeriodDtype data is invalid"): - result = to_datetime(data) + to_datetime(data) with pytest.raises(TypeError, match="PeriodDtype data is invalid"): - result = DatetimeIndex(pd.period_array(data)) + DatetimeIndex(period_array(data)) with pytest.raises(TypeError, match="PeriodDtype data is invalid"): - result = to_datetime(pd.period_array(data)) + to_datetime(period_array(data)) def test_dti_with_timedelta64_data_deprecation(self): # GH#23675 @@ -41,7 +43,7 @@ def test_dti_with_timedelta64_data_deprecation(self): assert result[0] == Timestamp('1970-01-01') - with tm.assert_produces_warning(FutureWarning): + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): result = to_datetime(data) assert result[0] == Timestamp('1970-01-01') @@ -51,7 +53,7 @@ def test_dti_with_timedelta64_data_deprecation(self): assert result[0] == Timestamp('1970-01-01') - with tm.assert_produces_warning(FutureWarning): + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): result = to_datetime(pd.TimedeltaIndex(data)) assert result[0] == Timestamp('1970-01-01') From 9f56d23b5cd677f3e956cea52292ba3403dbb42e Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Fri, 16 Nov 2018 16:00:53 -0800 Subject: [PATCH 15/21] isort fixup --- pandas/core/indexes/datetimes.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 3064c2cc4ee03..f12118f877f03 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -26,8 +26,8 @@ from pandas.core.arrays import datetimelike as dtl from pandas.core.arrays.datetimes import ( - DatetimeArrayMixin as DatetimeArray, objects_to_datetime64ns, _to_m8, - dtype_conversions, maybe_infer_tz) + DatetimeArrayMixin as DatetimeArray, _to_m8, dtype_conversions, + maybe_infer_tz, objects_to_datetime64ns) from pandas.core.base import _shared_docs import pandas.core.common as com from pandas.core.indexes.base import Index, _index_shared_docs From be4d4723cb227ae1500b8415dbc5395f9f3a9e66 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Sun, 18 Nov 2018 07:37:33 -0800 Subject: [PATCH 16/21] requested edits, name changes --- pandas/core/arrays/datetimes.py | 10 +++++----- pandas/core/indexes/datetimes.py | 4 ++-- pandas/core/tools/datetimes.py | 13 +++++++------ 3 files changed, 14 insertions(+), 13 deletions(-) diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 182cccccc2780..17d4ac19a2254 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -1447,7 +1447,7 @@ def maybe_infer_tz(tz, inferred_tz): return tz -def dtype_conversions(data, copy, has_format=False): +def maybe_convert_dtype(data, copy, has_format=False): """ Convert data based on dtype conventions, issuing deprecation warnings or errors where appropriate. @@ -1499,7 +1499,7 @@ def dtype_conversions(data, copy, has_format=False): def objects_to_datetime64ns(data, dayfirst, yearfirst, - tz=None, errors="raise", + utc=False, errors="raise", require_iso8601=False, allow_object=False): """ Convert data to array of timestamps. @@ -1509,7 +1509,8 @@ def objects_to_datetime64ns(data, dayfirst, yearfirst, data : np.ndarray[object] dayfirst : bool yearfirst : bool - tz : {None, 'utc'} + utc : bool, default False + Whether to convert timezone-aware timestamps to UTC errors : {'raise', 'ignore', 'coerce'} allow_object : bool @@ -1526,7 +1527,6 @@ def objects_to_datetime64ns(data, dayfirst, yearfirst, ValueError : if data cannot be converted to datetimes """ assert errors in ["raise", "ignore", "coerce"] - assert tz is None or tz == "utc" # if str-dtype, convert data = np.array(data, copy=False, dtype=np.object_) @@ -1535,7 +1535,7 @@ def objects_to_datetime64ns(data, dayfirst, yearfirst, result, tz_parsed = tslib.array_to_datetime( data, errors=errors, - utc=tz == 'utc', + utc=utc, dayfirst=dayfirst, yearfirst=yearfirst, require_iso8601=require_iso8601 diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index f12118f877f03..bda69e8954a0e 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -26,7 +26,7 @@ from pandas.core.arrays import datetimelike as dtl from pandas.core.arrays.datetimes import ( - DatetimeArrayMixin as DatetimeArray, _to_m8, dtype_conversions, + DatetimeArrayMixin as DatetimeArray, _to_m8, maybe_convert_dtype, maybe_infer_tz, objects_to_datetime64ns) from pandas.core.base import _shared_docs import pandas.core.common as com @@ -270,7 +270,7 @@ def __new__(cls, data=None, # By this point we are assured to have either a numpy array or Index - data, copy = dtype_conversions(data, copy) + data, copy = maybe_convert_dtype(data, copy) if is_object_dtype(data) or is_string_dtype(data): # TODO: We do not have tests specific to string-dtypes, diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py index 4fb7bf2bf66ec..3b1b032784043 100644 --- a/pandas/core/tools/datetimes.py +++ b/pandas/core/tools/datetimes.py @@ -184,7 +184,7 @@ def _convert_listlike_datetimes(arg, box, format, name=None, tz=None, """ from pandas import DatetimeIndex from pandas.core.arrays.datetimes import ( - dtype_conversions, objects_to_datetime64ns) + maybe_convert_dtype, objects_to_datetime64ns) if isinstance(arg, (list, tuple)): arg = np.array(arg, dtype='O') @@ -225,7 +225,8 @@ def _convert_listlike_datetimes(arg, box, format, name=None, tz=None, # warn if passing timedelta64, raise for PeriodDtype # NB: this must come after unit transformation - arg = dtype_conversions(arg, copy=False, has_format=format is not None)[0] + arg, _ = maybe_convert_dtype(arg, copy=False, + has_format=format is not None) arg = ensure_object(arg) require_iso8601 = False @@ -285,9 +286,10 @@ def _convert_listlike_datetimes(arg, box, format, name=None, tz=None, if result is None: assert format is None or infer_datetime_format + utc = tz == 'utc' result, tz_parsed = objects_to_datetime64ns( arg, dayfirst=dayfirst, yearfirst=yearfirst, - tz=tz, errors=errors, require_iso8601=require_iso8601, + utc=utc, errors=errors, require_iso8601=require_iso8601, allow_object=True) if tz_parsed is not None: @@ -299,9 +301,8 @@ def _convert_listlike_datetimes(arg, box, format, name=None, tz=None, else: # Convert the datetime64 numpy array to an numpy array # of datetime objects - result = [Timestamp(ts, tz=tz_parsed).to_pydatetime() - for ts in result] - return np.array(result, dtype=object) + result = DatetimeIndex(result, tz=tz_parsed).to_pydatetime() + return result if box: # Ensure we return an Index in all cases where box=True From 6b60da2e8db570d42b1b995655b34ba4d9a0b0c6 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Mon, 19 Nov 2018 14:53:46 -0800 Subject: [PATCH 17/21] comments, remove has_format --- pandas/core/arrays/datetimes.py | 9 ++++----- pandas/core/tools/datetimes.py | 10 +++++++--- 2 files changed, 11 insertions(+), 8 deletions(-) diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 8852c36d1c99b..facdc9583a561 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -1463,7 +1463,7 @@ def maybe_infer_tz(tz, inferred_tz): return tz -def maybe_convert_dtype(data, copy, has_format=False): +def maybe_convert_dtype(data, copy): """ Convert data based on dtype conventions, issuing deprecation warnings or errors where appropriate. @@ -1472,9 +1472,6 @@ def maybe_convert_dtype(data, copy, has_format=False): ---------- data : np.ndarray or pd.Index copy : bool - has_format : bool, default False - Indicates if the data will be passed to a parsing function with a - `format` kwarg. Returns ------- @@ -1486,7 +1483,7 @@ def maybe_convert_dtype(data, copy, has_format=False): TypeError : PeriodDType data is passed """ - if is_float_dtype(data) and not has_format: + if is_float_dtype(data): # Note: we must cast to datetime64[ns] here in order to treat these # as wall-times instead of UTC timestamps. data = data.astype(_NS_DTYPE) @@ -1529,6 +1526,8 @@ def objects_to_datetime64ns(data, dayfirst, yearfirst, Whether to convert timezone-aware timestamps to UTC errors : {'raise', 'ignore', 'coerce'} allow_object : bool + Whether to return an object-dtype ndarray instead of raising if the + data contains more than one timezone. Returns ------- diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py index 3b1b032784043..66fb9dc3fb8fd 100644 --- a/pandas/core/tools/datetimes.py +++ b/pandas/core/tools/datetimes.py @@ -225,8 +225,8 @@ def _convert_listlike_datetimes(arg, box, format, name=None, tz=None, # warn if passing timedelta64, raise for PeriodDtype # NB: this must come after unit transformation - arg, _ = maybe_convert_dtype(arg, copy=False, - has_format=format is not None) + orig_arg = arg + arg, _ = maybe_convert_dtype(arg, copy=False) arg = ensure_object(arg) require_iso8601 = False @@ -252,7 +252,10 @@ def _convert_listlike_datetimes(arg, box, format, name=None, tz=None, # shortcut formatting here if format == '%Y%m%d': try: - result = _attempt_YYYYMMDD(arg, errors=errors) + # pass orig_arg as float-dtype may have been converted to + # datetime64[ns] + orig_arg = ensure_object(orig_arg) + result = _attempt_YYYYMMDD(orig_arg, errors=errors) except (ValueError, TypeError, tslibs.OutOfBoundsDatetime): raise ValueError("cannot convert the input to " "'%Y%m%d' date format") @@ -278,6 +281,7 @@ def _convert_listlike_datetimes(arg, box, format, name=None, tz=None, raise result = arg except ValueError as e: + # Fallback to try to convert datetime objects try: values, tz = conversion.datetime_to_datetime64(arg) return DatetimeIndex._simple_new(values, name=name, tz=tz) From b3d5bb7564ddb912f64459dc2d82c0f3fac99f4d Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Mon, 26 Nov 2018 15:12:45 -0800 Subject: [PATCH 18/21] dummy commit to force CI From fd5af185ea1db715f3c13290f48000debcefea34 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Sun, 2 Dec 2018 14:11:52 -0800 Subject: [PATCH 19/21] Flesh out comment --- pandas/core/tools/datetimes.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py index dc02d7ddf5932..7a87e33c7f97e 100644 --- a/pandas/core/tools/datetimes.py +++ b/pandas/core/tools/datetimes.py @@ -269,7 +269,8 @@ def _convert_listlike_datetimes(arg, box, format, name=None, tz=None, raise result = arg except ValueError as e: - # Fallback to try to convert datetime objects + # Fallback to try to convert datetime objects if timezone-aware + # datetime objects are found without passing `utc=True` try: values, tz = conversion.datetime_to_datetime64(arg) return DatetimeIndex._simple_new(values, name=name, tz=tz) From 2cdd2152b3c20a8bde9d38cd24d5d5c64eac20dd Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Sun, 2 Dec 2018 17:26:34 -0800 Subject: [PATCH 20/21] comment --- pandas/core/arrays/datetimes.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 1e47417a5a3f3..0fa15d6e72f78 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -1570,6 +1570,8 @@ def objects_to_datetime64ns(data, dayfirst, yearfirst, return result, tz_parsed raise TypeError(result) else: # pragma: no cover + # GH#23675 this TypeError should never be hit, whereas the TypeError + # in the object-dtype branch above is reachable. raise TypeError(result) From 03d5b353bc367cc06417716c58d40f990d3eee7d Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Sun, 2 Dec 2018 17:33:43 -0800 Subject: [PATCH 21/21] comment more --- pandas/core/arrays/datetimes.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 82d9d89e67385..45630f8109932 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -1566,8 +1566,12 @@ def objects_to_datetime64ns(data, dayfirst, yearfirst, # the distinction is a thin one return result, tz_parsed elif is_object_dtype(result): + # GH#23675 when called via `pd.to_datetime`, returning an object-dtype + # array is allowed. When called via `pd.DatetimeIndex`, we can + # only accept datetime64 dtype, so raise TypeError if object-dtype + # is returned, as that indicates the values can be recognized as + # datetimes but they have conflicting timezones/awareness if allow_object: - # allowed by to_datetime, not by DatetimeIndex constructor return result, tz_parsed raise TypeError(result) else: # pragma: no cover