From 162bd1d5cdaca7c1be51ffbbe79d3792f26a4657 Mon Sep 17 00:00:00 2001 From: sinhrks Date: Sun, 1 Nov 2015 11:13:00 +0900 Subject: [PATCH] BUG/API: Index creation with different tz coerces DatetimeIndex --- doc/source/whatsnew/v0.18.0.txt | 3 +- pandas/core/index.py | 9 +- pandas/src/inference.pyx | 31 +++++- pandas/tests/test_index.py | 183 ++++++++++++++++++++++++++++++++ pandas/util/testing.py | 6 +- 5 files changed, 227 insertions(+), 5 deletions(-) diff --git a/doc/source/whatsnew/v0.18.0.txt b/doc/source/whatsnew/v0.18.0.txt index c1b7ff82f4c76..3e864918f06a5 100644 --- a/doc/source/whatsnew/v0.18.0.txt +++ b/doc/source/whatsnew/v0.18.0.txt @@ -187,7 +187,6 @@ Bug Fixes - Bug in timezone info lost when broadcasting scalar datetime to ``DataFrame`` (:issue:`11682`) - - Bug in parsing timezone offset strings with non-zero minutes (:issue:`11708`) @@ -197,3 +196,5 @@ Bug Fixes - Bug in ``pd.rolling_median`` where memory allocation failed even with sufficient memory (:issue:`11696`) - Bug in ``df.replace`` while replacing value in mixed dtype ``Dataframe`` (:issue:`11698`) +- Bug in ``Index`` creation from ``Timestamp`` with mixed tz coerces to UTC (:issue:`11488`) + diff --git a/pandas/core/index.py b/pandas/core/index.py index fa23f2e1efe3f..1433d755d294d 100644 --- a/pandas/core/index.py +++ b/pandas/core/index.py @@ -179,8 +179,13 @@ def __new__(cls, data=None, dtype=None, copy=False, name=None, fastpath=False, elif inferred != 'string': if (inferred.startswith('datetime') or tslib.is_timestamp_array(subarr)): - from pandas.tseries.index import DatetimeIndex - return DatetimeIndex(subarr, copy=copy, name=name, **kwargs) + + if (lib.is_datetime_with_singletz_array(subarr) or + 'tz' in kwargs): + # only when subarr has the same tz + from pandas.tseries.index import DatetimeIndex + return DatetimeIndex(subarr, copy=copy, name=name, **kwargs) + elif (inferred.startswith('timedelta') or lib.is_timedelta_array(subarr)): from pandas.tseries.tdi import TimedeltaIndex diff --git a/pandas/src/inference.pyx b/pandas/src/inference.pyx index 74bd437373c19..1a5703eb91053 100644 --- a/pandas/src/inference.pyx +++ b/pandas/src/inference.pyx @@ -1,6 +1,6 @@ import sys cimport util -from tslib import NaT +from tslib import NaT, get_timezone from datetime import datetime, timedelta iNaT = util.get_nat() @@ -431,6 +431,35 @@ def is_datetime64_array(ndarray values): return False return null_count != n + +cpdef is_datetime_with_singletz_array(ndarray[object] values): + """ + Check values have the same tzinfo attribute. + Doesn't check values are datetime-like types. + """ + + cdef Py_ssize_t i, j, n = len(values) + cdef object base_val, base_tz, val, tz + + if n == 0: + return False + + for i in range(n): + base_val = values[i] + if base_val is not NaT: + base_tz = get_timezone(getattr(base_val, 'tzinfo', None)) + + for j in range(i, n): + val = values[j] + if val is not NaT: + tz = getattr(val, 'tzinfo', None) + if base_tz != tz and base_tz != get_timezone(tz): + return False + break + + return True + + def is_timedelta_array(ndarray values): cdef Py_ssize_t i, null_count = 0, n = len(values) cdef object v diff --git a/pandas/tests/test_index.py b/pandas/tests/test_index.py index bc9d303dc3b1a..e2fa6a90429dc 100644 --- a/pandas/tests/test_index.py +++ b/pandas/tests/test_index.py @@ -3493,6 +3493,189 @@ def test_construction_with_alt(self): def test_pickle_compat_construction(self): pass + def test_construction_index_with_mixed_timezones(self): + # GH 11488 + # no tz results in DatetimeIndex + result = Index([Timestamp('2011-01-01'), Timestamp('2011-01-02')], name='idx') + exp = DatetimeIndex([Timestamp('2011-01-01'), Timestamp('2011-01-02')], name='idx') + self.assert_index_equal(result, exp, exact=True) + self.assertTrue(isinstance(result, DatetimeIndex)) + self.assertIsNone(result.tz) + + # same tz results in DatetimeIndex + result = Index([Timestamp('2011-01-01 10:00', tz='Asia/Tokyo'), + Timestamp('2011-01-02 10:00', tz='Asia/Tokyo')], name='idx') + exp = DatetimeIndex([Timestamp('2011-01-01 10:00'), Timestamp('2011-01-02 10:00')], tz='Asia/Tokyo', name='idx') + self.assert_index_equal(result, exp, exact=True) + self.assertTrue(isinstance(result, DatetimeIndex)) + self.assertIsNotNone(result.tz) + self.assertEqual(result.tz, exp.tz) + + # same tz results in DatetimeIndex (DST) + result = Index([Timestamp('2011-01-01 10:00', tz='US/Eastern'), + Timestamp('2011-08-01 10:00', tz='US/Eastern')], name='idx') + exp = DatetimeIndex([Timestamp('2011-01-01 10:00'), Timestamp('2011-08-01 10:00')], + tz='US/Eastern', name='idx') + self.assert_index_equal(result, exp, exact=True) + self.assertTrue(isinstance(result, DatetimeIndex)) + self.assertIsNotNone(result.tz) + self.assertEqual(result.tz, exp.tz) + + # different tz results in Index(dtype=object) + result = Index([Timestamp('2011-01-01 10:00'), Timestamp('2011-01-02 10:00', tz='US/Eastern')], name='idx') + exp = Index([Timestamp('2011-01-01 10:00'), Timestamp('2011-01-02 10:00', tz='US/Eastern')], + dtype='object', name='idx') + self.assert_index_equal(result, exp, exact=True) + self.assertFalse(isinstance(result, DatetimeIndex)) + + result = Index([Timestamp('2011-01-01 10:00', tz='Asia/Tokyo'), + Timestamp('2011-01-02 10:00', tz='US/Eastern')], name='idx') + exp = Index([Timestamp('2011-01-01 10:00', tz='Asia/Tokyo'), + Timestamp('2011-01-02 10:00', tz='US/Eastern')], + dtype='object', name='idx') + self.assert_index_equal(result, exp, exact=True) + self.assertFalse(isinstance(result, DatetimeIndex)) + + # passing tz results in DatetimeIndex + result = Index([Timestamp('2011-01-01 10:00'), Timestamp('2011-01-02 10:00', tz='US/Eastern')], + tz='Asia/Tokyo', name='idx') + exp = DatetimeIndex([Timestamp('2011-01-01 19:00'), Timestamp('2011-01-03 00:00')], + tz='Asia/Tokyo', name='idx') + self.assert_index_equal(result, exp, exact=True) + self.assertTrue(isinstance(result, DatetimeIndex)) + + # length = 1 + result = Index([Timestamp('2011-01-01')], name='idx') + exp = DatetimeIndex([Timestamp('2011-01-01')], name='idx') + self.assert_index_equal(result, exp, exact=True) + self.assertTrue(isinstance(result, DatetimeIndex)) + self.assertIsNone(result.tz) + + # length = 1 with tz + result = Index([Timestamp('2011-01-01 10:00', tz='Asia/Tokyo')], name='idx') + exp = DatetimeIndex([Timestamp('2011-01-01 10:00')], tz='Asia/Tokyo', name='idx') + self.assert_index_equal(result, exp, exact=True) + self.assertTrue(isinstance(result, DatetimeIndex)) + self.assertIsNotNone(result.tz) + self.assertEqual(result.tz, exp.tz) + + def test_construction_index_with_mixed_timezones_with_NaT(self): + # GH 11488 + result = Index([pd.NaT, Timestamp('2011-01-01'), + pd.NaT, Timestamp('2011-01-02')], name='idx') + exp = DatetimeIndex([pd.NaT, Timestamp('2011-01-01'), + pd.NaT, Timestamp('2011-01-02')], name='idx') + self.assert_index_equal(result, exp, exact=True) + self.assertTrue(isinstance(result, DatetimeIndex)) + self.assertIsNone(result.tz) + + # same tz results in DatetimeIndex + result = Index([pd.NaT, Timestamp('2011-01-01 10:00', tz='Asia/Tokyo'), + pd.NaT, Timestamp('2011-01-02 10:00', tz='Asia/Tokyo')], name='idx') + exp = DatetimeIndex([pd.NaT, Timestamp('2011-01-01 10:00'), + pd.NaT, Timestamp('2011-01-02 10:00')], tz='Asia/Tokyo', name='idx') + self.assert_index_equal(result, exp, exact=True) + self.assertTrue(isinstance(result, DatetimeIndex)) + self.assertIsNotNone(result.tz) + self.assertEqual(result.tz, exp.tz) + + # same tz results in DatetimeIndex (DST) + result = Index([Timestamp('2011-01-01 10:00', tz='US/Eastern'), + pd.NaT, Timestamp('2011-08-01 10:00', tz='US/Eastern')], name='idx') + exp = DatetimeIndex([Timestamp('2011-01-01 10:00'), pd.NaT, Timestamp('2011-08-01 10:00')], + tz='US/Eastern', name='idx') + self.assert_index_equal(result, exp, exact=True) + self.assertTrue(isinstance(result, DatetimeIndex)) + self.assertIsNotNone(result.tz) + self.assertEqual(result.tz, exp.tz) + + # different tz results in Index(dtype=object) + result = Index([pd.NaT, Timestamp('2011-01-01 10:00'), + pd.NaT, Timestamp('2011-01-02 10:00', tz='US/Eastern')], name='idx') + exp = Index([pd.NaT, Timestamp('2011-01-01 10:00'), + pd.NaT, Timestamp('2011-01-02 10:00', tz='US/Eastern')], + dtype='object', name='idx') + self.assert_index_equal(result, exp, exact=True) + self.assertFalse(isinstance(result, DatetimeIndex)) + + result = Index([pd.NaT, Timestamp('2011-01-01 10:00', tz='Asia/Tokyo'), + pd.NaT, Timestamp('2011-01-02 10:00', tz='US/Eastern')], name='idx') + exp = Index([pd.NaT, Timestamp('2011-01-01 10:00', tz='Asia/Tokyo'), + pd.NaT, Timestamp('2011-01-02 10:00', tz='US/Eastern')], + dtype='object', name='idx') + self.assert_index_equal(result, exp, exact=True) + self.assertFalse(isinstance(result, DatetimeIndex)) + + # passing tz results in DatetimeIndex + result = Index([pd.NaT, Timestamp('2011-01-01 10:00'), + pd.NaT, Timestamp('2011-01-02 10:00', tz='US/Eastern')], + tz='Asia/Tokyo', name='idx') + exp = DatetimeIndex([pd.NaT, Timestamp('2011-01-01 19:00'), + pd.NaT, Timestamp('2011-01-03 00:00')], + tz='Asia/Tokyo', name='idx') + self.assert_index_equal(result, exp, exact=True) + self.assertTrue(isinstance(result, DatetimeIndex)) + + # all NaT + result = Index([pd.NaT, pd.NaT], name='idx') + exp = DatetimeIndex([pd.NaT, pd.NaT], name='idx') + self.assert_index_equal(result, exp, exact=True) + self.assertTrue(isinstance(result, DatetimeIndex)) + self.assertIsNone(result.tz) + + # all NaT with tz + result = Index([pd.NaT, pd.NaT], tz='Asia/Tokyo', name='idx') + exp = DatetimeIndex([pd.NaT, pd.NaT], tz='Asia/Tokyo', name='idx') + self.assert_index_equal(result, exp, exact=True) + self.assertTrue(isinstance(result, DatetimeIndex)) + self.assertIsNotNone(result.tz) + self.assertEqual(result.tz, exp.tz) + + def test_construction_dti_with_mixed_timezones(self): + # GH 11488 (not changed, added explicit tests) + + # no tz results in DatetimeIndex + result = DatetimeIndex([Timestamp('2011-01-01'), Timestamp('2011-01-02')], name='idx') + exp = DatetimeIndex([Timestamp('2011-01-01'), Timestamp('2011-01-02')], name='idx') + self.assert_index_equal(result, exp, exact=True) + self.assertTrue(isinstance(result, DatetimeIndex)) + + # same tz results in DatetimeIndex + result = DatetimeIndex([Timestamp('2011-01-01 10:00', tz='Asia/Tokyo'), + Timestamp('2011-01-02 10:00', tz='Asia/Tokyo')], name='idx') + exp = DatetimeIndex([Timestamp('2011-01-01 10:00'), Timestamp('2011-01-02 10:00')], tz='Asia/Tokyo', name='idx') + self.assert_index_equal(result, exp, exact=True) + self.assertTrue(isinstance(result, DatetimeIndex)) + + # same tz results in DatetimeIndex (DST) + result = DatetimeIndex([Timestamp('2011-01-01 10:00', tz='US/Eastern'), + Timestamp('2011-08-01 10:00', tz='US/Eastern')], name='idx') + exp = DatetimeIndex([Timestamp('2011-01-01 10:00'), Timestamp('2011-08-01 10:00')], + tz='US/Eastern', name='idx') + self.assert_index_equal(result, exp, exact=True) + self.assertTrue(isinstance(result, DatetimeIndex)) + + # different tz coerces tz-naive to tz-awareIndex(dtype=object) + result = DatetimeIndex([Timestamp('2011-01-01 10:00'), + Timestamp('2011-01-02 10:00', tz='US/Eastern')], name='idx') + exp = DatetimeIndex([Timestamp('2011-01-01 05:00'), Timestamp('2011-01-02 10:00')], + tz='US/Eastern', name='idx') + self.assert_index_equal(result, exp, exact=True) + self.assertTrue(isinstance(result, DatetimeIndex)) + + # tz mismatch affecting to tz-aware raises TypeError/ValueError + with tm.assertRaises(ValueError): + DatetimeIndex([Timestamp('2011-01-01 10:00', tz='Asia/Tokyo'), + Timestamp('2011-01-02 10:00', tz='US/Eastern')], name='idx') + + with tm.assertRaises(TypeError): + DatetimeIndex([Timestamp('2011-01-01 10:00'), Timestamp('2011-01-02 10:00', tz='US/Eastern')], + tz='Asia/Tokyo', name='idx') + + with tm.assertRaises(ValueError): + DatetimeIndex([Timestamp('2011-01-01 10:00', tz='Asia/Tokyo'), + Timestamp('2011-01-02 10:00', tz='US/Eastern')], tz='US/Eastern', name='idx') + def test_get_loc(self): idx = pd.date_range('2000-01-01', periods=3) diff --git a/pandas/util/testing.py b/pandas/util/testing.py index dd3f300bcf266..858a0e1570240 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -718,7 +718,11 @@ def assert_attr_equal(attr, left, right, obj='Attributes'): # np.nan return True - result = left_attr == right_attr + try: + result = left_attr == right_attr + except TypeError: + # datetimetz on rhs may raise TypeError + result = False if not isinstance(result, bool): result = result.all()