Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

BUG/API: Index creation with different tz coerces DatetimeIndex #11696

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
3 changes: 2 additions & 1 deletion doc/source/whatsnew/v0.18.0.txt
Expand Up @@ -187,7 +187,6 @@ Bug Fixes
- Bug in timezone info lost when broadcasting scalar datetime to ``DataFrame`` (:issue:`11682`)



- Bug in parsing timezone offset strings with non-zero minutes (:issue:`11708`)


Expand All @@ -197,3 +196,5 @@ Bug Fixes
- Bug in ``pd.rolling_median`` where memory allocation failed even with sufficient memory (:issue:`11696`)

- Bug in ``df.replace`` while replacing value in mixed dtype ``Dataframe`` (:issue:`11698`)
- Bug in ``Index`` creation from ``Timestamp`` with mixed tz coerces to UTC (:issue:`11488`)

9 changes: 7 additions & 2 deletions pandas/core/index.py
Expand Up @@ -179,8 +179,13 @@ def __new__(cls, data=None, dtype=None, copy=False, name=None, fastpath=False,
elif inferred != 'string':
if (inferred.startswith('datetime') or
tslib.is_timestamp_array(subarr)):
from pandas.tseries.index import DatetimeIndex
return DatetimeIndex(subarr, copy=copy, name=name, **kwargs)

if (lib.is_datetime_with_singletz_array(subarr) or
'tz' in kwargs):
# only when subarr has the same tz
from pandas.tseries.index import DatetimeIndex
return DatetimeIndex(subarr, copy=copy, name=name, **kwargs)

elif (inferred.startswith('timedelta') or
lib.is_timedelta_array(subarr)):
from pandas.tseries.tdi import TimedeltaIndex
Expand Down
31 changes: 30 additions & 1 deletion pandas/src/inference.pyx
@@ -1,6 +1,6 @@
import sys
cimport util
from tslib import NaT
from tslib import NaT, get_timezone
from datetime import datetime, timedelta
iNaT = util.get_nat()

Expand Down Expand Up @@ -431,6 +431,35 @@ def is_datetime64_array(ndarray values):
return False
return null_count != n


cpdef is_datetime_with_singletz_array(ndarray[object] values):
"""
Check values have the same tzinfo attribute.
Doesn't check values are datetime-like types.
"""

cdef Py_ssize_t i, j, n = len(values)
cdef object base_val, base_tz, val, tz

if n == 0:
return False

for i in range(n):
base_val = values[i]
if base_val is not NaT:
base_tz = get_timezone(getattr(base_val, 'tzinfo', None))

for j in range(i, n):
val = values[j]
if val is not NaT:
tz = getattr(val, 'tzinfo', None)
if base_tz != tz and base_tz != get_timezone(tz):
return False
break

return True


def is_timedelta_array(ndarray values):
cdef Py_ssize_t i, null_count = 0, n = len(values)
cdef object v
Expand Down
183 changes: 183 additions & 0 deletions pandas/tests/test_index.py
Expand Up @@ -3493,6 +3493,189 @@ def test_construction_with_alt(self):
def test_pickle_compat_construction(self):
pass

def test_construction_index_with_mixed_timezones(self):
# GH 11488
# no tz results in DatetimeIndex
result = Index([Timestamp('2011-01-01'), Timestamp('2011-01-02')], name='idx')
exp = DatetimeIndex([Timestamp('2011-01-01'), Timestamp('2011-01-02')], name='idx')
self.assert_index_equal(result, exp, exact=True)
self.assertTrue(isinstance(result, DatetimeIndex))
self.assertIsNone(result.tz)

# same tz results in DatetimeIndex
result = Index([Timestamp('2011-01-01 10:00', tz='Asia/Tokyo'),
Timestamp('2011-01-02 10:00', tz='Asia/Tokyo')], name='idx')
exp = DatetimeIndex([Timestamp('2011-01-01 10:00'), Timestamp('2011-01-02 10:00')], tz='Asia/Tokyo', name='idx')
self.assert_index_equal(result, exp, exact=True)
self.assertTrue(isinstance(result, DatetimeIndex))
self.assertIsNotNone(result.tz)
self.assertEqual(result.tz, exp.tz)

# same tz results in DatetimeIndex (DST)
result = Index([Timestamp('2011-01-01 10:00', tz='US/Eastern'),
Timestamp('2011-08-01 10:00', tz='US/Eastern')], name='idx')
exp = DatetimeIndex([Timestamp('2011-01-01 10:00'), Timestamp('2011-08-01 10:00')],
tz='US/Eastern', name='idx')
self.assert_index_equal(result, exp, exact=True)
self.assertTrue(isinstance(result, DatetimeIndex))
self.assertIsNotNone(result.tz)
self.assertEqual(result.tz, exp.tz)

# different tz results in Index(dtype=object)
result = Index([Timestamp('2011-01-01 10:00'), Timestamp('2011-01-02 10:00', tz='US/Eastern')], name='idx')
exp = Index([Timestamp('2011-01-01 10:00'), Timestamp('2011-01-02 10:00', tz='US/Eastern')],
dtype='object', name='idx')
self.assert_index_equal(result, exp, exact=True)
self.assertFalse(isinstance(result, DatetimeIndex))

result = Index([Timestamp('2011-01-01 10:00', tz='Asia/Tokyo'),
Timestamp('2011-01-02 10:00', tz='US/Eastern')], name='idx')
exp = Index([Timestamp('2011-01-01 10:00', tz='Asia/Tokyo'),
Timestamp('2011-01-02 10:00', tz='US/Eastern')],
dtype='object', name='idx')
self.assert_index_equal(result, exp, exact=True)
self.assertFalse(isinstance(result, DatetimeIndex))

# passing tz results in DatetimeIndex
result = Index([Timestamp('2011-01-01 10:00'), Timestamp('2011-01-02 10:00', tz='US/Eastern')],
tz='Asia/Tokyo', name='idx')
exp = DatetimeIndex([Timestamp('2011-01-01 19:00'), Timestamp('2011-01-03 00:00')],
tz='Asia/Tokyo', name='idx')
self.assert_index_equal(result, exp, exact=True)
self.assertTrue(isinstance(result, DatetimeIndex))

# length = 1
result = Index([Timestamp('2011-01-01')], name='idx')
exp = DatetimeIndex([Timestamp('2011-01-01')], name='idx')
self.assert_index_equal(result, exp, exact=True)
self.assertTrue(isinstance(result, DatetimeIndex))
self.assertIsNone(result.tz)

# length = 1 with tz
result = Index([Timestamp('2011-01-01 10:00', tz='Asia/Tokyo')], name='idx')
exp = DatetimeIndex([Timestamp('2011-01-01 10:00')], tz='Asia/Tokyo', name='idx')
self.assert_index_equal(result, exp, exact=True)
self.assertTrue(isinstance(result, DatetimeIndex))
self.assertIsNotNone(result.tz)
self.assertEqual(result.tz, exp.tz)

def test_construction_index_with_mixed_timezones_with_NaT(self):
# GH 11488
result = Index([pd.NaT, Timestamp('2011-01-01'),
pd.NaT, Timestamp('2011-01-02')], name='idx')
exp = DatetimeIndex([pd.NaT, Timestamp('2011-01-01'),
pd.NaT, Timestamp('2011-01-02')], name='idx')
self.assert_index_equal(result, exp, exact=True)
self.assertTrue(isinstance(result, DatetimeIndex))
self.assertIsNone(result.tz)

# same tz results in DatetimeIndex
result = Index([pd.NaT, Timestamp('2011-01-01 10:00', tz='Asia/Tokyo'),
pd.NaT, Timestamp('2011-01-02 10:00', tz='Asia/Tokyo')], name='idx')
exp = DatetimeIndex([pd.NaT, Timestamp('2011-01-01 10:00'),
pd.NaT, Timestamp('2011-01-02 10:00')], tz='Asia/Tokyo', name='idx')
self.assert_index_equal(result, exp, exact=True)
self.assertTrue(isinstance(result, DatetimeIndex))
self.assertIsNotNone(result.tz)
self.assertEqual(result.tz, exp.tz)

# same tz results in DatetimeIndex (DST)
result = Index([Timestamp('2011-01-01 10:00', tz='US/Eastern'),
pd.NaT, Timestamp('2011-08-01 10:00', tz='US/Eastern')], name='idx')
exp = DatetimeIndex([Timestamp('2011-01-01 10:00'), pd.NaT, Timestamp('2011-08-01 10:00')],
tz='US/Eastern', name='idx')
self.assert_index_equal(result, exp, exact=True)
self.assertTrue(isinstance(result, DatetimeIndex))
self.assertIsNotNone(result.tz)
self.assertEqual(result.tz, exp.tz)

# different tz results in Index(dtype=object)
result = Index([pd.NaT, Timestamp('2011-01-01 10:00'),
pd.NaT, Timestamp('2011-01-02 10:00', tz='US/Eastern')], name='idx')
exp = Index([pd.NaT, Timestamp('2011-01-01 10:00'),
pd.NaT, Timestamp('2011-01-02 10:00', tz='US/Eastern')],
dtype='object', name='idx')
self.assert_index_equal(result, exp, exact=True)
self.assertFalse(isinstance(result, DatetimeIndex))

result = Index([pd.NaT, Timestamp('2011-01-01 10:00', tz='Asia/Tokyo'),
pd.NaT, Timestamp('2011-01-02 10:00', tz='US/Eastern')], name='idx')
exp = Index([pd.NaT, Timestamp('2011-01-01 10:00', tz='Asia/Tokyo'),
pd.NaT, Timestamp('2011-01-02 10:00', tz='US/Eastern')],
dtype='object', name='idx')
self.assert_index_equal(result, exp, exact=True)
self.assertFalse(isinstance(result, DatetimeIndex))

# passing tz results in DatetimeIndex
result = Index([pd.NaT, Timestamp('2011-01-01 10:00'),
pd.NaT, Timestamp('2011-01-02 10:00', tz='US/Eastern')],
tz='Asia/Tokyo', name='idx')
exp = DatetimeIndex([pd.NaT, Timestamp('2011-01-01 19:00'),
pd.NaT, Timestamp('2011-01-03 00:00')],
tz='Asia/Tokyo', name='idx')
self.assert_index_equal(result, exp, exact=True)
self.assertTrue(isinstance(result, DatetimeIndex))

# all NaT
result = Index([pd.NaT, pd.NaT], name='idx')
exp = DatetimeIndex([pd.NaT, pd.NaT], name='idx')
self.assert_index_equal(result, exp, exact=True)
self.assertTrue(isinstance(result, DatetimeIndex))
self.assertIsNone(result.tz)

# all NaT with tz
result = Index([pd.NaT, pd.NaT], tz='Asia/Tokyo', name='idx')
exp = DatetimeIndex([pd.NaT, pd.NaT], tz='Asia/Tokyo', name='idx')
self.assert_index_equal(result, exp, exact=True)
self.assertTrue(isinstance(result, DatetimeIndex))
self.assertIsNotNone(result.tz)
self.assertEqual(result.tz, exp.tz)

def test_construction_dti_with_mixed_timezones(self):
# GH 11488 (not changed, added explicit tests)

# no tz results in DatetimeIndex
result = DatetimeIndex([Timestamp('2011-01-01'), Timestamp('2011-01-02')], name='idx')
exp = DatetimeIndex([Timestamp('2011-01-01'), Timestamp('2011-01-02')], name='idx')
self.assert_index_equal(result, exp, exact=True)
self.assertTrue(isinstance(result, DatetimeIndex))

# same tz results in DatetimeIndex
result = DatetimeIndex([Timestamp('2011-01-01 10:00', tz='Asia/Tokyo'),
Timestamp('2011-01-02 10:00', tz='Asia/Tokyo')], name='idx')
exp = DatetimeIndex([Timestamp('2011-01-01 10:00'), Timestamp('2011-01-02 10:00')], tz='Asia/Tokyo', name='idx')
self.assert_index_equal(result, exp, exact=True)
self.assertTrue(isinstance(result, DatetimeIndex))

# same tz results in DatetimeIndex (DST)
result = DatetimeIndex([Timestamp('2011-01-01 10:00', tz='US/Eastern'),
Timestamp('2011-08-01 10:00', tz='US/Eastern')], name='idx')
exp = DatetimeIndex([Timestamp('2011-01-01 10:00'), Timestamp('2011-08-01 10:00')],
tz='US/Eastern', name='idx')
self.assert_index_equal(result, exp, exact=True)
self.assertTrue(isinstance(result, DatetimeIndex))

# different tz coerces tz-naive to tz-awareIndex(dtype=object)
result = DatetimeIndex([Timestamp('2011-01-01 10:00'),
Timestamp('2011-01-02 10:00', tz='US/Eastern')], name='idx')
exp = DatetimeIndex([Timestamp('2011-01-01 05:00'), Timestamp('2011-01-02 10:00')],
tz='US/Eastern', name='idx')
self.assert_index_equal(result, exp, exact=True)
self.assertTrue(isinstance(result, DatetimeIndex))

# tz mismatch affecting to tz-aware raises TypeError/ValueError
with tm.assertRaises(ValueError):
DatetimeIndex([Timestamp('2011-01-01 10:00', tz='Asia/Tokyo'),
Timestamp('2011-01-02 10:00', tz='US/Eastern')], name='idx')

with tm.assertRaises(TypeError):
DatetimeIndex([Timestamp('2011-01-01 10:00'), Timestamp('2011-01-02 10:00', tz='US/Eastern')],
tz='Asia/Tokyo', name='idx')

with tm.assertRaises(ValueError):
DatetimeIndex([Timestamp('2011-01-01 10:00', tz='Asia/Tokyo'),
Timestamp('2011-01-02 10:00', tz='US/Eastern')], tz='US/Eastern', name='idx')

def test_get_loc(self):
idx = pd.date_range('2000-01-01', periods=3)

Expand Down
6 changes: 5 additions & 1 deletion pandas/util/testing.py
Expand Up @@ -718,7 +718,11 @@ def assert_attr_equal(attr, left, right, obj='Attributes'):
# np.nan
return True

result = left_attr == right_attr
try:
result = left_attr == right_attr
except TypeError:
# datetimetz on rhs may raise TypeError
result = False
if not isinstance(result, bool):
result = result.all()

Expand Down