Skip to content

Commit

Permalink
BUG/API: Index creation with different tz coerces DatetimeIndex
Browse files Browse the repository at this point in the history
  • Loading branch information
sinhrks committed Dec 8, 2015
1 parent 43edd83 commit 162bd1d
Show file tree
Hide file tree
Showing 5 changed files with 227 additions and 5 deletions.
3 changes: 2 additions & 1 deletion doc/source/whatsnew/v0.18.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -187,7 +187,6 @@ Bug Fixes
- Bug in timezone info lost when broadcasting scalar datetime to ``DataFrame`` (:issue:`11682`)



- Bug in parsing timezone offset strings with non-zero minutes (:issue:`11708`)


Expand All @@ -197,3 +196,5 @@ Bug Fixes
- Bug in ``pd.rolling_median`` where memory allocation failed even with sufficient memory (:issue:`11696`)

- Bug in ``df.replace`` while replacing value in mixed dtype ``Dataframe`` (:issue:`11698`)
- Bug in ``Index`` creation from ``Timestamp`` with mixed tz coerces to UTC (:issue:`11488`)

9 changes: 7 additions & 2 deletions pandas/core/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -179,8 +179,13 @@ def __new__(cls, data=None, dtype=None, copy=False, name=None, fastpath=False,
elif inferred != 'string':
if (inferred.startswith('datetime') or
tslib.is_timestamp_array(subarr)):
from pandas.tseries.index import DatetimeIndex
return DatetimeIndex(subarr, copy=copy, name=name, **kwargs)

if (lib.is_datetime_with_singletz_array(subarr) or
'tz' in kwargs):
# only when subarr has the same tz
from pandas.tseries.index import DatetimeIndex
return DatetimeIndex(subarr, copy=copy, name=name, **kwargs)

elif (inferred.startswith('timedelta') or
lib.is_timedelta_array(subarr)):
from pandas.tseries.tdi import TimedeltaIndex
Expand Down
31 changes: 30 additions & 1 deletion pandas/src/inference.pyx
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import sys
cimport util
from tslib import NaT
from tslib import NaT, get_timezone
from datetime import datetime, timedelta
iNaT = util.get_nat()

Expand Down Expand Up @@ -431,6 +431,35 @@ def is_datetime64_array(ndarray values):
return False
return null_count != n


cpdef is_datetime_with_singletz_array(ndarray[object] values):
"""
Check values have the same tzinfo attribute.
Doesn't check values are datetime-like types.
"""

cdef Py_ssize_t i, j, n = len(values)
cdef object base_val, base_tz, val, tz

if n == 0:
return False

for i in range(n):
base_val = values[i]
if base_val is not NaT:
base_tz = get_timezone(getattr(base_val, 'tzinfo', None))

for j in range(i, n):
val = values[j]
if val is not NaT:
tz = getattr(val, 'tzinfo', None)
if base_tz != tz and base_tz != get_timezone(tz):
return False
break

return True


def is_timedelta_array(ndarray values):
cdef Py_ssize_t i, null_count = 0, n = len(values)
cdef object v
Expand Down
183 changes: 183 additions & 0 deletions pandas/tests/test_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -3493,6 +3493,189 @@ def test_construction_with_alt(self):
def test_pickle_compat_construction(self):
pass

def test_construction_index_with_mixed_timezones(self):
# GH 11488
# no tz results in DatetimeIndex
result = Index([Timestamp('2011-01-01'), Timestamp('2011-01-02')], name='idx')
exp = DatetimeIndex([Timestamp('2011-01-01'), Timestamp('2011-01-02')], name='idx')
self.assert_index_equal(result, exp, exact=True)
self.assertTrue(isinstance(result, DatetimeIndex))
self.assertIsNone(result.tz)

# same tz results in DatetimeIndex
result = Index([Timestamp('2011-01-01 10:00', tz='Asia/Tokyo'),
Timestamp('2011-01-02 10:00', tz='Asia/Tokyo')], name='idx')
exp = DatetimeIndex([Timestamp('2011-01-01 10:00'), Timestamp('2011-01-02 10:00')], tz='Asia/Tokyo', name='idx')
self.assert_index_equal(result, exp, exact=True)
self.assertTrue(isinstance(result, DatetimeIndex))
self.assertIsNotNone(result.tz)
self.assertEqual(result.tz, exp.tz)

# same tz results in DatetimeIndex (DST)
result = Index([Timestamp('2011-01-01 10:00', tz='US/Eastern'),
Timestamp('2011-08-01 10:00', tz='US/Eastern')], name='idx')
exp = DatetimeIndex([Timestamp('2011-01-01 10:00'), Timestamp('2011-08-01 10:00')],
tz='US/Eastern', name='idx')
self.assert_index_equal(result, exp, exact=True)
self.assertTrue(isinstance(result, DatetimeIndex))
self.assertIsNotNone(result.tz)
self.assertEqual(result.tz, exp.tz)

# different tz results in Index(dtype=object)
result = Index([Timestamp('2011-01-01 10:00'), Timestamp('2011-01-02 10:00', tz='US/Eastern')], name='idx')
exp = Index([Timestamp('2011-01-01 10:00'), Timestamp('2011-01-02 10:00', tz='US/Eastern')],
dtype='object', name='idx')
self.assert_index_equal(result, exp, exact=True)
self.assertFalse(isinstance(result, DatetimeIndex))

result = Index([Timestamp('2011-01-01 10:00', tz='Asia/Tokyo'),
Timestamp('2011-01-02 10:00', tz='US/Eastern')], name='idx')
exp = Index([Timestamp('2011-01-01 10:00', tz='Asia/Tokyo'),
Timestamp('2011-01-02 10:00', tz='US/Eastern')],
dtype='object', name='idx')
self.assert_index_equal(result, exp, exact=True)
self.assertFalse(isinstance(result, DatetimeIndex))

# passing tz results in DatetimeIndex
result = Index([Timestamp('2011-01-01 10:00'), Timestamp('2011-01-02 10:00', tz='US/Eastern')],
tz='Asia/Tokyo', name='idx')
exp = DatetimeIndex([Timestamp('2011-01-01 19:00'), Timestamp('2011-01-03 00:00')],
tz='Asia/Tokyo', name='idx')
self.assert_index_equal(result, exp, exact=True)
self.assertTrue(isinstance(result, DatetimeIndex))

# length = 1
result = Index([Timestamp('2011-01-01')], name='idx')
exp = DatetimeIndex([Timestamp('2011-01-01')], name='idx')
self.assert_index_equal(result, exp, exact=True)
self.assertTrue(isinstance(result, DatetimeIndex))
self.assertIsNone(result.tz)

# length = 1 with tz
result = Index([Timestamp('2011-01-01 10:00', tz='Asia/Tokyo')], name='idx')
exp = DatetimeIndex([Timestamp('2011-01-01 10:00')], tz='Asia/Tokyo', name='idx')
self.assert_index_equal(result, exp, exact=True)
self.assertTrue(isinstance(result, DatetimeIndex))
self.assertIsNotNone(result.tz)
self.assertEqual(result.tz, exp.tz)

def test_construction_index_with_mixed_timezones_with_NaT(self):
# GH 11488
result = Index([pd.NaT, Timestamp('2011-01-01'),
pd.NaT, Timestamp('2011-01-02')], name='idx')
exp = DatetimeIndex([pd.NaT, Timestamp('2011-01-01'),
pd.NaT, Timestamp('2011-01-02')], name='idx')
self.assert_index_equal(result, exp, exact=True)
self.assertTrue(isinstance(result, DatetimeIndex))
self.assertIsNone(result.tz)

# same tz results in DatetimeIndex
result = Index([pd.NaT, Timestamp('2011-01-01 10:00', tz='Asia/Tokyo'),
pd.NaT, Timestamp('2011-01-02 10:00', tz='Asia/Tokyo')], name='idx')
exp = DatetimeIndex([pd.NaT, Timestamp('2011-01-01 10:00'),
pd.NaT, Timestamp('2011-01-02 10:00')], tz='Asia/Tokyo', name='idx')
self.assert_index_equal(result, exp, exact=True)
self.assertTrue(isinstance(result, DatetimeIndex))
self.assertIsNotNone(result.tz)
self.assertEqual(result.tz, exp.tz)

# same tz results in DatetimeIndex (DST)
result = Index([Timestamp('2011-01-01 10:00', tz='US/Eastern'),
pd.NaT, Timestamp('2011-08-01 10:00', tz='US/Eastern')], name='idx')
exp = DatetimeIndex([Timestamp('2011-01-01 10:00'), pd.NaT, Timestamp('2011-08-01 10:00')],
tz='US/Eastern', name='idx')
self.assert_index_equal(result, exp, exact=True)
self.assertTrue(isinstance(result, DatetimeIndex))
self.assertIsNotNone(result.tz)
self.assertEqual(result.tz, exp.tz)

# different tz results in Index(dtype=object)
result = Index([pd.NaT, Timestamp('2011-01-01 10:00'),
pd.NaT, Timestamp('2011-01-02 10:00', tz='US/Eastern')], name='idx')
exp = Index([pd.NaT, Timestamp('2011-01-01 10:00'),
pd.NaT, Timestamp('2011-01-02 10:00', tz='US/Eastern')],
dtype='object', name='idx')
self.assert_index_equal(result, exp, exact=True)
self.assertFalse(isinstance(result, DatetimeIndex))

result = Index([pd.NaT, Timestamp('2011-01-01 10:00', tz='Asia/Tokyo'),
pd.NaT, Timestamp('2011-01-02 10:00', tz='US/Eastern')], name='idx')
exp = Index([pd.NaT, Timestamp('2011-01-01 10:00', tz='Asia/Tokyo'),
pd.NaT, Timestamp('2011-01-02 10:00', tz='US/Eastern')],
dtype='object', name='idx')
self.assert_index_equal(result, exp, exact=True)
self.assertFalse(isinstance(result, DatetimeIndex))

# passing tz results in DatetimeIndex
result = Index([pd.NaT, Timestamp('2011-01-01 10:00'),
pd.NaT, Timestamp('2011-01-02 10:00', tz='US/Eastern')],
tz='Asia/Tokyo', name='idx')
exp = DatetimeIndex([pd.NaT, Timestamp('2011-01-01 19:00'),
pd.NaT, Timestamp('2011-01-03 00:00')],
tz='Asia/Tokyo', name='idx')
self.assert_index_equal(result, exp, exact=True)
self.assertTrue(isinstance(result, DatetimeIndex))

# all NaT
result = Index([pd.NaT, pd.NaT], name='idx')
exp = DatetimeIndex([pd.NaT, pd.NaT], name='idx')
self.assert_index_equal(result, exp, exact=True)
self.assertTrue(isinstance(result, DatetimeIndex))
self.assertIsNone(result.tz)

# all NaT with tz
result = Index([pd.NaT, pd.NaT], tz='Asia/Tokyo', name='idx')
exp = DatetimeIndex([pd.NaT, pd.NaT], tz='Asia/Tokyo', name='idx')
self.assert_index_equal(result, exp, exact=True)
self.assertTrue(isinstance(result, DatetimeIndex))
self.assertIsNotNone(result.tz)
self.assertEqual(result.tz, exp.tz)

def test_construction_dti_with_mixed_timezones(self):
# GH 11488 (not changed, added explicit tests)

# no tz results in DatetimeIndex
result = DatetimeIndex([Timestamp('2011-01-01'), Timestamp('2011-01-02')], name='idx')
exp = DatetimeIndex([Timestamp('2011-01-01'), Timestamp('2011-01-02')], name='idx')
self.assert_index_equal(result, exp, exact=True)
self.assertTrue(isinstance(result, DatetimeIndex))

# same tz results in DatetimeIndex
result = DatetimeIndex([Timestamp('2011-01-01 10:00', tz='Asia/Tokyo'),
Timestamp('2011-01-02 10:00', tz='Asia/Tokyo')], name='idx')
exp = DatetimeIndex([Timestamp('2011-01-01 10:00'), Timestamp('2011-01-02 10:00')], tz='Asia/Tokyo', name='idx')
self.assert_index_equal(result, exp, exact=True)
self.assertTrue(isinstance(result, DatetimeIndex))

# same tz results in DatetimeIndex (DST)
result = DatetimeIndex([Timestamp('2011-01-01 10:00', tz='US/Eastern'),
Timestamp('2011-08-01 10:00', tz='US/Eastern')], name='idx')
exp = DatetimeIndex([Timestamp('2011-01-01 10:00'), Timestamp('2011-08-01 10:00')],
tz='US/Eastern', name='idx')
self.assert_index_equal(result, exp, exact=True)
self.assertTrue(isinstance(result, DatetimeIndex))

# different tz coerces tz-naive to tz-awareIndex(dtype=object)
result = DatetimeIndex([Timestamp('2011-01-01 10:00'),
Timestamp('2011-01-02 10:00', tz='US/Eastern')], name='idx')
exp = DatetimeIndex([Timestamp('2011-01-01 05:00'), Timestamp('2011-01-02 10:00')],
tz='US/Eastern', name='idx')
self.assert_index_equal(result, exp, exact=True)
self.assertTrue(isinstance(result, DatetimeIndex))

# tz mismatch affecting to tz-aware raises TypeError/ValueError
with tm.assertRaises(ValueError):
DatetimeIndex([Timestamp('2011-01-01 10:00', tz='Asia/Tokyo'),
Timestamp('2011-01-02 10:00', tz='US/Eastern')], name='idx')

with tm.assertRaises(TypeError):
DatetimeIndex([Timestamp('2011-01-01 10:00'), Timestamp('2011-01-02 10:00', tz='US/Eastern')],
tz='Asia/Tokyo', name='idx')

with tm.assertRaises(ValueError):
DatetimeIndex([Timestamp('2011-01-01 10:00', tz='Asia/Tokyo'),
Timestamp('2011-01-02 10:00', tz='US/Eastern')], tz='US/Eastern', name='idx')

def test_get_loc(self):
idx = pd.date_range('2000-01-01', periods=3)

Expand Down
6 changes: 5 additions & 1 deletion pandas/util/testing.py
Original file line number Diff line number Diff line change
Expand Up @@ -718,7 +718,11 @@ def assert_attr_equal(attr, left, right, obj='Attributes'):
# np.nan
return True

result = left_attr == right_attr
try:
result = left_attr == right_attr
except TypeError:
# datetimetz on rhs may raise TypeError
result = False
if not isinstance(result, bool):
result = result.all()

Expand Down

0 comments on commit 162bd1d

Please sign in to comment.