Skip to content

Commit

Permalink
BUG: DatetimeIndex constructor to handle dtype & tz with conflicts
Browse files Browse the repository at this point in the history
BUG: construct Series of all NaT w/tz

xref #11736
  • Loading branch information
jreback committed Feb 1, 2016
1 parent 90d71f6 commit eaad93a
Show file tree
Hide file tree
Showing 12 changed files with 148 additions and 59 deletions.
3 changes: 1 addition & 2 deletions doc/source/whatsnew/v0.18.0.txt
Expand Up @@ -522,6 +522,7 @@ Bug Fixes
- Bug in not treating ``NaT`` as a missing value in datetimelikes when factorizing & with ``Categoricals`` (:issue:`12077`)
- Bug in getitem when the values of a ``Series`` were tz-aware (:issue:`12089`)
- Bug in ``Series.str.get_dummies`` when one of the variables was 'name' (:issue:`12180`)
- Bug in ``pd.concat`` while concatenating tz-aware NaT series. (:issue:`11693`, :issue:`11755`)



Expand Down Expand Up @@ -583,5 +584,3 @@ Bug Fixes
- Bug in ``.skew`` and ``.kurt`` due to roundoff error for highly similar values (:issue:`11974`)

- Bug in ``buffer_rd_bytes`` src->buffer could be freed more than once if reading failed, causing a segfault (:issue:`12098`)
- Bug in ``pd.concat`` while concatenating tz-aware NaT series. (:issue:`11693`)
- Bug in ``pd.concat`` while concatenating tz-aware series with time series. (:issue:`11755`)
10 changes: 8 additions & 2 deletions pandas/core/common.py
Expand Up @@ -1643,15 +1643,21 @@ def _possibly_cast_to_datetime(value, dtype, errors='raise'):
raise TypeError("cannot convert datetimelike to "
"dtype [%s]" % dtype)
elif is_datetime64tz:
pass

# our NaT doesn't support tz's
# this will coerce to DatetimeIndex with
# a matching dtype below
if lib.isscalar(value) and isnull(value):
value = [value]

elif is_timedelta64 and not is_dtype_equal(dtype, _TD_DTYPE):
if dtype.name == 'timedelta64[ns]':
dtype = _TD_DTYPE
else:
raise TypeError("cannot convert timedeltalike to "
"dtype [%s]" % dtype)

if np.isscalar(value):
if lib.isscalar(value):
if value == tslib.iNaT or isnull(value):
value = tslib.iNaT
else:
Expand Down
5 changes: 3 additions & 2 deletions pandas/core/series.py
Expand Up @@ -2903,7 +2903,7 @@ def create_from_value(value, index, dtype):
# return a new empty value suitable for the dtype

if is_datetimetz(dtype):
subarr = DatetimeIndex([value] * len(index))
subarr = DatetimeIndex([value] * len(index), dtype=dtype)
else:
if not isinstance(dtype, (np.dtype, type(np.dtype))):
dtype = dtype.dtype
Expand Down Expand Up @@ -2937,7 +2937,8 @@ def create_from_value(value, index, dtype):

# a 1-element ndarray
if len(subarr) != len(index) and len(subarr) == 1:
subarr = create_from_value(subarr[0], index, subarr)
subarr = create_from_value(subarr[0], index,
subarr.dtype)

elif subarr.ndim > 1:
if isinstance(data, np.ndarray):
Expand Down
4 changes: 0 additions & 4 deletions pandas/tests/indexes/test_datetimelike.py
Expand Up @@ -108,10 +108,6 @@ def test_construction_with_alt(self):
expected = i.tz_localize(None).tz_localize('UTC')
self.assert_index_equal(i2, expected)

i2 = DatetimeIndex(i, tz='UTC')
expected = i.tz_convert('UTC')
self.assert_index_equal(i2, expected)

# incompat tz/dtype
self.assertRaises(ValueError, lambda: DatetimeIndex(
i.tz_localize(None).asi8, dtype=i.dtype, tz='US/Pacific'))
Expand Down
5 changes: 5 additions & 0 deletions pandas/tests/series/test_constructors.py
Expand Up @@ -473,6 +473,11 @@ def test_constructor_with_datetime_tz(self):
self.assertTrue(s.dtype == 'object')
self.assertTrue(lib.infer_dtype(s) == 'datetime')

# with all NaT
s = Series(pd.NaT, index=[0, 1], dtype='datetime64[ns, US/Eastern]')
expected = Series(pd.DatetimeIndex(['NaT', 'NaT'], tz='US/Eastern'))
assert_series_equal(s, expected)

def test_constructor_periodindex(self):
# GH7932
# converting a PeriodIndex when put in a Series
Expand Down
15 changes: 11 additions & 4 deletions pandas/tests/test_groupby.py
Expand Up @@ -3943,10 +3943,17 @@ def test_groupby_multi_timezone(self):
result = df.groupby('tz').date.apply(
lambda x: pd.to_datetime(x).dt.tz_localize(x.name))

expected = pd.to_datetime(Series(
['2000-01-28 22:47:00', '2000-01-29 22:48:00',
'2000-01-31 00:49:00', '2000-01-31 22:50:00',
'2000-01-01 21:50:00']))
expected = Series([Timestamp('2000-01-28 16:47:00-0600',
tz='America/Chicago'),
Timestamp('2000-01-29 16:48:00-0600',
tz='America/Chicago'),
Timestamp('2000-01-30 16:49:00-0800',
tz='America/Los_Angeles'),
Timestamp('2000-01-31 16:50:00-0600',
tz='America/Chicago'),
Timestamp('2000-01-01 16:50:00-0500',
tz='America/New_York')],
dtype=object)
assert_series_equal(result, expected)

tz = 'America/Chicago'
Expand Down
6 changes: 1 addition & 5 deletions pandas/tools/merge.py
Expand Up @@ -979,12 +979,8 @@ def get_result(self):

# stack blocks
if self.axis == 0:
to_concat = [x._values for x in self.objs]
typs = com.get_dtype_kinds(to_concat)
new_data = com._concat_compat(to_concat)
new_data = com._concat_compat([x._values for x in self.objs])
name = com._consensus_name_attr(self.objs)
if 'datetimetz' in typs and ('datetime' in typs or 'object' in typs):
return Series(new_data, index=self.new_axes[0], name=name, dtype='object').__finalize__(self, method='concat')
return (Series(new_data, index=self.new_axes[0], name=name)
.__finalize__(self, method='concat'))

Expand Down
76 changes: 46 additions & 30 deletions pandas/tools/tests/test_merge.py
Expand Up @@ -1024,46 +1024,62 @@ def test_merge_on_datetime64tz(self):
result = pd.merge(left, right, on='key', how='outer')
assert_frame_equal(result, expected)

def test_concat_Nat_series(self):
def test_concat_NaT_series(self):
# GH 11693
# test for merging NaT series with datetime series.
x = pd.Series( pd.date_range('20151124 08:00', '20151124 09:00', freq='1h', tz = "US/Eastern"))
y = pd.Series( pd.date_range('20151124 10:00', '20151124 11:00', freq='1h', tz = "US/Eastern"))
y[:] = pd.NaT
expected = pd.Series([x[0], x[1], pd.NaT, pd.NaT], index=[0, 1, 0, 1])
tm.assert_series_equal(pd.concat([x,y]), expected)
x = Series(date_range('20151124 08:00', '20151124 09:00',
freq='1h', tz='US/Eastern'))
y = Series(pd.NaT, index=[0, 1], dtype='datetime64[ns, US/Eastern]')
expected = Series([x[0], x[1], pd.NaT, pd.NaT])

result = concat([x, y], ignore_index=True)
tm.assert_series_equal(result, expected)

# all NaT with tz
x[:] = pd.NaT
expected = pd.Series([pd.NaT for i in range(4)], index=[0, 1, 0, 1], dtype ='datetime64[ns, US/Eastern]')
tm.assert_series_equal(pd.concat([x,y]), expected)
expected = Series(pd.NaT, index=range(4),
dtype='datetime64[ns, US/Eastern]')
result = pd.concat([y, y], ignore_index=True)
tm.assert_series_equal(result, expected)

#without tz
x = pd.Series( pd.date_range('20151124 08:00', '20151124 09:00', freq='1h'))
y = pd.Series( pd.date_range('20151124 10:00', '20151124 11:00', freq='1h'))
# without tz
x = pd.Series(pd.date_range('20151124 08:00',
'20151124 09:00', freq='1h'))
y = pd.Series(pd.date_range('20151124 10:00',
'20151124 11:00', freq='1h'))
y[:] = pd.NaT
expected = pd.Series([x[0], x[1], pd.NaT, pd.NaT], index=[0, 1, 0, 1])
tm.assert_series_equal(pd.concat([x, y]), expected)
expected = pd.Series([x[0], x[1], pd.NaT, pd.NaT])
result = pd.concat([x, y], ignore_index=True)
tm.assert_series_equal(result, expected)

#all NaT without tz
# all NaT without tz
x[:] = pd.NaT
expected = pd.Series([pd.NaT for i in range(4)], index=[0, 1, 0, 1], dtype ='datetime64[ns]')
tm.assert_series_equal(pd.concat([x,y]), expected)
expected = pd.Series(pd.NaT, index=range(4),
dtype='datetime64[ns]')
result = pd.concat([x, y], ignore_index=True)
tm.assert_series_equal(result, expected)

def test_concat_tz_series(self):
#tz and no tz
#GH 11755
x = pd.Series(pd.date_range('20151124 08:00', '20151124 09:00', freq = '1h', tz = "UTC") )
y = pd.Series(pd.date_range('2012-01-01', '2012-01-02'))
expected = pd.Series([x[0], x[1], y[0], y[1]], index=[0, 1, 0, 1], dtype='object')
tm.assert_series_equal(pd.concat([x,y]), expected)

#tz and object
#GH 11887
x = pd.Series(pd.date_range('20151124 08:00', '20151124 09:00', freq = '1h', tz = "UTC") )
y = pd.Series(['a', 'b'])
expected = pd.Series([x[0], x[1], y[0], y[1]], index=[0, 1, 0, 1], dtype='object')
tm.assert_series_equal(pd.concat([x,y]), expected)
# GH 11755
# tz and no tz
x = Series(date_range('20151124 08:00',
'20151124 09:00',
freq='1h', tz='UTC'))
y = Series(date_range('2012-01-01', '2012-01-02'))
expected = Series([x[0], x[1], y[0], y[1]],
dtype='object')
result = concat([x, y], ignore_index=True)
tm.assert_series_equal(result, expected)

# GH 11887
# concat tz and object
x = Series(date_range('20151124 08:00',
'20151124 09:00',
freq='1h', tz='UTC'))
y = Series(['a', 'b'])
expected = Series([x[0], x[1], y[0], y[1]],
dtype='object')
result = concat([x, y], ignore_index=True)
tm.assert_series_equal(result, expected)

def test_indicator(self):
# PR #10054. xref #7412 and closes #8790.
Expand Down
14 changes: 8 additions & 6 deletions pandas/tseries/common.py
Expand Up @@ -258,11 +258,12 @@ def convert_to_pydatetime(x, axis):

# if dtype is of datetimetz or timezone
if x.dtype.kind == _NS_DTYPE.kind:
shape = x.shape
x = tslib.ints_to_pydatetime(x.view(np.int64).ravel())
x = x.reshape(shape)
if hasattr(x, 'tz'):
if getattr(x, 'tz', None) is not None:
x = x.asobject
else:
shape = x.shape
x = tslib.ints_to_pydatetime(x.view(np.int64).ravel())
x = x.reshape(shape)

elif x.dtype == _TD_DTYPE:
shape = x.shape
Expand All @@ -276,10 +277,11 @@ def convert_to_pydatetime(x, axis):
# datetimetz
if 'datetimetz' in typs:

# if to_concat have 'datetime' or 'object', then we need to coerce to object
# if to_concat have 'datetime' or 'object'
# then we need to coerce to object
if 'datetime' in typs or 'object' in typs:
to_concat = [convert_to_pydatetime(x, axis) for x in to_concat]
return np.concatenate(to_concat,axis=axis)
return np.concatenate(to_concat, axis=axis)

# we require ALL of the same tz for datetimetz
tzs = set([getattr(x, 'tz', None) for x in to_concat]) - set([None])
Expand Down
29 changes: 28 additions & 1 deletion pandas/tseries/index.py
Expand Up @@ -242,6 +242,19 @@ def __new__(cls, data=None,
raise ValueError("Must provide freq argument if no data is "
"supplied")

# if dtype has an embeded tz, capture it
if dtype is not None:
try:
dtype = DatetimeTZDtype.construct_from_string(dtype)
dtz = getattr(dtype, 'tz', None)
if dtz is not None:
if tz is not None and str(tz) != str(dtz):
raise ValueError("cannot supply both a tz and a dtype"
" with a tz")
tz = dtz
except TypeError:
pass

if data is None:
return cls._generate(start, end, periods, name, freq,
tz=tz, normalize=normalize, closed=closed,
Expand Down Expand Up @@ -272,7 +285,15 @@ def __new__(cls, data=None,
data.name = name

if tz is not None:
return data.tz_localize(tz, ambiguous=ambiguous)

# we might already be localized to this tz
# so passing the same tz is ok
# however any other tz is a no-no
if data.tz is None:
return data.tz_localize(tz, ambiguous=ambiguous)
elif str(tz) != str(data.tz):
raise TypeError("Already tz-aware, use tz_convert "
"to convert.")

return data

Expand All @@ -288,6 +309,12 @@ def __new__(cls, data=None,
if tz is None:
tz = data.tz

else:
# the tz's must match
if str(tz) != str(data.tz):
raise TypeError("Already tz-aware, use tz_convert "
"to convert.")

subarr = data.values

if freq is None:
Expand Down
36 changes: 35 additions & 1 deletion pandas/tseries/tests/test_timeseries.py
Expand Up @@ -74,7 +74,7 @@ def test_index_unique(self):
dups_local = self.dups.index.tz_localize('US/Eastern')
dups_local.name = 'foo'
result = dups_local.unique()
expected = DatetimeIndex(expected, tz='US/Eastern')
expected = DatetimeIndex(expected).tz_localize('US/Eastern')
self.assertTrue(result.tz is not None)
self.assertEqual(result.name, 'foo')
self.assertTrue(result.equals(expected))
Expand Down Expand Up @@ -2473,6 +2473,40 @@ def test_constructor_datetime64_tzformat(self):
tz='Asia/Tokyo')
self.assert_numpy_array_equal(idx.asi8, expected_i8.asi8)

def test_constructor_dtype(self):

# passing a dtype with a tz should localize
idx = DatetimeIndex(['2013-01-01',
'2013-01-02'],
dtype='datetime64[ns, US/Eastern]')
expected = DatetimeIndex(['2013-01-01', '2013-01-02']
).tz_localize('US/Eastern')
self.assertTrue(idx.equals(expected))

idx = DatetimeIndex(['2013-01-01',
'2013-01-02'],
tz='US/Eastern')
self.assertTrue(idx.equals(expected))

# if we already have a tz and its not the same, then raise
idx = DatetimeIndex(['2013-01-01', '2013-01-02'],
dtype='datetime64[ns, US/Eastern]')

self.assertRaises(ValueError,
lambda: DatetimeIndex(idx,
dtype='datetime64[ns]'))

# this is effectively trying to convert tz's
self.assertRaises(TypeError,
lambda: DatetimeIndex(idx,
dtype='datetime64[ns, CET]'))
self.assertRaises(ValueError,
lambda: DatetimeIndex(
idx, tz='CET',
dtype='datetime64[ns, US/Eastern]'))
result = DatetimeIndex(idx, dtype='datetime64[ns, US/Eastern]')
self.assertTrue(idx.equals(result))

def test_constructor_name(self):
idx = DatetimeIndex(start='2000-01-01', periods=1, freq='A',
name='TEST')
Expand Down
4 changes: 2 additions & 2 deletions pandas/tslib.pyx
Expand Up @@ -3554,8 +3554,8 @@ def tz_convert(ndarray[int64_t] vals, object tz1, object tz2):
trans, deltas, typ = _get_dst_info(tz2)
trans_len = len(trans)

#if all NaT, return all NaT
if (utc_dates==iNaT).all():
# if all NaT, return all NaT
if (utc_dates==NPY_NAT).all():
return utc_dates

# use first non-NaT element
Expand Down

0 comments on commit eaad93a

Please sign in to comment.