Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

BUG: concat of tz series with NaT #12195

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.18.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -522,6 +522,7 @@ Bug Fixes
- Bug in not treating ``NaT`` as a missing value in datetimelikes when factorizing & with ``Categoricals`` (:issue:`12077`)
- Bug in getitem when the values of a ``Series`` were tz-aware (:issue:`12089`)
- Bug in ``Series.str.get_dummies`` when one of the variables was 'name' (:issue:`12180`)
- Bug in ``pd.concat`` while concatenating tz-aware NaT series. (:issue:`11693`, :issue:`11755`)



Expand Down
10 changes: 8 additions & 2 deletions pandas/core/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -1643,15 +1643,21 @@ def _possibly_cast_to_datetime(value, dtype, errors='raise'):
raise TypeError("cannot convert datetimelike to "
"dtype [%s]" % dtype)
elif is_datetime64tz:
pass

# our NaT doesn't support tz's
# this will coerce to DatetimeIndex with
# a matching dtype below
if lib.isscalar(value) and isnull(value):
value = [value]

elif is_timedelta64 and not is_dtype_equal(dtype, _TD_DTYPE):
if dtype.name == 'timedelta64[ns]':
dtype = _TD_DTYPE
else:
raise TypeError("cannot convert timedeltalike to "
"dtype [%s]" % dtype)

if np.isscalar(value):
if lib.isscalar(value):
if value == tslib.iNaT or isnull(value):
value = tslib.iNaT
else:
Expand Down
5 changes: 3 additions & 2 deletions pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -2903,7 +2903,7 @@ def create_from_value(value, index, dtype):
# return a new empty value suitable for the dtype

if is_datetimetz(dtype):
subarr = DatetimeIndex([value] * len(index))
subarr = DatetimeIndex([value] * len(index), dtype=dtype)
else:
if not isinstance(dtype, (np.dtype, type(np.dtype))):
dtype = dtype.dtype
Expand Down Expand Up @@ -2937,7 +2937,8 @@ def create_from_value(value, index, dtype):

# a 1-element ndarray
if len(subarr) != len(index) and len(subarr) == 1:
subarr = create_from_value(subarr[0], index, subarr)
subarr = create_from_value(subarr[0], index,
subarr.dtype)

elif subarr.ndim > 1:
if isinstance(data, np.ndarray):
Expand Down
4 changes: 0 additions & 4 deletions pandas/tests/indexes/test_datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,10 +108,6 @@ def test_construction_with_alt(self):
expected = i.tz_localize(None).tz_localize('UTC')
self.assert_index_equal(i2, expected)

i2 = DatetimeIndex(i, tz='UTC')
expected = i.tz_convert('UTC')
self.assert_index_equal(i2, expected)

# incompat tz/dtype
self.assertRaises(ValueError, lambda: DatetimeIndex(
i.tz_localize(None).asi8, dtype=i.dtype, tz='US/Pacific'))
Expand Down
5 changes: 5 additions & 0 deletions pandas/tests/series/test_constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -473,6 +473,11 @@ def test_constructor_with_datetime_tz(self):
self.assertTrue(s.dtype == 'object')
self.assertTrue(lib.infer_dtype(s) == 'datetime')

# with all NaT
s = Series(pd.NaT, index=[0, 1], dtype='datetime64[ns, US/Eastern]')
expected = Series(pd.DatetimeIndex(['NaT', 'NaT'], tz='US/Eastern'))
assert_series_equal(s, expected)

def test_constructor_periodindex(self):
# GH7932
# converting a PeriodIndex when put in a Series
Expand Down
15 changes: 11 additions & 4 deletions pandas/tests/test_groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -3943,10 +3943,17 @@ def test_groupby_multi_timezone(self):
result = df.groupby('tz').date.apply(
lambda x: pd.to_datetime(x).dt.tz_localize(x.name))

expected = pd.to_datetime(Series(
['2000-01-28 22:47:00', '2000-01-29 22:48:00',
'2000-01-31 00:49:00', '2000-01-31 22:50:00',
'2000-01-01 21:50:00']))
expected = Series([Timestamp('2000-01-28 16:47:00-0600',
tz='America/Chicago'),
Timestamp('2000-01-29 16:48:00-0600',
tz='America/Chicago'),
Timestamp('2000-01-30 16:49:00-0800',
tz='America/Los_Angeles'),
Timestamp('2000-01-31 16:50:00-0600',
tz='America/Chicago'),
Timestamp('2000-01-01 16:50:00-0500',
tz='America/New_York')],
dtype=object)
assert_series_equal(result, expected)

tz = 'America/Chicago'
Expand Down
57 changes: 57 additions & 0 deletions pandas/tools/tests/test_merge.py
Original file line number Diff line number Diff line change
Expand Up @@ -1024,6 +1024,63 @@ def test_merge_on_datetime64tz(self):
result = pd.merge(left, right, on='key', how='outer')
assert_frame_equal(result, expected)

def test_concat_NaT_series(self):
# GH 11693
# test for merging NaT series with datetime series.
x = Series(date_range('20151124 08:00', '20151124 09:00',
freq='1h', tz='US/Eastern'))
y = Series(pd.NaT, index=[0, 1], dtype='datetime64[ns, US/Eastern]')
expected = Series([x[0], x[1], pd.NaT, pd.NaT])

result = concat([x, y], ignore_index=True)
tm.assert_series_equal(result, expected)

# all NaT with tz
expected = Series(pd.NaT, index=range(4),
dtype='datetime64[ns, US/Eastern]')
result = pd.concat([y, y], ignore_index=True)
tm.assert_series_equal(result, expected)

# without tz
x = pd.Series(pd.date_range('20151124 08:00',
'20151124 09:00', freq='1h'))
y = pd.Series(pd.date_range('20151124 10:00',
'20151124 11:00', freq='1h'))
y[:] = pd.NaT
expected = pd.Series([x[0], x[1], pd.NaT, pd.NaT])
result = pd.concat([x, y], ignore_index=True)
tm.assert_series_equal(result, expected)

# all NaT without tz
x[:] = pd.NaT
expected = pd.Series(pd.NaT, index=range(4),
dtype='datetime64[ns]')
result = pd.concat([x, y], ignore_index=True)
tm.assert_series_equal(result, expected)

def test_concat_tz_series(self):
# GH 11755
# tz and no tz
x = Series(date_range('20151124 08:00',
'20151124 09:00',
freq='1h', tz='UTC'))
y = Series(date_range('2012-01-01', '2012-01-02'))
expected = Series([x[0], x[1], y[0], y[1]],
dtype='object')
result = concat([x, y], ignore_index=True)
tm.assert_series_equal(result, expected)

# GH 11887
# concat tz and object
x = Series(date_range('20151124 08:00',
'20151124 09:00',
freq='1h', tz='UTC'))
y = Series(['a', 'b'])
expected = Series([x[0], x[1], y[0], y[1]],
dtype='object')
result = concat([x, y], ignore_index=True)
tm.assert_series_equal(result, expected)

def test_indicator(self):
# PR #10054. xref #7412 and closes #8790.
df1 = DataFrame({'col1': [0, 1], 'col_left': [
Expand Down
18 changes: 13 additions & 5 deletions pandas/tseries/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -255,14 +255,16 @@ def _concat_compat(to_concat, axis=0):

def convert_to_pydatetime(x, axis):
# coerce to an object dtype
if x.dtype == _NS_DTYPE:

if hasattr(x, 'tz'):
# if dtype is of datetimetz or timezone
if x.dtype.kind == _NS_DTYPE.kind:
if getattr(x, 'tz', None) is not None:
x = x.asobject
else:
shape = x.shape
x = tslib.ints_to_pydatetime(x.view(np.int64).ravel())
x = x.reshape(shape)

shape = x.shape
x = tslib.ints_to_pydatetime(x.view(np.int64).ravel())
x = x.reshape(shape)
elif x.dtype == _TD_DTYPE:
shape = x.shape
x = tslib.ints_to_pytimedelta(x.view(np.int64).ravel())
Expand All @@ -275,6 +277,12 @@ def convert_to_pydatetime(x, axis):
# datetimetz
if 'datetimetz' in typs:

# if to_concat have 'datetime' or 'object'
# then we need to coerce to object
if 'datetime' in typs or 'object' in typs:
to_concat = [convert_to_pydatetime(x, axis) for x in to_concat]
return np.concatenate(to_concat, axis=axis)

# we require ALL of the same tz for datetimetz
tzs = set([getattr(x, 'tz', None) for x in to_concat]) - set([None])
if len(tzs) == 1:
Expand Down
29 changes: 28 additions & 1 deletion pandas/tseries/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -242,6 +242,19 @@ def __new__(cls, data=None,
raise ValueError("Must provide freq argument if no data is "
"supplied")

# if dtype has an embeded tz, capture it
if dtype is not None:
try:
dtype = DatetimeTZDtype.construct_from_string(dtype)
dtz = getattr(dtype, 'tz', None)
if dtz is not None:
if tz is not None and str(tz) != str(dtz):
raise ValueError("cannot supply both a tz and a dtype"
" with a tz")
tz = dtz
except TypeError:
pass

if data is None:
return cls._generate(start, end, periods, name, freq,
tz=tz, normalize=normalize, closed=closed,
Expand Down Expand Up @@ -272,7 +285,15 @@ def __new__(cls, data=None,
data.name = name

if tz is not None:
return data.tz_localize(tz, ambiguous=ambiguous)

# we might already be localized to this tz
# so passing the same tz is ok
# however any other tz is a no-no
if data.tz is None:
return data.tz_localize(tz, ambiguous=ambiguous)
elif str(tz) != str(data.tz):
raise TypeError("Already tz-aware, use tz_convert "
"to convert.")

return data

Expand All @@ -288,6 +309,12 @@ def __new__(cls, data=None,
if tz is None:
tz = data.tz

else:
# the tz's must match
if str(tz) != str(data.tz):
raise TypeError("Already tz-aware, use tz_convert "
"to convert.")

subarr = data.values

if freq is None:
Expand Down
36 changes: 35 additions & 1 deletion pandas/tseries/tests/test_timeseries.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ def test_index_unique(self):
dups_local = self.dups.index.tz_localize('US/Eastern')
dups_local.name = 'foo'
result = dups_local.unique()
expected = DatetimeIndex(expected, tz='US/Eastern')
expected = DatetimeIndex(expected).tz_localize('US/Eastern')
self.assertTrue(result.tz is not None)
self.assertEqual(result.name, 'foo')
self.assertTrue(result.equals(expected))
Expand Down Expand Up @@ -2473,6 +2473,40 @@ def test_constructor_datetime64_tzformat(self):
tz='Asia/Tokyo')
self.assert_numpy_array_equal(idx.asi8, expected_i8.asi8)

def test_constructor_dtype(self):

# passing a dtype with a tz should localize
idx = DatetimeIndex(['2013-01-01',
'2013-01-02'],
dtype='datetime64[ns, US/Eastern]')
expected = DatetimeIndex(['2013-01-01', '2013-01-02']
).tz_localize('US/Eastern')
self.assertTrue(idx.equals(expected))

idx = DatetimeIndex(['2013-01-01',
'2013-01-02'],
tz='US/Eastern')
self.assertTrue(idx.equals(expected))

# if we already have a tz and its not the same, then raise
idx = DatetimeIndex(['2013-01-01', '2013-01-02'],
dtype='datetime64[ns, US/Eastern]')

self.assertRaises(ValueError,
lambda: DatetimeIndex(idx,
dtype='datetime64[ns]'))

# this is effectively trying to convert tz's
self.assertRaises(TypeError,
lambda: DatetimeIndex(idx,
dtype='datetime64[ns, CET]'))
self.assertRaises(ValueError,
lambda: DatetimeIndex(
idx, tz='CET',
dtype='datetime64[ns, US/Eastern]'))
result = DatetimeIndex(idx, dtype='datetime64[ns, US/Eastern]')
self.assertTrue(idx.equals(result))

def test_constructor_name(self):
idx = DatetimeIndex(start='2000-01-01', periods=1, freq='A',
name='TEST')
Expand Down
4 changes: 4 additions & 0 deletions pandas/tslib.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -3554,6 +3554,10 @@ def tz_convert(ndarray[int64_t] vals, object tz1, object tz2):
trans, deltas, typ = _get_dst_info(tz2)
trans_len = len(trans)

# if all NaT, return all NaT
if (utc_dates==NPY_NAT).all():
return utc_dates

# use first non-NaT element
# if all-NaT, return all-NaT
if (result==NPY_NAT).all():
Expand Down