Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

BUG: tz info lost by set_index and reindex #7092

Merged
merged 1 commit into from May 12, 2014
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
2 changes: 2 additions & 0 deletions doc/source/release.rst
Expand Up @@ -495,6 +495,8 @@ Bug Fixes
- Bug in ``boxplot`` and ``hist`` draws unnecessary axes (:issue:`6769`)
- Regression in ``groupby.nth()`` for out-of-bounds indexers (:issue:`6621`)
- Bug in ``quantile`` with datetime values (:issue:`6965`)
- Bug in ``Dataframe.set_index``, ``reindex`` and ``pivot`` don't preserve ``DatetimeIndex`` and ``PeriodIndex`` attributes (:issue:`3950`, :issue:`5878`, :issue:`6631`)
- Bug in ``MultiIndex.get_level_values`` doesn't preserve ``DatetimeIndex`` and ``PeriodIndex`` attributes (:issue:`7092`)

pandas 0.13.1
-------------
Expand Down
7 changes: 5 additions & 2 deletions pandas/core/frame.py
Expand Up @@ -2220,7 +2220,7 @@ def set_index(self, keys, drop=True, append=False, inplace=False,
for i in range(self.index.nlevels):
arrays.append(self.index.get_level_values(i))
else:
arrays.append(np.asarray(self.index))
arrays.append(self.index)

to_remove = []
for col in keys:
Expand All @@ -2232,9 +2232,12 @@ def set_index(self, keys, drop=True, append=False, inplace=False,

level = col.get_level_values(col.nlevels - 1)
names.extend(col.names)
elif isinstance(col, (Series, Index)):
elif isinstance(col, Series):
level = col.values
names.append(col.name)
elif isinstance(col, Index):
level = col
names.append(col.name)
elif isinstance(col, (list, np.ndarray)):
level = col
names.append(None)
Expand Down
19 changes: 12 additions & 7 deletions pandas/core/index.py
Expand Up @@ -114,9 +114,7 @@ def __new__(cls, data, dtype=None, copy=False, name=None, fastpath=False,

# no class inference!
if fastpath:
subarr = data.view(cls)
subarr.name = name
return subarr
return cls._simple_new(data, name)

from pandas.tseries.period import PeriodIndex
if isinstance(data, (np.ndarray, ABCSeries)):
Expand Down Expand Up @@ -185,6 +183,12 @@ def __new__(cls, data, dtype=None, copy=False, name=None, fastpath=False,
subarr._set_names([name])
return subarr

@classmethod
def _simple_new(cls, values, name, **kwargs):
result = values.view(cls)
result.name = name
return result

def is_(self, other):
"""
More flexible, faster check like ``is`` but that works through views
Expand Down Expand Up @@ -2588,11 +2592,12 @@ def get_level_values(self, level):
values : ndarray
"""
num = self._get_level_number(level)
unique_vals = self.levels[num] # .values
unique = self.levels[num] # .values
labels = self.labels[num]
values = Index(com.take_1d(unique_vals.values, labels,
fill_value=unique_vals._na_value))
values.name = self.names[num]
filled = com.take_1d(unique.values, labels, fill_value=unique._na_value)
values = unique._simple_new(filled, self.names[num],
freq=getattr(unique, 'freq', None),
tz=getattr(unique, 'tz', None))
return values

def format(self, space=2, sparsify=None, adjoin=True, names=False,
Expand Down
9 changes: 4 additions & 5 deletions pandas/core/reshape.py
Expand Up @@ -82,11 +82,10 @@ def __init__(self, values, index, level=-1, value_columns=None):
labels = index.labels

def _make_index(lev, lab):
if isinstance(lev, PeriodIndex):
i = lev.copy()
else:
i = lev.__class__(_make_index_array_level(lev.values, lab))
i.name = lev.name
values = _make_index_array_level(lev.values, lab)
i = lev._simple_new(values, lev.name,
freq=getattr(lev, 'freq', None),
tz=getattr(lev, 'tz', None))
return i

self.new_index_levels = [_make_index(lev, lab)
Expand Down
13 changes: 13 additions & 0 deletions pandas/tests/test_index.py
Expand Up @@ -180,6 +180,19 @@ def test_index_ctor_infer_periodindex(self):
assert_array_equal(rs, xp)
tm.assert_isinstance(rs, PeriodIndex)

def test_constructor_simple_new(self):
idx = Index([1, 2, 3, 4, 5], name='int')
result = idx._simple_new(idx, 'int')
self.assert_(result.equals(idx))

idx = Index([1.1, np.nan, 2.2, 3.0], name='float')
result = idx._simple_new(idx, 'float')
self.assert_(result.equals(idx))

idx = Index(['A', 'B', 'C', np.nan], name='obj')
result = idx._simple_new(idx, 'obj')
self.assert_(result.equals(idx))

def test_copy(self):
i = Index([], name='Foo')
i_copy = i.copy()
Expand Down
72 changes: 72 additions & 0 deletions pandas/tests/test_multilevel.py
Expand Up @@ -1989,6 +1989,78 @@ def test_datetimeindex(self):
self.assert_(idx.levels[0].equals(expected1))
self.assert_(idx.levels[1].equals(idx2))

def test_set_index_datetime(self):
# GH 3950
df = pd.DataFrame({'label':['a', 'a', 'a', 'b', 'b', 'b'],
'datetime':['2011-07-19 07:00:00', '2011-07-19 08:00:00',
'2011-07-19 09:00:00', '2011-07-19 07:00:00',
'2011-07-19 08:00:00', '2011-07-19 09:00:00'],
'value':range(6)})
df.index = pd.to_datetime(df.pop('datetime'), utc=True)
df.index = df.index.tz_localize('UTC').tz_convert('US/Pacific')

expected = pd.DatetimeIndex(['2011-07-19 07:00:00', '2011-07-19 08:00:00', '2011-07-19 09:00:00'])
expected = expected.tz_localize('UTC').tz_convert('US/Pacific')

df = df.set_index('label', append=True)
self.assert_(df.index.levels[0].equals(expected))
self.assert_(df.index.levels[1].equals(pd.Index(['a', 'b'])))

df = df.swaplevel(0, 1)
self.assert_(df.index.levels[0].equals(pd.Index(['a', 'b'])))
self.assert_(df.index.levels[1].equals(expected))


df = DataFrame(np.random.random(6))
idx1 = pd.DatetimeIndex(['2011-07-19 07:00:00', '2011-07-19 08:00:00',
'2011-07-19 09:00:00', '2011-07-19 07:00:00',
'2011-07-19 08:00:00', '2011-07-19 09:00:00'], tz='US/Eastern')
idx2 = pd.DatetimeIndex(['2012-04-01 09:00', '2012-04-01 09:00', '2012-04-01 09:00',
'2012-04-02 09:00', '2012-04-02 09:00', '2012-04-02 09:00'],
tz='US/Eastern')
idx3 = pd.date_range('2011-01-01 09:00', periods=6, tz='Asia/Tokyo')

df = df.set_index(idx1)
df = df.set_index(idx2, append=True)
df = df.set_index(idx3, append=True)

expected1 = pd.DatetimeIndex(['2011-07-19 07:00:00', '2011-07-19 08:00:00',
'2011-07-19 09:00:00'], tz='US/Eastern')
expected2 = pd.DatetimeIndex(['2012-04-01 09:00', '2012-04-02 09:00'], tz='US/Eastern')

self.assert_(df.index.levels[0].equals(expected1))
self.assert_(df.index.levels[1].equals(expected2))
self.assert_(df.index.levels[2].equals(idx3))

# GH 7092
self.assert_(df.index.get_level_values(0).equals(idx1))
self.assert_(df.index.get_level_values(1).equals(idx2))
self.assert_(df.index.get_level_values(2).equals(idx3))

def test_set_index_period(self):
# GH 6631
df = DataFrame(np.random.random(6))
idx1 = pd.period_range('2011-01-01', periods=3, freq='M')
idx1 = idx1.append(idx1)
idx2 = pd.period_range('2013-01-01 09:00', periods=2, freq='H')
idx2 = idx2.append(idx2).append(idx2)
idx3 = pd.period_range('2005', periods=6, freq='Y')

df = df.set_index(idx1)
df = df.set_index(idx2, append=True)
df = df.set_index(idx3, append=True)

expected1 = pd.period_range('2011-01-01', periods=3, freq='M')
expected2 = pd.period_range('2013-01-01 09:00', periods=2, freq='H')

self.assert_(df.index.levels[0].equals(expected1))
self.assert_(df.index.levels[1].equals(expected2))
self.assert_(df.index.levels[2].equals(idx3))

self.assert_(df.index.get_level_values(0).equals(idx1))
self.assert_(df.index.get_level_values(1).equals(idx2))
self.assert_(df.index.get_level_values(2).equals(idx3))


if __name__ == '__main__':

Expand Down
42 changes: 39 additions & 3 deletions pandas/tools/tests/test_pivot.py
Expand Up @@ -3,7 +3,7 @@
import numpy as np
from numpy.testing import assert_equal

import pandas
import pandas as pd
from pandas import DataFrame, Series, Index, MultiIndex, Grouper
from pandas.tools.merge import concat
from pandas.tools.pivot import pivot_table, crosstab
Expand Down Expand Up @@ -181,6 +181,42 @@ def test_pivot_index_with_nan(self):
columns = Index(['C1','C2','C3','C4'],name='b'))
tm.assert_frame_equal(result, expected)

def test_pivot_with_tz(self):
# GH 5878
df = DataFrame({'dt1': [datetime.datetime(2013, 1, 1, 9, 0),
datetime.datetime(2013, 1, 2, 9, 0),
datetime.datetime(2013, 1, 1, 9, 0),
datetime.datetime(2013, 1, 2, 9, 0)],
'dt2': [datetime.datetime(2014, 1, 1, 9, 0),
datetime.datetime(2014, 1, 1, 9, 0),
datetime.datetime(2014, 1, 2, 9, 0),
datetime.datetime(2014, 1, 2, 9, 0)],
'data1': range(4), 'data2': range(4)})

df['dt1'] = df['dt1'].apply(lambda d: pd.Timestamp(d, tz='US/Pacific'))
df['dt2'] = df['dt2'].apply(lambda d: pd.Timestamp(d, tz='Asia/Tokyo'))

exp_col1 = Index(['data1', 'data1', 'data2', 'data2'])
exp_col2 = pd.DatetimeIndex(['2014/01/01 09:00', '2014/01/02 09:00'] * 2,
name='dt2', tz='Asia/Tokyo')
exp_col = pd.MultiIndex.from_arrays([exp_col1, exp_col2])
expected = DataFrame([[0, 2, 0, 2], [1, 3, 1, 3]],
index=pd.DatetimeIndex(['2013/01/01 09:00', '2013/01/02 09:00'],
name='dt1', tz='US/Pacific'),
columns=exp_col)

pv = df.pivot(index='dt1', columns='dt2')
tm.assert_frame_equal(pv, expected)

expected = DataFrame([[0, 2], [1, 3]],
index=pd.DatetimeIndex(['2013/01/01 09:00', '2013/01/02 09:00'],
name='dt1', tz='US/Pacific'),
columns=pd.DatetimeIndex(['2014/01/01 09:00', '2014/01/02 09:00'],
name='dt2', tz='Asia/Tokyo'))

pv = df.pivot(index='dt1', columns='dt2', values='data1')
tm.assert_frame_equal(pv, expected)

def test_margins(self):
def _check_output(res, col, index=['A', 'B'], columns=['C']):
cmarg = res['All'][:-1]
Expand Down Expand Up @@ -235,7 +271,7 @@ def test_pivot_integer_columns(self):
d = datetime.date.min
data = list(product(['foo', 'bar'], ['A', 'B', 'C'], ['x1', 'x2'],
[d + datetime.timedelta(i) for i in range(20)], [1.0]))
df = pandas.DataFrame(data)
df = DataFrame(data)
table = df.pivot_table(values=4, index=[0, 1, 3], columns=[2])

df2 = df.rename(columns=str)
Expand Down Expand Up @@ -286,7 +322,7 @@ def test_pivot_columns_lexsorted(self):
iproduct = np.random.randint(0, len(products), n)
items['Index'] = products['Index'][iproduct]
items['Symbol'] = products['Symbol'][iproduct]
dr = pandas.date_range(datetime.date(2000, 1, 1), datetime.date(2010, 12, 31))
dr = pd.date_range(datetime.date(2000, 1, 1), datetime.date(2010, 12, 31))
dates = dr[np.random.randint(0, len(dr), n)]
items['Year'] = dates.year
items['Month'] = dates.month
Expand Down
7 changes: 7 additions & 0 deletions pandas/tseries/period.py
Expand Up @@ -669,6 +669,13 @@ def _from_arraylike(cls, data, freq, tz):

return data, freq

@classmethod
def _simple_new(cls, values, name, freq=None, **kwargs):
result = values.view(cls)
result.name = name
result.freq = freq
return result

def __contains__(self, key):
if not isinstance(key, Period) or key.freq != self.freq:
if isinstance(key, compat.string_types):
Expand Down
19 changes: 19 additions & 0 deletions pandas/tseries/tests/test_period.py
Expand Up @@ -1194,6 +1194,14 @@ def test_constructor_datetime64arr(self):

self.assertRaises(ValueError, PeriodIndex, vals, freq='D')

def test_constructor_simple_new(self):
idx = period_range('2007-01', name='p', periods=20, freq='M')
result = idx._simple_new(idx, 'p', freq=idx.freq)
self.assert_(result.equals(idx))

result = idx._simple_new(idx.astype('i8'), 'p', freq=idx.freq)
self.assert_(result.equals(idx))

def test_is_(self):
create_index = lambda: PeriodIndex(freq='A', start='1/1/2001',
end='12/1/2009')
Expand Down Expand Up @@ -1390,6 +1398,17 @@ def test_frame_setitem(self):
tm.assert_isinstance(rs.index, PeriodIndex)
self.assert_(rs.index.equals(rng))

def test_period_set_index_reindex(self):
# GH 6631
df = DataFrame(np.random.random(6))
idx1 = period_range('2011/01/01', periods=6, freq='M')
idx2 = period_range('2013', periods=6, freq='A')

df = df.set_index(idx1)
self.assert_(df.index.equals(idx1))
df = df.reindex(idx2)
self.assert_(df.index.equals(idx2))

def test_nested_dict_frame_constructor(self):
rng = period_range('1/1/2000', periods=5)
df = DataFrame(randn(10, 5), columns=rng)
Expand Down
11 changes: 11 additions & 0 deletions pandas/tseries/tests/test_timeseries.py
Expand Up @@ -2510,6 +2510,17 @@ def test_dti_reset_index_round_trip(self):
self.assertEquals(df.index[0], stamp)
self.assertEquals(df.reset_index()['Date'][0], stamp)

def test_dti_set_index_reindex(self):
# GH 6631
df = DataFrame(np.random.random(6))
idx1 = date_range('2011/01/01', periods=6, freq='M', tz='US/Eastern')
idx2 = date_range('2013', periods=6, freq='A', tz='Asia/Tokyo')

df = df.set_index(idx1)
self.assert_(df.index.equals(idx1))
df = df.reindex(idx2)
self.assert_(df.index.equals(idx2))

def test_datetimeindex_union_join_empty(self):
dti = DatetimeIndex(start='1/1/2001', end='2/1/2001', freq='D')
empty = Index([])
Expand Down