Skip to content

Commit

Permalink
Merge pull request #7092 from sinhrks/appendtz
Browse files Browse the repository at this point in the history
BUG: tz info lost by set_index and reindex
  • Loading branch information
jreback committed May 12, 2014
2 parents 2995737 + b716e67 commit c59b217
Show file tree
Hide file tree
Showing 10 changed files with 184 additions and 17 deletions.
2 changes: 2 additions & 0 deletions doc/source/release.rst
Expand Up @@ -495,6 +495,8 @@ Bug Fixes
- Bug in ``boxplot`` and ``hist`` draws unnecessary axes (:issue:`6769`)
- Regression in ``groupby.nth()`` for out-of-bounds indexers (:issue:`6621`)
- Bug in ``quantile`` with datetime values (:issue:`6965`)
- Bug in ``Dataframe.set_index``, ``reindex`` and ``pivot`` don't preserve ``DatetimeIndex`` and ``PeriodIndex`` attributes (:issue:`3950`, :issue:`5878`, :issue:`6631`)
- Bug in ``MultiIndex.get_level_values`` doesn't preserve ``DatetimeIndex`` and ``PeriodIndex`` attributes (:issue:`7092`)

pandas 0.13.1
-------------
Expand Down
7 changes: 5 additions & 2 deletions pandas/core/frame.py
Expand Up @@ -2220,7 +2220,7 @@ def set_index(self, keys, drop=True, append=False, inplace=False,
for i in range(self.index.nlevels):
arrays.append(self.index.get_level_values(i))
else:
arrays.append(np.asarray(self.index))
arrays.append(self.index)

to_remove = []
for col in keys:
Expand All @@ -2232,9 +2232,12 @@ def set_index(self, keys, drop=True, append=False, inplace=False,

level = col.get_level_values(col.nlevels - 1)
names.extend(col.names)
elif isinstance(col, (Series, Index)):
elif isinstance(col, Series):
level = col.values
names.append(col.name)
elif isinstance(col, Index):
level = col
names.append(col.name)
elif isinstance(col, (list, np.ndarray)):
level = col
names.append(None)
Expand Down
19 changes: 12 additions & 7 deletions pandas/core/index.py
Expand Up @@ -114,9 +114,7 @@ def __new__(cls, data, dtype=None, copy=False, name=None, fastpath=False,

# no class inference!
if fastpath:
subarr = data.view(cls)
subarr.name = name
return subarr
return cls._simple_new(data, name)

from pandas.tseries.period import PeriodIndex
if isinstance(data, (np.ndarray, ABCSeries)):
Expand Down Expand Up @@ -185,6 +183,12 @@ def __new__(cls, data, dtype=None, copy=False, name=None, fastpath=False,
subarr._set_names([name])
return subarr

@classmethod
def _simple_new(cls, values, name, **kwargs):
result = values.view(cls)
result.name = name
return result

def is_(self, other):
"""
More flexible, faster check like ``is`` but that works through views
Expand Down Expand Up @@ -2588,11 +2592,12 @@ def get_level_values(self, level):
values : ndarray
"""
num = self._get_level_number(level)
unique_vals = self.levels[num] # .values
unique = self.levels[num] # .values
labels = self.labels[num]
values = Index(com.take_1d(unique_vals.values, labels,
fill_value=unique_vals._na_value))
values.name = self.names[num]
filled = com.take_1d(unique.values, labels, fill_value=unique._na_value)
values = unique._simple_new(filled, self.names[num],
freq=getattr(unique, 'freq', None),
tz=getattr(unique, 'tz', None))
return values

def format(self, space=2, sparsify=None, adjoin=True, names=False,
Expand Down
9 changes: 4 additions & 5 deletions pandas/core/reshape.py
Expand Up @@ -82,11 +82,10 @@ def __init__(self, values, index, level=-1, value_columns=None):
labels = index.labels

def _make_index(lev, lab):
if isinstance(lev, PeriodIndex):
i = lev.copy()
else:
i = lev.__class__(_make_index_array_level(lev.values, lab))
i.name = lev.name
values = _make_index_array_level(lev.values, lab)
i = lev._simple_new(values, lev.name,
freq=getattr(lev, 'freq', None),
tz=getattr(lev, 'tz', None))
return i

self.new_index_levels = [_make_index(lev, lab)
Expand Down
13 changes: 13 additions & 0 deletions pandas/tests/test_index.py
Expand Up @@ -180,6 +180,19 @@ def test_index_ctor_infer_periodindex(self):
assert_array_equal(rs, xp)
tm.assert_isinstance(rs, PeriodIndex)

def test_constructor_simple_new(self):
idx = Index([1, 2, 3, 4, 5], name='int')
result = idx._simple_new(idx, 'int')
self.assert_(result.equals(idx))

idx = Index([1.1, np.nan, 2.2, 3.0], name='float')
result = idx._simple_new(idx, 'float')
self.assert_(result.equals(idx))

idx = Index(['A', 'B', 'C', np.nan], name='obj')
result = idx._simple_new(idx, 'obj')
self.assert_(result.equals(idx))

def test_copy(self):
i = Index([], name='Foo')
i_copy = i.copy()
Expand Down
72 changes: 72 additions & 0 deletions pandas/tests/test_multilevel.py
Expand Up @@ -1989,6 +1989,78 @@ def test_datetimeindex(self):
self.assert_(idx.levels[0].equals(expected1))
self.assert_(idx.levels[1].equals(idx2))

def test_set_index_datetime(self):
# GH 3950
df = pd.DataFrame({'label':['a', 'a', 'a', 'b', 'b', 'b'],
'datetime':['2011-07-19 07:00:00', '2011-07-19 08:00:00',
'2011-07-19 09:00:00', '2011-07-19 07:00:00',
'2011-07-19 08:00:00', '2011-07-19 09:00:00'],
'value':range(6)})
df.index = pd.to_datetime(df.pop('datetime'), utc=True)
df.index = df.index.tz_localize('UTC').tz_convert('US/Pacific')

expected = pd.DatetimeIndex(['2011-07-19 07:00:00', '2011-07-19 08:00:00', '2011-07-19 09:00:00'])
expected = expected.tz_localize('UTC').tz_convert('US/Pacific')

df = df.set_index('label', append=True)
self.assert_(df.index.levels[0].equals(expected))
self.assert_(df.index.levels[1].equals(pd.Index(['a', 'b'])))

df = df.swaplevel(0, 1)
self.assert_(df.index.levels[0].equals(pd.Index(['a', 'b'])))
self.assert_(df.index.levels[1].equals(expected))


df = DataFrame(np.random.random(6))
idx1 = pd.DatetimeIndex(['2011-07-19 07:00:00', '2011-07-19 08:00:00',
'2011-07-19 09:00:00', '2011-07-19 07:00:00',
'2011-07-19 08:00:00', '2011-07-19 09:00:00'], tz='US/Eastern')
idx2 = pd.DatetimeIndex(['2012-04-01 09:00', '2012-04-01 09:00', '2012-04-01 09:00',
'2012-04-02 09:00', '2012-04-02 09:00', '2012-04-02 09:00'],
tz='US/Eastern')
idx3 = pd.date_range('2011-01-01 09:00', periods=6, tz='Asia/Tokyo')

df = df.set_index(idx1)
df = df.set_index(idx2, append=True)
df = df.set_index(idx3, append=True)

expected1 = pd.DatetimeIndex(['2011-07-19 07:00:00', '2011-07-19 08:00:00',
'2011-07-19 09:00:00'], tz='US/Eastern')
expected2 = pd.DatetimeIndex(['2012-04-01 09:00', '2012-04-02 09:00'], tz='US/Eastern')

self.assert_(df.index.levels[0].equals(expected1))
self.assert_(df.index.levels[1].equals(expected2))
self.assert_(df.index.levels[2].equals(idx3))

# GH 7092
self.assert_(df.index.get_level_values(0).equals(idx1))
self.assert_(df.index.get_level_values(1).equals(idx2))
self.assert_(df.index.get_level_values(2).equals(idx3))

def test_set_index_period(self):
# GH 6631
df = DataFrame(np.random.random(6))
idx1 = pd.period_range('2011-01-01', periods=3, freq='M')
idx1 = idx1.append(idx1)
idx2 = pd.period_range('2013-01-01 09:00', periods=2, freq='H')
idx2 = idx2.append(idx2).append(idx2)
idx3 = pd.period_range('2005', periods=6, freq='Y')

df = df.set_index(idx1)
df = df.set_index(idx2, append=True)
df = df.set_index(idx3, append=True)

expected1 = pd.period_range('2011-01-01', periods=3, freq='M')
expected2 = pd.period_range('2013-01-01 09:00', periods=2, freq='H')

self.assert_(df.index.levels[0].equals(expected1))
self.assert_(df.index.levels[1].equals(expected2))
self.assert_(df.index.levels[2].equals(idx3))

self.assert_(df.index.get_level_values(0).equals(idx1))
self.assert_(df.index.get_level_values(1).equals(idx2))
self.assert_(df.index.get_level_values(2).equals(idx3))


if __name__ == '__main__':

Expand Down
42 changes: 39 additions & 3 deletions pandas/tools/tests/test_pivot.py
Expand Up @@ -3,7 +3,7 @@
import numpy as np
from numpy.testing import assert_equal

import pandas
import pandas as pd
from pandas import DataFrame, Series, Index, MultiIndex, Grouper
from pandas.tools.merge import concat
from pandas.tools.pivot import pivot_table, crosstab
Expand Down Expand Up @@ -181,6 +181,42 @@ def test_pivot_index_with_nan(self):
columns = Index(['C1','C2','C3','C4'],name='b'))
tm.assert_frame_equal(result, expected)

def test_pivot_with_tz(self):
# GH 5878
df = DataFrame({'dt1': [datetime.datetime(2013, 1, 1, 9, 0),
datetime.datetime(2013, 1, 2, 9, 0),
datetime.datetime(2013, 1, 1, 9, 0),
datetime.datetime(2013, 1, 2, 9, 0)],
'dt2': [datetime.datetime(2014, 1, 1, 9, 0),
datetime.datetime(2014, 1, 1, 9, 0),
datetime.datetime(2014, 1, 2, 9, 0),
datetime.datetime(2014, 1, 2, 9, 0)],
'data1': range(4), 'data2': range(4)})

df['dt1'] = df['dt1'].apply(lambda d: pd.Timestamp(d, tz='US/Pacific'))
df['dt2'] = df['dt2'].apply(lambda d: pd.Timestamp(d, tz='Asia/Tokyo'))

exp_col1 = Index(['data1', 'data1', 'data2', 'data2'])
exp_col2 = pd.DatetimeIndex(['2014/01/01 09:00', '2014/01/02 09:00'] * 2,
name='dt2', tz='Asia/Tokyo')
exp_col = pd.MultiIndex.from_arrays([exp_col1, exp_col2])
expected = DataFrame([[0, 2, 0, 2], [1, 3, 1, 3]],
index=pd.DatetimeIndex(['2013/01/01 09:00', '2013/01/02 09:00'],
name='dt1', tz='US/Pacific'),
columns=exp_col)

pv = df.pivot(index='dt1', columns='dt2')
tm.assert_frame_equal(pv, expected)

expected = DataFrame([[0, 2], [1, 3]],
index=pd.DatetimeIndex(['2013/01/01 09:00', '2013/01/02 09:00'],
name='dt1', tz='US/Pacific'),
columns=pd.DatetimeIndex(['2014/01/01 09:00', '2014/01/02 09:00'],
name='dt2', tz='Asia/Tokyo'))

pv = df.pivot(index='dt1', columns='dt2', values='data1')
tm.assert_frame_equal(pv, expected)

def test_margins(self):
def _check_output(res, col, index=['A', 'B'], columns=['C']):
cmarg = res['All'][:-1]
Expand Down Expand Up @@ -235,7 +271,7 @@ def test_pivot_integer_columns(self):
d = datetime.date.min
data = list(product(['foo', 'bar'], ['A', 'B', 'C'], ['x1', 'x2'],
[d + datetime.timedelta(i) for i in range(20)], [1.0]))
df = pandas.DataFrame(data)
df = DataFrame(data)
table = df.pivot_table(values=4, index=[0, 1, 3], columns=[2])

df2 = df.rename(columns=str)
Expand Down Expand Up @@ -286,7 +322,7 @@ def test_pivot_columns_lexsorted(self):
iproduct = np.random.randint(0, len(products), n)
items['Index'] = products['Index'][iproduct]
items['Symbol'] = products['Symbol'][iproduct]
dr = pandas.date_range(datetime.date(2000, 1, 1), datetime.date(2010, 12, 31))
dr = pd.date_range(datetime.date(2000, 1, 1), datetime.date(2010, 12, 31))
dates = dr[np.random.randint(0, len(dr), n)]
items['Year'] = dates.year
items['Month'] = dates.month
Expand Down
7 changes: 7 additions & 0 deletions pandas/tseries/period.py
Expand Up @@ -669,6 +669,13 @@ def _from_arraylike(cls, data, freq, tz):

return data, freq

@classmethod
def _simple_new(cls, values, name, freq=None, **kwargs):
result = values.view(cls)
result.name = name
result.freq = freq
return result

def __contains__(self, key):
if not isinstance(key, Period) or key.freq != self.freq:
if isinstance(key, compat.string_types):
Expand Down
19 changes: 19 additions & 0 deletions pandas/tseries/tests/test_period.py
Expand Up @@ -1194,6 +1194,14 @@ def test_constructor_datetime64arr(self):

self.assertRaises(ValueError, PeriodIndex, vals, freq='D')

def test_constructor_simple_new(self):
idx = period_range('2007-01', name='p', periods=20, freq='M')
result = idx._simple_new(idx, 'p', freq=idx.freq)
self.assert_(result.equals(idx))

result = idx._simple_new(idx.astype('i8'), 'p', freq=idx.freq)
self.assert_(result.equals(idx))

def test_is_(self):
create_index = lambda: PeriodIndex(freq='A', start='1/1/2001',
end='12/1/2009')
Expand Down Expand Up @@ -1390,6 +1398,17 @@ def test_frame_setitem(self):
tm.assert_isinstance(rs.index, PeriodIndex)
self.assert_(rs.index.equals(rng))

def test_period_set_index_reindex(self):
# GH 6631
df = DataFrame(np.random.random(6))
idx1 = period_range('2011/01/01', periods=6, freq='M')
idx2 = period_range('2013', periods=6, freq='A')

df = df.set_index(idx1)
self.assert_(df.index.equals(idx1))
df = df.reindex(idx2)
self.assert_(df.index.equals(idx2))

def test_nested_dict_frame_constructor(self):
rng = period_range('1/1/2000', periods=5)
df = DataFrame(randn(10, 5), columns=rng)
Expand Down
11 changes: 11 additions & 0 deletions pandas/tseries/tests/test_timeseries.py
Expand Up @@ -2510,6 +2510,17 @@ def test_dti_reset_index_round_trip(self):
self.assertEquals(df.index[0], stamp)
self.assertEquals(df.reset_index()['Date'][0], stamp)

def test_dti_set_index_reindex(self):
# GH 6631
df = DataFrame(np.random.random(6))
idx1 = date_range('2011/01/01', periods=6, freq='M', tz='US/Eastern')
idx2 = date_range('2013', periods=6, freq='A', tz='Asia/Tokyo')

df = df.set_index(idx1)
self.assert_(df.index.equals(idx1))
df = df.reindex(idx2)
self.assert_(df.index.equals(idx2))

def test_datetimeindex_union_join_empty(self):
dti = DatetimeIndex(start='1/1/2001', end='2/1/2001', freq='D')
empty = Index([])
Expand Down

0 comments on commit c59b217

Please sign in to comment.