Skip to content

Commit

Permalink
BUG: DTI.value_counts doesnt preserve tz
Browse files Browse the repository at this point in the history
  • Loading branch information
sinhrks committed Jul 25, 2014
1 parent 415fbfc commit 4b5a7c7
Show file tree
Hide file tree
Showing 4 changed files with 75 additions and 16 deletions.
6 changes: 6 additions & 0 deletions doc/source/v0.15.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -290,6 +290,12 @@ Bug Fixes



- Bug in ``DatetimeIndex.value_counts`` doesn't preserve tz (:issue:`7735`)
- Bug in ``PeriodIndex.value_counts`` results in ``Int64Index`` (:issue:`7735`)







Expand Down
10 changes: 6 additions & 4 deletions pandas/core/algorithms.py
Original file line number Diff line number Diff line change
Expand Up @@ -197,6 +197,7 @@ def value_counts(values, sort=True, ascending=False, normalize=False,
from pandas.core.series import Series
from pandas.tools.tile import cut

is_period = getattr(values, 'inferred_type', None) == 'period'
values = Series(values).values
is_category = com.is_categorical_dtype(values.dtype)

Expand All @@ -212,11 +213,8 @@ def value_counts(values, sort=True, ascending=False, normalize=False,
values = cat.codes

dtype = values.dtype
if com.is_integer_dtype(dtype):
values = com._ensure_int64(values)
keys, counts = htable.value_count_int64(values)

elif issubclass(values.dtype.type, (np.datetime64, np.timedelta64)):
if issubclass(values.dtype.type, (np.datetime64, np.timedelta64)) or is_period:
values = values.view(np.int64)
keys, counts = htable.value_count_int64(values)

Expand All @@ -227,6 +225,10 @@ def value_counts(values, sort=True, ascending=False, normalize=False,
# convert the keys back to the dtype we came in
keys = keys.astype(dtype)

elif com.is_integer_dtype(dtype):
values = com._ensure_int64(values)
keys, counts = htable.value_count_int64(values)

else:
values = com._ensure_object(values)
mask = com.isnull(values)
Expand Down
16 changes: 12 additions & 4 deletions pandas/core/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -275,8 +275,18 @@ def value_counts(self, normalize=False, sort=True, ascending=False,
counts : Series
"""
from pandas.core.algorithms import value_counts
return value_counts(self.values, sort=sort, ascending=ascending,
normalize=normalize, bins=bins, dropna=dropna)
from pandas.tseries.api import DatetimeIndex, PeriodIndex
result = value_counts(self, sort=sort, ascending=ascending,
normalize=normalize, bins=bins, dropna=dropna)

if isinstance(self, PeriodIndex):
# preserve freq
result.index = self._simple_new(result.index.values, self.name,
freq=self.freq)
elif isinstance(self, DatetimeIndex):
result.index = self._simple_new(result.index.values, self.name,
tz=getattr(self, 'tz', None))
return result

def unique(self):
"""
Expand Down Expand Up @@ -542,5 +552,3 @@ def __sub__(self, other):

def _add_delta(self, other):
return NotImplemented


59 changes: 51 additions & 8 deletions pandas/tests/test_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -267,8 +267,9 @@ def test_value_counts_unique_nunique(self):
# skips int64 because it doesn't allow to include nan or None
continue

if o.values.dtype == 'datetime64[ns]' and _np_version_under1p7:
# Unable to assign None
if ((isinstance(o, Int64Index) and not isinstance(o,
(DatetimeIndex, PeriodIndex)))):
# skips int64 because it doesn't allow to include nan or None
continue

# special assign to the numpy array
Expand All @@ -283,12 +284,8 @@ def test_value_counts_unique_nunique(self):
else:
o = klass(np.repeat(values, range(1, len(o) + 1)))

if isinstance(o, DatetimeIndex):
expected_s_na = Series(list(range(10, 2, -1)) + [3], index=values[9:0:-1])
expected_s = Series(list(range(10, 2, -1)), index=values[9:1:-1])
else:
expected_s_na = Series(list(range(10, 2, -1)) +[3], index=values[9:0:-1], dtype='int64')
expected_s = Series(list(range(10, 2, -1)), index=values[9:1:-1], dtype='int64')
expected_s_na = Series(list(range(10, 2, -1)) +[3], index=values[9:0:-1], dtype='int64')
expected_s = Series(list(range(10, 2, -1)), index=values[9:1:-1], dtype='int64')

tm.assert_series_equal(o.value_counts(dropna=False), expected_s_na)
tm.assert_series_equal(o.value_counts(), expected_s)
Expand Down Expand Up @@ -709,6 +706,28 @@ def test_sub_isub(self):
rng -= 1
tm.assert_index_equal(rng, expected)

def test_value_counts(self):
# GH 7735
for tz in [None, 'UTC', 'Asia/Tokyo', 'US/Eastern']:
idx = pd.date_range('2011-01-01 09:00', freq='H', periods=10)
# create repeated values, 'n'th element is repeated by n+1 times
idx = DatetimeIndex(np.repeat(idx.values, range(1, len(idx) + 1)), tz=tz)

exp_idx = pd.date_range('2011-01-01 18:00', freq='-1H', periods=10, tz=tz)
expected = Series(range(10, 0, -1), index=exp_idx, dtype='int64')
tm.assert_series_equal(idx.value_counts(), expected)

idx = DatetimeIndex(['2013-01-01 09:00', '2013-01-01 09:00', '2013-01-01 09:00',
'2013-01-01 08:00', '2013-01-01 08:00', pd.NaT], tz=tz)

exp_idx = DatetimeIndex(['2013-01-01 09:00', '2013-01-01 08:00'], tz=tz)
expected = Series([3, 2], index=exp_idx)
tm.assert_series_equal(idx.value_counts(), expected)

exp_idx = DatetimeIndex(['2013-01-01 09:00', '2013-01-01 08:00', pd.NaT], tz=tz)
expected = Series([3, 2, 1], index=exp_idx)
tm.assert_series_equal(idx.value_counts(dropna=False), expected)


class TestPeriodIndexOps(Ops):
_allowed = '_allow_period_index_ops'
Expand Down Expand Up @@ -968,6 +987,30 @@ def test_sub_isub(self):
rng -= 1
tm.assert_index_equal(rng, expected)

def test_value_counts(self):
# GH 7735
idx = pd.period_range('2011-01-01 09:00', freq='H', periods=10)
# create repeated values, 'n'th element is repeated by n+1 times
idx = PeriodIndex(np.repeat(idx.values, range(1, len(idx) + 1)), freq='H')

exp_idx = PeriodIndex(['2011-01-01 18:00', '2011-01-01 17:00', '2011-01-01 16:00',
'2011-01-01 15:00', '2011-01-01 14:00', '2011-01-01 13:00',
'2011-01-01 12:00', '2011-01-01 11:00', '2011-01-01 10:00',
'2011-01-01 09:00'], freq='H')
expected = Series(range(10, 0, -1), index=exp_idx, dtype='int64')
tm.assert_series_equal(idx.value_counts(), expected)

idx = PeriodIndex(['2013-01-01 09:00', '2013-01-01 09:00', '2013-01-01 09:00',
'2013-01-01 08:00', '2013-01-01 08:00', pd.NaT], freq='H')

exp_idx = PeriodIndex(['2013-01-01 09:00', '2013-01-01 08:00'], freq='H')
expected = Series([3, 2], index=exp_idx)
tm.assert_series_equal(idx.value_counts(), expected)

exp_idx = PeriodIndex(['2013-01-01 09:00', '2013-01-01 08:00', pd.NaT], freq='H')
expected = Series([3, 2, 1], index=exp_idx)
tm.assert_series_equal(idx.value_counts(dropna=False), expected)


if __name__ == '__main__':
import nose
Expand Down

0 comments on commit 4b5a7c7

Please sign in to comment.