Skip to content

Commit

Permalink
BUG: resample with nunique
Browse files Browse the repository at this point in the history
closes #12352

Author: Jeff Reback <jeff@reback.net>

Closes #12364 from jreback/resample_fix and squashes the following commits:

d19c5fe [Jeff Reback] BUG: resample with nunique
  • Loading branch information
jreback committed Feb 17, 2016
1 parent 5d1857c commit f1aad46
Show file tree
Hide file tree
Showing 3 changed files with 39 additions and 3 deletions.
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v0.18.0.txt
Expand Up @@ -1035,7 +1035,7 @@ Bug Fixes
- Bug in ``pd.concat`` while concatenating tz-aware NaT series. (:issue:`11693`, :issue:`11755`, :issue:`12217`)
- Bug in ``pd.read_stata`` with version <= 108 files (:issue:`12232`)
- Bug in ``Series.resample`` using a frequency of ``Nano`` when the index is a ``DatetimeIndex`` and contains non-zero nanosecond parts (:issue:`12037`)

- Bug in resampling with ``.nunique`` and a sparse index (:issue:`12352`)

- Bug in ``NaT`` subtraction from ``Timestamp`` or ``DatetimeIndex`` with timezones (:issue:`11718`)
- Bug in subtraction of ``Series`` of a single tz-aware ``Timestamp`` (:issue:`12290`)
Expand Down
12 changes: 10 additions & 2 deletions pandas/core/groupby.py
Expand Up @@ -2819,8 +2819,16 @@ def nunique(self, dropna=True):
inc[idx] = 1

out = np.add.reduceat(inc, idx).astype('int64', copy=False)
return Series(out if ids[0] != -1 else out[1:],
index=self.grouper.result_index,
res = out if ids[0] != -1 else out[1:]
ri = self.grouper.result_index

# we might have duplications among the bins
if len(res) != len(ri):
res, out = np.zeros(len(ri), dtype=out.dtype), res
res[ids] = out

return Series(res,
index=ri,
name=self.name)

@deprecate_kwarg('take_last', 'keep',
Expand Down
28 changes: 28 additions & 0 deletions pandas/tseries/tests/test_resample.py
Expand Up @@ -1526,6 +1526,34 @@ def test_resample_timegrouper(self):
result = df.groupby(pd.Grouper(freq='M', key='A')).count()
assert_frame_equal(result, expected)

def test_resample_nunique(self):

# GH 12352
df = DataFrame({
'ID': {pd.Timestamp('2015-06-05 00:00:00'): '0010100903',
pd.Timestamp('2015-06-08 00:00:00'): '0010150847'},
'DATE': {pd.Timestamp('2015-06-05 00:00:00'): '2015-06-05',
pd.Timestamp('2015-06-08 00:00:00'): '2015-06-08'}})
r = df.resample('D')
g = df.groupby(pd.Grouper(freq='D'))
expected = df.groupby(pd.TimeGrouper('D')).ID.apply(lambda x:
x.nunique())
self.assertEqual(expected.name, 'ID')

for t in [r, g]:
result = r.ID.nunique()
assert_series_equal(result, expected)

# TODO
# this should have name
# https://github.com/pydata/pandas/issues/12363
expected.name = None
result = df.ID.resample('D').nunique()
assert_series_equal(result, expected)

result = df.ID.groupby(pd.Grouper(freq='D')).nunique()
assert_series_equal(result, expected)

def test_resample_group_info(self): # GH10914
for n, k in product((10000, 100000), (10, 100, 1000)):
dr = date_range(start='2015-08-27', periods=n // 10, freq='T')
Expand Down

0 comments on commit f1aad46

Please sign in to comment.