Skip to content

Commit

Permalink
Fix up groupby-standard deviation again on object dtype keys
Browse files Browse the repository at this point in the history
  • Loading branch information
mrocklin committed Mar 4, 2019
1 parent f3c2d5d commit 0e09333
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 4 deletions.
11 changes: 7 additions & 4 deletions dask/dataframe/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -240,14 +240,17 @@ def _apply_chunk(df, *index, **kwargs):
def _var_chunk(df, *index):
if is_series_like(df):
df = df.to_frame()
df = df._get_numeric_data()

df = df.copy()
cols = df._get_numeric_data().columns

g = _groupby_raise_unaligned(df, by=index)
x = g.sum()

n = g.count().rename(columns=lambda c: c + '-count')
n = g[x.columns].count().rename(columns=lambda c: c + '-count')

df2 = df ** 2
g2 = _groupby_raise_unaligned(df2, by=index)
df[cols] = df[cols] ** 2
g2 = _groupby_raise_unaligned(df, by=index)
x2 = g2.sum().rename(columns=lambda c: c + '-x2')

x2.index = x.index
Expand Down
6 changes: 6 additions & 0 deletions dask/dataframe/tests/test_groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -1535,3 +1535,9 @@ def test_std_object_dtype(func):
ddf = dd.from_pandas(df, npartitions=2)

assert_eq(func(df), func(ddf))


def test_timeseries():
df = dask.datasets.timeseries().partitions[:2]
assert_eq(df.groupby('name').std(),
df.groupby('name').std())

0 comments on commit 0e09333

Please sign in to comment.