Skip to content

Commit

Permalink
Remove explicit references to Pandas in dask.dataframe.groupby
Browse files Browse the repository at this point in the history
  • Loading branch information
mrocklin committed May 6, 2019
1 parent 8fdc88f commit 4f378ba
Showing 1 changed file with 8 additions and 5 deletions.
13 changes: 8 additions & 5 deletions dask/dataframe/groupby.py
Expand Up @@ -11,7 +11,7 @@
from .core import (DataFrame, Series, aca, map_partitions,
new_dd_object, no_default, split_out_on_index,
_extract_meta)
from .methods import drop_columns
from .methods import drop_columns, concat
from .shuffle import shuffle
from .utils import (make_meta, insert_meta_param_description,
raise_on_meta_error, is_series_like, is_dataframe_like)
Expand Down Expand Up @@ -264,7 +264,7 @@ def _var_chunk(df, *index):
x2 = g2.sum().rename(columns=lambda c: c + '-x2')

x2.index = x.index
return pd.concat([x, x2, n], axis=1)
return concat([x, x2, n], axis=1)


def _var_combine(g, levels):
Expand Down Expand Up @@ -298,7 +298,7 @@ def _nunique_df_chunk(df, *index, **kwargs):

g = _groupby_raise_unaligned(df, by=index)
if len(df) > 0:
grouped = g[[name]].apply(pd.DataFrame.drop_duplicates)
grouped = g[[name]].apply(M.drop_duplicates)
# we set the index here to force a possibly duplicate index
# for our reduce step
if isinstance(levels, list):
Expand Down Expand Up @@ -671,7 +671,10 @@ def _groupby_apply_funcs(df, *index, **kwargs):
else:
result[result_column] = r

return pd.DataFrame(result)
if is_dataframe_like(df):
return type(df)(result)
else:
return type(df.head(0).to_frame())(result)


def _compute_sum_of_squares(grouped, column):
Expand All @@ -688,7 +691,7 @@ def _agg_finalize(df, aggregate_funcs, finalize_funcs, level):
for result_column, func, kwargs in finalize_funcs:
result[result_column] = func(df, **kwargs)

return pd.DataFrame(result)
return type(df)(result)


def _apply_func_to_column(df_like, column, func):
Expand Down

0 comments on commit 4f378ba

Please sign in to comment.