Skip to content

Commit

Permalink
fix metadata in shuffle
Browse files Browse the repository at this point in the history
  • Loading branch information
mrocklin committed Feb 26, 2019
1 parent a6fcc36 commit 4bab77e
Show file tree
Hide file tree
Showing 2 changed files with 3 additions and 0 deletions.
2 changes: 2 additions & 0 deletions dask/dataframe/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -3793,6 +3793,8 @@ def apply_and_enforce(*args, **kwargs):
meta = kwargs.pop('_meta')
df = func(*args, **kwargs)
if is_dataframe_like(df) or is_series_like(df) or is_index_like(df):
if not len(df):
return meta
if is_dataframe_like(df):
# Need nan_to_num otherwise nan comparison gives False
if not np.array_equal(np.nan_to_num(meta.columns),
Expand Down
1 change: 1 addition & 0 deletions dask/dataframe/shuffle.py
Original file line number Diff line number Diff line change
Expand Up @@ -213,6 +213,7 @@ def shuffle(df, index, shuffle=None, npartitions=None, max_branch=32,
npartitions=npartitions or df.npartitions,
meta=pd.Series([0]))
df2 = df.assign(_partitions=partitions)
df2._meta.index.name = df._meta.index.name
df3 = rearrange_by_column(df2, '_partitions', npartitions=npartitions,
max_branch=max_branch, shuffle=shuffle,
compute=compute)
Expand Down

0 comments on commit 4bab77e

Please sign in to comment.