Skip to content

Commit

Permalink
Fix shuffle code to work with pyarrow 13 (dask#8009)
Browse files Browse the repository at this point in the history
(cherry picked from commit b7e5f8f)
  • Loading branch information
jorisvandenbossche authored and phofl committed Jul 24, 2023
1 parent 62c4b06 commit b3991af
Showing 1 changed file with 2 additions and 2 deletions.
4 changes: 2 additions & 2 deletions distributed/shuffle/_worker_plugin.py
Expand Up @@ -951,7 +951,7 @@ def split_by_worker(
# bytestream such that it cannot be deserialized anymore
t = pa.Table.from_pandas(df, preserve_index=True)
t = t.sort_by("_worker")
codes = np.asarray(t.select(["_worker"]))[0]
codes = np.asarray(t["_worker"])
t = t.drop(["_worker"])
del df

Expand Down Expand Up @@ -983,7 +983,7 @@ def split_by_partition(t: pa.Table, column: str) -> dict[Any, pa.Table]:
partitions.sort()
t = t.sort_by(column)

partition = np.asarray(t.select([column]))[0]
partition = np.asarray(t[column])
splits = np.where(partition[1:] != partition[:-1])[0] + 1
splits = np.concatenate([[0], splits])

Expand Down

0 comments on commit b3991af

Please sign in to comment.