Skip to content

Commit d3d608c

Browse files
authoredMay 23, 2022
Fix chunk index error in auto_merge_chunks (#3057)
1 parent 61c0c51 commit d3d608c

File tree

2 files changed

+8
-10
lines changed

2 files changed

+8
-10
lines changed
 

‎mars/dataframe/tests/test_utils.py

+4-1
Original file line numberDiff line numberDiff line change
@@ -614,7 +614,10 @@ def get_chunks_meta(self, data_keys: List[str], **_) -> List[Dict]:
614614
assert len(df2.chunks) == 2
615615
assert isinstance(df2.chunks[0].op, DataFrameConcat)
616616
assert len(df2.chunks[0].op.inputs) == 3
617-
assert df2.chunks[1] is df.chunks[-1]
617+
assert isinstance(df2.chunks[1].op, DataFrameConcat)
618+
assert len(df2.chunks[1].op.inputs) == 1
619+
assert df2.chunks[1].shape == df.chunks[-1].shape
620+
assert df2.chunks[1].index == (1, 0)
618621

619622
# mock situation that df not executed
620623
df2 = auto_merge_chunks(FakeContext(False), df, 3 * memory_size)

‎mars/dataframe/utils.py

+4-9
Original file line numberDiff line numberDiff line change
@@ -1389,15 +1389,10 @@ def _concat_chunks(merge_chunks: List[ChunkType], output_index: int):
13891389
to_merge_chunks.append(chunk)
13901390
acc_memory_size += chunk_memory_size
13911391
# process the last chunk
1392-
if len(to_merge_chunks) > 1:
1393-
merged_chunk = _concat_chunks(to_merge_chunks, len(n_split))
1394-
out_chunks.append(merged_chunk)
1395-
n_split.append(merged_chunk.shape[0])
1396-
else:
1397-
assert len(to_merge_chunks) == 1
1398-
last_chunk = to_merge_chunks[0]
1399-
out_chunks.append(last_chunk)
1400-
n_split.append(last_chunk.shape[0])
1392+
assert len(to_merge_chunks) >= 1
1393+
merged_chunk = _concat_chunks(to_merge_chunks, len(n_split))
1394+
out_chunks.append(merged_chunk)
1395+
n_split.append(merged_chunk.shape[0])
14011396

14021397
new_op = df_or_series.op.copy()
14031398
params = df_or_series.params.copy()

0 commit comments

Comments
 (0)
Please sign in to comment.