In [1]:
import cubed as xp
from IPython.display import display

In [2]:
import numpy as np

from cubed.vendor.rechunker import algorithm
from cubed.vendor.rechunker.algorithm import rechunking_plan


def evaluate_stage_v2(shape, read_chunks, int_chunks, write_chunks):
    tasks = algorithm.calculate_single_stage_io_ops(shape, read_chunks, write_chunks)
    read_tasks = tasks if write_chunks != read_chunks else 0
    write_tasks = tasks if read_chunks != int_chunks else 0
    return read_tasks, write_tasks


def evaluate_plan(stages, shape, itemsize):
    total_reads = 0
    total_writes = 0
    for i, stage in enumerate(stages):
        read_chunks, int_chunks, write_chunks = stage
        read_tasks, write_tasks = evaluate_stage_v2(
            shape,
            read_chunks,
            int_chunks,
            write_chunks,
        )
        total_reads += read_tasks
        total_writes += write_tasks
    return total_reads, total_writes


def print_summary(stages, shape, itemsize):
    for i, stage in enumerate(stages):
        print(f"stage={i}: " + " -> ".join(map(str, stage)))
        read_chunks, int_chunks, write_chunks = stage
        read_tasks, write_tasks = evaluate_stage_v2(
            shape,
            read_chunks,
            int_chunks,
            write_chunks,
        )
        print(f"  Tasks: {read_tasks} reads, {write_tasks} writes")
        print(f"  Split chunks: {itemsize*np.prod(int_chunks)/1e6 :1.3f} MB")

    total_reads, total_writes = evaluate_plan(stages, shape, itemsize)
    print("Overall:")
    print(f"  Reads count: {total_reads:1.3e}")
    print(f"  Write count: {total_writes:1.3e}")


def rechunker_plan(shape, source_chunks, target_chunks, **kwargs):
    stages = algorithm.multistage_rechunking_plan(
        shape, source_chunks, target_chunks, **kwargs
    )
    return (
        [(source_chunks, source_chunks, stages[0][0])]
        + list(stages)
        + [(stages[-1][-1], target_chunks, target_chunks)]
    )


In [3]:
itemsize = 4
shape = (350640, 721 * 1440)
source_chunks = (31, 721 * 1440)
target_chunks = (350640, 10 * 10)

In [5]:
plan = rechunker_plan(
    shape, source_chunks, target_chunks, itemsize=4, min_mem=int(10e6), max_mem=int(500e6)
)
for i, stage in enumerate(plan):
    read_chunks, int_chunks, write_chunks = stage
    print(f"Stage {i}")
    arr = xp.empty(shape, dtype=xp.float32, chunks=read_chunks)
    display(arr)
    arr = xp.empty(shape, dtype=xp.float32, chunks=int_chunks)
    display(arr)
    # write_chunks is the same as int_chunks in this example

Stage 0


Unnamed: 0,Array,Chunk
Bytes,1.5 TB,128.7 MB
Shape,"(350640, 1038240)","(31, 1038240)"
Count,1 arrays in Plan,11311 Chunks
Type,float32,np.ndarray
"Array Chunk Bytes 1.5 TB 128.7 MB Shape (350640, 1038240) (31, 1038240) Count 1 arrays in Plan 11311 Chunks Type float32 np.ndarray",1038240  350640,

Unnamed: 0,Array,Chunk
Bytes,1.5 TB,128.7 MB
Shape,"(350640, 1038240)","(31, 1038240)"
Count,1 arrays in Plan,11311 Chunks
Type,float32,np.ndarray


Unnamed: 0,Array,Chunk
Bytes,1.5 TB,128.7 MB
Shape,"(350640, 1038240)","(31, 1038240)"
Count,1 arrays in Plan,11311 Chunks
Type,float32,np.ndarray
"Array Chunk Bytes 1.5 TB 128.7 MB Shape (350640, 1038240) (31, 1038240) Count 1 arrays in Plan 11311 Chunks Type float32 np.ndarray",1038240  350640,

Unnamed: 0,Array,Chunk
Bytes,1.5 TB,128.7 MB
Shape,"(350640, 1038240)","(31, 1038240)"
Count,1 arrays in Plan,11311 Chunks
Type,float32,np.ndarray


Stage 1


Unnamed: 0,Array,Chunk
Bytes,1.5 TB,386.2 MB
Shape,"(350640, 1038240)","(93, 1038240)"
Count,1 arrays in Plan,3771 Chunks
Type,float32,np.ndarray
"Array Chunk Bytes 1.5 TB 386.2 MB Shape (350640, 1038240) (93, 1038240) Count 1 arrays in Plan 3771 Chunks Type float32 np.ndarray",1038240  350640,

Unnamed: 0,Array,Chunk
Bytes,1.5 TB,386.2 MB
Shape,"(350640, 1038240)","(93, 1038240)"
Count,1 arrays in Plan,3771 Chunks
Type,float32,np.ndarray


Unnamed: 0,Array,Chunk
Bytes,1.5 TB,25.5 MB
Shape,"(350640, 1038240)","(93, 68639)"
Count,1 arrays in Plan,60336 Chunks
Type,float32,np.ndarray
"Array Chunk Bytes 1.5 TB 25.5 MB Shape (350640, 1038240) (93, 68639) Count 1 arrays in Plan 60336 Chunks Type float32 np.ndarray",1038240  350640,

Unnamed: 0,Array,Chunk
Bytes,1.5 TB,25.5 MB
Shape,"(350640, 1038240)","(93, 68639)"
Count,1 arrays in Plan,60336 Chunks
Type,float32,np.ndarray


Stage 2


Unnamed: 0,Array,Chunk
Bytes,1.5 TB,397.3 MB
Shape,"(350640, 1038240)","(1447, 68639)"
Count,1 arrays in Plan,3888 Chunks
Type,float32,np.ndarray
"Array Chunk Bytes 1.5 TB 397.3 MB Shape (350640, 1038240) (1447, 68639) Count 1 arrays in Plan 3888 Chunks Type float32 np.ndarray",1038240  350640,

Unnamed: 0,Array,Chunk
Bytes,1.5 TB,397.3 MB
Shape,"(350640, 1038240)","(1447, 68639)"
Count,1 arrays in Plan,3888 Chunks
Type,float32,np.ndarray


Unnamed: 0,Array,Chunk
Bytes,1.5 TB,26.3 MB
Shape,"(350640, 1038240)","(1447, 4537)"
Count,1 arrays in Plan,55647 Chunks
Type,float32,np.ndarray
"Array Chunk Bytes 1.5 TB 26.3 MB Shape (350640, 1038240) (1447, 4537) Count 1 arrays in Plan 55647 Chunks Type float32 np.ndarray",1038240  350640,

Unnamed: 0,Array,Chunk
Bytes,1.5 TB,26.3 MB
Shape,"(350640, 1038240)","(1447, 4537)"
Count,1 arrays in Plan,55647 Chunks
Type,float32,np.ndarray


Stage 3


Unnamed: 0,Array,Chunk
Bytes,1.5 TB,408.8 MB
Shape,"(350640, 1038240)","(22528, 4537)"
Count,1 arrays in Plan,3664 Chunks
Type,float32,np.ndarray
"Array Chunk Bytes 1.5 TB 408.8 MB Shape (350640, 1038240) (22528, 4537) Count 1 arrays in Plan 3664 Chunks Type float32 np.ndarray",1038240  350640,

Unnamed: 0,Array,Chunk
Bytes,1.5 TB,408.8 MB
Shape,"(350640, 1038240)","(22528, 4537)"
Count,1 arrays in Plan,3664 Chunks
Type,float32,np.ndarray


Unnamed: 0,Array,Chunk
Bytes,1.5 TB,27.0 MB
Shape,"(350640, 1038240)","(22528, 300)"
Count,1 arrays in Plan,55376 Chunks
Type,float32,np.ndarray
"Array Chunk Bytes 1.5 TB 27.0 MB Shape (350640, 1038240) (22528, 300) Count 1 arrays in Plan 55376 Chunks Type float32 np.ndarray",1038240  350640,

Unnamed: 0,Array,Chunk
Bytes,1.5 TB,27.0 MB
Shape,"(350640, 1038240)","(22528, 300)"
Count,1 arrays in Plan,55376 Chunks
Type,float32,np.ndarray


Stage 4


Unnamed: 0,Array,Chunk
Bytes,1.5 TB,420.8 MB
Shape,"(350640, 1038240)","(350640, 300)"
Count,1 arrays in Plan,3461 Chunks
Type,float32,np.ndarray
"Array Chunk Bytes 1.5 TB 420.8 MB Shape (350640, 1038240) (350640, 300) Count 1 arrays in Plan 3461 Chunks Type float32 np.ndarray",1038240  350640,

Unnamed: 0,Array,Chunk
Bytes,1.5 TB,420.8 MB
Shape,"(350640, 1038240)","(350640, 300)"
Count,1 arrays in Plan,3461 Chunks
Type,float32,np.ndarray


Unnamed: 0,Array,Chunk
Bytes,1.5 TB,140.3 MB
Shape,"(350640, 1038240)","(350640, 100)"
Count,1 arrays in Plan,10383 Chunks
Type,float32,np.ndarray
"Array Chunk Bytes 1.5 TB 140.3 MB Shape (350640, 1038240) (350640, 100) Count 1 arrays in Plan 10383 Chunks Type float32 np.ndarray",1038240  350640,

Unnamed: 0,Array,Chunk
Bytes,1.5 TB,140.3 MB
Shape,"(350640, 1038240)","(350640, 100)"
Count,1 arrays in Plan,10383 Chunks
Type,float32,np.ndarray
