In [1]:
import pickle
import numpy as np
import zarr
from pathlib import Path

def partition_sliding_window(data, window_size, stride):
    """
    Partition the input data using a sliding window approach.
    
    Args:
    - data (numpy.ndarray): A numpy array of shape (H, W).
    - window_size (tuple): A tuple of two integers, the size of the sliding window.
    - stride (tuple): A tuple of two integers, the stride of the sliding window.
    
    Returns:
    - List[numpy.ndarray]: A list of numpy arrays, each of shape (window_size[0], window_size[1]).
    """
    # Validate inputs
    if not isinstance(data, np.ndarray):
        raise TypeError("Input data must be a numpy array")
        
    if len(data.shape) != 2:
        raise ValueError("Input data must be of shape (H, W)")
        
    if len(window_size) != 2 or len(stride) != 2:
        raise ValueError("window_size and stride must be tuples of length 2")
        
    H, W = data.shape
    h, w = window_size
    h_stride, w_stride = stride
    
    # Validate window_size and stride
    if h > H or w > W:
        raise ValueError("Window size must be smaller than the input dimensions")
    
    if h_stride <= 0 or w_stride <= 0:
        raise ValueError("Stride values must be greater than zero")
    
    # Initialize partitions list
    partitions = []
    
    # Loop through data and populate partitions
    for i in range(0, H - h + 1, h_stride):
        for j in range(0, W - w + 1, w_stride):
            partition = data[i:i+h, j:j+w]
            partitions.append(partition)
            
    return partitions


In [3]:
import zarr

raw_files_list = [x for x in Path('data/Processed/').glob('**/*.pkl') if 'focus' not in str(x)]
print(len(raw_files_list))

# Open or create a Zarr group
root = zarr.open_group('R2F.zarr', mode='a')

# Check if datasets already exist, if not, create them
if 'raw' not in root:
    # Create an empty dataset for 'raw' and resize it later
    raw_ds = root.zeros('raw', shape=(0, 4096, 4096), chunks=(1, 4096, 4096), dtype='complex64')
else:
    raw_ds = root['raw']

if 'gt' not in root:
    focus_ds = root.zeros('gt', shape=(0, 4096, 4096), chunks=(1, 4096, 4096), dtype='complex64')
else:
    focus_ds = root['gt']

for r in raw_files_list:
    data_raw = pickle.load(open(r, 'rb'))
    data_focus = pickle.load(open(r.parent / ('focused_' + r.name), 'rb'))
    assert data_focus.shape == data_raw['echo'].shape, 'Shapes of data_focus and data_raw are not equal'

    focus_partitions = partition_sliding_window(data_focus, (4096, 4096), (4096, 4096))
    print('Number of Focus partitions: ', len(focus_partitions))

    raw_partitions = partition_sliding_window(data_raw['echo'], (4096, 4096), (4096, 4096))
    print('Number of Raw partitions: ', len(raw_partitions))
    
    # Resize datasets to accommodate new data
    raw_ds.resize((raw_ds.shape[0] + len(raw_partitions), 4096, 4096))
    focus_ds.resize((focus_ds.shape[0] + len(focus_partitions), 4096, 4096))
    
    # Append the data
    raw_ds[-len(raw_partitions):] = raw_partitions
    focus_ds[-len(focus_partitions):] = focus_partitions


5
Number of Focus partitions:  40
Number of Raw partitions:  40
Number of Focus partitions:  45
Number of Raw partitions:  45
Number of Focus partitions:  28
Number of Raw partitions:  28
Number of Focus partitions:  35
Number of Raw partitions:  35
Number of Focus partitions:  54
Number of Raw partitions:  54


In [4]:
!du -sh R2F.zarr # Show the size of the dataset

38G	R2F.zarr
