# Dataloader Example

This script demonstrates how to efficiently load and iterate over a satellite Earth observation dataset stored in the Zarr format, using a modular PyTorch DataLoader setup. 

In [1]:
from data_loader import get_zarr_dataloader, NormalizeChannels
from tqdm import tqdm

# Path to the input Zarr dataset
zarr_path = "burned_area_dataset.zarr"

# Select dataset split: "trainval" or "test"
dataset_set = "trainval"

# Initialize a PyTorch DataLoader from a Zarr-based dataset
dataloader = get_zarr_dataloader(
    zarr_path=zarr_path,                     # Path to the Zarr archive
    dataset_set=dataset_set,                 # Dataset subset to use
    batch_size=16,                           # Number of samples per batch
    shuffle=True,                            # Enable shuffling (useful for training)
    num_workers=4,                           # Number of parallel workers for loading
    transform=NormalizeChannels(min_max=True),  # Normalize input channels to [0, 1]
    task_filter="segmentation",              # Only load data for the "segmentation" task
    metadata_keys=["sensor", "timestamp"],   # Include auxiliary metadata fields
)

# Iterate over the DataLoader to process each batch
for idx, batch in enumerate(tqdm(dataloader, desc="Processing Batches")):
    # Loop through all task types present in the batch
    for task in batch['tasks']:
        images = batch[f'{task}_img']       # Input image tensor for the current task
        labels = batch[f'{task}_label']     # Corresponding label tensor
        # Model inference, loss computation, or further processing can be done here

Processing Batches: 100%|██████████| 487/487 [00:25<00:00, 18.93it/s]
