# Dataloader usage examples

In [1]:
import sys
import os
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), '..')))

In this first example we are defining a dataloader that loads samples in chunks, ie given a full SAR products, it loads all the patches from one chunk at a time (```patch_order="chunk"```). 
The dataloader does also takes samples from the huggingface repository in case they were not found locally (```online = True```)

In [2]:
from dataloader import get_sar_dataloader


loader = get_sar_dataloader(
    data_dir="/Data/sar_focusing",
    level_from="rcmc",
    level_to="az",
    batch_size=16,
    num_workers=0,
    patch_mode="rectangular", 
    patch_size = (1, 1000),
    buffer = (1000, 1000),
    stride = (1, 1000),
    shuffle_files = False,
    patch_order="chunk", 
    complex_valued = True,
    save_samples = False, 
    backend="zarr", 
    verbose=False, 
    samples_per_prod = 1000,
    cache_size = 1000, 
    online = True, 
    max_products = 20
)
for i, (x_batch, y_batch) in enumerate(loader):
    print(f"Batch {i}: x {x_batch.shape}, y {y_batch.shape}")

  from .autonotebook import tqdm as notebook_tqdm


[PosixPath('/Data/sar_focusing/s1a-s1-raw-s-hh-20230508t121142-20230508t121213-048442-05d3c0.zarr'), PosixPath('/Data/sar_focusing/s1a-s1-raw-s-hh-20230731t121147-20230731t121217-049667-05f8f1.zarr'), PosixPath('/Data/sar_focusing/s1a-s1-raw-s-hh-20240103t121147-20240103t121217-051942-0646ac.zarr'), PosixPath('/Data/sar_focusing/s1a-s1-raw-s-hh-20240130t151239-20240130t151254-052337-06541b.zarr'), PosixPath('/Data/sar_focusing/s1a-s1-raw-s-hh-20240517t151240-20240517t151255-053912-068d91.zarr'), PosixPath('/Data/sar_focusing/s1a-s1-raw-s-hh-20240626t053141-20240626t053207-054490-06a193.zarr'), PosixPath('/Data/sar_focusing/s1a-s1-raw-s-hh-20240825t053115-20240825t053148-055365-06c086.zarr'), PosixPath('/Data/sar_focusing/s1a-s1-raw-s-hh-20240830t121144-20240830t121215-055442-06c352.zarr'), PosixPath('/Data/sar_focusing/s1a-s1-raw-s-hh-20241005t121146-20241005t121216-055967-06d7fb.zarr'), PosixPath('/Data/sar_focusing/s1a-s1-raw-s-hh-20241008t151239-20241008t151255-056012-06d9c9.zarr'),

For more details, check out https://huggingface.co/docs/huggingface_hub/main/en/guides/download#download-files-to-local-folder.


Successfully downloaded "s1a-s1-raw-s-hh-20230508t121142-20230508t121213-048442-05d3c0.zarr/rcmc/zarr.json" to "/Data/sar_focusing/s1a-s1-raw-s-hh-20230508t121142-20230508t121213-048442-05d3c0.zarr/rcmc/zarr.json".
Files in s1a-s1-raw-s-hh-20230508t121142-20230508t121213-048442-05d3c0.zarr/az: ['c', 'zarr.json']
Files in s1a-s1-raw-s-hh-20230508t121142-20230508t121213-048442-05d3c0.zarr: ['az', 'raw', 'rc', 'rcmc', 'zarr.json']
Files in s1a-s1-raw-s-hh-20230508t121142-20230508t121213-048442-05d3c0.zarr/rcmc: ['c', 'zarr.json']
Downloading "s1a-s1-raw-s-hh-20230508t121142-20230508t121213-048442-05d3c0.zarr/rcmc/c/0/0" from "sirbastiano94/Maya4" to "/Data/sar_focusing"...
Successfully downloaded "s1a-s1-raw-s-hh-20230508t121142-20230508t121213-048442-05d3c0.zarr/rcmc/c/0/0" to "/Data/sar_focusing/s1a-s1-raw-s-hh-20230508t121142-20230508t121213-048442-05d3c0.zarr/rcmc/c/0/0".
Batch 0: x torch.Size([16, 1, 1001]), y torch.Size([16, 1, 1000])
Batch 1: x torch.Size([16, 1, 1001]), y torch.Si

In the following snippet we are just loading 10000 vertical samples from a single SAR product that had already been downloaded locally. Here the column order is chosen as the sampling strategy

In [3]:
from dataloader import get_sar_dataloader, SARTransform
import functools
from utils import normalize, RC_MIN, RC_MAX, GT_MIN, GT_MAX

transforms = SARTransform(
    transform_raw = functools.partial(normalize, array_min=RC_MIN, array_max=RC_MAX),
    transform_rc = functools.partial(normalize, array_min=RC_MIN, array_max=RC_MAX),
    transform_rcmc =functools.partial(normalize, array_min=RC_MIN, array_max=RC_MAX),
    transform_az = functools.partial(normalize, array_min=GT_MIN, array_max=GT_MAX)
)
loader = get_sar_dataloader(
    data_dir="/Data/sar_focusing",
    level_from="rcmc",
    level_to="az",
    batch_size=16,
    num_workers=0,
    patch_mode="rectangular", 
    patch_size = (1, 1000),
    buffer = (1000, 1000),
    stride = (1, 300),
    transform=transforms,
    shuffle_files = False,
    patch_order="col", 
    complex_valued = True,
    save_samples = False, 
    backend="zarr", 
    verbose=True, 
    samples_per_prod = 10000,
    cache_size = 100, 
    online = False, 
    max_products=1
)
for i, (x_batch, y_batch) in enumerate(loader):
    print(f"Batch {i}: x {x_batch.shape}, y {y_batch.shape}")


Selected only files:  [PosixPath('/Data/sar_focusing/s1a-s1-raw-s-hh-20230508t121142-20230508t121213-048442-05d3c0.zarr')]
Files list calculation took 0.00 seconds.
Zarr stores initialization took 2.03 seconds.
[PosixPath('/Data/sar_focusing/s1a-s1-raw-s-hh-20230508t121142-20230508t121213-048442-05d3c0.zarr')]
Sampling from file /Data/sar_focusing/s1a-s1-raw-s-hh-20230508t121142-20230508t121213-048442-05d3c0.zarr, patch at (1000, 1000)
Sampling from file /Data/sar_focusing/s1a-s1-raw-s-hh-20230508t121142-20230508t121213-048442-05d3c0.zarr, patch at (1001, 1000)
Sampling from file /Data/sar_focusing/s1a-s1-raw-s-hh-20230508t121142-20230508t121213-048442-05d3c0.zarr, patch at (1002, 1000)
Sampling from file /Data/sar_focusing/s1a-s1-raw-s-hh-20230508t121142-20230508t121213-048442-05d3c0.zarr, patch at (1003, 1000)
Sampling from file /Data/sar_focusing/s1a-s1-raw-s-hh-20230508t121142-20230508t121213-048442-05d3c0.zarr, patch at (1004, 1000)
Sampling from file /Data/sar_focusing/s1a-s1-raw

In the following snippet we are just loading 2000 vertical samples from a single SAR product that had already been downloaded locally. Here the chunk order is chosen as the sampling strategy

In [4]:
from dataloader import get_sar_dataloader


loader = get_sar_dataloader(
    data_dir="/Data/sar_focusing",
    level_from="rcmc",
    level_to="az",
    batch_size=16,
    num_workers=0,
    patch_mode="rectangular", 
    patch_size = (1, 1000),
    buffer = (1000, 1000),
    stride = (1, 300),
    transform=transforms,
    shuffle_files = False,
    patch_order="chunk", 
    complex_valued = True,
    save_samples = False, 
    backend="zarr", 
    verbose=True, 
    samples_per_prod = 2000,
    cache_size = 1000, 
    online = False, 
    max_products=1
)
for i, (x_batch, y_batch) in enumerate(loader):
    print(f"Batch {i}: x {x_batch.shape}, y {y_batch.shape}")


Selected only files:  [PosixPath('/Data/sar_focusing/s1a-s1-raw-s-hh-20230508t121142-20230508t121213-048442-05d3c0.zarr')]
Files list calculation took 0.00 seconds.
Zarr stores initialization took 2.09 seconds.
[PosixPath('/Data/sar_focusing/s1a-s1-raw-s-hh-20230508t121142-20230508t121213-048442-05d3c0.zarr')]
Sampling from file /Data/sar_focusing/s1a-s1-raw-s-hh-20230508t121142-20230508t121213-048442-05d3c0.zarr, patch at (1000, 1000)
Sampling from file /Data/sar_focusing/s1a-s1-raw-s-hh-20230508t121142-20230508t121213-048442-05d3c0.zarr, patch at (1001, 1000)
Sampling from file /Data/sar_focusing/s1a-s1-raw-s-hh-20230508t121142-20230508t121213-048442-05d3c0.zarr, patch at (1002, 1000)
Sampling from file /Data/sar_focusing/s1a-s1-raw-s-hh-20230508t121142-20230508t121213-048442-05d3c0.zarr, patch at (1003, 1000)
Sampling from file /Data/sar_focusing/s1a-s1-raw-s-hh-20230508t121142-20230508t121213-048442-05d3c0.zarr, patch at (1004, 1000)
Sampling from file /Data/sar_focusing/s1a-s1-raw

In the following snippet we are just loading 2000 vertical samples from a single SAR product that had already been downloaded locally. Here the row order is chosen as the sampling strategy

In [5]:
from dataloader import get_sar_dataloader


loader = get_sar_dataloader(
    data_dir="/Data/sar_focusing",
    level_from="rcmc",
    level_to="az",
    batch_size=16,
    num_workers=0,
    patch_mode="rectangular", 
    patch_size = (1, 1000),
    buffer = (1000, 1000),
    stride = (1, 300),
    transform=transforms,
    shuffle_files = False,
    patch_order="chunk", 
    complex_valued = True,
    save_samples = False, 
    backend="zarr", 
    verbose=True, 
    samples_per_prod = 2000,
    cache_size = 1000, 
    online = False, 
    max_products=1
)
for i, (x_batch, y_batch) in enumerate(loader):
    print(f"Batch {i}: x {x_batch.shape}, y {y_batch.shape}")


Selected only files:  [PosixPath('/Data/sar_focusing/s1a-s1-raw-s-hh-20230508t121142-20230508t121213-048442-05d3c0.zarr')]
Files list calculation took 0.00 seconds.
Zarr stores initialization took 2.00 seconds.
[PosixPath('/Data/sar_focusing/s1a-s1-raw-s-hh-20230508t121142-20230508t121213-048442-05d3c0.zarr')]
Sampling from file /Data/sar_focusing/s1a-s1-raw-s-hh-20230508t121142-20230508t121213-048442-05d3c0.zarr, patch at (1000, 1000)
Sampling from file /Data/sar_focusing/s1a-s1-raw-s-hh-20230508t121142-20230508t121213-048442-05d3c0.zarr, patch at (1001, 1000)
Sampling from file /Data/sar_focusing/s1a-s1-raw-s-hh-20230508t121142-20230508t121213-048442-05d3c0.zarr, patch at (1002, 1000)
Sampling from file /Data/sar_focusing/s1a-s1-raw-s-hh-20230508t121142-20230508t121213-048442-05d3c0.zarr, patch at (1003, 1000)
Sampling from file /Data/sar_focusing/s1a-s1-raw-s-hh-20230508t121142-20230508t121213-048442-05d3c0.zarr, patch at (1004, 1000)
Sampling from file /Data/sar_focusing/s1a-s1-raw