In [None]:
from huggingface_hub import hf_hub_download, snapshot_download

In [None]:
import os
from pathlib import Path
from huggingface_hub import snapshot_download

def download_hf_dataset(repo_id: str, folder_name: str, local_dir: str = None) -> Path:
    """
    Download a folder from Hugging Face Hub dataset.

    Args:
        repo_id (str): Repository ID (e.g., 'sirbastiano94/Maya4').
        folder_name (str): Specific folder to download.
        local_dir (str, optional): Local directory to save the data (defaults to folder_name).

    Returns:
        Path: Path to the downloaded folder.

    Raises:
        Exception: If download fails.
    """
    local_path = Path(local_dir or folder_name)
    print(f'Downloading {folder_name} from {repo_id} to {local_path}...')
    try:
        snapshot_download(
            repo_id=repo_id,
            repo_type='dataset',
            local_dir=str(local_path),
            allow_patterns=[f'{folder_name}/**'],
            max_workers=10,
            resume_download=True,
            local_dir_use_symlinks=False
        )
        assert local_path.exists(), f'Directory {local_path} not found after download'
        print(f'Successfully downloaded {folder_name} to {local_path}')
        print('Downloaded files:')
        for root, dirs, files in os.walk(local_path):
            level = root.replace(str(local_path), '').count(os.sep)
            indent = ' ' * 2 * level
            print(f'{indent}{os.path.basename(root)}/')
            for f in files:
                print(f'{indent}  {f}')
        return local_path
    except Exception as e:
        print(f'Download failed: {e}')
        raise

# Usage
repo_id = 'sirbastiano94/Maya4'
folder_name = 's1a-s1-raw-s-hh-20240130t151239-20240130t151254-052337-06541b.zarr' #'s1a-s1-raw-s-hh-20230508t121142-20230508t121213-048442-05d3c0.zarr'
downloaded_path = download_hf_dataset(repo_id, folder_name, local_dir='/Data/sar_focusing')

In [1]:
import sys
import os
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), '..')))
sys.path.append(os.path.abspath((os.getcwd())))

In [None]:
from dataloader import get_sar_dataloader


loader = get_sar_dataloader(
    data_dir="/Data/sar_focusing",
    level_from="rc",
    level_to="az",
    batch_size=16,
    num_workers=0,
    patch_mode="square", 
    patch_size = (1, 1000),
    buffer = (1000, 1000),
    stride = (1, 1000),
    shuffle_files = False,
    shuffle_patches = False, 
    complex_valued = True,
    save_samples = False, 
    backend="zarr", 
    verbose=True, 
    k = 1000,
    cache_size = 1000, 
    online = True
)
for i, (x_batch, y_batch) in enumerate(loader):
    print(f"Batch {i}: x {x_batch.shape}, y {y_batch.shape}")

# patch calculation + patch extraction - 7.9 s

In [None]:
from api import fetch_chunk_from_hf_zarr

fetch_chunk_from_hf_zarr(
    level='az',
    zarr_archive="s1a-s1-raw-s-hh-20230731t121147-20230731t121217-049667-05f8f1.zarr",
    y=7000,
    x=15000,
    local_dir="/Data/sar_focusing"
)

In [6]:
import zarr
store = zarr.open(
    "/Data/sar_focusing/s1a-s1-raw-s-hh-20240130t151239-20240130t151254-052337-06541b.zarr", #s1a-s1-raw-s-hh-20230731t121147-20230731t121217-049667-05f8f1.zarr", #s1a-s1-raw-s-hh-20240130t151239-20240130t151254-052337-06541b_bis.zarr/s1a-s1-raw-s-hh-20240130t151239-20240130t151254-052337-06541b.zarr",
    mode='r'
)

In [None]:
store['az'][0][0]