In [2]:
import os
import logging
from pathlib import Path
from huggingface_hub import snapshot_download

def download_folder_from_hf(
    repo_id: str,
    folder_name: str,
    local_dir: str,
    repo_type: str = 'dataset',
    overwrite: bool = False
) -> str:
    """Download a specific folder from a Hugging Face Hub repository.

    Args:
        repo_id (str): The repository ID (e.g., 'username/repo-name').
        folder_name (str): Name of the folder to download.
        local_dir (str): Target local directory for saving the folder.
        repo_type (str): Repository type: 'dataset', 'model', or 'space'. Defaults to 'dataset'.
        overwrite (bool): Whether to overwrite existing local content. Defaults to False.

    Returns:
        str: Path to the downloaded folder.

    Raises:
        ValueError: If inputs are invalid.
        RuntimeError: If download fails.
    """
    if not repo_id.strip():
        raise ValueError("`repo_id` cannot be empty.")
    if not folder_name.strip():
        raise ValueError("`folder_name` cannot be empty.")

    local_dir = Path(local_dir).resolve()
    target_folder = local_dir / folder_name

    logging.basicConfig(level=logging.INFO, format='[%(levelname)s] %(message)s')
    
    if target_folder.exists():
        if overwrite:
            logging.warning(f"Overwriting existing folder: {target_folder}")
        else:
            logging.info(f"Folder already exists at: {target_folder}. Skipping download.")
            return str(target_folder)

    try:
        logging.info(f"Downloading '{folder_name}' from '{repo_id}' into '{local_dir}'...")
        
        snapshot_path = snapshot_download(
            repo_id=repo_id,
            repo_type=repo_type,
            allow_patterns=f"{folder_name}/**",
            local_dir=str(local_dir),
            local_dir_use_symlinks=False
        )

        final_path = Path(snapshot_path) / folder_name

        if not final_path.exists():
            raise RuntimeError(f"Expected folder not found after download: {final_path}")
        
        logging.info(f"Download completed successfully at: {final_path}")
        return str(final_path)
    
    except Exception as e:
        logging.error(f"Download failed: {e}")
        raise RuntimeError(f"Failed to download folder '{folder_name}' from '{repo_id}'") from e


# Example usage
repo_id = 'sirbastiano94/Maya4'
folder = 's1a-s1-raw-s-hh-20240130t151239-20240130t151254-052337-06541b.zarr'
local_base = '/Users/roberto.delprete/Library/CloudStorage/OneDrive-ESA/Desktop/Repos/SARPYX/data/maya4_data'

try:
    path = download_folder_from_hf(
        repo_id=repo_id,
        folder_name=folder,
        local_dir=local_base,
        overwrite=False
    )
    print(f"\nFolder downloaded to: {path}")
except RuntimeError as err:
    print(f"\n[ERROR] {err}")

[INFO] Folder already exists at: /Users/roberto.delprete/Library/CloudStorage/OneDrive-ESA/Desktop/Repos/SARPYX/data/maya4_data/s1a-s1-raw-s-hh-20240130t151239-20240130t151254-052337-06541b.zarr. Skipping download.



Folder downloaded to: /Users/roberto.delprete/Library/CloudStorage/OneDrive-ESA/Desktop/Repos/SARPYX/data/maya4_data/s1a-s1-raw-s-hh-20240130t151239-20240130t151254-052337-06541b.zarr
