In [17]:
from typing import Iterable, List, Optional, Sequence, Tuple, Union
import zipfile
from pathlib import Path


import cdsapi
from huggingface_hub import hf_hub_download

def download_cams_forecast(
    *,
    date: str,                                 # "YYYY-MM-DD"
    lead_hours: int = 24,                      # inclusive: 0..lead_hours
    times: Sequence[str] = ("00:00",),         # e.g., ("00:00",) or ("00:00","12:00")
    area: Optional[Tuple[float, float, float, float]] = None,  # (N, W, S, E) or None
    global_domain: bool = False,               # if True, ignores 'area'
    download_dir: Union[str, Path] = "~/downloads/cams",
    variables: Optional[List[str]] = None,     # surface + 3D variable names
    pressure_levels: Optional[List[str]] = None,  # CAMS pressure levels as strings
    prefix_tag: Optional[str] = None,          # extra tag in filenames, e.g. "utah"
    overwrite: bool = False,                   # re-download even if files exist
) -> dict:
    """
    Download CAMS global atmospheric composition forecasts (surface + pressure levels),
    optionally subsetted to a geographic area, and unpack to NetCDF.

    Returns a dict with paths:
      {
        "static_path": <pickle>,
        "zip_path": <zip>,
        "surface_nc": <surface-level .nc>,
        "atmos_nc": <pressure-level .nc>
      }

    Notes
    -----
    - 'area' must be (North, West, South, East) in degrees.
    - If 'global_domain' is True, 'area' is ignored and a global request is made.
    - lead_hours is inclusive (downloads steps 0..lead_hours).
    """

    # ----- Defaults -----
    if variables is None:
        variables = [
            # 2D / surface-esque (including total columns)
            "10m_u_component_of_wind", "10m_v_component_of_wind",
            "2m_temperature", "mean_sea_level_pressure",
            "particulate_matter_1um", "particulate_matter_2.5um",
            "particulate_matter_10um",
            "total_column_carbon_monoxide", "total_column_nitrogen_monoxide",
            "total_column_nitrogen_dioxide", "total_column_ozone",
            "total_column_sulphur_dioxide",
            # 3D / pressure-level compatible variable names (CAMS)
            "u_component_of_wind", "v_component_of_wind",
            "temperature", "geopotential", "specific_humidity",
            "carbon_monoxide", "nitrogen_dioxide", "nitrogen_monoxide",
            "ozone", "sulphur_dioxide",
        ]
    if pressure_levels is None:
        pressure_levels = ["50","100","150","200","250","300","400",
                           "500","600","700","850","925","1000"]

    # ----- Paths / filenames -----
    download_path = Path(download_dir).expanduser()
    download_path.mkdir(parents=True, exist_ok=True)

    # Use a human tag for the region in filenames
    region_tag = "global" if global_domain or area is None else ""
    if prefix_tag:
        region_tag = f"{prefix_tag}"

    time_tag = "-".join(t.replace(":","") for t in times)
    zip_name = f"{date}_{time_tag}_cams_{lead_hours}h_forecast_{region_tag}.nc.zip"
    sfc_name = f"{date}_{time_tag}_cams_{lead_hours}h_forecast_{region_tag}-surface-level.nc"
    plev_name = f"{date}_{time_tag}_cams_{lead_hours}h_forecast_{region_tag}-atmospheric.nc"

    zip_path = download_path / zip_name
    surface_nc = download_path / sfc_name
    atmos_nc = download_path / plev_name

    # ----- Download Aurora static variables (once) -----
    static_path = download_path / "aurora-0.4-air-pollution-static.pickle"
    if not static_path.exists():
        hf_hub_download(
            repo_id="microsoft/aurora",
            filename="aurora-0.4-air-pollution-static.pickle",
            local_dir=download_path
        )
        print("Static variables downloaded!")
    else:
        print("Static variables already present.")

    # ----- Build CAMS request -----
    # CAMS expects time like "00:00" strings; leadtime_hour steps are inclusive here.
    lead_steps = [str(h) for h in range(0, int(lead_hours) + 1)]

    req = {
        "type": "forecast",
        "format": "netcdf_zip",
        "date": date,
        "time": list(times),
        "leadtime_hour": lead_steps,
        "variable": variables,
        "pressure_level": pressure_levels,
    }

    if not global_domain and area is not None:
        if len(area) != 4:
            raise ValueError("area must be a 4-tuple: (North, West, South, East)")
        # CAMS order is [North, West, South, East]
        req["area"] = list(area)

    # ----- Retrieve (if needed / overwrite) -----
    need_zip = overwrite or not zip_path.exists()
    if need_zip:
        print(f"Requesting CAMS forecast for {date} ({'global' if global_domain or area is None else 'area subset'})...")
        c = cdsapi.Client()
        c.retrieve(
            "cams-global-atmospheric-composition-forecasts",
            req,
            str(zip_path),
        )
        print(f"ZIP saved: {zip_path}")
    else:
        print(f"ZIP already exists: {zip_path}")

    # ----- Unpack to .nc files -----
    # data_sfc.nc and data_plev.nc are the standard names inside CAMS netcdf_zip responses
    if overwrite or not surface_nc.exists():
        with zipfile.ZipFile(zip_path, "r") as zf, open(surface_nc, "wb") as f:
            f.write(zf.read("data_sfc.nc"))
        print(f"Surface-level saved: {surface_nc}")
    else:
        print(f"Surface-level already exists: {surface_nc}")

    if overwrite or not atmos_nc.exists():
        with zipfile.ZipFile(zip_path, "r") as zf, open(atmos_nc, "wb") as f:
            f.write(zf.read("data_plev.nc"))
        print(f"Atmospheric (pressure-level) saved: {atmos_nc}")
    else:
        print(f"Atmospheric already exists: {atmos_nc}")

    return {
        "static_path": str(static_path),
        "zip_path": str(zip_path),
        "surface_nc": str(surface_nc),
        "atmos_nc": str(atmos_nc),
    }


In [None]:
out = download_cams_forecast(
    date="2025-10-20",
    lead_hours=24,
    times=["12:00"],
    # area=(42.1, -115.0, 37.0, -100.0),
    # prefix_tag="utah"
)

Static variables already present.
Requesting CAMS forecast for 2025-10-20 (area subset)...


2025-10-31 21:44:54,932 INFO Request ID is bc818dd1-5c44-4ad5-adf1-308618aaa840
2025-10-31 21:44:55,115 INFO status has been updated to accepted
2025-10-31 21:45:09,182 INFO status has been updated to running
2025-10-31 21:46:50,151 INFO status has been updated to successful
                                                                                          

ZIP saved: /uufs/chpc.utah.edu/common/home/u1494915/downloads/cams/2025-10-20_0000_cams_24h_forecast_utah.nc.zip
Surface-level saved: /uufs/chpc.utah.edu/common/home/u1494915/downloads/cams/2025-10-20_0000_cams_24h_forecast_utah-surface-level.nc
Atmospheric (pressure-level) saved: /uufs/chpc.utah.edu/common/home/u1494915/downloads/cams/2025-10-20_0000_cams_24h_forecast_utah-atmospheric.nc


