In [1]:
# --- Configuration you will normally edit ---

YEAR  = "2014"
MONTH = "05"  # May

# All days in May (1–31)
DAYS = [f"{d:02d}" for d in range(1, 32)]

# All hours (00:00–23:00)
TIMES = [f"{h:02d}:00" for h in range(24)]

# Output directory
OUT_DIR = "/data/mgeorge7/sudhansu_WORK/grb_files/2014era_hrly"

# Optional: geographic subset [North, West, South, East]
# If you want global, set AREA = None
AREA = None
# Example for a regional subset (Phoenix region, rough box):
# AREA = [40, -120, 25, -105]  # N, W, S, E in degrees

import os
os.makedirs(OUT_DIR, exist_ok=True)


In [2]:
import cdsapi
from pathlib import Path

client = cdsapi.Client()

def ensure_dir(path):
    Path(path).mkdir(parents=True, exist_ok=True)


In [3]:
def download_era5_pressure_levels(
    year: str,
    month: str,
    days: list,
    times: list,
    out_dir: str,
    area=None,
):
    """
    Download ERA5 pressure-level data (one GRIB per day).

    Parameters
    ----------
    year : str (e.g., "2014")
    month : str (e.g., "05")
    days : list of str (e.g., ["01","02",...])
    times : list of str (e.g., ["00:00",...,"23:00"])
    out_dir : str, directory for output GRIB files
    area : list [N,W,S,E] or None for global
    """
    ensure_dir(out_dir)

    variables = [
        "divergence",
        "fraction_of_cloud_cover",
        "geopotential",
        "ozone_mass_mixing_ratio",
        "potential_vorticity",
        "relative_humidity",
        "specific_cloud_ice_water_content",
        "specific_cloud_liquid_water_content",
        "specific_humidity",
        "specific_rain_water_content",
        "specific_snow_water_content",
        "temperature",
        "u_component_of_wind",
        "v_component_of_wind",
        "vertical_velocity",
        "vorticity",
    ]

    pressure_levels = [
        "10", "20", "30", "50", "70",
        "100", "125", "150", "175", "200",
        "225", "250", "300", "350", "400",
        "450", "500", "550", "600", "650",
        "700", "750", "775", "800", "825",
        "850", "875", "900", "925", "950",
        "975", "1000",
    ]

    for day in days:
        target = Path(out_dir) / f"era5_pl_{year}{month}{day}.grib"
        if target.exists():
            print(f"[SKIP] {target} already exists")
            continue

        print(f"[INFO] Downloading pressure levels for {year}-{month}-{day} -> {target}")

        request = {
            "product_type": "reanalysis",
            "format": "grib",
            "variable": variables,
            "pressure_level": pressure_levels,
            "year": year,
            "month": month,
            "day": day,
            "time": times,
        }

        if area is not None:
            request["area"] = area  # [N, W, S, E]

        client.retrieve(
            "reanalysis-era5-pressure-levels",
            request,
            str(target),
        )


In [4]:
def download_era5_single_levels(
    year: str,
    month: str,
    days: list,
    times: list,
    out_dir: str,
    area=None,
):
    """
    Download ERA5 single-level data (one GRIB per day).

    Parameters
    ----------
    year : str
    month : str
    days : list of str
    times : list of str
    out_dir : str
    area : list [N,W,S,E] or None
    """
    ensure_dir(out_dir)

    variables = [
        "10m_u_component_of_wind",
        "10m_v_component_of_wind",
        "2m_dewpoint_temperature",
        "2m_temperature",
        "land_sea_mask",
        "mean_sea_level_pressure",
        "sea_ice_cover",
        "sea_surface_temperature",
        "skin_temperature",
        "snow_density",
        "snow_depth",
        "soil_temperature_level_1",
        "soil_temperature_level_2",
        "soil_temperature_level_3",
        "soil_temperature_level_4",
        "surface_pressure",
        "volumetric_soil_water_layer_1",
        "volumetric_soil_water_layer_2",
        "volumetric_soil_water_layer_3",
        "volumetric_soil_water_layer_4",
    ]

    for day in days:
        target = Path(out_dir) / f"era5_sl_{year}{month}{day}.grib"
        if target.exists():
            print(f"[SKIP] {target} already exists")
            continue

        print(f"[INFO] Downloading single levels for {year}-{month}-{day} -> {target}")

        request = {
            "product_type": "reanalysis",
            "format": "grib",
            "variable": variables,
            "year": year,
            "month": month,
            "day": day,
            "time": times,
        }

        if area is not None:
            request["area"] = area  # [N, W, S, E]

        client.retrieve(
            "reanalysis-era5-single-levels",
            request,
            str(target),
        )


In [5]:
# Subfolders for clarity
pl_out_dir = os.path.join(OUT_DIR, "p_levels")
sl_out_dir = os.path.join(OUT_DIR, "s_levels")

download_era5_pressure_levels(
    year=YEAR,
    month=MONTH,
    days=DAYS,
    times=TIMES,
    out_dir=pl_out_dir,
    area=AREA,
)

download_era5_single_levels(
    year=YEAR,
    month=MONTH,
    days=DAYS,
    times=TIMES,
    out_dir=sl_out_dir,
    area=AREA,
)


[INFO] Downloading pressure levels for 2014-05-01 -> /data/mgeorge7/sudhansu_WORK/grb_files/2014era_hrly/p_levels/era5_pl_20140501.grib


Recovering from connection error [HTTPSConnectionPool(host='cds.climate.copernicus.eu', port=443): Read timed out. (read timeout=60)], attempt 1 of 500
Retrying in 120 seconds
2025-12-01 13:43:32,963 INFO Request ID is 451fb180-2c00-47ba-bcb4-adda2d27fb55
2025-12-01 13:43:33,141 INFO status has been updated to accepted
2025-12-01 13:43:47,238 INFO status has been updated to running
2025-12-01 13:55:57,163 INFO status has been updated to successful


9f603c3ca7822bf93955d6f334286ce.grib:   0%|          | 0.00/21.0G [00:00<?, ?B/s]

[INFO] Downloading pressure levels for 2014-05-02 -> /data/mgeorge7/sudhansu_WORK/grb_files/2014era_hrly/p_levels/era5_pl_20140502.grib


2025-12-01 14:49:59,923 INFO Request ID is 73f09aaa-dc4f-44ba-9bb3-d56868d92d65
2025-12-01 14:50:00,104 INFO status has been updated to accepted
2025-12-01 14:50:08,960 INFO status has been updated to running


KeyboardInterrupt: 