In [2]:
# save as era5_tp_06to06_daily_2025JAS.py
import os
from pathlib import Path
import cdsapi
import xarray as xr
import numpy as np

# ---- configuration ----
out_dir = Path("/scratch2/mg963/era5_tp_jas2025")
out_dir.mkdir(parents=True, exist_ok=True)

months = ["07", "08", "09"]      # July–September
year = "2025"
variable = "total_precipitation" # ERA5 variable name
product = "reanalysis"           # ERA5 (not ERA5-Land)
times = [f"{h:02d}:00" for h in range(24)]
days_31 = [f"{d:02d}" for d in range(1,32)]
days_30 = [f"{d:02d}" for d in range(1,31)]
days_map = {"07": days_31, "08": days_31, "09": days_30}

# ---- step 1: download hourly ERA5 per month as NetCDF ----
c = cdsapi.Client()
monthly_files = []

for m in months:
    target = out_dir / f"era5_tp_{year}{m}.nc"
    monthly_files.append(target)

    if target.exists():
        print(f"[skip] {target} exists")
        continue

    print(f"[download] ERA5 tp {year}-{m} -> {target}")
    c.retrieve(
        "reanalysis-era5-single-levels",
        {
            "product_type": product,
            "variable": variable,
            "year": year,
            "month": m,
            "day": days_map[m],
            "time": times,
            "format": "netcdf",
        },
        str(target),
    )

# ---- step 2: open and aggregate to 06:00→06:00 daily totals ----
# Concatenate months lazily
ds = xr.open_mfdataset([str(f) for f in monthly_files], combine="by_coords")

# ERA5 total_precipitation in meters per hour, accumulated over the hour ending at timestamp.
# We want sums from 06:00 of day D to 06:00 of day D+1.
# Trick: shift timestamps -6h, then resample to daily sum, then shift labels back +6h.
tp = ds["tp"]  # variable name in file is 'tp'

tp_shift = tp.assign_coords(time=tp.time - np.timedelta64(6, "h"))
# Sum all hours that now fall within each shifted calendar day
tp_daily_shifted = tp_shift.resample(time="1D").sum(keep_attrs=True)

# Put labels back at the period end (06:00 UTC original timeline)
tp_daily = tp_daily_shifted.assign_coords(time=tp_daily_shifted.time + np.timedelta64(6, "h"))

# Convert from meters to mm
tp_daily_mm = (tp_daily * 1000.0).rename("tp_06to06_mm_day")
tp_daily_mm.attrs.update({
    "long_name": "Daily total precipitation (06:00→06:00 UTC)",
    "units": "mm/day",
    "source": "ERA5 hourly on single levels",
    "aggregation": "sum of hourly tp (hour-ending) from 06Z to 06Z",
})

# Build a compact dataset with coords and variable
out = tp_daily_mm.to_dataset()

# ---- step 3: write NetCDF ----
out_file = out_dir / f"era5_tp_daily_06to06_{year}07_{year}09_global.nc"
encoding = { "tp_06to06_mm_day": {"zlib": True, "complevel": 4, "_FillValue": np.nan} }
out.to_netcdf(out_file, encoding=encoding)
print(f"[done] Wrote {out_file}")

# Optional: show the resulting time stamps (each marks the 06:00 UTC period end)
print(out["tp_06to06_mm_day"].time.to_index()[:5])

Recovering from connection error [('Connection aborted.', RemoteDisconnected('Remote end closed connection without response'))], attempt 1 of 500
Retrying in 120 seconds


[download] ERA5 tp 2025-07 -> /scratch2/mg963/era5_tp_jas2025/era5_tp_202507.nc


2025-10-21 16:38:13,772 INFO Request ID is 6408ff4d-ce07-4391-b5a5-dd339de58afb
2025-10-21 16:38:13,842 INFO status has been updated to accepted
2025-10-21 16:38:22,244 INFO status has been updated to running
2025-10-21 16:41:10,883 INFO status has been updated to successful
                                                                                                                                                                                                          

[download] ERA5 tp 2025-08 -> /scratch2/mg963/era5_tp_jas2025/era5_tp_202508.nc


2025-10-21 16:41:58,495 INFO Request ID is b2b61f2b-6197-411e-a3d4-b99c9cce3331
2025-10-21 16:41:58,592 INFO status has been updated to accepted
2025-10-21 16:42:06,955 INFO status has been updated to running
2025-10-21 16:44:50,542 INFO status has been updated to successful
                                                                                                                                                                                                          

[download] ERA5 tp 2025-09 -> /scratch2/mg963/era5_tp_jas2025/era5_tp_202509.nc


2025-10-21 16:46:14,512 INFO Request ID is 69bb600c-7a2f-4736-ade3-27890c762b45
2025-10-21 16:46:14,592 INFO status has been updated to accepted
2025-10-21 16:46:28,149 INFO status has been updated to running
2025-10-21 16:49:11,651 INFO status has been updated to successful
                                                                                                                                                                                                          

AttributeError: 'DataArray' object has no attribute 'time'