In [2]:
import xarray as xr
import numpy as np

def aggregate_to_daily_custom_start(ds, var="tp", start_hour=6, units_out="mm/day"):
    """
    Aggregate hourly ERA5 variable to daily totals based on a custom 24-h window.

    Parameters
    ----------
    ds : xr.Dataset or xr.DataArray
        Input dataset containing an hourly variable (e.g. ERA5 'tp' in m/hour).
    var : str
        Name of variable in ds (ignored if ds is a DataArray).
    start_hour : int
        Start hour (UTC) defining the 24-h accumulation period.
        e.g. start_hour=6 -> 06:00→06:00 daily totals.
    units_out : str
        Desired output units (default 'mm/day').

    Returns
    -------
    xr.DataArray
        Daily totals with time coordinate marking the **end** of each 24-h period.
    """

    # Extract DataArray
    da = ds[var] if isinstance(ds, xr.Dataset) else ds

    # Ensure hourly regular spacing
    if not np.all(np.diff(da.valid_time.values).astype('timedelta64[h]') == np.timedelta64(1, 'h')):
        raise ValueError("Input time steps must be hourly and regular")

    # Shift time by -start_hour so resampling bins align with desired start time
    da_shifted = da.assign_coords(time=da.valid_time - np.timedelta64(start_hour, "h"))

    # Aggregate over 24 h
    da_daily_shifted = da_shifted.resample(time="1D").sum(keep_attrs=True)

    # Shift labels back +start_hour so each timestamp marks the *end* of the period
    da_daily = da_daily_shifted.assign_coords(
        time=da_daily_shifted.time + np.timedelta64(start_hour, "h")
    )

    # Convert from meters to mm
    if "mm" in units_out:
        da_daily = da_daily * 1000.0

    # Metadata
    da_daily.name = f"{da.name}_daily_{start_hour:02d}UTC"
    da_daily.attrs.update({
        "long_name": f"Daily total ({start_hour:02d}–{start_hour:02d} UTC)",
        "aggregation": f"sum of hourly {da.name} shifted by {start_hour}h",
        "units": units_out
    })

    return da_daily

In [19]:
# open your downloaded hourly dataset
ds = xr.open_dataset("/scratch2/mg963/era5_tp_jas2025/era5_tp_202509.nc")

# aggregate to 06:00→06:00 UTC daily totals
tp_daily = aggregate_to_daily_custom_start(ds, var="tp", start_hour=6)

# save to NetCDF
tp_daily.to_netcdf("/scratch2/mg963/era5_tp_jas2025/era5_tp_daily_06to06_202509.nc")