In [4]:
# --- Hourly NetCDF -> 30-minute NetCDF (works in Jupyter / Anaconda Cloud) ---
# 1) EDIT THESE THREE LINES:
INFILE = r"D:\Farhan\CLASSIC_Farhan\ERA5 Hourly Data La Romaine\Long wave radiation\Converted_longwave_radiation_Wm2.nc"   # your hourly file
OUTFILE = r"D:\Farhan\CLASSIC_Farhan\ERA5 Hourly Data La Romaine\Long wave radiation\Converted_longwave_radiation_halfhourly_Wm2.nc"                    # where to save the 30-min file
ACCUM_VARS = []  # e.g. ["tp", "pr", "precip"] for hourly accumulations; leave [] if none

In [5]:
# --- Install missing libraries (safe in Jupyter) ---
import sys, subprocess, importlib
def ensure_pkg(pkg):
    try:
        importlib.import_module(pkg)
    except ImportError:
        subprocess.check_call([sys.executable, "-m", "pip", "install", "--quiet", pkg])
for pkg in ["xarray", "netCDF4", "cftime", "pandas", "numpy"]:
    ensure_pkg(pkg)

In [6]:
# --- Imports ---
import xarray as xr
import pandas as pd
import numpy as np
from datetime import datetime
from pathlib import Path

In [7]:
# ---- helpers ----
def find_time_name(ds):
    if "time" in ds.dims: return "time"
    for k in ds.coords:
        if "time" in k.lower(): return k
    raise ValueError("No time coordinate found.")

In [8]:
def clean_time(ds, tname):
    t = ds[tname].to_index()
    if not t.is_monotonic_increasing:
        ds = ds.sortby(tname); t = ds[tname].to_index()
    if not t.is_unique:
        ds = ds.sel({tname: ~pd.Index(t).duplicated()})
    return ds

In [9]:
def upsample_accum(da, tname, new_t):
    """
    Convert HOURLY ACCUMULATION to 30-min totals:
    - half at the exact hourly stamp
    - half at +30 min
    Works for any extra dims.
    """

In [10]:
def process_time_data(da, tname='time', new_t=None):
    """
    Process time data by reindexing and combining values at :00 and :30 positions.
    
    Parameters:
    -----------
    da : xarray.DataArray
        The input data array to process
    tname : str, optional
        The name of the time coordinate (default: 'time')
    new_t : array-like, optional
        New time values to reindex to (must be provided)
    
    Returns:
    --------
    xarray.DataArray
        Combined data from :00 and :30 positions
    """
    if new_t is None:
        raise ValueError("new_t parameter must be provided")
        
    # half at :00 positions (align exact hourly -> new_t)
    half0 = da.reindex({tname: new_t}, method=None) * 0.5
    # half at :30 positions (shift time coord by +30 min, then align)
    half30 = da.copy()
    half30 = half30.assign_coords({tname: (da[tname] + np.timedelta64(30, "m"))})
    half30 = half30.reindex({tname: new_t}, method=None) * 0.5

In [22]:
# ---- load ----
# Use proper path format for your environment
# For Unix-like environments (Linux/Mac):
INFILE = r"D:\Farhan\CLASSIC_Farhan\ERA5 Hourly Data La Romaine\specific_humidity_2m_ERA5_Tetens.nc"   # your hourly file
OUTFILE = r"D:\Farhan\CLASSIC_Farhan\ERA5 Hourly Data La Romaine\specific_humidity_2m_ERA5_halfhourly.nc"   
# If you're in Windows but need to avoid backslash issues:
# INFILE = r"D:\data_stream-oper_stepType-instant.nc"  # Use raw string with Windows path
# OUTFILE = r"D:\data_halfhourly.nc"  # Use raw string with Windows path

# If using Path objects, import it first
from pathlib import Path

# Convert to Path objects and then to strings
INFILE = str(Path(INFILE))
OUTFILE = str(Path(OUTFILE))

# Open the dataset
ds = xr.open_dataset(INFILE, decode_times=True)
tname = find_time_name(ds)
ds = clean_time(ds, tname)
tix = ds[tname].to_index()

In [23]:
# ---- new 30-min grid ----
new_time = pd.date_range(start=tix.min(), end=tix.max(), freq="30min")

# ---- instantaneous vars: linear interpolation ----
ds_inst = ds.interp({tname: new_time})

In [24]:
# ---- accumulation vars (override any interp versions) ----
if ACCUM_VARS:
    pieces = []
    for v in ACCUM_VARS:
        if v in ds:
            pieces.append(upsample_accum(ds[v], tname, new_time).to_dataset(name=v))
        else:
            print(f"[warn] '{v}' not found; skipping.")
    if pieces:
        ds_acc = xr.merge(pieces)
        keep = [v for v in ds_inst.data_vars if v not in ds_acc.data_vars]
        ds_inst = ds_inst[keep]
        ds_out = xr.merge([ds_inst, ds_acc])
    else:
        ds_out = ds_inst
else:
    ds_out = ds_inst

In [25]:
# ---- metadata & save ----
attrs = dict(ds.attrs) if ds.attrs else {}
stamp = f"{datetime.utcnow().isoformat()}Z: hourly → 30-min (instant=interp; accum=split 50/50)."
attrs["history"] = (attrs.get("history","").strip() + (" | " if attrs.get("history","").strip() else "") + stamp)
ds_out = ds_out.assign_attrs(attrs)

ds_out.to_netcdf(OUTFILE)
print("✅ Done")
print("Time axis      :", tname)
print("Original       :", tix.min(), "→", tix.max(), "| N =", len(tix), "| freq~", pd.infer_freq(tix))
print("New            :", new_time.min(), "→", new_time.max(), "| N =", len(new_time))
print("Accum handled  :", ACCUM_VARS if ACCUM_VARS else "None")
print("Saved          :", OUTFILE)

  stamp = f"{datetime.utcnow().isoformat()}Z: hourly → 30-min (instant=interp; accum=split 50/50)."


✅ Done
Time axis      : time
Original       : 1984-01-01 00:00:00 → 2024-12-31 23:00:00 | N = 359424 | freq~ h
New            : 1984-01-01 00:00:00 → 2024-12-31 23:00:00 | N = 718847
Accum handled  : None
Saved          : D:\Farhan\CLASSIC_Farhan\ERA5 Hourly Data La Romaine\specific_humidity_2m_ERA5_halfhourly.nc


In [2]:
import xarray as xr
import netCDF4 as nc

In [26]:
ds = xr.open_dataset (2m_temp_merged_half)
ds.load()

SyntaxError: invalid decimal literal (2134579449.py, line 1)