In [1]:
import os
import numpy as np
import xarray as xr
import warnings

# ------------------------------------------------------------
# Paths
# ------------------------------------------------------------
RAW_FILE = "/mnt/lustre/proj/kimyy/Model/CESM2/ESP/for_XRO/raw_mem/XRO_input_LE_ALL_ENSEMBLES.nc"
OUT_FILE = "/mnt/lustre/proj/kimyy/Model/CESM2/ESP/for_XRO/preprocessed_mem/XRO_input_LE_ALL_ENSEMBLES_preproc.nc"

# Make sure output directory exists
os.makedirs(os.path.dirname(OUT_FILE), exist_ok=True)

# ------------------------------------------------------------
# Warnings setup
# ------------------------------------------------------------
warnings.filterwarnings("ignore", category=DeprecationWarning)
warnings.filterwarnings("ignore", category=RuntimeWarning)
warnings.filterwarnings("ignore", category=FutureWarning)

# ------------------------------------------------------------
# Function: remove climatology & quadratic trend (for one variable)
# ------------------------------------------------------------
def preprocess_and_detrend_var(da, var_name):
    """
    Remove monthly climatology (1980â€“2010) and quadratic trend for one variable.
    Works for DataArray with dimensions (ens_LE, time).
    """
    # --- Remove monthly climatology (1980â€“2010) ---
    clim = (
        da.sel(time=slice("1980-01-01", "2010-12-31"))
          .groupby("time.month")
          .mean("time", skipna=True)
    )
    anomalies = da.groupby("time.month") - clim

    # --- Detrend each ensemble member separately ---
    detrended_list = []
    for ens in anomalies.ens_LE.values:
        sub = anomalies.sel(ens_LE=ens)
        y = sub.squeeze().values
        t = np.arange(len(y))

        if np.all(np.isnan(y)):
            detrended_list.append(sub)
            continue

        mask = ~np.isnan(y)
        if np.sum(mask) < 5:
            detrended_list.append(sub)
            continue

        coeffs = np.polyfit(t[mask], y[mask], deg=2)
        trend = np.polyval(coeffs, t)
        detrended = sub - xr.DataArray(trend, dims=["time"], coords={"time": sub.time})
        detrended_list.append(detrended)

    detrended_all = xr.concat(detrended_list, dim="ens_LE")
    return detrended_all.to_dataset(name=var_name)

# ------------------------------------------------------------
# Main procedure
# ------------------------------------------------------------
print(f"Loading raw dataset:\n  {RAW_FILE}\n")
ds = xr.open_dataset(RAW_FILE, decode_times=True, use_cftime=True)
ds = xr.decode_cf(ds)

# --- Convert cftime to datetime64 if necessary ---
try:
    if "cftime" in str(type(ds.time.values[0])):
        ds["time"] = ds.indexes["time"].to_datetimeindex(time_unit="ns")
except Exception:
    pass

# --- Identify all data variables ---
var_names = list(ds.data_vars.keys())
print(f"Variables found: {var_names}\n")

# --- Process each variable ---
processed_vars = []

for var in var_names:
    print(f"Processing {var} ...")
    da = ds[var]
    ds_proc = preprocess_and_detrend_var(da, var)
    processed_vars.append(ds_proc)

# --- Merge all processed variables back together ---
ds_final = xr.merge(processed_vars, compat="override")

# ------------------------------------------------------------
# Save output
# ------------------------------------------------------------
comp = dict(zlib=True, complevel=4)
encoding = {v: comp for v in ds_final.data_vars}
ds_final.to_netcdf(OUT_FILE, encoding=encoding)

print(f"\nâœ… Preprocessed dataset saved to:\n  {OUT_FILE}")
print("ðŸŽ‰ All variables processed successfully!")


Loading raw dataset:
  /mnt/lustre/proj/kimyy/Model/CESM2/ESP/for_XRO/raw_mem/XRO_input_LE_ALL_ENSEMBLES.nc

Variables found: ['NINO34', 'NPMM', 'SPMM', 'IOB', 'IOD1', 'IOD2', 'SIOD1', 'SIOD2', 'TNA', 'ATL3', 'SASD1', 'SASD2', 'WWV']

Processing NINO34 ...
Processing NPMM ...
Processing SPMM ...
Processing IOB ...
Processing IOD1 ...
Processing IOD2 ...
Processing SIOD1 ...
Processing SIOD2 ...
Processing TNA ...
Processing ATL3 ...
Processing SASD1 ...
Processing SASD2 ...
Processing WWV ...

âœ… Preprocessed dataset saved to:
  /mnt/lustre/proj/kimyy/Model/CESM2/ESP/for_XRO/preprocessed_mem/XRO_input_LE_ALL_ENSEMBLES_preproc.nc
ðŸŽ‰ All variables processed successfully!
