In [4]:
import os
import glob
import numpy as np
import xarray as xr
import warnings

# ------------------------------------------------------------
# Paths
# ------------------------------------------------------------
RAW_DIR = "/mnt/lustre/proj/kimyy/Model/CESM2/ESP/for_XRO/raw"
OUT_DIR = "/mnt/lustre/proj/kimyy/Model/CESM2/ESP/for_XRO/preprocessed"
os.makedirs(OUT_DIR, exist_ok=True)

# ------------------------------------------------------------
# Warnings setup
# ------------------------------------------------------------
warnings.filterwarnings("ignore", category=DeprecationWarning)
warnings.filterwarnings("ignore", category=RuntimeWarning)
warnings.filterwarnings("ignore", category=FutureWarning)

# ------------------------------------------------------------
# List all files
# ------------------------------------------------------------
file_list = sorted(glob.glob(os.path.join(RAW_DIR, "XRO_input_LE_*.nc")))
print(f"Found {len(file_list)} files")

# ------------------------------------------------------------
# Function: remove climatology & quadratic trend
# ------------------------------------------------------------
def preprocess_and_detrend(ds):
    """
    Remove monthly climatology (1980â€“2010) and quadratic trend from dataset.
    """
    # Detect variable name (exclude coordinates)
    var_name = [v for v in ds.data_vars.keys() if v not in ['time', 'ens_LE']][0]
    da = ds[var_name]

    # --- Remove monthly climatology ---
    clim = da.sel(time=slice("1980-01-01", "2010-12-31")).groupby("time.month").mean("time")
    anomalies = da.groupby("time.month") - clim

    # --- Detrend each ensemble member ---
    detrended_list = []

    for ens in anomalies.ens_LE.values:
        sub = anomalies.sel(ens_LE=ens)
        y = sub.squeeze().values  # ensure 1D array
        t = np.arange(len(y))

        if np.all(np.isnan(y)):
            detrended_list.append(sub)
            continue

        # Fit quadratic trend on non-NaN data
        mask = ~np.isnan(y)
        coeffs = np.polyfit(t[mask], y[mask], deg=2)
        trend = np.polyval(coeffs, t)

        detrended = sub - xr.DataArray(trend, dims=["time"], coords={"time": sub.time})
        detrended_list.append(detrended)

    detrended_all = xr.concat(detrended_list, dim="ens_LE")
    return detrended_all.to_dataset(name=var_name)

# ------------------------------------------------------------
# Main loop
# ------------------------------------------------------------
for f in file_list:
    var_name = os.path.basename(f).split("_LE_")[-1].replace(".nc", "")
    print(f"Processing {var_name} ...")

    # Open dataset (compatible with older xarray)
    ds = xr.open_dataset(f, decode_times=True, use_cftime=True)
    ds = xr.decode_cf(ds)

    # --- Convert time to datetime64[ns] safely ---
    try:
        if "cftime" in str(type(ds.time.values[0])):
            ds["time"] = ds.indexes["time"].to_datetimeindex(time_unit="ns")
    except Exception:
        pass  # fallback if conversion fails (rare case)

    # --- Apply preprocessing ---
    ds_proc = preprocess_and_detrend(ds)

    # --- Save output ---
    out_path = os.path.join(OUT_DIR, f"XRO_input_LE_{var_name}_preproc.nc")
    comp = dict(zlib=True, complevel=4)
    encoding = {v: comp for v in ds_proc.data_vars}
    ds_proc.to_netcdf(out_path, encoding=encoding)

    print(f"âœ… Saved to {out_path}")

print("\nðŸŽ‰ All preprocessing complete!")


Found 13 files
Processing ATL3 ...
âœ… Saved to /mnt/lustre/proj/kimyy/Model/CESM2/ESP/for_XRO/preprocessed/XRO_input_LE_ATL3_preproc.nc
Processing ENSO ...
âœ… Saved to /mnt/lustre/proj/kimyy/Model/CESM2/ESP/for_XRO/preprocessed/XRO_input_LE_ENSO_preproc.nc
Processing IOB ...
âœ… Saved to /mnt/lustre/proj/kimyy/Model/CESM2/ESP/for_XRO/preprocessed/XRO_input_LE_IOB_preproc.nc
Processing IOD1 ...
âœ… Saved to /mnt/lustre/proj/kimyy/Model/CESM2/ESP/for_XRO/preprocessed/XRO_input_LE_IOD1_preproc.nc
Processing IOD2 ...
âœ… Saved to /mnt/lustre/proj/kimyy/Model/CESM2/ESP/for_XRO/preprocessed/XRO_input_LE_IOD2_preproc.nc
Processing NPMM ...
âœ… Saved to /mnt/lustre/proj/kimyy/Model/CESM2/ESP/for_XRO/preprocessed/XRO_input_LE_NPMM_preproc.nc
Processing SASD1 ...
âœ… Saved to /mnt/lustre/proj/kimyy/Model/CESM2/ESP/for_XRO/preprocessed/XRO_input_LE_SASD1_preproc.nc
Processing SASD2 ...
âœ… Saved to /mnt/lustre/proj/kimyy/Model/CESM2/ESP/for_XRO/preprocessed/XRO_input_LE_SASD2_preproc.nc
Process