In [36]:
# Notebook cell: build time from scalar sampling + run length, read ALL ordinate_values

from __future__ import annotations
import pathlib
import numpy as np
import pandas as pd

def _open_cdf(path: str):
    """Return (dataset_handle, backend)."""
    try:
        import netCDF4
        return netCDF4.Dataset(path, "r"), "netcdf4"
    except Exception:
        import xarray as xr
        # Prefer scipy (no cftime); fallback to netcdf4
        try:
            return xr.open_dataset(path, engine="scipy", decode_times=False), "xarray"
        except Exception:
            return xr.open_dataset(path, engine="netcdf4", decode_times=False), "xarray"

def _get_var(ds, name: str):
    return ds.variables[name] if hasattr(ds, "variables") else ds[name]

def _as_float_scalar(obj) -> float:
    # Works for netCDF4 Variable 0-d, xarray DataArray 0-d, numpy scalar
    if hasattr(obj, "values"):
        arr = np.asarray(obj.values)
    else:
        arr = np.asarray(obj[:]) if hasattr(obj, "__getitem__") else np.asarray(obj)
    return float(arr.reshape(()))

def convert_cdf_scalar_timing_to_csv(
    path: str,
    out: str | None = None,
    *,
    prefer_minutes: bool = False,
) -> pd.DataFrame:
    """
    Reads:
      - ordinate_values: 1D intensities (length N)
      - actual_sampling_interval (seconds): dt
      - actual_run_time_length (seconds): T
      - actual_delay_time (seconds, optional): t0 (defaults 0)
    Constructs time: t = t0 + dt * arange(N_expected), with robust fallback to len(y).
    Writes CSV if `out` is provided. Returns DataFrame(time,intensity).
    """
    path = str(path)
    if not pathlib.Path(path).exists():
        raise FileNotFoundError(path)
    ds, backend = _open_cdf(path)
    try:
        names = list(ds.variables.keys()) if hasattr(ds, "variables") else list(ds.variables)
        if "ordinate_values" not in names:
            raise ValueError("CDF missing 'ordinate_values' (intensity) array.")

        # Read FULL intensity vector (no preview)
        var_y = _get_var(ds, "ordinate_values")
        y = np.asarray(var_y.values if hasattr(var_y, "values") else var_y[:], dtype=float)

        # Required scalar timing
        if "actual_sampling_interval" not in names or "actual_run_time_length" not in names:
            raise ValueError("CDF must contain 'actual_sampling_interval' and 'actual_run_time_length' scalars.")
        dt  = _as_float_scalar(_get_var(ds, "actual_sampling_interval"))
        T   = _as_float_scalar(_get_var(ds, "actual_run_time_length"))
        t0  = _as_float_scalar(_get_var(ds, "actual_delay_time")) if "actual_delay_time" in names else 0.0

        # Expected length from header
        N_expected = int(round(T / dt)) + 1  # e.g., 1200.25/0.25 â†’ 4801
        if y.size == N_expected:
            t = t0 + dt * np.arange(N_expected, dtype=float)
        else:
            # Vendor rounding quirks: trust the actual data length
            t = t0 + dt * np.arange(y.size, dtype=float)

        if prefer_minutes:
            t = t / 60.0

        # Clean finite & align lengths (defensive)
        n = min(t.size, y.size)
        t, y = t[:n], y[:n]
        mask = np.isfinite(t) & np.isfinite(y)
        t, y = t[mask], y[mask]

        df = pd.DataFrame({"time": t, "intensity": y})
        if out:
            out_path = out if str(out).lower().endswith(".csv") else str(pathlib.Path(out).with_suffix(".csv"))
            df.to_csv(out_path, index=False)
        return df
    finally:
        try:
            ds.close()
        except Exception:
            pass



In [37]:
# Example:
df = convert_cdf_scalar_timing_to_csv("1AM_1.cdf", out="1AM_1.csv", prefer_minutes=False)

((4801, 2),
    time  intensity
 0  0.00    0.02019
 1  0.25    0.02023
 2  0.50    0.02128
 3  0.75    0.02238
 4  1.00    0.02198)