In [21]:
#!/usr/bin/env python
"""
Unified EIS Training + Inference + Cycles-from-Training (v9.7.1 – temp-agnostic SoC ensemble)
Fixes SoC collapsing by:
  • Making SoC model temperature-agnostic (Temp feature is ignored for SoC)
  • Using an ensemble (GPR + HGB + optional shape-GP) with validation-selected weights
  • Calibrating SoC uncertainty from validation residuals (no artificial 0.95 clamp)
  • Keeping SoH + cycles logic intact
  • Adding debug fields in JSON: used_freq_from_file, freq_range_hz

v9.7.1: BUGFIX — featurize_any() now calls build_shape_normalized(re_i, im_i) (correct kwarg name)
"""

from __future__ import annotations
import re, json, math, random, warnings, joblib, hashlib, uuid, io, sys, os
from pathlib import Path
from dataclasses import dataclass, asdict
from typing import Any, Dict, List, Optional, Tuple

import numpy as np
import pandas as pd
from tqdm import tqdm
from scipy.io import loadmat
from scipy import linalg
from scipy.interpolate import interp1d

from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import HistGradientBoostingRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import RBF, WhiteKernel
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.isotonic import IsotonicRegression
from sklearn.linear_model import LinearRegression

import matplotlib.pyplot as plt
from PIL import Image


# =========================
# Helpers: environment & RNG
# =========================
def _running_in_notebook() -> bool:
    try:
        from IPython import get_ipython  # noqa
        shell = get_ipython().__class__.__name__
        return shell in ("ZMQInteractiveShell", "Shell")
    except Exception:
        return False

def set_seed(seed: int):
    random.seed(seed)
    np.random.seed(seed)

# =========================
# 1. CONFIGURATION
# =========================
@dataclass
class Config:
    # Training data directories (update if needed)
    EIS_DIR: Path = Path(r"C:\Users\tgondal0\OneDrive - Edith Cowan University\00 - Megallan Power\NMC Batteries Warwick Station\NMC\DIB_Data\.matfiles\EIS_Test")
    CAP_DIR: Path = Path(r"C:\Users\tgondal0\OneDrive - Edith Cowan University\00 - Megallan Power\NMC Batteries Warwick Station\NMC\DIB_Data\.matfiles\Capacity_Check")
    MODEL_DIR: Path = Path("models_eis_phase2_phys")

    # Test files
    EIS_TEST_FILES: List[Path] = None  # assigned after instantiation

    # Frequency interpolation grid
    F_MIN: float = 1e-2
    F_MAX: float = 1e4
    N_FREQ: int = 60

    # Uncertainty control
    SOH_STD_MAX_OOD: float = 2.0

    # SoC std caps (kept for compatibility; no hard clamp to 0.95 now)
    SOC_STD_MAX: float = 0.95
    SOC_STD_MAX_OOD: float = 0.95

    # Train / split settings
    TEST_FRAC: float = 0.2
    GROUP_KFOLDS: int = 0
    RANDOM_STATE: int = 42

    # Feature group toggles
    INCLUDE_RAW_RE_IM: bool = True
    INCLUDE_BASICS: bool = True
    INCLUDE_F_FEATS: bool = True
    INCLUDE_PHYSICAL: bool = True
    INCLUDE_DRT: bool = True
    INCLUDE_BAND_STATS: bool = True
    INCLUDE_DIFF_SLOPES: bool = True

    # DRT params
    DRT_POINTS: int = 60
    DRT_TAU_MIN: float = 1e-4
    DRT_TAU_MAX: float = 1e4
    DRT_LAMBDA: float = 1e-2

    # Capacity-based refinement
    REFINE_SOH_WITH_CAPACITY: bool = True

    # SoH modeling
    MAX_GPR_TRAIN_SAMPLES: int = 3500
    INCLUDE_NORMALIZED_SHAPE_MODEL: bool = True
    ENSEMBLE_SOH: bool = True
    NORMALIZE_SHAPE_BY_HF_RE: bool = True

    # SoC modeling (ENSEMBLE, temperature-agnostic)
    SOC_INCLUDE_SHAPE_MODEL: bool = True
    SOC_MAX_GPR_TRAIN_SAMPLES: int = 3500
    SOC_LABEL_JITTER: float = 0.6  # helps if SoC labels are discrete bands

    # OOD / SoC behavior
    OOD_SOC_ENABLE: bool = True
    OOD_SOC_Q: float = 0.995
    OOD_SOC_PRIOR: float = 50.0
    OOD_SOC_SHRINK_SCALE: float = 2.0
    OOD_SOC_W_MIN: float = 0.3
    OOD_SOC_PRIOR_MODE: str = "knn"
    SOC_OOD_USE_KNN: bool = True
    SOC_OOD_K: int = 40
    SOC_CALIBRATE_ON_OOD: bool = True
    OOD_SOC_PRIOR_MAX_WEIGHT: float = 0.45
    OOD_SOC_SHAPE_MAX_WEIGHT: float = 0.3

    # SoC calibration safety controls
    SOC_CALIBRATION_MODE: str = "auto"        # "auto" | "iso" | "linear" | "off"
    SOC_CAL_MIN_RANGE: float = 1e-3
    SOC_CAL_MIN_UNIQUE: int = 4
    SOC_CAL_MIN_R2_IMPROVE: float = -0.01

    # Cycles modeling
    ENABLE_CYCLES_MODEL: bool = True

    # RUL parameters (for plotting)
    DECISION_SOH_PERCENT: float = 50.0
    ILLUSTRATIVE_MIN_SOH: float = 40.0
    CPP_ROLLING_WINDOW: int = 5
    CPP_MIN_POINTS: int = 6
    CPP_FALLBACK: float = 20.0  # used only if no cycles model & no CPP est

    # Inference extras
    TEST_TEMPERATURE_OVERRIDE: Optional[float] = 25.0
    FORCE_RETRAIN: bool = False

    # Saving / logging
    SAVE_FEATURE_TABLE: bool = True
    VERBOSE: bool = True

    # ---- feature signature
    FEATURE_VERSION: int = 97

    # OOD thresholds (SoH)
    MAHAL_THRESHOLD: float = 10.0
    GP_ARD_NORM_THRESHOLD: float = 6.0

    # Projection curve
    PLOT_EXPONENT: float = 1.25

    # Thresholds to report/plot
    TARGET_SOH_THRESHOLDS: Tuple[float, ...] = (80.0, 50.0, 40.0)

    # ---- Cycles scaling / extrapolation ----
    CYCLE_SCALE: float = 1.0
    TARGET_CALIB_CYCLE_AT_80: Optional[float] = 1000.0
    CYCLE_TAIL_POINTS: int = 4

cfg = Config()
if cfg.EIS_TEST_FILES is None:
    cfg.EIS_TEST_FILES = [Path("Mazda-Battery-Cell2.xlsx")]
cfg.MODEL_DIR.mkdir(parents=True, exist_ok=True)
set_seed(cfg.RANDOM_STATE)

def set_paths(eis_dir: str | Path, cap_dir: str | Path, model_dir: str | Path | None = None):
    cfg.EIS_DIR = Path(eis_dir)
    cfg.CAP_DIR = Path(cap_dir)
    if model_dir is not None:
        cfg.MODEL_DIR = Path(model_dir)
        cfg.MODEL_DIR.mkdir(parents=True, exist_ok=True)

def to_jsonable(x):
    if isinstance(x, Path): return str(x)
    if isinstance(x, dict): return {k: to_jsonable(v) for k,v in x.items()}
    if isinstance(x, (list, tuple)): return [to_jsonable(i) for i in x]
    return x

def config_signature(cfg: Config) -> str:
    d = asdict(cfg).copy()
    d["EIS_DIR"] = str(d["EIS_DIR"]); d["CAP_DIR"]=str(d["CAP_DIR"]); d["MODEL_DIR"]=str(d["MODEL_DIR"])
    d.pop("EIS_TEST_FILES", None)
    blob = json.dumps(d, sort_keys=True)
    return hashlib.sha256(blob.encode("utf-8")).hexdigest()

CANON_FREQ = np.geomspace(cfg.F_MAX, cfg.F_MIN, cfg.N_FREQ)

# =========================
# 3. REGEX
# =========================
EIS_META_PATTERN = re.compile(
    r"Cell(?P<CellID>\d+)_(?P<SOH>80|85|90|95|100)SOH_(?P<Temp>\d+)degC_(?P<SOC>\d+)SOC_(?P<RealSOH>\d+)"
)
CAP_META_PATTERN = re.compile(
    r"Cell(?P<CellID>\d+)_(?P<SOH>80|85|90|95|100)SOH_Capacity_Check_(?P<Temp>\d+)degC_(?P<Cycle>\d+)cycle"
)

# =========================
# 4. PARSERS
# =========================
def parse_eis_metadata(stem: str) -> Optional[Dict[str, Any]]:
    m = EIS_META_PATTERN.search(stem)
    if not m: return None
    d = m.groupdict()
    return {
        "CellID": f"Cell{d['CellID']}",
        "SOH_stage": int(d["SOH"]),
        "SOC": float(d["SOC"]),
        "Temp": int(d["Temp"]),
        "RealSOH_file": int(d["RealSOH"])/100.0
    }

def parse_cap_metadata(stem: str) -> Optional[Dict[str, Any]]:
    m = CAP_META_PATTERN.search(stem)
    if not m: return None
    d = m.groupdict()
    return {
        "CellID": f"Cell{d['CellID']}",
        "SOH_stage": int(d["SOH"]),
        "Temp": int(d["Temp"]),
        "CycleIndex": int(d["Cycle"])
    }

# =========================
# 5. LOADERS / INTERPOLATION
# =========================
def _find_matrix(mat_dict: dict):
    for v in mat_dict.values():
        if isinstance(v, np.ndarray) and v.ndim == 2 and v.shape[1] >= 3 and v.shape[0] >= 10:
            return v
    return None

def _interp_channel(freq_raw, y_raw, freq_target):
    freq_raw = np.asarray(freq_raw).astype(float)
    y_raw = np.asarray(y_raw).astype(float)
    if freq_raw[0] < freq_raw[-1]:
        freq_raw = freq_raw[::-1]; y_raw = y_raw[::-1]
    uniq, idx = np.unique(freq_raw, return_index=True)
    if len(uniq) != len(freq_raw):
        order = np.argsort(idx)
        freq_raw = uniq[order]; y_raw = y_raw[idx][order]
    f = interp1d(freq_raw, y_raw, bounds_error=False,
                 fill_value=(y_raw[0], y_raw[-1]), kind="linear")
    return f(freq_target)

FREQ_CANDS = ["frequency","freq","f","hz","frequency(hz)","Frequency(Hz)","Frequency","FREQ","Freq","Hz"]
RE_CANDS   = ["zreal","re(z)","re","real","z_re","zreal(ohm)","re (ohm)","re(z) (ohm)","Zreal","Zreal (ohm)","Zreal(ohm)","Re","Re(Z)"]
IM_CANDS   = ["-zimag","zimag","im(z)","im","imag","imaginary","z_im","zimg","z_imag"," -Zimag (ohm)"," -Zimag(ohm)","-Zimag","Zimag","Zimag (ohm)","Im","-Im(Z)","-Zimag"]

def _select_column(df: pd.DataFrame, cands: List[str]) -> Optional[str]:
    low = {c.lower(): c for c in df.columns}
    for c in cands:
        if c.lower() in low: return low[c.lower()]
    for c in cands:
        for col in df.columns:
            if c.lower() in col.lower():
                return col
    return None

def load_mat_eis(path: Path):
    mat = loadmat(path)
    arr = _find_matrix(mat)
    if arr is None:
        raise ValueError(f"No valid EIS matrix in {path.name}")
    return arr[:,0].astype(float), arr[:,1].astype(float), arr[:,2].astype(float)

def load_table_eis(path: Path):
    # returns (freq, re, im, used_freq_from_file, (first,last))
    if path.suffix.lower() == ".csv":
        df = pd.read_csv(path)
    else:
        df = pd.read_excel(path)
    if df.empty:
        raise ValueError("Empty table.")
    fcol = _select_column(df, FREQ_CANDS)
    recol = _select_column(df, RE_CANDS)
    imcol = _select_column(df, IM_CANDS)
    if recol is None or imcol is None:
        raise ValueError(f"Missing Re/Im columns in {path.name}")
    re_vals = pd.to_numeric(df[recol], errors="coerce").to_numpy()
    im_vals = pd.to_numeric(df[imcol], errors="coerce").to_numpy()
    used_freq = True
    first_last = (None, None)
    if fcol is not None:
        freq_vals = pd.to_numeric(df[fcol], errors="coerce").to_numpy()
        if np.isfinite(freq_vals).sum() >= 2:
            first_last = (float(freq_vals[0]), float(freq_vals[-1]))
    else:
        used_freq = False
        n = min(len(re_vals), len(im_vals))
        freq_vals = np.geomspace(cfg.F_MAX, cfg.F_MIN, n)
    n = min(len(freq_vals), len(re_vals), len(im_vals))
    freq_vals = freq_vals[:n]; re_vals = re_vals[:n]; im_vals = im_vals[:n]
    if np.nanmean(im_vals) > 0:
        im_vals = -im_vals
    return freq_vals.astype(float), re_vals.astype(float), im_vals.astype(float), used_freq, first_last

def load_any_inference(path: Path):
    suf = path.suffix.lower()
    if suf == ".mat":
        f,r,i = load_mat_eis(path); used=True; first_last=(None,None)
    elif suf in (".csv",".xls",".xlsx"):
        f,r,i,used,first_last = load_table_eis(path)
    else:
        raise ValueError(f"Unsupported test file extension: {suf}")
    return f,r,i,used,first_last

# =========================
# 6. FEATURE ENGINEERING
# =========================
def compute_F_features(freq, re_i, im_i):
    neg_im = -im_i
    idx_peak = int(np.argmax(neg_im))
    F1 = re_i[0]; F2 = re_i[idx_peak]; F3 = re_i[-1]
    sc = np.where(np.sign(im_i[:-1]) != np.sign(im_i[1:]))[0]
    if len(sc):
        k = sc[0]; y0,y1 = im_i[k], im_i[k+1]
        w = -y0/(y1 - y0 + 1e-12)
        F4 = re_i[k] + w*(re_i[k+1]-re_i[k])
    else:
        F4 = np.nan
    F5 = (re_i[idx_peak]-F1) if idx_peak>0 else np.nan
    F6 = np.min(im_i)
    mid_target = 10.0
    idx_mid = int(np.argmin(np.abs(freq-mid_target)))
    F7 = re_i[idx_mid]
    return [F1,F2,F3,F4,F5,F6,F7]

PHYSICAL_FEATURE_NAMES = [
    "Rs","Rct","tau_peak","warburg_sigma","arc_quality",
    "phase_mean_mid","phase_std_mid","phase_min","lf_slope_negIm","norm_arc"
]

def physical_features(freq, re_i, im_i):
    freq = np.asarray(freq); re_i = np.asarray(re_i); im_i = np.asarray(im_i)
    neg_im = -im_i
    idx_peak = int(np.argmax(neg_im))
    Rs = float(re_i[0]); Rpeak = float(re_i[idx_peak]); Rlow = float(re_i[-1])
    Rct = max(Rpeak - Rs, 0.0)
    arc_diam = Rlow - Rs
    norm_arc = arc_diam / (Rs + 1e-9)
    f_peak = float(freq[idx_peak])
    tau_peak = 1.0/(2*math.pi*f_peak) if f_peak>0 else np.nan
    K = min(10, len(freq)//3)
    if K >= 4:
        w_section = (2*np.pi*freq[-K:])**(-0.5)
        re_section = re_i[-K:]
        if len(np.unique(w_section)) > 2:
            warburg_sigma = float(np.polyfit(w_section, re_section, 1)[0])
        else:
            warburg_sigma = np.nan
    else:
        warburg_sigma = np.nan
    phase = np.arctan2(-im_i, re_i)
    mid_mask = (freq>=1) & (freq<=100)
    if mid_mask.sum()>2:
        phase_mean_mid = float(phase[mid_mask].mean())
        phase_std_mid  = float(phase[mid_mask].std())
    else:
        phase_mean_mid = np.nan; phase_std_mid = np.nan
    phase_min = float(phase.min())
    lf_mask = (freq<=1.0)
    if lf_mask.sum() >= 4:
        x = np.log10(freq[lf_mask]+1e-12); y = neg_im[lf_mask]
        lf_slope = np.polyfit(x, y, 1)[0]
    else:
        lf_slope = np.nan
    arc_quality = (neg_im.max() - neg_im.min())/(abs(neg_im.mean())+1e-9)
    return [Rs,Rct,tau_peak,warburg_sigma,arc_quality,
            phase_mean_mid,phase_std_mid,phase_min,lf_slope,norm_arc]

BANDS = [(1e4,1e3),(1e3,1e2),(1e2,10),(10,1),(1,1e-1),(1e-1,1e-2)]
def band_stats(freq, re_i, im_i):
    feats=[]; freq=np.asarray(freq)
    for hi,lo in BANDS:
        m=(freq<=hi)&(freq>=lo)
        if m.sum()>1:
            z=np.hypot(re_i[m], im_i[m])
            feats += [z.mean(), z.std()]
        else:
            feats += [np.nan, np.nan]
    return feats

def diff_slopes(freq, re_i, im_i, segments=5):
    logf = np.log10(freq)
    edges = np.linspace(logf.min(), logf.max(), segments+1)
    out=[]
    for i in range(segments):
        m=(logf>=edges[i])&(logf<=edges[i+1])
        if m.sum()>=3:
            x=logf[m]
            out += [np.polyfit(x,re_i[m],1)[0], np.polyfit(x,(-im_i)[m],1)[0]]
        else:
            out += [np.nan, np.nan]
    return out

DRT_FEATURE_NAMES = [
    "drt_sum","drt_mean_logtau","drt_var_logtau","drt_peak_tau",
    "drt_peak_gamma","drt_frac_low_tau","drt_frac_high_tau"
]

def compute_drt(freq,re_i,im_i,tau_min,tau_max,n_tau,lam):
    w = 2*np.pi*freq
    tau = np.geomspace(tau_max, tau_min, n_tau)
    WT = w[:,None]*tau[None,:]
    denom = 1+WT**2
    K_re = 1.0/denom
    K_im = -WT/denom
    R_inf = re_i[0]
    y_re = re_i - R_inf
    y_im = im_i
    Y = np.concatenate([y_re, y_im])
    K = np.vstack([K_re, K_im])
    A = K.T @ K + lam*np.eye(n_tau)
    b = K.T @ Y
    gamma = linalg.solve(A,b,assume_a='pos')
    gamma = np.clip(gamma,0,None)
    return tau, gamma

def drt_features(freq,re_i,im_i):
    try:
        tau,gamma = compute_drt(freq,re_i,im_i,
                                 cfg.DRT_TAU_MIN,cfg.DRT_TAU_MAX,
                                 cfg.DRT_POINTS,cfg.DRT_LAMBDA)
        log_tau = np.log10(tau)
        g_sum = gamma.sum()+1e-12
        w_norm = gamma/g_sum
        mean_logtau = float((w_norm*log_tau).sum())
        var_logtau  = float((w_norm*(log_tau-mean_logtau)**2).sum())
        p = int(np.argmax(gamma))
        peak_tau = float(tau[p]); peak_gamma=float(gamma[p])
        mid = np.median(log_tau)
        frac_low = float(w_norm[log_tau<=mid].sum())
        frac_high = 1-frac_low
        return [g_sum,mean_logtau,var_logtau,peak_tau,peak_gamma,frac_low,frac_high]
    except Exception:
        return [np.nan]*7

def build_feature_vector(re_i, im_i, temp, freq, include_names=False):
    parts=[]; names=[]
    if cfg.INCLUDE_RAW_RE_IM:
        parts += [re_i, im_i]
        names += [f"Re_{i}" for i in range(len(re_i))] + [f"Im_{i}" for i in range(len(im_i))]
    if cfg.INCLUDE_BASICS:
        z = np.hypot(re_i, im_i)
        basics=[re_i[0], re_i[-1], re_i[-1]-re_i[0], z.max(), z.mean(), z.std()]
        parts.append(np.array(basics)); names += ["hf_re","lf_re","arc_diam","zmag_max","zmag_mean","zmag_std"]
    if cfg.INCLUDE_F_FEATS:
        Ff=compute_F_features(freq,re_i,im_i); parts.append(np.array(Ff)); names += [f"F{i}" for i in range(1,8)]
    if cfg.INCLUDE_PHYSICAL:
        Pf=physical_features(freq,re_i,im_i); parts.append(np.array(Pf)); names += PHYSICAL_FEATURE_NAMES
    if cfg.INCLUDE_BAND_STATS:
        Bf=band_stats(freq,re_i,im_i); parts.append(np.array(Bf))
        for bi in range(len(BANDS)): names += [f"band{bi}_mean", f"band{bi}_std"]
    if cfg.INCLUDE_DIFF_SLOPES:
        Ds=diff_slopes(freq,re_i,im_i); parts.append(np.array(Ds))
        for i in range(len(Ds)//2): names += [f"slope_re_seg{i}", f"slope_negIm_seg{i}"]
    if cfg.INCLUDE_DRT:
        Df=drt_features(freq,re_i,im_i); parts.append(np.array(Df)); names += DRT_FEATURE_NAMES
    parts.append(np.array([temp])); names += ["Feat_Temp"]
    vec = np.concatenate(parts).astype(float)
    vec = np.nan_to_num(vec, nan=0.0, posinf=0.0, neginf=0.0)
    if include_names: return vec, names
    return vec

def build_shape_normalized(re_i, im_i, k: int = 5):
    hf = float(np.nanmedian(re_i[:max(1, min(k, len(re_i)))]))
    if not np.isfinite(hf) or abs(hf) < 1e-9:
        hf = 1.0
    return re_i / hf, im_i / hf

# =========================
# 7. CAPACITY & CYCLES TARGETS
# =========================
def load_capacity_info(cap_dir: Path) -> pd.DataFrame:
    if not (cap_dir.exists() and cfg.REFINE_SOH_WITH_CAPACITY):
        return pd.DataFrame()
    recs=[]
    for fp in cap_dir.rglob("*.mat"):
        meta = parse_cap_metadata(fp.stem)
        if not meta:
            continue
        try:
            mat = loadmat(fp, squeeze_me=True, struct_as_record=False)
            arr = _find_matrix(mat)
            cap = None
            if arr is not None:
                col = np.argmax(np.abs(arr[-50:, :]).mean(axis=0))
                cap = float(np.nanmax(arr[:, col]))
            else:
                d = mat.get("data", None)
                if d is not None:
                    def _cell_to_1d(x):
                        a = np.array(x, dtype=object).squeeze()
                        out=[]
                        for e in a.flat:
                            if isinstance(e, np.ndarray):
                                out.append(float(np.nanmax(e.astype(float))) if e.size else np.nan)
                            else:
                                try: out.append(float(e))
                                except Exception: out.append(np.nan)
                        z = np.array(out, dtype=float)
                        if z.ndim == 0: z = z[None]
                        return z
                    if hasattr(d, "AhAccu"):
                        v = _cell_to_1d(getattr(d, "AhAccu"))
                        if v.size: cap = float(np.nanmax(v))
                    if cap is None and hasattr(d, "WhAccu"):
                        v = _cell_to_1d(getattr(d, "WhAccu"))
                        if v.size: cap = float(np.nanmax(v) / 3.7)
            if cap is None or not np.isfinite(cap):
                continue
            meta["MeasuredCapacity_Ah"] = cap
            recs.append(meta)
        except Exception:
            pass

    df = pd.DataFrame(recs)
    if df.empty:
        return df
    ref = df.groupby("CellID")["MeasuredCapacity_Ah"].transform("max")
    df["NormCapacity"] = df["MeasuredCapacity_Ah"] / ref
    df["SoH_percent"] = df["NormCapacity"] * 100.0
    return df

def _build_soh_to_cycle_interpolators(cap_df: pd.DataFrame) -> Dict[str, Dict[str, Any]]:
    maps={}
    if cap_df.empty: return maps
    for cid, g in cap_df.groupby("CellID"):
        g = g.sort_values("CycleIndex")
        x = np.asarray(g["SoH_percent"].values, dtype=float)
        y = np.asarray(g["CycleIndex"].values, dtype=float)
        order = np.argsort(x)
        x_sorted = x[order]
        y_sorted = y[order]
        uniq = np.unique(x_sorted)
        cyc_agg=[]
        for s in uniq:
            cyc_agg.append(float(np.nanmean(y_sorted[x_sorted==s])))
        soh = uniq
        cyc = np.asarray(cyc_agg, dtype=float)
        maps[cid] = {"soh": soh, "cyc": cyc}
    return maps

def _tail_cpp_from_map(soh: np.ndarray, cyc: np.ndarray, k: int) -> Optional[float]:
    if soh.size < 2: return None
    k = max(2, min(k, soh.size))
    xs = soh[:k]; ys = cyc[:k]
    if len(np.unique(xs)) < 2: return None
    slope = np.polyfit(xs, ys, 1)[0]
    return abs(float(slope))

def _interp_or_extrap_cycle_for_soh(cell_map: Dict[str, Dict[str, Any]],
                                    cell_id: str,
                                    soh_val: float,
                                    k_tail: int,
                                    fallback_cpp: float) -> Optional[float]:
    m = cell_map.get(cell_id)
    if not m: return None
    soh = m["soh"]; cyc = m["cyc"]
    if soh.size < 2: return None
    if soh_val >= soh.min() and soh_val <= soh.max():
        return float(np.interp(soh_val, soh, cyc, left=cyc[0], right=cyc[-1]))
    if soh_val < soh.min():
        cpp = _tail_cpp_from_map(soh, cyc, k_tail) or float(fallback_cpp)
        delta = float(soh.min() - soh_val)
        return float(cyc[0] + cpp * delta)
    return float(cyc[-1])

def estimate_cpp_per_cell(capacity_df: pd.DataFrame,
                          window:int, min_points:int)->Dict[str,float]:
    cpp={}
    for cid,grp in capacity_df.groupby("CellID"):
        g=grp.sort_values("CycleIndex")
        if g.shape[0]<min_points: continue
        tail=g.tail(window)
        x=tail["CycleIndex"].values.astype(float)
        y=tail["SoH_percent"].values.astype(float)
        if len(np.unique(x))<2: continue
        slope=np.polyfit(x,y,1)[0]
        if slope >= -1e-6:
            continue
        cpp[cid]=1.0/abs(slope)
    return cpp

def build_cpp_map(cap_df: pd.DataFrame):
    if cap_df.empty: return {}, cfg.CPP_FALLBACK
    cpp_map=estimate_cpp_per_cell(
        cap_df[["CellID","CycleIndex","SoH_percent"]],
        cfg.CPP_ROLLING_WINDOW, cfg.CPP_MIN_POINTS
    )
    if not cpp_map:
        return {}, cfg.CPP_FALLBACK
    return cpp_map, float(np.median(list(cpp_map.values())))

def _calibrate_cycle_scale(cap_df: pd.DataFrame, target_80: Optional[float]) -> float:
    if cap_df.empty or target_80 is None: return 1.0
    maps = _build_soh_to_cycle_interpolators(cap_df)
    vals=[]
    for cid in cap_df["CellID"].unique():
        m = maps.get(cid)
        if not m: continue
        c80 = np.interp(80.0, m["soh"], m["cyc"], left=m["cyc"][0], right=m["cyc"][-1])
        if np.isfinite(c80) and c80>0: vals.append(float(c80))
    if not vals: return 1.0
    med = float(np.median(vals))
    if med <= 0: return 1.0
    return float(target_80/med)

def get_cpp(meta: dict, cpp_map: Dict[str,float], global_cpp: float):
    if not meta: return global_cpp
    return cpp_map.get(meta.get("CellID"), global_cpp)

# =========================
# 8. DATASET BUILD
# =========================
def load_single_eis_mat(fp: Path):
    meta = parse_eis_metadata(fp.stem)
    if meta is None:
        raise ValueError(f"Bad filename: {fp.name}")
    freq,re_z,im_z = load_mat_eis(fp)
    re_i=_interp_channel(freq, re_z, CANON_FREQ)
    im_i=_interp_channel(freq, im_z, CANON_FREQ)
    vec, names = build_feature_vector(re_i, im_i, meta["Temp"], CANON_FREQ, include_names=True)
    return vec, names, meta, re_i, im_i

def _build_cycles_targets(meta_df: pd.DataFrame,
                          cap_df: pd.DataFrame,
                          cycle_scale: float,
                          k_tail: int) -> Tuple[np.ndarray, Dict[float, np.ndarray]]:
    y_cycle_index = np.full(len(meta_df), np.nan, dtype=float)
    y_rem_dict: Dict[float, np.ndarray] = {thr: np.full(len(meta_df), np.nan, dtype=float)
                                           for thr in cfg.TARGET_SOH_THRESHOLDS}
    if cap_df.empty:
        return y_cycle_index, y_rem_dict

    maps = _build_soh_to_cycle_interpolators(cap_df)
    cpp_map, cpp_global = build_cpp_map(cap_df)

    for i, row in meta_df.reset_index(drop=True).iterrows():
        cid = row["CellID"]
        soh_here = float(row["SoH_cont"])

        cyc_here = _interp_or_extrap_cycle_for_soh(
            maps, cid, soh_here, k_tail=k_tail, fallback_cpp=cpp_map.get(cid, cpp_global)
        )
        if cyc_here is None:
            continue
        y_cycle_index[i] = float(max(0.0, cyc_here))

        for thr in cfg.TARGET_SOH_THRESHOLDS:
            cthr = _interp_or_extrap_cycle_for_soh(
                maps, cid, float(thr), k_tail=k_tail, fallback_cpp=cpp_map.get(cid, cpp_global)
            )
            if cthr is None:
                y_rem_dict[thr][i] = np.nan
            else:
                y_rem_dict[thr][i] = float(max(0.0, cthr - cyc_here))

    if cycle_scale and cycle_scale != 1.0:
        y_cycle_index *= float(cycle_scale)
        for thr in y_rem_dict:
            y_rem_dict[thr] *= float(cycle_scale)

    return y_cycle_index, y_rem_dict

def build_dataset(eis_dir: Path, cap_df: Optional[pd.DataFrame], cycle_scale: float):
    files = sorted(eis_dir.rglob("*.mat"))
    if not files:
        raise FileNotFoundError(f"No .mat spectra in {eis_dir}")

    f0,r0,i0 = load_mat_eis(files[0])
    re0=_interp_channel(f0,r0,CANON_FREQ); im0=_interp_channel(f0,i0,CANON_FREQ)
    _, feature_names = build_feature_vector(re0, im0, 25.0, CANON_FREQ, include_names=True)

    feats=[]; rows=[]; shape_feats=[]
    for fp in tqdm(files, desc="Loading training spectra"):
        try:
            v, names, m, rei, imi = load_single_eis_mat(fp)
            feats.append(v); rows.append(m)
            if (cfg.INCLUDE_NORMALIZED_SHAPE_MODEL or cfg.SOC_INCLUDE_SHAPE_MODEL) and cfg.NORMALIZE_SHAPE_BY_HF_RE:
                rsh, ish = build_shape_normalized(rei, imi)
                shape_vec = build_feature_vector(rsh, ish, m["Temp"], CANON_FREQ)
                shape_feats.append(shape_vec)
        except Exception as e:
            if cfg.VERBOSE: print(f"[Skip] {fp.name}: {e}")

    if not rows:
        raise RuntimeError("No valid training spectra after filtering.")

    X = np.vstack(feats)
    X_shape = np.vstack(shape_feats) if shape_feats else None
    meta_df = pd.DataFrame(rows)

    # SoH refinement
    if cap_df is not None and not cap_df.empty and cfg.REFINE_SOH_WITH_CAPACITY:
        lookup = cap_df.set_index(["CellID","SOH_stage"])["NormCapacity"].to_dict()
        refined=[]
        for cid, stage, fallback in zip(meta_df.CellID, meta_df.SOH_stage, meta_df.RealSOH_file):
            nc = lookup.get((cid, stage))
            refined.append(100.0*nc if nc is not None else fallback)
        meta_df["SoH_cont"]=refined
    else:
        meta_df["SoH_cont"]=meta_df["RealSOH_file"]

    # Targets
    y_soc = meta_df["SOC"].astype(float).values
    y_soh = meta_df["SoH_cont"].values

    # Cycles targets
    if cfg.ENABLE_CYCLES_MODEL and cap_df is not None:
        y_cycle_index, y_rem_dict = _build_cycles_targets(
            meta_df, cap_df, cycle_scale=cycle_scale, k_tail=cfg.CYCLE_TAIL_POINTS
        )
    else:
        y_cycle_index = np.full(len(meta_df), np.nan)
        y_rem_dict = {thr: np.full(len(meta_df), np.nan) for thr in cfg.TARGET_SOH_THRESHOLDS}

    soh_var = float(np.var(y_soh))
    if cfg.VERBOSE:
        print(f"[DATA] SoH range: {y_soh.min():.2f} – {y_soh.max():.2f} (var={soh_var:.3f})")
        if soh_var < 1.0:
            print("[WARN] Low SoH variance → model may output near-constant SoH.")

    if cfg.SAVE_FEATURE_TABLE:
        pd.concat(
            [meta_df.reset_index(drop=True),
             pd.DataFrame(X, columns=names)], axis=1
        ).to_parquet(cfg.MODEL_DIR/"training_features.parquet", index=False)

    return meta_df, X, (X_shape, names), y_soc, y_soh, y_cycle_index, y_rem_dict

# =========================
# 9. SPLITTING
# =========================
def cell_split_mask(meta_df: pd.DataFrame):
    cells = meta_df.CellID.unique()
    rng = np.random.default_rng(cfg.RANDOM_STATE)
    n_test = max(1, int(len(cells)*cfg.TEST_FRAC))
    test_cells = rng.choice(cells, size=n_test, replace=False)
    return meta_df.CellID.isin(test_cells)

# =========================
# 10. MODELS (helpers)
# =========================
def _fit_gpr(X, y, seed, max_samples):
    dim = X.shape[1]
    kernel = RBF(length_scale=np.ones(dim)*3.0,
                 length_scale_bounds=(1e-1,1e6)) + \
             WhiteKernel(noise_level=1e-2,
                         noise_level_bounds=(1e-6,1e-1))
    gpr = GaussianProcessRegressor(
        kernel=kernel, alpha=0.0, normalize_y=True,
        random_state=seed, n_restarts_optimizer=3
    )
    if X.shape[0] > max_samples:
        idx = np.random.default_rng(seed).choice(
            X.shape[0], size=max_samples, replace=False)
        gpr.fit(X[idx], y[idx])
    else:
        gpr.fit(X, y)
    return gpr

def _fit_hgb(X, y):
    hgb = HistGradientBoostingRegressor(
        learning_rate=0.05, max_iter=600,
        l2_regularization=1e-3, random_state=cfg.RANDOM_STATE
    )
    hgb.fit(X, y)
    return hgb

def _evaluate(y_true, y_pred):
    return r2_score(y_true, y_pred), math.sqrt(mean_squared_error(y_true, y_pred))

def _can_calibrate_soc(val_pred: np.ndarray) -> bool:
    if val_pred is None or len(val_pred) < 3:
        return False
    if np.ptp(val_pred) < cfg.SOC_CAL_MIN_RANGE:
        return False
    if len(np.unique(np.round(val_pred, 3))) < cfg.SOC_CAL_MIN_UNIQUE:
        return False
    return True

class _IdentityCalibrator:
    def fit(self, x, y): return self
    def predict(self, x): return np.asarray(x, dtype=float)

def _ensemble_var(means, stds, weights):
    """Variance of a mixture-of-experts with fixed weights."""
    m = float(np.sum([w*mu for w, mu in zip(weights, means)]))
    var = 0.0
    for w, mu, sd in zip(weights, means, stds):
        var += w * ((sd if np.isfinite(sd) else 0.0)**2 + (mu - m)**2)
    return float(max(var, 1e-9))

def _grid_weights(n, step=0.1):
    """Yield weight vectors of length n that sum to 1 (coarse grid)."""
    if n == 1:
        yield [1.0]; return
    steps = int(round(1.0/step))
    if n == 2:
        for i in range(steps+1):
            yield [i*step, 1.0 - i*step]
        return
    for i in range(steps+1):
        for j in range(steps+1 - i):
            k = steps - i - j
            s = i + j + k
            if s == steps:
                yield [i*step, j*step, k*step]

# =========================
# 11. TRAINING
# =========================
def train_models(meta_df, X_raw, shape_bundle, y_soc, y_soh, y_cycle_index, y_rem_dict):
    X_shape, feature_names = shape_bundle
    mask_test = cell_split_mask(meta_df)

    # ===== SoC (temperature-agnostic ensemble) =====
    temp_idx = feature_names.index("Feat_Temp")
    X_soc_raw = X_raw.copy()
    X_soc_raw[:, temp_idx] = 0.0

    soc_scaler = StandardScaler()
    X_soc_s = soc_scaler.fit_transform(X_soc_raw)

    # Optional normalized-shape branch for SoC
    Xs_shape = None
    soc_shape_scaler = None
    if cfg.SOC_INCLUDE_SHAPE_MODEL and (X_shape is not None):
        X_shape_soc = X_shape.copy()
        X_shape_soc[:, temp_idx] = 0.0
        soc_shape_scaler = StandardScaler()
        Xs_shape = soc_shape_scaler.fit_transform(X_shape_soc)

    # Label jitter (helpful if SoC labels are discrete)
    y_soc_train = y_soc.copy()
    if cfg.SOC_LABEL_JITTER and cfg.SOC_LABEL_JITTER > 0:
        rng = np.random.default_rng(cfg.RANDOM_STATE)
        y_soc_train = np.clip(y_soc_train + rng.normal(0.0, cfg.SOC_LABEL_JITTER, size=y_soc_train.shape), 0.0, 100.0)

    soc_candidates = {}

    # GPR
    soc_gpr = _fit_gpr(X_soc_s, y_soc_train, cfg.RANDOM_STATE, cfg.SOC_MAX_GPR_TRAIN_SAMPLES)
    gp_val = soc_gpr.predict(X_soc_s[mask_test])
    r2_gp, rmse_gp = _evaluate(y_soc[mask_test], gp_val)
    soc_candidates["gpr"] = (soc_gpr, r2_gp, rmse_gp)

    # HGB
    soc_hgb = _fit_hgb(X_soc_s[~mask_test], y_soc_train[~mask_test])
    hgb_val = soc_hgb.predict(X_soc_s[mask_test])
    r2_hgb, rmse_hgb = _evaluate(y_soc[mask_test], hgb_val)
    soc_candidates["hgb"] = (soc_hgb, r2_hgb, rmse_hgb)

    # Shape GP (optional)
    soc_shape_model = None; r2_shape = None; rmse_shape = None; shp_val = None
    if Xs_shape is not None:
        soc_shape_model = _fit_gpr(Xs_shape, y_soc_train, cfg.RANDOM_STATE, cfg.SOC_MAX_GPR_TRAIN_SAMPLES)
        shp_val = soc_shape_model.predict(Xs_shape[mask_test])
        r2_shape, rmse_shape = _evaluate(y_soc[mask_test], shp_val)

    preds_on_val = [gp_val, hgb_val] + ([shp_val] if shp_val is not None else [])
    stds_on_val  = []
    try:
        _, gp_std_val = soc_gpr.predict(X_soc_s[mask_test], return_std=True)
    except Exception:
        gp_std_val = np.full_like(gp_val, float(r2_gp))
    stds_on_val.append(gp_std_val.astype(float))
    stds_on_val.append(np.full_like(hgb_val, float(rmse_hgb)))
    if shp_val is not None:
        try:
            _, shp_std_val = soc_shape_model.predict(Xs_shape[mask_test], return_std=True)
        except Exception:
            shp_std_val = np.full_like(shp_val, float(rmse_shape if rmse_shape is not None else rmse_gp))
        stds_on_val.append(shp_std_val.astype(float))

    P = np.vstack(preds_on_val)
    S = np.vstack(stds_on_val)
    yv = y_soc[mask_test].astype(float)

    names = ["gpr", "hgb"] + (["shape"] if shp_val is not None else [])
    best = None
    for w in _grid_weights(len(names), step=0.1):
        w = np.array(w, dtype=float)
        mix = (w[:,None] * P).sum(0)
        r2, rmse = _evaluate(yv, mix)
        if (best is None) or (rmse < best["rmse"]):
            best = {"weights": w, "rmse": float(rmse), "r2": float(r2)}
    soc_weights = best["weights"]

    # Uncertainty calibration on validation residuals
    mu_mix = (soc_weights[:,None] * P).sum(0)
    var_mix = np.zeros_like(mu_mix)
    for wi, mu_i, sd_i in zip(soc_weights, P, S):
        var_mix += wi * (sd_i**2 + (mu_i - mu_mix)**2)
    var_mix = np.clip(var_mix, 1e-9, None)
    resid2 = (yv - mu_mix)**2
    alpha = float(np.sqrt(np.mean(resid2) / np.mean(var_mix))) if np.isfinite(var_mix).all() else 1.0
    alpha = float(np.clip(alpha, 0.3, 3.0))

    # Final SoC calibration (isotonic/linear) — only if helpful
    soc_calibrator = _IdentityCalibrator()
    cal_kind = "identity"
    r2_before, rmse_before = best["r2"], best["rmse"]
    if cfg.SOC_CALIBRATION_MODE != "off" and _can_calibrate_soc(mu_mix):
        tried = []
        if cfg.SOC_CALIBRATION_MODE in ("auto","iso"):
            try:
                iso = IsotonicRegression(y_min=0.0, y_max=100.0, out_of_bounds="clip")
                iso.fit(mu_mix, yv)
                y_cal = iso.predict(mu_mix)
                r2_iso, rmse_iso = _evaluate(yv, y_cal)
                tried.append(("iso", iso, r2_iso, rmse_iso))
            except Exception: pass
        if cfg.SOC_CALIBRATION_MODE in ("auto","linear"):
            try:
                lin = LinearRegression()
                lin.fit(mu_mix.reshape(-1,1), yv)
                y_lin = np.clip(lin.predict(mu_mix.reshape(-1,1)), 0, 100)
                r2_lin, rmse_lin = _evaluate(yv, y_lin)
                tried.append(("linear", lin, r2_lin, rmse_lin))
            except Exception: pass
        if tried:
            name, model, r2_b, rmse_b = max(tried, key=lambda t: t[2])
            if r2_b >= r2_before + cfg.SOC_CAL_MIN_R2_IMPROVE:
                soc_calibrator = model
                cal_kind = name
                r2_before, rmse_before = r2_b, rmse_b

    if cfg.VERBOSE:
        print(f"[SoC] Candidates (R2 | RMSE):")
        print(f"      - soc_gpr_raw   R2={r2_gp:.3f}  RMSE={rmse_gp:.2f}")
        print(f"      - soc_hgb_raw   R2={r2_hgb:.3f}  RMSE={rmse_hgb:.2f}")
        if shp_val is not None:
            print(f"      - shapeGP       R2={r2_shape:.3f}  RMSE={rmse_shape:.2f}")
        print(f"[SoC] Selected base = ensemble({'+'.join(names)}) weights={soc_weights.tolist()}")
        print(f"[SoC-Cal] kind={cal_kind}  R2_val={r2_before:.3f}  RMSE_val={rmse_before:.2f}")

    # SoC-space OOD stats
    soc_center = X_soc_s.mean(axis=0)
    try:
        soc_cov_inv = np.linalg.pinv(np.cov(X_soc_s.T))
        dists = [float(np.sqrt((x - soc_center) @ soc_cov_inv @ (x - soc_center).T)) for x in X_soc_s]
        soc_mahal_thresh = float(np.quantile(dists, cfg.OOD_SOC_Q))
    except Exception:
        soc_cov_inv = np.eye(X_soc_s.shape[1]); soc_mahal_thresh = None

    # Save SoC KNN anchor set (SoC space)
    X_soc_train_for_knn = X_soc_s[~mask_test]
    y_soc_train_vals = y_soc[~mask_test]

    soc_bundle = {
        "names": names,
        "weights": soc_weights.tolist(),
        "gpr": soc_gpr,
        "hgb": soc_hgb,
        "shape_scaler": soc_shape_scaler,
        "shape_model": soc_shape_model,
        "calibrator": soc_calibrator,
        "cal_kind": cal_kind,
        "std_alpha": alpha,
        "val_rmse": float(rmse_before),
        "temp_idx": int(temp_idx),
        "soc_scaler": soc_scaler,
        "soc_center": soc_center.tolist(),
        "soc_cov_inv": soc_cov_inv.tolist(),
        "soc_mahal_thresh": None if soc_mahal_thresh is None else float(soc_mahal_thresh),
        "soc_knn_X": X_soc_train_for_knn,
        "soc_knn_y": y_soc_train_vals,
    }

    # ===== SoH =====
    scaler = StandardScaler()
    X_s = scaler.fit_transform(X_raw)

    soh_candidates = {}
    soh_gpr = _fit_gpr(X_s, y_soh, cfg.RANDOM_STATE, cfg.MAX_GPR_TRAIN_SAMPLES)
    r2g, rmseg = _evaluate(y_soh[mask_test], soh_gpr.predict(X_s[mask_test]))
    soh_candidates["gpr_raw"] = (soh_gpr, r2g, rmseg)

    soh_hgb = _fit_hgb(X_s[~mask_test], y_soh[~mask_test])
    r2h2, rmseh2 = _evaluate(y_soh[mask_test], soh_hgb.predict(X_s[mask_test]))
    soh_candidates["hgb_raw"] = (soh_hgb, r2h2, rmseh2)

    shape_model=shape_scaler=None
    shape_metrics=None
    if cfg.INCLUDE_NORMALIZED_SHAPE_MODEL and (X_shape is not None):
        shape_scaler = StandardScaler()
        X_shape_s = shape_scaler.fit_transform(X_shape)
        shape_model = _fit_gpr(X_shape_s, y_soh, cfg.RANDOM_STATE, cfg.MAX_GPR_TRAIN_SAMPLES)
        spred = shape_model.predict(X_shape_s[mask_test])
        r2s2, rmses2 = _evaluate(y_soh[mask_test], spred)
        soh_candidates["gpr_shape"] = (shape_model, r2s2, rmses2)
        shape_metrics = {"r2": r2s2, "rmse": rmses2}

    soh_best_name = max(["gpr_raw","hgb_raw"], key=lambda k: soh_candidates[k][1])
    soh_best_model, soh_best_r2, soh_best_rmse = soh_candidates[soh_best_name]

    if cfg.VERBOSE:
        print(f"[SoH] GPR_raw:  R2={r2g:.3f} RMSE={rmseg:.2f}")
        print(f"[SoH] HGB_raw: R2={r2h2:.3f} RMSE={rmseh2:.2f}")
        if shape_metrics:
            print(f"[SoH] ShapeGP: R2={shape_metrics['r2']:.3f} RMSE={shape_metrics['rmse']:.2f}")
        print(f"[SoH] Selected raw model = {soh_best_name}")

    # SoH OOD stats
    cov = np.cov(X_s.T)
    try:
        cov_inv = np.linalg.pinv(cov)
    except Exception:
        cov_inv = np.eye(cov.shape[0])
    center = X_s.mean(axis=0)

    # ----- Cycles models -----
    cycles_models = {}
    cycles_metrics = {}
    if cfg.ENABLE_CYCLES_MODEL:
        m_valid = np.isfinite(y_cycle_index)
        if m_valid.sum() >= 10:
            cyc_hgb = _fit_hgb(X_s[m_valid], y_cycle_index[m_valid])
            r2c, rmsec = _evaluate(y_cycle_index[mask_test & m_valid], cyc_hgb.predict(X_s[mask_test & m_valid]) if (mask_test & m_valid).any() else y_cycle_index[m_valid])
            cycles_models["absolute"] = {"model": cyc_hgb, "scaler": scaler}
            cycles_metrics["absolute"] = {"r2": float(r2c), "rmse": float(rmsec), "n": int(m_valid.sum())}
            if cfg.VERBOSE:
                print(f"[Cycles] absolute: R2={r2c:.3f} RMSE={rmsec:.2f}  n={m_valid.sum()}")
        for thr, arr in y_rem_dict.items():
            mv = np.isfinite(arr)
            if mv.sum() < 10:
                continue
            rem_model = _fit_hgb(X_s[mv], arr[mv])
            r2r, rmser = _evaluate(arr[mask_test & mv], rem_model.predict(X_s[mask_test & mv]) if (mask_test & mv).any() else arr[mv])
            cycles_models[str(int(thr))] = {"model": rem_model, "scaler": scaler}
            cycles_metrics[str(int(thr))] = {"r2": float(r2r), "rmse": float(rmser), "n": int(mv.sum())}
            if cfg.VERBOSE:
                print(f"[Cycles] remaining→{int(thr)}%: R2={r2r:.3f} RMSE={rmser:.2f}  n={mv.sum()}")

    bundle = {
        # SoC (new)
        "soc_ensemble": soc_bundle,
        # SoH
        "shared_scaler": scaler,
        "soh_model": soh_best_model,
        "soh_model_name": soh_best_name,
        "shape_scaler": shape_scaler,
        "shape_model": shape_model,
        # Cycles
        "cycles_models": cycles_models,
        "cycles_metrics": cycles_metrics,
        # Meta / persistence
        "freq_grid": CANON_FREQ,
        "feature_version": cfg.FEATURE_VERSION,
        "feature_manifest": feature_names,
        "config_signature": config_signature(cfg),
        "config": to_jsonable(asdict(cfg)),
        "cycle_scale": float(CYCLE_SCALE_GLOBAL),
        "metrics": {
            "soc_r2_selected": float(best["r2"]),
            "soc_rmse_selected": float(best["rmse"]),
            "soh_r2_selected": soh_best_r2,
            "soh_rmse_selected": soh_best_rmse
        },
        "soh_candidates_metrics": {
            "gpr_raw": {"r2": r2g, "rmse": rmseg},
            "hgb_raw": {"r2": r2h2, "rmse": rmseh2},
            "gpr_shape": shape_metrics
        },
        "train_mahal": {"center": center.tolist(), "cov_inv": cov_inv.tolist()},
    }
    out_path = cfg.MODEL_DIR/"eis_soc_soh_cycles_models.joblib"
    joblib.dump(bundle, out_path)
    if cfg.VERBOSE:
        print(f"[MODEL] Saved bundle → {out_path}")
        print(json.dumps(bundle["metrics"], indent=2))
    return bundle

# =========================
# 12. LOAD
# =========================
def load_bundle():
    path_new = cfg.MODEL_DIR / "eis_soc_soh_cycles_models.joblib"
    if not path_new.exists():
        raise FileNotFoundError(f"Bundle not found: {path_new}")
    bundle = joblib.load(path_new)
    for key in ["soc_ensemble","soh_model","freq_grid"]:
        if key not in bundle:
            raise KeyError(f"Bundle missing required key: {key}")
    return bundle

# =========================
# 13. INFERENCE FEATURIZATION
# =========================
def featurize_any(file_path: Path, bundle):
    freq_grid = bundle["freq_grid"]
    meta = parse_eis_metadata(file_path.stem)
    freq,re_raw,im_raw, used_freq, first_last = load_any_inference(file_path)
    if not used_freq:
        warnings.warn(f"[{file_path.name}] No frequency column found. Using geometric grid fallback.")
    re_i=_interp_channel(freq, re_raw, freq_grid)
    im_i=_interp_channel(freq, im_raw, freq_grid)
    if meta is None and cfg.TEST_TEMPERATURE_OVERRIDE is not None:
        temp = cfg.TEST_TEMPERATURE_OVERRIDE
    else:
        temp = meta["Temp"] if meta else -1
    vec = build_feature_vector(re_i, im_i, temp, freq_grid)
    norm_vec=None
    if (cfg.INCLUDE_NORMALIZED_SHAPE_MODEL or cfg.SOC_INCLUDE_SHAPE_MODEL):
        if cfg.NORMALIZE_SHAPE_BY_HF_RE:
            # ---- BUGFIX: correct argument name (im_i), not 'imi'
            rsh, ish = build_shape_normalized(re_i, im_i)
            norm_vec = build_feature_vector(rsh, ish, temp, freq_grid)
    checksum = hashlib.sha1(np.ascontiguousarray(vec).tobytes()).hexdigest()
    return vec, norm_vec, meta, checksum, used_freq, first_last

# =========================
# 14. OOD UTILITIES (SoH)
# =========================
def mahalanobis_distance(x, center, cov_inv):
    diff = x - center
    return float(np.sqrt(diff @ cov_inv @ diff.T))

def gp_ard_norm(Xp, model):
    try:
        K = model.kernel_
        from sklearn.gaussian_process.kernels import RBF
        rbf = None
        if hasattr(K,"k1") and isinstance(K.k1,RBF): rbf=K.k1
        elif hasattr(K,"k2") and isinstance(K.k2,RBF): rbf=K.k2
        if rbf is None: return None
        ls = np.atleast_1d(rbf.length_scale)
        z = (Xp / ls).ravel()
        return float(np.linalg.norm(z))
    except Exception:
        return None

# =========================
# 15. PROJECTION PLOT
# =========================
def _estimate_cpp_from_predictions(soh_current: float, cycles_to_map: Dict[float, float]) -> float:
    usable = [(thr, c) for thr, c in cycles_to_map.items() if c and c > 0 and soh_current > thr]
    if not usable:
        return cfg.CPP_FALLBACK
    thr, cyc = sorted(usable, key=lambda x: x[0])[0]
    delta = max(1e-6, soh_current - float(thr))
    return float(cyc / delta)

def build_projection(soh_current, cpp, lower, exponent=None, n=160):
    if soh_current <= lower or cpp <= 0:
        return np.array([0.0]), np.array([soh_current])
    total = (soh_current - lower) * cpp
    cycles = np.linspace(0, total, n)
    S0 = soh_current; Smin=lower
    if exponent is None: exponent = cfg.PLOT_EXPONENT
    soh_curve = Smin + (S0 - Smin)*(1 - cycles/total)**exponent
    return cycles, soh_curve

def plot_projection(file_base, soh_current, soh_std, cycles_to_map, cpp_hint, ood_flag, out_path, thresholds):
    if not thresholds: thresholds = (50.0, 40.0)
    min_thr = min(thresholds)
    if soh_current <= min_thr:
        return

    cpp = _estimate_cpp_from_predictions(soh_current, cycles_to_map)
    if not np.isfinite(cpp) or cpp <= 0:
        cpp = cpp_hint if (cpp_hint and cpp_hint > 0) else cfg.CPP_FALLBACK

    cycles, curve = build_projection(soh_current, cpp, min_thr)
    plt.figure(figsize=(6.4,4))
    plt.plot(cycles, curve, lw=2, label="Projected SoH (approx)")

    for thr in thresholds:
        style = "--" if thr >= 50 else ":"
        color = "orange" if thr >= 50 else "red"
        plt.axhline(thr, color=color, ls=style, label=f"{int(thr)}%")
        x = float(cycles_to_map.get(thr, 0.0) or 0.0)
        if x > 0:
            plt.axvline(x, color=color, ls="-." if thr>=50 else ":")
            plt.scatter([x],[thr], s=45)
            txty = thr + (1.0 if thr>=50 else -2.0)
            plt.text(x, txty, f"{x:.0f} cyc", ha="center", fontsize=8, color=color)

    plt.scatter([0],[soh_current], c="green", s=55, label=f"Current {soh_current:.2f}%")
    plt.text(0, soh_current+0.7, f"±{soh_std:.2f}", color="green", fontsize=8)

    if ood_flag:
        plt.text(0.98,0.05,"OOD", transform=plt.gca().transAxes,
                 ha="right", va="bottom", color="crimson", fontsize=11,
                 bbox=dict(boxstyle="round", fc="w", ec="crimson"))

    plt.xlabel("Remaining Cycles")
    plt.ylabel("SoH (%)")
    plt.title(f"RUL Projection – {file_base}")
    plt.grid(alpha=0.35)
    plt.legend(fontsize=8)
    plt.tight_layout()
    plt.savefig(out_path, dpi=140)
    plt.close()

# =========================
# 16. INFERENCE (single file)
# =========================
def predict_file(file_path: Path, bundle, cpp_map, global_cpp):
    vec, norm_vec, meta, checksum, used_freq, first_last_freq = featurize_any(file_path, bundle)

    # ----- SoC (temp-agnostic ensemble) -----
    socb = bundle["soc_ensemble"]
    temp_idx = int(socb["temp_idx"])

    # SoC features (zero temperature)
    vec_soc = vec.copy()
    vec_soc[temp_idx] = 0.0
    soc_scaler = socb["soc_scaler"]
    X_soc = soc_scaler.transform(vec_soc.reshape(1,-1))

    means = []; stds = []

    # GPR
    gp = socb["gpr"]
    if isinstance(gp, GaussianProcessRegressor):
        mu, sd = gp.predict(X_soc, return_std=True)
        means.append(float(mu[0])); stds.append(float(sd[0]))
    else:
        means.append(float(gp.predict(X_soc)[0])); stds.append(float(socb["val_rmse"]))

    # HGB
    hgb = socb["hgb"]
    means.append(float(hgb.predict(X_soc)[0])); stds.append(float(socb["val_rmse"]))

    # Shape GP (optional)
    if socb.get("shape_model") is not None and norm_vec is not None:
        norm_vec_soc = norm_vec.copy()
        norm_vec_soc[temp_idx] = 0.0
        Xs = socb["shape_scaler"].transform(norm_vec_soc.reshape(1,-1))
        shp = socb["shape_model"]
        if isinstance(shp, GaussianProcessRegressor):
            mu2, sd2 = shp.predict(Xs, return_std=True)
            means.append(float(mu2[0])); stds.append(float(sd2[0]))
        else:
            means.append(float(shp.predict(Xs)[0])); stds.append(float(socb["val_rmse"]))

    w = np.array(socb["weights"], dtype=float)
    mu_mix = float(np.sum(w * np.array(means)))
    var_mix = _ensemble_var(means, stds, w)
    sd_mix = float(np.sqrt(var_mix)) * float(socb.get("std_alpha", 1.0))

    # Calibrate mean
    cal = socb.get("calibrator")
    if cal is not None:
        try:
            mu_mix = float(np.clip(np.asarray(cal.predict([mu_mix]))[0], 0.0, 100.0))
        except Exception:
            pass

    # OOD detection in SoC space
    soc_oob = False; soc_mahal = None
    if socb.get("soc_center") is not None and socb.get("soc_cov_inv") is not None:
        c = np.array(socb["soc_center"]); inv = np.array(socb["soc_cov_inv"])
        diff = (X_soc[0] - c)
        try:
            soc_mahal = float(np.sqrt(diff @ inv @ diff.T))
        except Exception:
            soc_mahal = None
        thr = socb.get("soc_mahal_thresh", None)
        if thr is not None:
            soc_oob = (soc_mahal is not None and soc_mahal > float(thr))

    # If no frequency column in file, force OOD fallback behavior
    if not used_freq:
        soc_oob = True

    mu_final = mu_mix
    sd_final = sd_mix

    if cfg.OOD_SOC_ENABLE and soc_oob:
        # Prior via KNN in SoC-space
        Xtr = socb.get("soc_knn_X", None)
        ytr = socb.get("soc_knn_y", None)
        prior_val = cfg.OOD_SOC_PRIOR
        if Xtr is not None and ytr is not None and Xtr.shape[0] >= 5:
            d = np.linalg.norm(Xtr - X_soc[0], axis=1)
            k = min(cfg.SOC_OOD_K, Xtr.shape[0])
            idx = np.argpartition(d, k-1)[:k]
            wdist = 1.0 / (d[idx] + 1e-6)
            prior_val = float(np.sum(wdist * ytr[idx]) / np.sum(wdist))

        thr = socb.get("soc_mahal_thresh", np.inf)
        delta = max(0.0, (soc_mahal or 0.0) - (thr if np.isfinite(thr) else 0.0))
        s = max(1e-6, cfg.OOD_SOC_SHRINK_SCALE)
        severity = float(max(0.0, min(1.0, delta/(s*6.0))))

        w_prior = min(float(cfg.OOD_SOC_PRIOR_MAX_WEIGHT), 0.15 + 0.85*severity)
        w_base  = 1.0 - w_prior
        mu_final = float(w_base*mu_mix + w_prior*prior_val)

        # Slightly inflate uncertainty when OOD
        sd_final = float(min(max(sd_mix*(1.0 + 1.25*severity), sd_mix), 10.0))

    soc_mean = float(np.clip(mu_final, 0.0, 100.0))
    soc_std  = float(min(sd_final, 10.0))  # allow real variation

    # ----- SoH -----
    scaler = bundle["shared_scaler"]
    X = scaler.transform(vec.reshape(1,-1))
    soh_model=bundle["soh_model"]; model_name=bundle.get("soh_model_name","unknown")
    if isinstance(soh_model, GaussianProcessRegressor):
        sm, ss = soh_model.predict(X, return_std=True)
        soh_mean_raw = float(sm[0]); soh_std_raw=float(ss[0])
    else:
        soh_mean_raw = float(soh_model.predict(X)[0])
        soh_std_raw  = float(bundle["metrics"].get("soh_rmse_selected", 5.0))

    shape_model = bundle.get("shape_model"); shape_scaler = bundle.get("shape_scaler")
    shape_soh_mean=None; shape_soh_std=None
    if shape_model is not None and norm_vec is not None:
        X_shape_s = shape_scaler.transform(norm_vec.reshape(1,-1))
        if isinstance(shape_model, GaussianProcessRegressor):
            sm2, ss2 = shape_model.predict(X_shape_s, return_std=True)
            shape_soh_mean=float(sm2[0]); shape_soh_std=float(ss2[0])
        else:
            shape_soh_mean=float(shape_model.predict(X_shape_s)[0])
            shape_soh_std=float(bundle["metrics"].get("soh_rmse_selected", 5.0))

    if cfg.ENSEMBLE_SOH and shape_soh_mean is not None:
        soh_mean = 0.5*(soh_mean_raw + shape_soh_mean)
        stds = [soh_std_raw]
        if shape_soh_std is not None: stds.append(shape_soh_std)
        soh_std = float(np.sqrt(np.mean(np.array(stds)**2)))
    else:
        soh_mean, soh_std = soh_mean_raw, soh_std_raw

    train_mahal = bundle.get("train_mahal")
    mahal_dist=None
    if train_mahal:
        cov_inv = np.array(train_mahal["cov_inv"])
        center = np.array(train_mahal["center"])
        mahal_dist = mahalanobis_distance(X[0], center, cov_inv)
    ard_norm=None
    if "gpr" in model_name:
        ard_norm = gp_ard_norm(X, soh_model)
    ood_flag=False
    if (mahal_dist is not None and mahal_dist > cfg.MAHAL_THRESHOLD) or \
       (ard_norm is not None and ard_norm > cfg.GP_ARD_NORM_THRESHOLD):
        ood_flag=True

    soh_val_rmse = float(bundle["metrics"].get("soh_rmse_selected", 5.0))
    if ood_flag:
        soh_std = min(soh_std, cfg.SOH_STD_MAX_OOD)
    else:
        soh_std = min(soh_std, soh_val_rmse)

    # ----- Cycles predictions (learned; already scaled) -----
    cycles_models = bundle.get("cycles_models", {}) or {}
    cycles_to = {}
    cycles_abs = None
    if "absolute" in cycles_models:
        m = cycles_models["absolute"]["model"]
        cyc_pred = float(max(0.0, m.predict(X)[0]))
        cycles_abs = cyc_pred
    for thr in cfg.TARGET_SOH_THRESHOLDS:
        key = str(int(thr))
        if key in cycles_models:
            m = cycles_models[key]["model"]
            rem = float(max(0.0, m.predict(X)[0]))
            cycles_to[thr] = rem
        else:
            cycles_to[thr] = 0.0

    # Fallback if needed
    used_cpp = None
    if not any(v > 0 for v in cycles_to.values()):
        cpp = get_cpp(meta, cpp_map, global_cpp)
        used_cpp = float(cpp)
        for thr_val in cfg.TARGET_SOH_THRESHOLDS:
            cycles_to[thr_val] = float((soh_mean - thr_val) * cpp) if soh_mean > thr_val else 0.0

    if cfg.VERBOSE:
        print(f"[SoH] {Path(file_path).name}: mean={soh_mean:.2f} std={soh_std:.2f}  OOD(SoH)={bool(ood_flag)}")
        if cfg.OOD_SOC_ENABLE:
            print(f"[SoC] {Path(file_path).name}: base={mu_mix:.2f}  → final={soc_mean:.2f}  OOD={soc_oob}")

    result={
        "file": str(file_path),
        "feature_checksum": checksum,
        "parsed_metadata": meta,
        # SoC
        "predicted_SoC_percent": float(soc_mean),
        "SoC_std_estimate": float(soc_std),
        "soc_model_chosen": "ensemble_" + "+".join(socb["names"]),
        "soc_model_kind": "ensemble",
        "soc_calibration_kind": socb.get("cal_kind", "identity"),
        # SoH
        "predicted_SoH_percent": float(soh_mean),
        "SoH_std_estimate": float(soh_std),
        "soh_model_chosen": model_name,
        # Cycles
        "predicted_cycle_index": None if cycles_abs is None else float(cycles_abs),
        "predicted_cycles_remaining_to_thresholds": {str(int(k)): float(v) for k,v in cycles_to.items()},
        "cycles_per_percent_est": None if not any(v>0 for v in cycles_to.values()) else float(_estimate_cpp_from_predictions(soh_mean, cycles_to)),
        "fallback_cpp_used": None if used_cpp is None else float(used_cpp),
        # Debug
        "used_freq_from_file": bool(used_freq),
        "freq_range_hz": {
            "first": None if first_last_freq[0] is None else float(first_last_freq[0]),
            "last":  None if first_last_freq[1] is None else float(first_last_freq[1]),
        },
        # thresholds
        "decision_threshold_percent": cfg.DECISION_SOH_PERCENT,
        "lower_threshold_percent": cfg.ILLUSTRATIVE_MIN_SOH,
        # OOD
        "OOD_flag": bool(ood_flag)
    }
    return result, ood_flag, {float(k): float(v) for k,v in cycles_to.items()}

# =========================
# 17. MAIN (batch mode)
# =========================
def main():
    if cfg.VERBOSE:
        print("Configuration:\n", json.dumps(to_jsonable(asdict(cfg)), indent=2))

    cap_df = load_capacity_info(cfg.CAP_DIR)

    # compute global cycle scale used in this run
    global CYCLE_SCALE_GLOBAL
    CYCLE_SCALE_GLOBAL = float(cfg.CYCLE_SCALE)
    if not cap_df.empty and cfg.TARGET_CALIB_CYCLE_AT_80 is not None:
        auto = _calibrate_cycle_scale(cap_df, cfg.TARGET_CALIB_CYCLE_AT_80)
        CYCLE_SCALE_GLOBAL *= float(auto)
        if cfg.VERBOSE:
            print(f"[CYCLE-SCALE] auto={auto:.3f}  (target 80% at {cfg.TARGET_CALIB_CYCLE_AT_80})  → total scale={CYCLE_SCALE_GLOBAL:.3f}")

    # CPP map (scaled) for fallback and plotting
    if cap_df.empty:
        if cfg.VERBOSE: print("[INFO] No / empty capacity data.")
        cpp_map, global_cpp = {}, float(cfg.CPP_FALLBACK)
    else:
        cpp_map, global_cpp = build_cpp_map(cap_df)
    cpp_map = {k: v*CYCLE_SCALE_GLOBAL for k,v in cpp_map.items()}
    global_cpp *= CYCLE_SCALE_GLOBAL
    if cfg.VERBOSE:
        print(f"[CPP] dynamic cells={len(cpp_map)} global_cpp_median={global_cpp:.2f} (scaled)")

    # ---- Load or retrain
    bundle_path_new = cfg.MODEL_DIR / "eis_soc_soh_cycles_models.joblib"
    need_retrain = bool(cfg.FORCE_RETRAIN) or (not bundle_path_new.exists())
    bundle = None

    if not need_retrain:
        try:
            bundle = load_bundle()
            same_sig = (bundle.get("config_signature") == config_signature(cfg)) and \
                       (bundle.get("feature_version") == cfg.FEATURE_VERSION)
            need_retrain = not same_sig
            if cfg.VERBOSE:
                print(f"[LOAD] Found bundle. Signature match: {same_sig}")
        except Exception as e:
            print(f"[LOAD] Could not load existing bundle cleanly: {e}")
            need_retrain = True

    if need_retrain:
        if not cfg.EIS_DIR.exists():
            raise FileNotFoundError(f"EIS_DIR missing: {cfg.EIS_DIR}.")
        if cfg.REFINE_SOH_WITH_CAPACITY and not cfg.CAP_DIR.exists():
            print(f"[WARN] CAP_DIR missing: {cfg.CAP_DIR}. Proceeding without capacity refinement.")
            cfg.REFINE_SOH_WITH_CAPACITY = False
            cap_df = pd.DataFrame()
        if cfg.VERBOSE: print("[TRAIN] Building dataset & training models...")
        meta_df, X_raw, shape_bundle, y_soc, y_soh, y_cycle_index, y_rem_dict = build_dataset(cfg.EIS_DIR, cap_df, cycle_scale=CYCLE_SCALE_GLOBAL)
        if cfg.VERBOSE:
            print(f"[TRAIN] Samples={X_raw.shape[0]} Features={X_raw.shape[1]} Cells={meta_df.CellID.nunique()}")
        bundle = train_models(meta_df, X_raw, shape_bundle, y_soc, y_soh, y_cycle_index, y_rem_dict)
    else:
        if bundle is None:
            bundle = load_bundle()

    # ---- Inference (batch)
    artifacts = cfg.MODEL_DIR / "artifacts"
    artifacts.mkdir(parents=True, exist_ok=True)

    for test_fp in cfg.EIS_TEST_FILES:
        print(f"\n===== TEST: {Path(test_fp).name} =====")
        if not Path(test_fp).exists():
            print(f"[WARN] Test file not found: {test_fp}")
            continue
        try:
            result, ood_flag, cycles_to_map = predict_file(Path(test_fp), bundle, cpp_map, global_cpp)
        except Exception as e:
            print(f"[ERROR] Prediction failed for {Path(test_fp).name}: {e}")
            continue

        out_plot = artifacts / f"{Path(test_fp).stem}_projection.png"
        cpp_hint = result.get("fallback_cpp_used", None)
        plot_projection(
            Path(test_fp).stem,
            result["predicted_SoH_percent"],
            result["SoH_std_estimate"],
            {float(k): float(v) for k,v in result["predicted_cycles_remaining_to_thresholds"].items()},
            cpp_hint,
            result["OOD_flag"],
            out_plot,
            thresholds=cfg.TARGET_SOH_THRESHOLDS
        )

        out_json = artifacts / f"{Path(test_fp).stem}_prediction.json"
        with out_json.open("w", encoding="utf-8") as f:
            json.dump(result, f, indent=2)

        print(json.dumps(result, indent=2))
        print(f"[PLOT] Saved: {out_plot}")
        print(f"[JSON] Saved: {out_json}")

    print("\nDone.")

# =========================
# 18. ENTRYPOINT
# =========================
if __name__ == "__main__":
    import argparse
    parser = argparse.ArgumentParser()
    parser.add_argument("--ui", action="store_true", help="(UI removed in this trimmed script)")
    parser.add_argument("--share", action="store_true", help="(no-op)")
    parser.add_argument("--host", default="127.0.0.1", help="Server host (default: 127.0.0.1)")
    parser.add_argument("--port", type=int, default=7860, help="Server port (default: 7860)")
    parser.add_argument("--inbrowser", action="store_true", help="(no-op)")
    # NEW: paths + toggles
    parser.add_argument("--eis_dir", type=str, default=None, help="Path to training EIS .mat directory")
    parser.add_argument("--cap_dir", type=str, default=None, help="Path to capacity .mat directory")
    parser.add_argument("--model_dir", type=str, default=None, help="Path to save/load models and artifacts")
    parser.add_argument("--no-capacity", action="store_true", help="Disable capacity refinement")
    parser.add_argument("--force-retrain", action="store_true", help="Force retrain even if a bundle exists/matches")
    # Cycle scale override
    parser.add_argument("--cycle-scale", type=float, default=None, help="Override CYCLE_SCALE (global multiplier)")
    parser.add_argument("--target-80", type=float, default=None, help="Override TARGET_CALIB_CYCLE_AT_80 (auto-scale)")
    # SoC toggles
    parser.add_argument("--soc-ood", action="store_true", help="Enable SoC OOD blending (ON by default here)")
    parser.add_argument("--soc-cal", type=str, default=None, help="SoC calibration mode: auto|iso|linear|off")

    args, _ = parser.parse_known_args()

    if args.eis_dir or args.cap_dir or args.model_dir:
        set_paths(
            args.eis_dir if args.eis_dir else cfg.EIS_DIR,
            args.cap_dir if args.cap_dir else cfg.CAP_DIR,
            args.model_dir if args.model_dir else cfg.MODEL_DIR,
        )
    if args.no_capacity:
        cfg.REFINE_SOH_WITH_CAPACITY = False
    if args.force_retrain:
        cfg.FORCE_RETRAIN = True
    if args.cycle_scale is not None:
        cfg.CYCLE_SCALE = float(args.cycle_scale)
    if args.target_80 is not None:
        cfg.TARGET_CALIB_CYCLE_AT_80 = float(args.target_80)
    if args.soc_ood:
        cfg.OOD_SOC_ENABLE = True
    if args.soc_cal is not None:
        cfg.SOC_CALIBRATION_MODE = str(args.soc_cal).lower()

    main()


Configuration:
 {
  "EIS_DIR": "C:\\Users\\tgondal0\\OneDrive - Edith Cowan University\\00 - Megallan Power\\NMC Batteries Warwick Station\\NMC\\DIB_Data\\.matfiles\\EIS_Test",
  "CAP_DIR": "C:\\Users\\tgondal0\\OneDrive - Edith Cowan University\\00 - Megallan Power\\NMC Batteries Warwick Station\\NMC\\DIB_Data\\.matfiles\\Capacity_Check",
  "MODEL_DIR": "models_eis_phase2_phys",
  "EIS_TEST_FILES": [
    "Mazda-Battery-Cell2.xlsx"
  ],
  "F_MIN": 0.01,
  "F_MAX": 10000.0,
  "N_FREQ": 60,
  "SOH_STD_MAX_OOD": 2.0,
  "SOC_STD_MAX": 0.95,
  "SOC_STD_MAX_OOD": 0.95,
  "TEST_FRAC": 0.2,
  "GROUP_KFOLDS": 0,
  "RANDOM_STATE": 42,
  "INCLUDE_RAW_RE_IM": true,
  "INCLUDE_BASICS": true,
  "INCLUDE_F_FEATS": true,
  "INCLUDE_PHYSICAL": true,
  "INCLUDE_DRT": true,
  "INCLUDE_BAND_STATS": true,
  "INCLUDE_DIFF_SLOPES": true,
  "DRT_POINTS": 60,
  "DRT_TAU_MIN": 0.0001,
  "DRT_TAU_MAX": 10000.0,
  "DRT_LAMBDA": 0.01,
  "REFINE_SOH_WITH_CAPACITY": true,
  "MAX_GPR_TRAIN_SAMPLES": 3500,
  "INCLUDE_