In [None]:
import numpy as np
from sklearn.model_selection import KFold
import pwlf

# --- 1) K-fold CV to choose number of segments ---
def choose_segments_cv(x, y, k_min=1, k_max=20, K=5, seed=0, min_pts_per_seg=3):
    x = np.asarray(x, float)
    y = np.asarray(y, float)

    def cv_mse(m):
        kf = KFold(n_splits=K, shuffle=True, random_state=seed)
        errs = []
        for tr, va in kf.split(x):
            xt, yt = x[tr], y[tr]
            xv, yv = x[va], y[va]
            try:
                mdl = pwlf.PiecewiseLinFit(xt, yt, disp=False)
                mdl.fitfast(m)
                # ensure each segment has enough points
                bps = mdl.fit_breaks
                ok = True
                for i in range(len(bps)-1):
                    cnt = np.sum((xt >= bps[i]) & (xt <= bps[i+1]))
                    if cnt < min_pts_per_seg:
                        ok = False; break
                if not ok:
                    return np.inf
                yhat = mdl.predict(xv)
                errs.append(np.mean((yv - yhat)**2))
            except Exception:
                return np.inf
        return float(np.mean(errs))

    m_cands = range(k_min, k_max+1)
    scores = {m: cv_mse(m) for m in m_cands}
    best_m = min(scores, key=scores.get)

    final = pwlf.PiecewiseLinFit(x, y, disp=False)
    final.fitfast(best_m)
    return best_m, final.fit_breaks, scores

# --- 2) Information-criteria fallback (fast) ---
def choose_segments_ic(x, y, k_min=1, k_max=20, criterion="bic"):
    x = np.asarray(x, float); y = np.asarray(y, float)
    n = len(y)
    best_val, best_m, best_model = np.inf, None, None
    for m in range(k_min, k_max+1):
        try:
            mdl = pwlf.PiecewiseLinFit(x, y, disp=False)
            mdl.fitfast(m)
            yhat = mdl.predict(x)
            rss = np.sum((y - yhat)**2)
            p = 2*m                     # approx params (slope+intercept per segment)
            aic = n*np.log(rss/n) + 2*p
            bic = n*np.log(rss/n) + p*np.log(n)
            val = aic if criterion.lower()=="aic" else bic
            if val < best_val:
                best_val, best_m, best_model = val, m, mdl
        except Exception:
            pass
    return best_m, best_model.fit_breaks, best_val
