<a href="https://colab.research.google.com/github/korkutanapa/ANOMALY_DETECTION_TDA_YAHOO_DATASET/blob/main/TDA_NAB_SOLUTIONS.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Version 11 optimization algorithm

In [None]:
import os, re, shutil, itertools, subprocess
import pandas as pd

# ============================================================
# 1) CLEAN START & CLONE NAB
# ============================================================
print("--- 1. CLEAN START ---")
os.chdir("/content")

if os.path.exists("NAB"):
    shutil.rmtree("NAB")

!git clone https://github.com/numenta/NAB.git
!pip install -q ripser

os.chdir("/content/NAB")

os.makedirs("config", exist_ok=True)
thr_path = os.path.join("config", "thresholds.json")
if not os.path.exists(thr_path):
    with open(thr_path, "w") as f:
        f.write("{}")

os.makedirs("tuning_logs", exist_ok=True)

# ============================================================
# 2) TEMPLATE = YOUR my_algo.py CODE (UNCHANGED EXCEPT WE WILL
#    regex-replace ONLY K_PER_FEATURE and TOP_FINAL)
#    IMPORTANT: DETECTOR_NAME stays "TDA_VEAD_Method"
# ============================================================
TEMPLATE = r"""
import os
import glob
import numpy as np
import pandas as pd
from ripser import ripser
import warnings

warnings.filterwarnings("ignore")

DETECTOR_NAME = "TDA_VEAD_Method"
INPUT_DIR = "data"
OUTPUT_DIR = os.path.join("results", DETECTOR_NAME)

# ----------------------------------------------------------
# Embedding parameters (fixed in NAB for fairness)
# ----------------------------------------------------------
WINDOW_SIZE = 14
TAU         = 1
DIMENSION   = 7
_EPS        = 1e-12
MAXDIM      = 1  # H0 + H1

# ==========================================================
# 0. VEAD CONFIGURATION
# ==========================================================
KV   = 3.5
KA   = 3.5
MODE = "abs_plateau"  # "strict" | "plateau" | "abs_plateau"

def _vead_series(raw_vals, kv=KV, ka=KA, mode=MODE):
    s = pd.to_numeric(pd.Series(raw_vals, dtype=float), errors="coerce") \
            .interpolate(limit_direction="both")

    v = s.diff(1)
    a = v.diff(1)

    def _zmad(x):
        x = np.asarray(x, dtype=float)
        med = np.nanmedian(x)
        mad = np.nanmedian(np.abs(x - med)) + 1e-12
        return (x - med) / mad

    zv = _zmad(v.values)
    za = _zmad(a.values)

    mode = (mode or "strict").lower()
    if mode == "strict":
        zv = np.maximum(0.0, zv)
        za = np.maximum(0.0, za)
    elif mode == "plateau":
        zv = np.where(zv > -0.25, zv, 0.0)
        za = np.where(za > -0.25, za, 0.0)
    elif mode == "abs_plateau":
        zv = np.abs(zv)
        za = np.abs(za)

    score = (kv * zv) * (ka * za)
    return np.nan_to_num(score, nan=0.0, posinf=0.0, neginf=0.0)

# ==========================================================
# 1. TAKENS EMBEDDING
# ==========================================================
def takens_embed(window, time_delay, dimension):
    m = len(window) - (dimension - 1) * time_delay
    if m <= 0:
        raise ValueError("Takens parameters too large for this window.")
    return np.stack(
        [window[j:j + m * time_delay:time_delay] for j in range(dimension)],
        axis=1
    )

# ==========================================================
# 2. PERSISTENCE DIAGRAM UTILITIES + FEATURE FUNCTIONS
# ==========================================================
def _clean_diag(diag):
    if diag is None:
        return np.empty((0, 2), dtype=float)
    arr = np.asarray(diag, dtype=float)
    if arr.ndim != 2 or arr.shape[1] != 2 or arr.size == 0:
        return np.empty((0, 2), dtype=float)
    b, d = arr[:, 0], arr[:, 1]
    mask = np.isfinite(b) & np.isfinite(d) & (d > b)
    if not np.any(mask):
        return np.empty((0, 2), dtype=float)
    return np.stack([b[mask], d[mask]], axis=1)

def _lifetimes(diag):
    arr = _clean_diag(diag)
    if arr.size == 0:
        return np.empty(0, dtype=float)
    return np.maximum(arr[:, 1] - arr[:, 0], 0.0)

def _safe_div(a, b):
    return float(a) / float(b + _EPS)

try:
    _trapz = np.trapezoid
except AttributeError:
    _trapz = np.trapz

def _auc_tri_max(diag):
    arr = _clean_diag(diag)
    if arr.size == 0:
        return 0.0
    b_all, d_all = arr[:, 0], arr[:, 1]
    if b_all.min() == d_all.max():
        return 0.0

    grid = np.linspace(b_all.min(), d_all.max(), 64)
    lam1 = np.zeros_like(grid)

    for b, d in arr:
        m = 0.5 * (b + d)
        h = 0.5 * (d - b)
        if h <= 0:
            continue

        left = (grid >= b) & (grid <= m)
        right = (grid >= m) & (grid <= d)

        lam1[left] = np.maximum(lam1[left], (grid[left] - b) * (h / max(m - b, _EPS)))
        lam1[right] = np.maximum(lam1[right], (d - grid[right]) * (h / max(d - m, _EPS)))

    return float(_trapz(lam1, grid))

def _persistence_entropy(diag):
    L = _lifetimes(diag)
    if L.size == 0:
        return 0.0
    S = L.sum()
    if S <= 0:
        return 0.0
    p = L / (S + _EPS)
    return float(-np.sum(p * np.log(p + _EPS)))

def _gini_from_lifetimes(L):
    L = np.sort(L)
    n = len(L)
    if n == 0:
        return 0.0
    S = L.sum()
    if S <= 0:
        return 0.0
    cumL = np.cumsum(L)
    return float(1 + 1/n - 2*np.sum(cumL/(n*S)))

def _tail_share_q(diag, q):
    L = _lifetimes(diag)
    if L.size == 0:
        return 0.0
    qv = np.quantile(L, q)
    return _safe_div(L[L >= qv].sum(), L.sum())

def _birth_death_stats(diag):
    arr = _clean_diag(diag)
    if arr.size == 0:
        return {"mean_birth": 0.0, "mean_death": 0.0, "std_birth": 0.0, "std_death": 0.0}
    b, d = arr[:, 0], arr[:, 1]
    return {
        "mean_birth": float(b.mean()),
        "mean_death": float(d.mean()),
        "std_birth": float(b.std(ddof=0)),
        "std_death": float(d.std(ddof=0)),
    }

def _diag_distance_stats(diag):
    arr = _clean_diag(diag)
    if arr.size == 0:
        return {"mean_diag_dist": 0.0, "max_diag_dist": 0.0, "sum_diag_dist": 0.0}
    b, d = arr[:, 0], arr[:, 1]
    dist = (d - b) / np.sqrt(2.0)
    return {
        "mean_diag_dist": float(dist.mean()),
        "max_diag_dist": float(dist.max()),
        "sum_diag_dist": float(dist.sum()),
    }

def _centroid_xy(diag):
    arr = _clean_diag(diag)
    if arr.size == 0:
        return {"centroid_x": 0.0, "centroid_y": 0.0}
    b, d = arr[:, 0], arr[:, 1]
    L = np.maximum(d - b, 0.0)
    S = L.sum()
    if S <= 0:
        return {"centroid_x": 0.0, "centroid_y": 0.0}
    return {
        "centroid_x": float(np.sum(b * L) / (S + _EPS)),
        "centroid_y": float(np.sum(d * L) / (S + _EPS)),
    }

def _lifetimes_stats(diag):
    L = _lifetimes(diag)
    if L.size == 0:
        return {
            "count": 0, "sum": 0.0, "mean": 0.0, "median": 0.0, "std": 0.0,
            "min": 0.0, "max": 0.0, "L1": 0.0, "L2": 0.0, "Linf": 0.0
        }
    return {
        "count": int(L.size),
        "sum": float(L.sum()),
        "mean": float(L.mean()),
        "median": float(np.median(L)),
        "std": float(L.std(ddof=0)),
        "min": float(L.min()),
        "max": float(L.max()),
        "L1": float(np.sum(np.abs(L))),
        "L2": float(np.sqrt(np.sum(L**2))),
        "Linf": float(np.max(np.abs(L))),
    }

def _lifetimes_quantiles(diag):
    L = _lifetimes(diag)
    if L.size == 0:
        return {"q50": 0.0, "q75": 0.0, "q90": 0.0, "q95": 0.0, "q99": 0.0}
    return {
        "q50": float(np.quantile(L, 0.50)),
        "q75": float(np.quantile(L, 0.75)),
        "q90": float(np.quantile(L, 0.90)),
        "q95": float(np.quantile(L, 0.95)),
        "q99": float(np.quantile(L, 0.99)),
    }

def _carlsson_coordinates(diag):
    arr = _clean_diag(diag)
    if arr.size == 0:
        return {f"f{k}": 0.0 for k in range(1, 6)}
    b, d = arr[:, 0], arr[:, 1]
    L = np.maximum(d - b, 0.0)
    S = L.sum()
    if S <= 0:
        return {f"f{k}": 0.0 for k in range(1, 6)}
    return {
        "f1": float(L.sum()),
        "f2": float(np.sum(b * L)),
        "f3": float(np.sum(d * L)),
        "f4": float(np.sum(b**2 * L)),
        "f5": float(np.sum(d**2 * L)),
    }

def _sum_centroid_radial(diag):
    arr = _clean_diag(diag)
    if arr.size == 0:
        return 0.0
    b, d = arr[:, 0], arr[:, 1]
    L = np.maximum(d - b, 0.0)
    S = L.sum()
    if S <= 0:
        return 0.0
    radial = (b + d) / np.sqrt(2.0)
    return _safe_div(np.sum(np.abs(radial) * L), S)

def _pete(diag, p=1.6, q=0.5):
    arr = _clean_diag(diag)
    if arr.size == 0:
        return 0.0
    b, d = arr[:, 0], arr[:, 1]
    L = np.maximum(d - b, 0.0)
    S = L.sum()
    if S <= 0:
        return 0.0
    radial = (b + d) / np.sqrt(2.0)
    return _safe_div(np.sum((L**p) * (np.abs(radial)**q)), S)

def compute_features_for_diag(diag, prefix):
    feats = {}

    Ls = _lifetimes_stats(diag)
    feats[f"{prefix}count_lifetime"] = float(Ls["count"])
    feats[f"{prefix}sum_lifetime"]   = float(Ls["sum"])
    feats[f"{prefix}mean_lifetime"]  = float(Ls["mean"])
    feats[f"{prefix}median_lifetime"]= float(Ls["median"])
    feats[f"{prefix}std_lifetime"]   = float(Ls["std"])
    feats[f"{prefix}min_lifetime"]   = float(Ls["min"])
    feats[f"{prefix}max_lifetime"]   = float(Ls["max"])

    feats[f"{prefix}L1_lifetime"]    = float(Ls["L1"])
    feats[f"{prefix}L2_lifetime"]    = float(Ls["L2"])
    feats[f"{prefix}Linf_lifetime"]  = float(Ls["Linf"])

    feats[f"{prefix}L1_norm"]        = float(Ls["L1"])
    feats[f"{prefix}L2_norm"]        = float(Ls["L2"])
    feats[f"{prefix}Linf_norm"]      = float(Ls["Linf"])

    feats[f"{prefix}betti"]          = float(Ls["count"])
    feats[f"{prefix}energy_concentration"] = _safe_div(Ls["L2"], Ls["L1"])
    feats[f"{prefix}dominance_share"]      = _safe_div(Ls["Linf"], Ls["L1"])

    feats[f"{prefix}persistence_entropy"]  = _persistence_entropy(diag)

    bd = _birth_death_stats(diag)
    for k, v in bd.items():
        feats[f"{prefix}{k}"] = float(v)

    dd = _diag_distance_stats(diag)
    for k, v in dd.items():
        feats[f"{prefix}{k}"] = float(v)

    cxy = _centroid_xy(diag)
    feats[f"{prefix}centroid_x"] = float(cxy["centroid_x"])
    feats[f"{prefix}centroid_y"] = float(cxy["centroid_y"])

    q = _lifetimes_quantiles(diag)
    for k, v in q.items():
        feats[f"{prefix}{k}"] = float(v)

    tail80 = _tail_share_q(diag, 0.80)
    tail90 = _tail_share_q(diag, 0.90)
    tail95 = _tail_share_q(diag, 0.95)

    feats[f"{prefix}tail_share_q80"] = float(tail80)
    feats[f"{prefix}tail_share_q90"] = float(tail90)
    feats[f"{prefix}tail_share_q95"] = float(tail95)
    feats[f"{prefix}tail_curvature_80_90"] = float(tail90 - tail80)

    L = _lifetimes(diag)
    feats[f"{prefix}gini"] = float(_gini_from_lifetimes(L))

    cc = _carlsson_coordinates(diag)
    feats[f"{prefix}Carlsson_f1"] = float(cc["f1"])
    feats[f"{prefix}Carlsson_f2"] = float(cc["f2"])
    feats[f"{prefix}Carlsson_f3"] = float(cc["f3"])
    feats[f"{prefix}Carlsson_f4"] = float(cc["f4"])
    feats[f"{prefix}Carlsson_f5"] = float(cc["f5"])

    if prefix == "H0_":
        A = _auc_tri_max(diag)
        feats["H0_ratio_auc_L1_to_sum"] = _safe_div(A, Ls["sum"])
        feats["H0_ratio_auc_to_max"]    = _safe_div(A, Ls["max"])
        feats["H0_ratio_auc_to_l2"]     = _safe_div(A, Ls["L2"])
        feats["H0_bottleneck"]          = float(Ls["max"])
        feats["H0_sum_centroid"]        = float(_sum_centroid_radial(diag))
        feats["PETE_p1.6_q0.5"]         = float(_pete(diag, p=1.6, q=0.5))
        feats["H0_energy_concentration"]= _safe_div(Ls["L2"], Ls["sum"])
        feats["H0_dominance_share"]     = _safe_div(Ls["Linf"], Ls["sum"])
        feats["H0_tail_curvature_80_90"]= float(tail90 - tail80)
        feats["H0_centroid_to_energy"]  = _safe_div(feats["H0_sum_centroid"], Ls["L2"])
        feats["H0_gini"]                = float(feats["H0_gini"])
    return feats

def compute_cross_dim_features(feats_H0, feats_H1):
    out = {}
    def g(d, k): return float(d.get(k, 0.0))
    out["H1_to_H0_betti_ratio"]   = _safe_div(g(feats_H1, "H1_betti"), g(feats_H0, "H0_betti"))
    out["H1_to_H0_entropy_ratio"] = _safe_div(g(feats_H1, "H1_persistence_entropy"), g(feats_H0, "H0_persistence_entropy"))
    return out

FEATURE_NAMES = [
    "H0_Carlsson_f1","H0_Carlsson_f3","H0_Carlsson_f5",
    "H0_L1_lifetime","H0_L1_norm","H0_L2_lifetime","H0_L2_norm",
    "H0_Linf_lifetime","H0_Linf_norm","H0_bottleneck","H0_centroid_to_energy",
    "H0_centroid_y","H0_dominance_share","H0_energy_concentration","H0_gini",
    "H0_max_diag_dist","H0_max_lifetime","H0_mean_death","H0_mean_diag_dist",
    "H0_mean_lifetime","H0_median_lifetime","H0_min_lifetime","H0_persistence_entropy",
    "H0_q50","H0_q75","H0_q90","H0_q95","H0_q99","H0_ratio_auc_L1_to_sum",
    "H0_ratio_auc_to_l2","H0_ratio_auc_to_max","H0_std_death","H0_std_lifetime",
    "H0_sum_centroid","H0_sum_diag_dist","H0_sum_lifetime","H0_tail_curvature_80_90",
    "H0_tail_share_q80","H0_tail_share_q90","H0_tail_share_q95",
    "H1_Carlsson_f1","H1_Carlsson_f2","H1_Carlsson_f3",
    "H1_L1_lifetime","H1_L1_norm","H1_L2_lifetime","H1_L2_norm",
    "H1_Linf_lifetime","H1_Linf_norm","H1_betti","H1_count_lifetime",
    "H1_dominance_share","H1_energy_concentration","H1_gini",
    "H1_max_diag_dist","H1_max_lifetime","H1_mean_diag_dist","H1_mean_lifetime",
    "H1_median_lifetime","H1_min_lifetime","H1_persistence_entropy",
    "H1_q50","H1_q75","H1_q90","H1_q95","H1_q99",
    "H1_std_birth","H1_std_death","H1_std_lifetime",
    "H1_sum_diag_dist","H1_sum_lifetime",
    "H1_tail_share_q80","H1_tail_share_q90","H1_tail_share_q95",
    "H1_to_H0_betti_ratio","H1_to_H0_entropy_ratio",
    "PETE_p1.6_q0.5"
]

def run():
    files = glob.glob(os.path.join(INPUT_DIR, "**", "*.csv"), recursive=True)
    print(f"Found {len(files)} data files in '{INPUT_DIR}'")

    ##############################################################################################################
    # Voting config
    K_PER_FEATURE = 4  # each feature votes for top-2 indices
    TOP_FINAL     = 10  # final anomalies = top-5 voted indices
    ########################################################################################################################333

    for filepath in files:
        if ".ipynb_checkpoints" in filepath:
            continue

        try:
            df = pd.read_csv(filepath)
            df.columns = [c.strip().lower() for c in df.columns]
            if "value" not in df.columns or "timestamp" not in df.columns:
                continue

            vals = pd.to_numeric(df["value"], errors="coerce").astype(float).to_numpy()
            n = len(vals)

            rows = []
            for i in range(WINDOW_SIZE - 1, n):
                w = vals[i - WINDOW_SIZE + 1 : i + 1]
                try:
                    emb = takens_embed(w, TAU, DIMENSION)
                    dgms = ripser(emb, maxdim=MAXDIM)["dgms"]
                except Exception:
                    dgms = [np.empty((0, 2)), np.empty((0, 2))]

                D0 = dgms[0] if len(dgms) > 0 else np.empty((0, 2))
                D1 = dgms[1] if (MAXDIM >= 1 and len(dgms) > 1) else np.empty((0, 2))

                feats_H0 = compute_features_for_diag(D0, "H0_")
                feats_H1 = compute_features_for_diag(D1, "H1_")
                cross    = compute_cross_dim_features(feats_H0, feats_H1)

                merged = {}
                merged.update(feats_H0)
                merged.update(feats_H1)
                merged.update(cross)
                merged["index"] = i
                rows.append(merged)

            feat_df = pd.DataFrame(rows)
            full = pd.DataFrame(index=np.arange(n))
            if not feat_df.empty:
                feat_df = feat_df.set_index("index")
                full = full.join(feat_df, how="left")

            full = full.replace([np.inf, -np.inf], np.nan).fillna(0.0)

            votes = np.zeros(n, dtype=int)

            for feat_name in FEATURE_NAMES:
                series = pd.to_numeric(full.get(feat_name, 0.0), errors="coerce").astype(float).to_numpy()
                series = np.nan_to_num(series, nan=0.0, posinf=0.0, neginf=0.0)

                vead_scores = _vead_series(series, kv=KV, ka=KA, mode=MODE)

                mx = float(np.max(vead_scores)) if len(vead_scores) else 0.0
                if (not np.isfinite(mx)) or mx <= 0:
                    continue

                scores01 = np.clip(vead_scores / mx, 0.0, 1.0)
                if np.max(scores01) <= 0:
                    continue

                k_eff = min(K_PER_FEATURE, n)
                topk_idx = np.argpartition(scores01, -k_eff)[-k_eff:]
                topk_idx = topk_idx[np.lexsort((topk_idx, -scores01[topk_idx]))]
                votes[topk_idx] += 1

            final_scores = np.zeros(n, dtype=float)
            if np.max(votes) > 0:
                top_final_eff = min(TOP_FINAL, n)
                order = np.lexsort((np.arange(n), -votes))
                chosen = order[:top_final_eff]
                final_scores[chosen] = 1.0

            rel = os.path.relpath(filepath, INPUT_DIR)
            category = os.path.dirname(rel)
            base_name = os.path.basename(rel)

            out_dir = os.path.join(OUTPUT_DIR, category)
            os.makedirs(out_dir, exist_ok=True)
            out_name = f"{DETECTOR_NAME}_" + base_name
            out_path = os.path.join(out_dir, out_name)

            out_df = pd.DataFrame({
                "timestamp": df["timestamp"],
                "anomaly_score": final_scores
            })
            out_df.to_csv(out_path, index=False)

        except Exception as e:
            print(f"!! Error processing {filepath}: {e}")
            continue

if __name__ == "__main__":
    run()
"""

# ============================================================
# 3) Helpers
# ============================================================
def run_cmd(cmd_list):
    p = subprocess.run(cmd_list, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True)
    return p.returncode, p.stdout

def parse_final_scores(text):
    # works for ANY profile names printed by NAB
    pat = r"Final score for '([^']+)' detector on '([^']+)' profile = ([\-0-9.]+)"
    found = re.findall(pat, text)
    # returns dict: {(detectorLabel, profileName): score}
    return {(d, prof): float(val) for d, prof, val in found}

def make_variant_code(template, K, TOP):
    code = template
    code = re.sub(r'K_PER_FEATURE\s*=\s*\d+', f'K_PER_FEATURE = {K}', code, count=1)
    code = re.sub(r'TOP_FINAL\s*=\s*\d+',     f'TOP_FINAL = {TOP}', code, count=1)
    return code

# ============================================================
# 4) GRID SEARCH
# ============================================================
K_LIST   = [2, 3, 4, 5, 6]
TOP_LIST = [3, 5, 7, 9, 11, 13]

results = []

for K, TOP in itertools.product(K_LIST, TOP_LIST):
    exp_id = f"K{K}_TOP{TOP}"
    print("\n" + "="*80)
    print(f"EXPERIMENT: K_PER_FEATURE={K}, TOP_FINAL={TOP}  ->  TDA_VEAD_Method")
    print("="*80)

    # Write detector code (only K/TOP changed)
    code = make_variant_code(TEMPLATE, K, TOP)
    with open("my_algo.py", "w") as f:
        f.write(code)

    # Run detector (generates results/TDA_VEAD_Method/*)
    rc1, out1 = run_cmd(["python", "my_algo.py"])
    if rc1 != 0:
        print("Detector failed.")
        results.append({
            "K_PER_FEATURE": K, "TOP_FINAL": TOP,
            "standard": None, "lowFP": None, "lowFN": None,
            "status": "detector_failed"
        })
        continue

    # Run NAB scoring (IMPORTANT: --skipConfirmation)
    rc2, out2 = run_cmd([
        "python", "run.py",
        "--optimize", "--score",
        "--detectors", "TDA_VEAD_Method",
        "--normalize",
        "--skipConfirmation"
    ])

    # Save full log for this experiment (super useful)
    log_path = f"/content/NAB/tuning_logs/runpy_{exp_id}.log"
    with open(log_path, "w") as f:
        f.write(out2)

    scores = parse_final_scores(out2)

    # Try to pick the usual 3 profiles if they exist; otherwise leave None
    # Note: detector label in NAB output is often 'TDA' even if detector module is custom.
    # So we search across any detector label.
    def pick(profile_name):
        for (det_label, prof), val in scores.items():
            if prof == profile_name:
                return val
        return None

    row = {
        "K_PER_FEATURE": K,
        "TOP_FINAL": TOP,
        "standard": pick("standard") or pick("VEAD_Method_standard"),
        "lowFP":    pick("reward_low_FP_rate") or pick("VEAD_Method_reward_low_FP_rate"),
        "lowFN":    pick("reward_low_FN_rate") or pick("VEAD_Method_reward_low_FN_rate"),
        "rc_score": rc2,
        "log_file": log_path,
        "status": "ok" if (rc2 == 0 and len(scores) > 0) else "score_parse_failed"
    }
    results.append(row)
    print("Extracted:", row)

# ============================================================
# 5) SAVE RESULTS
# ============================================================
res_df = pd.DataFrame(results)
out_csv = "/content/NAB/parameter_tuning_results.csv"
res_df.to_csv(out_csv, index=False)
print("\n✅ Saved:", out_csv)

best = res_df.dropna(subset=["standard"]).sort_values("standard", ascending=False).head(10)
print("\nTop 10 by standard score:")
print(best[["K_PER_FEATURE","TOP_FINAL","standard","lowFP","lowFN","status","log_file"]])


--- 1. CLEAN START ---
Cloning into 'NAB'...
remote: Enumerating objects: 7119, done.[K
remote: Counting objects: 100% (699/699), done.[K
remote: Compressing objects: 100% (204/204), done.[K
remote: Total 7119 (delta 552), reused 495 (delta 495), pack-reused 6420 (from 1)[K
Receiving objects: 100% (7119/7119), 86.13 MiB | 22.70 MiB/s, done.
Resolving deltas: 100% (5001/5001), done.
Updating files: 100% (1186/1186), done.

EXPERIMENT: K_PER_FEATURE=2, TOP_FINAL=3  ->  TDA_VEAD_Method
Extracted: {'K_PER_FEATURE': 2, 'TOP_FINAL': 3, 'standard': 33.85, 'lowFP': 31.14, 'lowFN': 36.07, 'rc_score': 0, 'log_file': '/content/NAB/tuning_logs/runpy_K2_TOP3.log', 'status': 'ok'}

EXPERIMENT: K_PER_FEATURE=2, TOP_FINAL=5  ->  TDA_VEAD_Method
Extracted: {'K_PER_FEATURE': 2, 'TOP_FINAL': 5, 'standard': 38.11, 'lowFP': 33.02, 'lowFN': 41.5, 'rc_score': 0, 'log_file': '/content/NAB/tuning_logs/runpy_K2_TOP5.log', 'status': 'ok'}

EXPERIMENT: K_PER_FEATURE=2, TOP_FINAL=7  ->  TDA_VEAD_Method
Extract

In [None]:
import os, re, shutil, itertools, subprocess
import pandas as pd

# ============================================================
# 1) CLEAN START & CLONE NAB
# ============================================================
print("--- 1. CLEAN START ---")
os.chdir("/content")

if os.path.exists("NAB"):
    shutil.rmtree("NAB")

!git clone https://github.com/numenta/NAB.git
!pip install -q ripser

os.chdir("/content/NAB")

os.makedirs("config", exist_ok=True)
thr_path = os.path.join("config", "thresholds.json")
if not os.path.exists(thr_path):
    with open(thr_path, "w") as f:
        f.write("{}")

os.makedirs("tuning_logs", exist_ok=True)

# ============================================================
# 2) TEMPLATE = YOUR my_algo.py CODE (UNCHANGED EXCEPT WE WILL
#    regex-replace ONLY K_PER_FEATURE and TOP_FINAL)
#    IMPORTANT: DETECTOR_NAME stays "TDA_VEAD_Method"
# ============================================================
TEMPLATE = r"""
import os
import glob
import numpy as np
import pandas as pd
from ripser import ripser
import warnings

warnings.filterwarnings("ignore")

DETECTOR_NAME = "TDA_VEAD_Method"
INPUT_DIR = "data"
OUTPUT_DIR = os.path.join("results", DETECTOR_NAME)

# ----------------------------------------------------------
# Embedding parameters (fixed in NAB for fairness)
# ----------------------------------------------------------
WINDOW_SIZE = 14
TAU         = 1
DIMENSION   = 7
_EPS        = 1e-12
MAXDIM      = 1  # H0 + H1

# ==========================================================
# 0. VEAD CONFIGURATION
# ==========================================================
KV   = 3.5
KA   = 3.5
MODE = "abs_plateau"  # "strict" | "plateau" | "abs_plateau"

def _vead_series(raw_vals, kv=KV, ka=KA, mode=MODE):
    s = pd.to_numeric(pd.Series(raw_vals, dtype=float), errors="coerce") \
            .interpolate(limit_direction="both")

    v = s.diff(1)
    a = v.diff(1)

    def _zmad(x):
        x = np.asarray(x, dtype=float)
        med = np.nanmedian(x)
        mad = np.nanmedian(np.abs(x - med)) + 1e-12
        return (x - med) / mad

    zv = _zmad(v.values)
    za = _zmad(a.values)

    mode = (mode or "strict").lower()
    if mode == "strict":
        zv = np.maximum(0.0, zv)
        za = np.maximum(0.0, za)
    elif mode == "plateau":
        zv = np.where(zv > -0.25, zv, 0.0)
        za = np.where(za > -0.25, za, 0.0)
    elif mode == "abs_plateau":
        zv = np.abs(zv)
        za = np.abs(za)

    score = (kv * zv) * (ka * za)
    return np.nan_to_num(score, nan=0.0, posinf=0.0, neginf=0.0)

# ==========================================================
# 1. TAKENS EMBEDDING
# ==========================================================
def takens_embed(window, time_delay, dimension):
    m = len(window) - (dimension - 1) * time_delay
    if m <= 0:
        raise ValueError("Takens parameters too large for this window.")
    return np.stack(
        [window[j:j + m * time_delay:time_delay] for j in range(dimension)],
        axis=1
    )

# ==========================================================
# 2. PERSISTENCE DIAGRAM UTILITIES + FEATURE FUNCTIONS
# ==========================================================
def _clean_diag(diag):
    if diag is None:
        return np.empty((0, 2), dtype=float)
    arr = np.asarray(diag, dtype=float)
    if arr.ndim != 2 or arr.shape[1] != 2 or arr.size == 0:
        return np.empty((0, 2), dtype=float)
    b, d = arr[:, 0], arr[:, 1]
    mask = np.isfinite(b) & np.isfinite(d) & (d > b)
    if not np.any(mask):
        return np.empty((0, 2), dtype=float)
    return np.stack([b[mask], d[mask]], axis=1)

def _lifetimes(diag):
    arr = _clean_diag(diag)
    if arr.size == 0:
        return np.empty(0, dtype=float)
    return np.maximum(arr[:, 1] - arr[:, 0], 0.0)

def _safe_div(a, b):
    return float(a) / float(b + _EPS)

try:
    _trapz = np.trapezoid
except AttributeError:
    _trapz = np.trapz

def _auc_tri_max(diag):
    arr = _clean_diag(diag)
    if arr.size == 0:
        return 0.0
    b_all, d_all = arr[:, 0], arr[:, 1]
    if b_all.min() == d_all.max():
        return 0.0

    grid = np.linspace(b_all.min(), d_all.max(), 64)
    lam1 = np.zeros_like(grid)

    for b, d in arr:
        m = 0.5 * (b + d)
        h = 0.5 * (d - b)
        if h <= 0:
            continue

        left = (grid >= b) & (grid <= m)
        right = (grid >= m) & (grid <= d)

        lam1[left] = np.maximum(lam1[left], (grid[left] - b) * (h / max(m - b, _EPS)))
        lam1[right] = np.maximum(lam1[right], (d - grid[right]) * (h / max(d - m, _EPS)))

    return float(_trapz(lam1, grid))

def _persistence_entropy(diag):
    L = _lifetimes(diag)
    if L.size == 0:
        return 0.0
    S = L.sum()
    if S <= 0:
        return 0.0
    p = L / (S + _EPS)
    return float(-np.sum(p * np.log(p + _EPS)))

def _gini_from_lifetimes(L):
    L = np.sort(L)
    n = len(L)
    if n == 0:
        return 0.0
    S = L.sum()
    if S <= 0:
        return 0.0
    cumL = np.cumsum(L)
    return float(1 + 1/n - 2*np.sum(cumL/(n*S)))

def _tail_share_q(diag, q):
    L = _lifetimes(diag)
    if L.size == 0:
        return 0.0
    qv = np.quantile(L, q)
    return _safe_div(L[L >= qv].sum(), L.sum())

def _birth_death_stats(diag):
    arr = _clean_diag(diag)
    if arr.size == 0:
        return {"mean_birth": 0.0, "mean_death": 0.0, "std_birth": 0.0, "std_death": 0.0}
    b, d = arr[:, 0], arr[:, 1]
    return {
        "mean_birth": float(b.mean()),
        "mean_death": float(d.mean()),
        "std_birth": float(b.std(ddof=0)),
        "std_death": float(d.std(ddof=0)),
    }

def _diag_distance_stats(diag):
    arr = _clean_diag(diag)
    if arr.size == 0:
        return {"mean_diag_dist": 0.0, "max_diag_dist": 0.0, "sum_diag_dist": 0.0}
    b, d = arr[:, 0], arr[:, 1]
    dist = (d - b) / np.sqrt(2.0)
    return {
        "mean_diag_dist": float(dist.mean()),
        "max_diag_dist": float(dist.max()),
        "sum_diag_dist": float(dist.sum()),
    }

def _centroid_xy(diag):
    arr = _clean_diag(diag)
    if arr.size == 0:
        return {"centroid_x": 0.0, "centroid_y": 0.0}
    b, d = arr[:, 0], arr[:, 1]
    L = np.maximum(d - b, 0.0)
    S = L.sum()
    if S <= 0:
        return {"centroid_x": 0.0, "centroid_y": 0.0}
    return {
        "centroid_x": float(np.sum(b * L) / (S + _EPS)),
        "centroid_y": float(np.sum(d * L) / (S + _EPS)),
    }

def _lifetimes_stats(diag):
    L = _lifetimes(diag)
    if L.size == 0:
        return {
            "count": 0, "sum": 0.0, "mean": 0.0, "median": 0.0, "std": 0.0,
            "min": 0.0, "max": 0.0, "L1": 0.0, "L2": 0.0, "Linf": 0.0
        }
    return {
        "count": int(L.size),
        "sum": float(L.sum()),
        "mean": float(L.mean()),
        "median": float(np.median(L)),
        "std": float(L.std(ddof=0)),
        "min": float(L.min()),
        "max": float(L.max()),
        "L1": float(np.sum(np.abs(L))),
        "L2": float(np.sqrt(np.sum(L**2))),
        "Linf": float(np.max(np.abs(L))),
    }

def _lifetimes_quantiles(diag):
    L = _lifetimes(diag)
    if L.size == 0:
        return {"q50": 0.0, "q75": 0.0, "q90": 0.0, "q95": 0.0, "q99": 0.0}
    return {
        "q50": float(np.quantile(L, 0.50)),
        "q75": float(np.quantile(L, 0.75)),
        "q90": float(np.quantile(L, 0.90)),
        "q95": float(np.quantile(L, 0.95)),
        "q99": float(np.quantile(L, 0.99)),
    }

def _carlsson_coordinates(diag):
    arr = _clean_diag(diag)
    if arr.size == 0:
        return {f"f{k}": 0.0 for k in range(1, 6)}
    b, d = arr[:, 0], arr[:, 1]
    L = np.maximum(d - b, 0.0)
    S = L.sum()
    if S <= 0:
        return {f"f{k}": 0.0 for k in range(1, 6)}
    return {
        "f1": float(L.sum()),
        "f2": float(np.sum(b * L)),
        "f3": float(np.sum(d * L)),
        "f4": float(np.sum(b**2 * L)),
        "f5": float(np.sum(d**2 * L)),
    }

def _sum_centroid_radial(diag):
    arr = _clean_diag(diag)
    if arr.size == 0:
        return 0.0
    b, d = arr[:, 0], arr[:, 1]
    L = np.maximum(d - b, 0.0)
    S = L.sum()
    if S <= 0:
        return 0.0
    radial = (b + d) / np.sqrt(2.0)
    return _safe_div(np.sum(np.abs(radial) * L), S)

def _pete(diag, p=1.6, q=0.5):
    arr = _clean_diag(diag)
    if arr.size == 0:
        return 0.0
    b, d = arr[:, 0], arr[:, 1]
    L = np.maximum(d - b, 0.0)
    S = L.sum()
    if S <= 0:
        return 0.0
    radial = (b + d) / np.sqrt(2.0)
    return _safe_div(np.sum((L**p) * (np.abs(radial)**q)), S)

def compute_features_for_diag(diag, prefix):
    feats = {}

    Ls = _lifetimes_stats(diag)
    feats[f"{prefix}count_lifetime"] = float(Ls["count"])
    feats[f"{prefix}sum_lifetime"]   = float(Ls["sum"])
    feats[f"{prefix}mean_lifetime"]  = float(Ls["mean"])
    feats[f"{prefix}median_lifetime"]= float(Ls["median"])
    feats[f"{prefix}std_lifetime"]   = float(Ls["std"])
    feats[f"{prefix}min_lifetime"]   = float(Ls["min"])
    feats[f"{prefix}max_lifetime"]   = float(Ls["max"])

    feats[f"{prefix}L1_lifetime"]    = float(Ls["L1"])
    feats[f"{prefix}L2_lifetime"]    = float(Ls["L2"])
    feats[f"{prefix}Linf_lifetime"]  = float(Ls["Linf"])

    feats[f"{prefix}L1_norm"]        = float(Ls["L1"])
    feats[f"{prefix}L2_norm"]        = float(Ls["L2"])
    feats[f"{prefix}Linf_norm"]      = float(Ls["Linf"])

    feats[f"{prefix}betti"]          = float(Ls["count"])
    feats[f"{prefix}energy_concentration"] = _safe_div(Ls["L2"], Ls["L1"])
    feats[f"{prefix}dominance_share"]      = _safe_div(Ls["Linf"], Ls["L1"])

    feats[f"{prefix}persistence_entropy"]  = _persistence_entropy(diag)

    bd = _birth_death_stats(diag)
    for k, v in bd.items():
        feats[f"{prefix}{k}"] = float(v)

    dd = _diag_distance_stats(diag)
    for k, v in dd.items():
        feats[f"{prefix}{k}"] = float(v)

    cxy = _centroid_xy(diag)
    feats[f"{prefix}centroid_x"] = float(cxy["centroid_x"])
    feats[f"{prefix}centroid_y"] = float(cxy["centroid_y"])

    q = _lifetimes_quantiles(diag)
    for k, v in q.items():
        feats[f"{prefix}{k}"] = float(v)

    tail80 = _tail_share_q(diag, 0.80)
    tail90 = _tail_share_q(diag, 0.90)
    tail95 = _tail_share_q(diag, 0.95)

    feats[f"{prefix}tail_share_q80"] = float(tail80)
    feats[f"{prefix}tail_share_q90"] = float(tail90)
    feats[f"{prefix}tail_share_q95"] = float(tail95)
    feats[f"{prefix}tail_curvature_80_90"] = float(tail90 - tail80)

    L = _lifetimes(diag)
    feats[f"{prefix}gini"] = float(_gini_from_lifetimes(L))

    cc = _carlsson_coordinates(diag)
    feats[f"{prefix}Carlsson_f1"] = float(cc["f1"])
    feats[f"{prefix}Carlsson_f2"] = float(cc["f2"])
    feats[f"{prefix}Carlsson_f3"] = float(cc["f3"])
    feats[f"{prefix}Carlsson_f4"] = float(cc["f4"])
    feats[f"{prefix}Carlsson_f5"] = float(cc["f5"])

    if prefix == "H0_":
        A = _auc_tri_max(diag)
        feats["H0_ratio_auc_L1_to_sum"] = _safe_div(A, Ls["sum"])
        feats["H0_ratio_auc_to_max"]    = _safe_div(A, Ls["max"])
        feats["H0_ratio_auc_to_l2"]     = _safe_div(A, Ls["L2"])
        feats["H0_bottleneck"]          = float(Ls["max"])
        feats["H0_sum_centroid"]        = float(_sum_centroid_radial(diag))
        feats["PETE_p1.6_q0.5"]         = float(_pete(diag, p=1.6, q=0.5))
        feats["H0_energy_concentration"]= _safe_div(Ls["L2"], Ls["sum"])
        feats["H0_dominance_share"]     = _safe_div(Ls["Linf"], Ls["sum"])
        feats["H0_tail_curvature_80_90"]= float(tail90 - tail80)
        feats["H0_centroid_to_energy"]  = _safe_div(feats["H0_sum_centroid"], Ls["L2"])
        feats["H0_gini"]                = float(feats["H0_gini"])
    return feats

def compute_cross_dim_features(feats_H0, feats_H1):
    out = {}
    def g(d, k): return float(d.get(k, 0.0))
    out["H1_to_H0_betti_ratio"]   = _safe_div(g(feats_H1, "H1_betti"), g(feats_H0, "H0_betti"))
    out["H1_to_H0_entropy_ratio"] = _safe_div(g(feats_H1, "H1_persistence_entropy"), g(feats_H0, "H0_persistence_entropy"))
    return out

FEATURE_NAMES = [
    "H0_Carlsson_f1","H0_Carlsson_f3","H0_Carlsson_f5",
    "H0_L1_lifetime","H0_L1_norm","H0_L2_lifetime","H0_L2_norm",
    "H0_Linf_lifetime","H0_Linf_norm","H0_bottleneck","H0_centroid_to_energy",
    "H0_centroid_y","H0_dominance_share","H0_energy_concentration","H0_gini",
    "H0_max_diag_dist","H0_max_lifetime","H0_mean_death","H0_mean_diag_dist",
    "H0_mean_lifetime","H0_median_lifetime","H0_min_lifetime","H0_persistence_entropy",
    "H0_q50","H0_q75","H0_q90","H0_q95","H0_q99","H0_ratio_auc_L1_to_sum",
    "H0_ratio_auc_to_l2","H0_ratio_auc_to_max","H0_std_death","H0_std_lifetime",
    "H0_sum_centroid","H0_sum_diag_dist","H0_sum_lifetime","H0_tail_curvature_80_90",
    "H0_tail_share_q80","H0_tail_share_q90","H0_tail_share_q95",
    "H1_Carlsson_f1","H1_Carlsson_f2","H1_Carlsson_f3",
    "H1_L1_lifetime","H1_L1_norm","H1_L2_lifetime","H1_L2_norm",
    "H1_Linf_lifetime","H1_Linf_norm","H1_betti","H1_count_lifetime",
    "H1_dominance_share","H1_energy_concentration","H1_gini",
    "H1_max_diag_dist","H1_max_lifetime","H1_mean_diag_dist","H1_mean_lifetime",
    "H1_median_lifetime","H1_min_lifetime","H1_persistence_entropy",
    "H1_q50","H1_q75","H1_q90","H1_q95","H1_q99",
    "H1_std_birth","H1_std_death","H1_std_lifetime",
    "H1_sum_diag_dist","H1_sum_lifetime",
    "H1_tail_share_q80","H1_tail_share_q90","H1_tail_share_q95",
    "H1_to_H0_betti_ratio","H1_to_H0_entropy_ratio",
    "PETE_p1.6_q0.5"
]

def run():
    files = glob.glob(os.path.join(INPUT_DIR, "**", "*.csv"), recursive=True)
    print(f"Found {len(files)} data files in '{INPUT_DIR}'")

    ##############################################################################################################
    # Voting config
    K_PER_FEATURE = 4  # each feature votes for top-2 indices
    TOP_FINAL     = 10  # final anomalies = top-5 voted indices
    ########################################################################################################################333

    for filepath in files:
        if ".ipynb_checkpoints" in filepath:
            continue

        try:
            df = pd.read_csv(filepath)
            df.columns = [c.strip().lower() for c in df.columns]
            if "value" not in df.columns or "timestamp" not in df.columns:
                continue

            vals = pd.to_numeric(df["value"], errors="coerce").astype(float).to_numpy()
            n = len(vals)

            rows = []
            for i in range(WINDOW_SIZE - 1, n):
                w = vals[i - WINDOW_SIZE + 1 : i + 1]
                try:
                    emb = takens_embed(w, TAU, DIMENSION)
                    dgms = ripser(emb, maxdim=MAXDIM)["dgms"]
                except Exception:
                    dgms = [np.empty((0, 2)), np.empty((0, 2))]

                D0 = dgms[0] if len(dgms) > 0 else np.empty((0, 2))
                D1 = dgms[1] if (MAXDIM >= 1 and len(dgms) > 1) else np.empty((0, 2))

                feats_H0 = compute_features_for_diag(D0, "H0_")
                feats_H1 = compute_features_for_diag(D1, "H1_")
                cross    = compute_cross_dim_features(feats_H0, feats_H1)

                merged = {}
                merged.update(feats_H0)
                merged.update(feats_H1)
                merged.update(cross)
                merged["index"] = i
                rows.append(merged)

            feat_df = pd.DataFrame(rows)
            full = pd.DataFrame(index=np.arange(n))
            if not feat_df.empty:
                feat_df = feat_df.set_index("index")
                full = full.join(feat_df, how="left")

            full = full.replace([np.inf, -np.inf], np.nan).fillna(0.0)

            votes = np.zeros(n, dtype=int)

            for feat_name in FEATURE_NAMES:
                series = pd.to_numeric(full.get(feat_name, 0.0), errors="coerce").astype(float).to_numpy()
                series = np.nan_to_num(series, nan=0.0, posinf=0.0, neginf=0.0)

                vead_scores = _vead_series(series, kv=KV, ka=KA, mode=MODE)

                mx = float(np.max(vead_scores)) if len(vead_scores) else 0.0
                if (not np.isfinite(mx)) or mx <= 0:
                    continue

                scores01 = np.clip(vead_scores / mx, 0.0, 1.0)
                if np.max(scores01) <= 0:
                    continue

                k_eff = min(K_PER_FEATURE, n)
                topk_idx = np.argpartition(scores01, -k_eff)[-k_eff:]
                topk_idx = topk_idx[np.lexsort((topk_idx, -scores01[topk_idx]))]
                votes[topk_idx] += 1

            final_scores = np.zeros(n, dtype=float)
            if np.max(votes) > 0:
                top_final_eff = min(TOP_FINAL, n)
                order = np.lexsort((np.arange(n), -votes))
                chosen = order[:top_final_eff]
                final_scores[chosen] = 1.0

            rel = os.path.relpath(filepath, INPUT_DIR)
            category = os.path.dirname(rel)
            base_name = os.path.basename(rel)

            out_dir = os.path.join(OUTPUT_DIR, category)
            os.makedirs(out_dir, exist_ok=True)
            out_name = f"{DETECTOR_NAME}_" + base_name
            out_path = os.path.join(out_dir, out_name)

            out_df = pd.DataFrame({
                "timestamp": df["timestamp"],
                "anomaly_score": final_scores
            })
            out_df.to_csv(out_path, index=False)

        except Exception as e:
            print(f"!! Error processing {filepath}: {e}")
            continue

if __name__ == "__main__":
    run()
"""

# ============================================================
# 3) Helpers
# ============================================================
def run_cmd(cmd_list):
    p = subprocess.run(cmd_list, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True)
    return p.returncode, p.stdout

def parse_final_scores(text):
    # works for ANY profile names printed by NAB
    pat = r"Final score for '([^']+)' detector on '([^']+)' profile = ([\-0-9.]+)"
    found = re.findall(pat, text)
    # returns dict: {(detectorLabel, profileName): score}
    return {(d, prof): float(val) for d, prof, val in found}

def make_variant_code(template, K, TOP):
    code = template
    code = re.sub(r'K_PER_FEATURE\s*=\s*\d+', f'K_PER_FEATURE = {K}', code, count=1)
    code = re.sub(r'TOP_FINAL\s*=\s*\d+',     f'TOP_FINAL = {TOP}', code, count=1)
    return code

# ============================================================
# 4) GRID SEARCH
# ============================================================
K_LIST   = [6,7,8,9,10]
TOP_LIST = [11,13,15,17,19]

results = []

for K, TOP in itertools.product(K_LIST, TOP_LIST):
    exp_id = f"K{K}_TOP{TOP}"
    print("\n" + "="*80)
    print(f"EXPERIMENT: K_PER_FEATURE={K}, TOP_FINAL={TOP}  ->  TDA_VEAD_Method")
    print("="*80)

    # Write detector code (only K/TOP changed)
    code = make_variant_code(TEMPLATE, K, TOP)
    with open("my_algo.py", "w") as f:
        f.write(code)

    # Run detector (generates results/TDA_VEAD_Method/*)
    rc1, out1 = run_cmd(["python", "my_algo.py"])
    if rc1 != 0:
        print("Detector failed.")
        results.append({
            "K_PER_FEATURE": K, "TOP_FINAL": TOP,
            "standard": None, "lowFP": None, "lowFN": None,
            "status": "detector_failed"
        })
        continue

    # Run NAB scoring (IMPORTANT: --skipConfirmation)
    rc2, out2 = run_cmd([
        "python", "run.py",
        "--optimize", "--score",
        "--detectors", "TDA_VEAD_Method",
        "--normalize",
        "--skipConfirmation"
    ])

    # Save full log for this experiment (super useful)
    log_path = f"/content/NAB/tuning_logs/runpy_{exp_id}.log"
    with open(log_path, "w") as f:
        f.write(out2)

    scores = parse_final_scores(out2)

    # Try to pick the usual 3 profiles if they exist; otherwise leave None
    # Note: detector label in NAB output is often 'TDA' even if detector module is custom.
    # So we search across any detector label.
    def pick(profile_name):
        for (det_label, prof), val in scores.items():
            if prof == profile_name:
                return val
        return None

    row = {
        "K_PER_FEATURE": K,
        "TOP_FINAL": TOP,
        "standard": pick("standard") or pick("VEAD_Method_standard"),
        "lowFP":    pick("reward_low_FP_rate") or pick("VEAD_Method_reward_low_FP_rate"),
        "lowFN":    pick("reward_low_FN_rate") or pick("VEAD_Method_reward_low_FN_rate"),
        "rc_score": rc2,
        "log_file": log_path,
        "status": "ok" if (rc2 == 0 and len(scores) > 0) else "score_parse_failed"
    }
    results.append(row)
    print("Extracted:", row)

# ============================================================
# 5) SAVE RESULTS
# ============================================================
res_df = pd.DataFrame(results)
out_csv = "/content/NAB/parameter_tuning_results.csv"
res_df.to_csv(out_csv, index=False)
print("\n✅ Saved:", out_csv)

best = res_df.dropna(subset=["standard"]).sort_values("standard", ascending=False).head(10)
print("\nTop 10 by standard score:")
print(best[["K_PER_FEATURE","TOP_FINAL","standard","lowFP","lowFN","status","log_file"]])


--- 1. CLEAN START ---
Cloning into 'NAB'...
remote: Enumerating objects: 7119, done.[K
remote: Counting objects: 100% (713/713), done.[K
remote: Compressing objects: 100% (168/168), done.[K
remote: Total 7119 (delta 601), reused 545 (delta 545), pack-reused 6406 (from 1)[K
Receiving objects: 100% (7119/7119), 86.73 MiB | 21.82 MiB/s, done.
Resolving deltas: 100% (5015/5015), done.
Updating files: 100% (1186/1186), done.

EXPERIMENT: K_PER_FEATURE=6, TOP_FINAL=11  ->  TDA_VEAD_Method
Extracted: {'K_PER_FEATURE': 6, 'TOP_FINAL': 11, 'standard': 48.57, 'lowFP': 34.99, 'lowFN': 55.37, 'rc_score': 0, 'log_file': '/content/NAB/tuning_logs/runpy_K6_TOP11.log', 'status': 'ok'}

EXPERIMENT: K_PER_FEATURE=6, TOP_FINAL=13  ->  TDA_VEAD_Method
Extracted: {'K_PER_FEATURE': 6, 'TOP_FINAL': 13, 'standard': 50.56, 'lowFP': 34.53, 'lowFN': 58.42, 'rc_score': 0, 'log_file': '/content/NAB/tuning_logs/runpy_K6_TOP13.log', 'status': 'ok'}

EXPERIMENT: K_PER_FEATURE=6, TOP_FINAL=15  ->  TDA_VEAD_Method