In [1]:
from pipeline_80_20 import run_once

# ALL 12 untuned combos
lags = ["no_lag", "precip_lags", "both_lags"]
wins = [("2015-07-01","2015-10-21"), ("2015-07-01","2015-10-31")]
zps  = ["standard", "train_keeps_zeros"]

for lag in lags:
    for (ws,we) in wins:
        for zp in zps:
            run_once(
                file="Final_2015_Data_Site3.csv",
                window_start=ws, window_end=we,
                lag_setting=lag, zero_policy=zp,
                tuned=False
            )
            

[80/20] Overnight t0 stats; Precip same-day (precip_t0) | Zero: TRAIN & TEST: 0→NaN | Window: 2015-07-01..2015-10-21
Tuned: False | cv_folds=5 | imputer_trials=48 | stage2_trials=96
Adults_3_Col: known=23 | MAE=18.043478260869566 | RMSE=25.948946531351837
Adults_3_Gam: known=14 | MAE=3.4285714285714284 | RMSE=3.946064947695181
Saved: results/8020__no_lag__standard__2015-07-01_2015-10-21__untuned__summary__20251009_012845.csv
[80/20] Overnight t0 stats; Precip same-day (precip_t0) | Zero: TRAIN keeps zeros; TEST 0→NaN | Window: 2015-07-01..2015-10-21
Tuned: False | cv_folds=5 | imputer_trials=48 | stage2_trials=96
Adults_3_Col: known=23 | MAE=20.782608695652176 | RMSE=27.473307203459363
Adults_3_Gam: known=14 | MAE=3.357142857142857 | RMSE=3.826598638851107
Saved: results/8020__no_lag__train_keeps_zeros__2015-07-01_2015-10-21__untuned__summary__20251009_012846.csv
[80/20] Overnight t0 stats; Precip same-day (precip_t0) | Zero: TRAIN & TEST: 0→NaN | Window: 2015-07-01..2015-10-31
Tuned: 

In [2]:
# the following scripts/make_8020_untuned_bywindow_tables.py
# Build ONE LaTeX file with two sections (one per window) for 80/20 UNTUNED results
# Output: results/_8020_untuned_bywindow.tex

import pandas as pd
import numpy as np
from pathlib import Path

root = Path("results")

def latex_escape(s: str) -> str:
    s = str(s)
    s = s.replace("\\", r"\textbackslash{}")
    s = s.replace("&", r"\&").replace("%", r"\%").replace("#", r"\#")
    s = s.replace("_", r"\_").replace("{", r"\{").replace("}", r"\}")
    return s

def parse_meta_from_filename(p: Path):
    """
    Expected filename tokens (they are in the results folder):
      <protocol>__<lag>__<zero>__<window>__<tuned/untuned>__summary__<ts>.csv
    """
    toks = p.stem.split("__")
    meta = {"protocol": None, "lag": None, "zero_policy": None,
            "window": None, "tuned": None, "run_tag": p.stem}
    if len(toks) >= 1:
        meta["protocol"] = toks[0]  # '8020' or '80_20' or 'loo'
    if len(toks) >= 2:
        meta["lag"] = toks[1]       # 'no_lag' | 'precip_lags' | 'both_lags'
    if len(toks) >= 3:
        meta["zero_policy"] = toks[2]  # 'standard' | 'train_keeps_zeros'
    if len(toks) >= 4:
        w = toks[3]  # 'YYYY-MM-DD_YYYY-MM-DD' or 'YYYY-MM-DD_to_YYYY-MM-DD'
        if "_to_" in w:
            ws, we = w.split("_to_", 1)
            meta["window"] = f"{ws}_to_{we}"
        elif "_" in w:
            ws, we = w.split("_", 1)
            meta["window"] = f"{ws}_to_{we}"
        else:
            meta["window"] = w
    if len(toks) >= 5:
        tt = toks[4].lower()
        meta["tuned"] = (tt == "tuned")
    return meta

def is_8020_untuned_filename(p: Path) -> bool:
    """filter based on filename tokens only."""
    toks = p.stem.split("__")
    if len(toks) < 6:  # we need at least ... __untuned__summary__...
        return False
    protocol = toks[0].lower()
    tuned_tok = toks[4].lower()
    return (protocol in {"8020", "80_20"}) and (tuned_tok == "untuned")

# collecting files by filename meta
summary_files = [p for p in root.rglob("*__summary__*.csv") if is_8020_untuned_filename(p)]
if not summary_files:
    raise FileNotFoundError("No 80/20 untuned summary files found (by filename).")

rows = []
for p in summary_files:
    try:
        df = pd.read_csv(p)
        meta = parse_meta_from_filename(p)
        # attaching meta (override any mismatched CSV columns to enforce filename truth)
        for k, v in meta.items():
            df[k] = v

        # normalize typical columns (robust across different generator versions)
        if "target" not in df.columns and "Target" in df.columns:
            df["target"] = df["Target"]

        # metrics
        if "MAE" not in df.columns and "mae" in df.columns:
            df["MAE"] = df["mae"]
        if "RMSE" not in df.columns and "rmse" in df.columns:
            df["RMSE"] = df["rmse"]

        # known rows
        if "n_known" not in df.columns:
            if "known_rows" in df.columns:
                df["n_known"] = df["known_rows"]
            elif "n_rows_known" in df.columns:
                df["n_known"] = df["n_rows_known"]
            else:
                df["n_known"] = np.nan

        # adding a clean Window string for display
        if "window" in df.columns:
            df["WindowTxt"] = (df["window"].astype(str)
                               .str.replace("_", " ", regex=False)
                               .str.replace("to", "–", regex=False))
        else:
            df["WindowTxt"] = "—"

        rows.append(df)
    except Exception as e:
        print(f"[skip] {p}: {e}")

if not rows:
    raise RuntimeError("Found files, but none could be parsed. Inspect CSV formats.")

df = pd.concat(rows, ignore_index=True)

# coerce types
df["MAE"]     = pd.to_numeric(df["MAE"], errors="coerce")
df["RMSE"]    = pd.to_numeric(df["RMSE"], errors="coerce")
df["n_known"] = pd.to_numeric(df["n_known"], errors="coerce").fillna(0).astype(int)

# drop rows with no metrics
df = df.dropna(subset=["MAE","RMSE"]).copy()
if df.empty:
    raise RuntimeError("After coercion, no rows have valid MAE/RMSE. Check the CSV content.")

# compact labels
lag_map  = {"no_lag":"No-lag", "precip_lags":"Precip-lags", "both_lags":"Both-lags"}
zero_map = {"standard":"0→NaN train+test", "train_keeps_zeros":"train keeps 0; test 0→NaN"}

df["Lag"]         = df["lag"].map(lag_map).fillna(df["lag"].astype(str))
df["Zero policy"] = df["zero_policy"].map(zero_map).fillna(df["zero_policy"].astype(str))

# Windows present
windows = list(df["WindowTxt"].dropna().unique())
windows.sort()

out_tex = root / "_8020_untuned_bywindow.tex"
with open(out_tex, "w", encoding="utf-8") as f:
    f.write("\\begin{table}[htbp]\n\\centering\n\\small\n")
    f.write("\\setlength{\\tabcolsep}{4pt}\n\\renewcommand{\\arraystretch}{1.12}\n")
    f.write("\\caption{80/20 (untuned) results, presented separately by analysis window. "
            "Metrics computed on originally known test rows only.}\n")
    f.write("\\label{tab:8020-untuned-bywindow}\n")

    for wi, wtxt in enumerate(windows):
        block = df[df["WindowTxt"] == wtxt].copy()
        f.write(f"\\subsubsection*{{Window: {latex_escape(wtxt)}}}\n")
        f.write("\\begin{tabularx}{\\textwidth}{l l X r r r}\n\\toprule\n")
        f.write("Target & Lag & Zero policy & $n_{\\mathrm{known}}$ & MAE & RMSE\\\\\n\\midrule\n")

        if block.empty:
            f.write("\\multicolumn{6}{c}{No runs found for this window}\\\\\n")
        else:
            block = (block[["target","Lag","Zero policy","n_known","MAE","RMSE"]]
                     .rename(columns={"target":"Target"}))
            block = block.sort_values(["Target","RMSE","MAE"]).reset_index(drop=True)

            for _, r in block.iterrows():
                f.write(
                    f"{latex_escape(r['Target'])} & "
                    f"{latex_escape(r['Lag'])} & "
                    f"{latex_escape(r['Zero policy'])} & "
                    f"{int(r['n_known'])} & "
                    f"{r['MAE']:.3f} & {r['RMSE']:.3f}\\\\\n"
                )

        f.write("\\bottomrule\n\\end{tabularx}\n")
        if wi < len(windows) - 1:
            f.write("\\vspace{0.75em}\n")

    f.write("\\end{table}\n")

print(f"[OK] wrote {out_tex}")


[OK] wrote results/_8020_untuned_bywindow.tex
