In [13]:
import os
from pathlib import Path
import pandas as pd
import numpy as np

TRADING_DAYS = 248

# =========================
# INPUTS
# =========================


SWEEP_RESULTS_CSV = "./13a-trading_output_sweep_performance_daily_returns/sweep_summary_20260105-142026.csv"
DAILY_RETURNS_DIR = "./13a-trading_output_sweep_performance_daily_returns/daily_returns/20260105-142026"

TOP_N = 48

TRAIN_YEARS = 1
TEST_YEARS  = 1

START_DATE = pd.Timestamp("1999-01-01")
END_DATE   = pd.Timestamp("2025-12-24")

SELECTION_METRIC = "cagr"  # "cagr" | "sharpe" | "calmar"

SAVE_WF_RESULTS_CSV = True
SAVE_EQUITY_CURVE_CSV = True

# =========================
# OUTPUT DIRECTORY
# =========================
OUTPUT_DIR = "./13b-wfo"
os.makedirs(OUTPUT_DIR, exist_ok=True)

OUT_PREFIX = "walkforward"

# =========================
# CONFIG ID + KEYS (NOW INCLUDE ALL SWEPT PARAMS)
# =========================
def make_config_id(row: dict) -> str:
    """
    Must MATCH the sweep daily-returns writer naming.
    Includes all swept params so filenames are unique.
    """
    lb  = int(row["lookback_days"])
    atr = int(row["atr_days"])
    tp  = int(round(float(row["top_percentile"]) * 1000))         # 0.95 -> 950
    mtv = int(round(float(row["min_trade_value"])))

    mpw = int(round(float(row["max_position_weight"]) * 1000))    # 0.12 -> 120
    mcr = int(round(float(row["min_cash_reserve"])))              # 5000.0 -> 5000
    dr  = int(round(float(row["drift_threshold"]) * 10000))       # 0.05 -> 500
    mnw = int(round(float(row["min_new_position_weight"]) * 10000))  # 0.005 -> 50

    return f"lb{lb}_atr{atr}_tp{tp:04d}_mtv{mtv}_mpw{mpw}_cash{mcr}_dr{dr}_mnw{mnw}"

def config_key(c):
    return (
        int(c["lookback_days"]),
        int(c["atr_days"]),
        float(c["top_percentile"]),
        float(c["min_trade_value"]),
        float(c["max_position_weight"]),
        float(c["min_cash_reserve"]),
        float(c["drift_threshold"]),
        float(c["min_new_position_weight"]),
    )

def config_id_from_row(lookback, atr, top_percentile, min_trade_value,
                       max_position_weight, min_cash_reserve,
                       drift_threshold, min_new_position_weight):

    tp = int(round(float(top_percentile) * 1000))       # 0.95 -> 950  => tp0950
    mtv_i = int(round(float(min_trade_value)))          # 10000 -> mtv10000

    cap_i = int(round(float(max_position_weight) * 10000))      # 0.12 -> 1200 => cap1200
    cash_i = int(round(float(min_cash_reserve)))                # 5000 -> cash5000
    dr_i = int(round(float(drift_threshold) * 10000))           # 0.02 -> 200 => dr0200
    mnw_i = int(round(float(min_new_position_weight) * 10000))  # 0.005 -> 50 => mnw0050

    return (
        f"lb{int(lookback)}_atr{int(atr)}_tp{tp:04d}_mtv{mtv_i}"
        f"_cap{cap_i:04d}_cash{cash_i}_dr{dr_i:04d}_mnw{mnw_i:04d}"
    )

from pathlib import Path
p = Path(DAILY_RETURNS_DIR)
print("DAILY_RETURNS_DIR resolved:", p.resolve())
print("Exists?", p.exists())
print("Sample files:", [x.name for x in list(p.glob("*"))[:20]])

def load_config_returns(base_dir: str | Path, cfg) -> pd.DataFrame:
    base_dir = Path(base_dir)

    cfg_id = config_id_from_row(
        cfg["lookback_days"], cfg["atr_days"], cfg["top_percentile"], cfg["min_trade_value"],
        cfg["max_position_weight"], cfg["min_cash_reserve"], cfg["drift_threshold"], cfg["min_new_position_weight"]
    )

    pq = base_dir / f"{cfg_id}.parquet"
    csv = base_dir / f"{cfg_id}.csv"

    if pq.exists():
        df = pd.read_parquet(pq)
    elif csv.exists():
        df = pd.read_csv(csv)
    else:
        raise FileNotFoundError(f"Missing daily returns file for {cfg_id} in {base_dir}")

    df = df.copy()
    df["date"] = pd.to_datetime(df["date"])
    df = df.sort_values("date")

    if "strat_ret" not in df.columns:
        raise ValueError(f"{cfg_id} missing 'strat_ret'")
    if "spy_ret" not in df.columns:
        df["spy_ret"] = np.nan

    return df[["date", "strat_ret", "spy_ret"]]


# =========================
# METRICS
# =========================
def annualized_cagr_from_returns(rets: pd.Series) -> float:
    if rets.empty:
        return np.nan
    total = float((1.0 + rets).prod())
    years = len(rets) / TRADING_DAYS
    if years <= 0:
        return np.nan
    return total ** (1.0 / years) - 1.0

def annualized_sharpe(rets: pd.Series) -> float:
    if rets.empty:
        return np.nan
    sd = float(rets.std(ddof=0))
    if sd <= 0 or np.isnan(sd):
        return 0.0
    return np.sqrt(TRADING_DAYS) * float(rets.mean()) / sd

def max_drawdown_from_returns(rets: pd.Series) -> float:
    if rets.empty:
        return np.nan
    equity = (1.0 + rets).cumprod()
    dd = equity / equity.cummax() - 1.0
    return float(dd.min())

def score_training_slice(train_rets: pd.Series, metric: str):
    c = annualized_cagr_from_returns(train_rets)
    s = annualized_sharpe(train_rets)
    dd = max_drawdown_from_returns(train_rets)
    calmar = c / abs(dd) if (dd is not None and not np.isnan(dd) and dd != 0) else np.nan

    if metric == "cagr":
        return c, {"cagr": c, "sharpe": s, "dd": dd, "calmar": calmar}
    if metric == "sharpe":
        return s, {"cagr": c, "sharpe": s, "dd": dd, "calmar": calmar}
    if metric == "calmar":
        return calmar, {"cagr": c, "sharpe": s, "dd": dd, "calmar": calmar}
    raise ValueError(f"Unknown metric: {metric}")

# =========================
# 0) SANITY CHECK DIRECTORY
# =========================
daily_dir = Path(DAILY_RETURNS_DIR)
if not daily_dir.exists():
    raise FileNotFoundError(f"DAILY_RETURNS_DIR not found: {daily_dir}")

parquets = sorted(daily_dir.glob("*.parquet"))
csvs = sorted(daily_dir.glob("*.csv"))
print(f"Daily returns dir: {daily_dir}")
print(f"Found {len(parquets)} parquet, {len(csvs)} csv files.")

if len(parquets) == 0 and len(csvs) == 0:
    raise RuntimeError(
        "No daily return files found in DAILY_RETURNS_DIR. "
        "Either WRITE_DAILY_RETURNS was off, the RUN_ID folder is wrong, "
        "or files were written to a different directory."
    )

# =========================
# 1) LOAD SWEEP SUMMARY + PICK TOP_N UNIQUE CONFIGS
# =========================
df = pd.read_csv(SWEEP_RESULTS_CSV)

required_cols = [
    "lookback_days","atr_days","top_percentile","min_trade_value",
    "max_position_weight","min_cash_reserve","drift_threshold","min_new_position_weight",
    "strat_cagr"
]
missing_cols = [c for c in required_cols if c not in df.columns]
if missing_cols:
    raise ValueError(f"Sweep summary missing columns needed for expanded WFO: {missing_cols}")

# Sort best-to-worst and de-dupe by FULL config definition
df_sorted = df.sort_values("strat_cagr", ascending=False).copy()
df_sorted["_cfgkey"] = df_sorted.apply(lambda r: config_key(r.to_dict()), axis=1)
df_unique = df_sorted.drop_duplicates("_cfgkey", keep="first").drop(columns=["_cfgkey"])

topN = (
    df.sort_values("strat_cagr", ascending=False)
      .head(TOP_N)[[
          "lookback_days","atr_days","top_percentile","min_trade_value",
          "max_position_weight","min_cash_reserve","drift_threshold","min_new_position_weight",
          "strat_cagr"
      ]]
      .to_dict("records")
)


print(f"\nTop {TOP_N} UNIQUE configs by full-sample CAGR (after de-dupe): {len(topN)}")

# =========================
# 2) LOAD DAILY RETURNS FOR THOSE CONFIGS
# =========================
returns_data = {}
missing = []

for row in topN:
    k = config_key(row)
    try:
        returns_data[k] = load_config_returns(DAILY_RETURNS_DIR, row)
    except Exception as e:
        missing.append((make_config_id(row), str(e)))

if missing:
    print("\nWARNING: Some top configs are missing daily return files:")
    for cfg_id, msg in missing[:25]:
        print(" ", cfg_id, "->", msg)
    if len(missing) > 25:
        print(f" ... and {len(missing)-25} more")

top_loaded = [row for row in topN if config_key(row) in returns_data]
if not top_loaded:
    raise RuntimeError(
        "Could not load daily returns for ANY of the top configs.\n"
        "Most likely: your sweep writer didn't create daily return files, "
        "or the filename scheme doesn't match make_config_id()."
    )

print(f"\nLoaded daily returns for {len(top_loaded)}/{len(topN)} configs.")

# =========================
# 3) BUILD WALK-FORWARD WINDOWS (YEAR-START ANCHORS)
# =========================
test_starts = pd.date_range(START_DATE, END_DATE, freq="YS")
windows = []
for test_start in test_starts:
    train_start = test_start - pd.DateOffset(years=TRAIN_YEARS)
    test_end    = test_start + pd.DateOffset(years=TEST_YEARS)
    if train_start < START_DATE:
        continue
    if test_end > END_DATE:
        break
    windows.append((train_start, test_start, test_end))

print(f"\n{len(windows)} walk-forward windows ({TRAIN_YEARS}y train / {TEST_YEARS}y trade).")
if not windows:
    raise RuntimeError("No valid windows. Check START_DATE / END_DATE / TRAIN_YEARS.")

# =========================
# 4) WALK-FORWARD LOOP
# =========================
wf_rows = []
equity_parts = []
equity_level = 1.0

for (train_start, test_start, test_end) in windows:
    scores = []
    for row in top_loaded:
        k = config_key(row)
        r = returns_data[k]

        train = r.loc[(r["date"] >= train_start) & (r["date"] < test_start)]
        if len(train) < TRADING_DAYS:
            continue

        score, stats = score_training_slice(train["strat_ret"], SELECTION_METRIC)
        if np.isnan(score):
            continue

        scores.append((k, score, stats))

    if not scores:
        print(f"Skipping window {train_start.date()} → {test_end.date()} (no configs had enough training data).")
        continue

    scores.sort(key=lambda x: x[1], reverse=True)
    best_key, best_score, best_train_stats = scores[0]

    test = returns_data[best_key].loc[
        (returns_data[best_key]["date"] >= test_start) &
        (returns_data[best_key]["date"] < test_end)
    ].copy()

    if test.empty:
        print(f"Skipping OOS period {test_start.date()} → {test_end.date()} (no test data).")
        continue

    test = test.sort_values("date")
    test["equity"] = equity_level * (1.0 + test["strat_ret"]).cumprod()
    equity_level = float(test["equity"].iloc[-1])

    test_cagr   = annualized_cagr_from_returns(test["strat_ret"])
    test_sharpe = annualized_sharpe(test["strat_ret"])
    test_dd     = max_drawdown_from_returns(test["strat_ret"])
    test_calmar = test_cagr / abs(test_dd) if (test_dd is not None and not np.isnan(test_dd) and test_dd != 0) else np.nan

    wf_rows.append({
        "train_start": train_start,
        "test_start": test_start,
        "test_end": test_end,
        "chosen_config_key": str(best_key),
        f"train_{SELECTION_METRIC}": best_score,
        "train_cagr": best_train_stats["cagr"],
        "train_sharpe": best_train_stats["sharpe"],
        "train_dd": best_train_stats["dd"],
        "train_calmar": best_train_stats["calmar"],
        "test_cagr": test_cagr,
        "test_sharpe": test_sharpe,
        "test_dd": test_dd,
        "test_calmar": test_calmar,
        "n_test_days": int(len(test)),
    })

    equity_parts.append(test[["date", "equity"]])

wf_df = pd.DataFrame(wf_rows).sort_values("test_start").reset_index(drop=True)
print("\n=== Walk-forward window results ===")
print(wf_df)

if not equity_parts:
    raise RuntimeError("No equity curve produced (all windows skipped).")

equity_curve = pd.concat(equity_parts, ignore_index=True).drop_duplicates("date").sort_values("date")
equity_curve = equity_curve.set_index("date")

overall_rets = equity_curve["equity"].pct_change().fillna(0.0)
overall_cagr = annualized_cagr_from_returns(overall_rets)
overall_sharpe = annualized_sharpe(overall_rets)
overall_dd = max_drawdown_from_returns(overall_rets)
overall_calmar = overall_cagr / abs(overall_dd) if overall_dd != 0 else np.nan

print("\n=== Overall walk-forward performance ===")
print(f"WFO CAGR   : {overall_cagr:.3%}")
print(f"WFO Sharpe : {overall_sharpe:.2f}")
print(f"WFO MaxDD  : {overall_dd:.2%}")
print(f"WFO Calmar : {overall_calmar:.2f}")

# =========================
# 5) SAVE OUTPUTS TO 13b-wfo
# =========================
if SAVE_WF_RESULTS_CSV:
    out1 = os.path.join(OUTPUT_DIR, f"{OUT_PREFIX}_top{TOP_N}_windows.csv")
    wf_df.to_csv(out1, index=False)
    print(f"Saved: {out1}")

if SAVE_EQUITY_CURVE_CSV:
    out2 = os.path.join(OUTPUT_DIR, f"{OUT_PREFIX}_top{TOP_N}_equity_curve.csv")
    equity_curve.reset_index().to_csv(out2, index=False)
    print(f"Saved: {out2}")

print(f"\n=== COMPLETE ===")
print(f"All outputs saved to: {OUTPUT_DIR}")

DAILY_RETURNS_DIR resolved: C:\TWS API\source\pythonclient\TradingIdeas\MomentumSystem\13a-trading_output_sweep_performance_daily_returns\daily_returns\20260105-142026
Exists? True
Sample files: ['lb90_atr20_tp0950_mtv10000_cap1200_cash10000_dr0200_mnw0050.parquet', 'lb90_atr20_tp0950_mtv10000_cap1200_cash10000_dr0200_mnw0100.parquet', 'lb90_atr20_tp0950_mtv10000_cap1200_cash10000_dr0200_mnw0200.parquet', 'lb90_atr20_tp0950_mtv10000_cap1200_cash10000_dr0200_mnw0300.parquet', 'lb90_atr20_tp0950_mtv10000_cap1200_cash10000_dr0500_mnw0050.parquet', 'lb90_atr20_tp0950_mtv10000_cap1200_cash10000_dr0500_mnw0100.parquet', 'lb90_atr20_tp0950_mtv10000_cap1200_cash10000_dr0500_mnw0200.parquet', 'lb90_atr20_tp0950_mtv10000_cap1200_cash10000_dr0500_mnw0300.parquet', 'lb90_atr20_tp0950_mtv10000_cap1200_cash10000_dr1000_mnw0050.parquet', 'lb90_atr20_tp0950_mtv10000_cap1200_cash10000_dr1000_mnw0100.parquet', 'lb90_atr20_tp0950_mtv10000_cap1200_cash10000_dr1000_mnw0200.parquet', 'lb90_atr20_tp0950_mtv1

In [5]:
import pandas as pd
from pathlib import Path

DAILY_RETURNS_DIR = "./13a-trading_output_sweep_performance_daily_returns/daily_returns/20260105-142026"

# Load one file and check its date range
files = list(Path(DAILY_RETURNS_DIR).glob("*.parquet"))
if files:
    df = pd.read_parquet(files[0])
    df["date"] = pd.to_datetime(df["date"])
    df = df.sort_values("date")
    
    print(f"File: {files[0].name}")
    print(f"Date range: {df['date'].min()} to {df['date'].max()}")
    print(f"Total rows: {len(df)}")
    print(f"\nRows per year:")
    print(df.groupby(df["date"].dt.year).size())

File: lb90_atr20_tp0950_mtv10000_cap1200_cash10000_dr0200_mnw0050.parquet
Date range: 1999-01-04 00:00:00 to 2025-12-30 00:00:00
Total rows: 6790

Rows per year:
date
1999    252
2000    252
2001    248
2002    252
2003    252
2004    252
2005    252
2006    251
2007    251
2008    253
2009    252
2010    252
2011    252
2012    250
2013    252
2014    252
2015    252
2016    252
2017    251
2018    251
2019    252
2020    253
2021    252
2022    251
2023    250
2024    252
2025    249
dtype: int64


In [6]:
# Check what config_id_from_row generates vs actual filenames
from pathlib import Path

topN_sample = topN[0]  # First config
generated_id = config_id_from_row(
    topN_sample["lookback_days"], 
    topN_sample["atr_days"], 
    topN_sample["top_percentile"], 
    topN_sample["min_trade_value"],
    topN_sample["max_position_weight"], 
    topN_sample["min_cash_reserve"],
    topN_sample["drift_threshold"], 
    topN_sample["min_new_position_weight"]
)

print(f"Top config values: {topN_sample}")
print(f"Generated config_id: {generated_id}")
print(f"\nActual files in directory:")
for f in list(Path(DAILY_RETURNS_DIR).glob("*.parquet"))[:5]:
    print(f"  {f.name}")

Top config values: {'lookback_days': 90, 'atr_days': 20, 'top_percentile': 0.95, 'min_trade_value': 10000.0, 'max_position_weight': 0.12, 'min_cash_reserve': 5000.0, 'drift_threshold': 0.02, 'min_new_position_weight': 0.02, 'strat_cagr': 0.1965560535444952}
Generated config_id: lb90_atr20_tp0950_mtv10000_cap1200_cash5000_dr0200_mnw0200

Actual files in directory:
  lb90_atr20_tp0950_mtv10000_cap1200_cash10000_dr0200_mnw0050.parquet
  lb90_atr20_tp0950_mtv10000_cap1200_cash10000_dr0200_mnw0100.parquet
  lb90_atr20_tp0950_mtv10000_cap1200_cash10000_dr0200_mnw0200.parquet
  lb90_atr20_tp0950_mtv10000_cap1200_cash10000_dr0200_mnw0300.parquet
  lb90_atr20_tp0950_mtv10000_cap1200_cash10000_dr0500_mnw0050.parquet


In [7]:
from pathlib import Path

# Get all available config IDs from actual files
available_files = {f.stem for f in Path(DAILY_RETURNS_DIR).glob("*.parquet")}
print(f"Available files: {len(available_files)}")

# Check how many of topN actually exist
matched = 0
unmatched = []
for row in topN:
    cfg_id = config_id_from_row(
        row["lookback_days"], row["atr_days"], row["top_percentile"], row["min_trade_value"],
        row["max_position_weight"], row["min_cash_reserve"], row["drift_threshold"], row["min_new_position_weight"]
    )
    if cfg_id in available_files:
        matched += 1
    else:
        unmatched.append((cfg_id, row))

print(f"Matched: {matched}/{len(topN)}")
print(f"\nFirst 10 unmatched config IDs:")
for cfg_id, row in unmatched[:10]:
    print(f"  {cfg_id}")
    print(f"    cash_reserve={row['min_cash_reserve']}, mnw={row['min_new_position_weight']}")

Available files: 48
Matched: 48/48

First 10 unmatched config IDs:


In [8]:
from pathlib import Path

files = sorted(Path(DAILY_RETURNS_DIR).glob("*.parquet"))
print(f"Total files: {len(files)}\n")
for f in files:
    print(f.stem)

Total files: 48

lb90_atr20_tp0950_mtv10000_cap1200_cash10000_dr0200_mnw0050
lb90_atr20_tp0950_mtv10000_cap1200_cash10000_dr0200_mnw0100
lb90_atr20_tp0950_mtv10000_cap1200_cash10000_dr0200_mnw0200
lb90_atr20_tp0950_mtv10000_cap1200_cash10000_dr0200_mnw0300
lb90_atr20_tp0950_mtv10000_cap1200_cash10000_dr0500_mnw0050
lb90_atr20_tp0950_mtv10000_cap1200_cash10000_dr0500_mnw0100
lb90_atr20_tp0950_mtv10000_cap1200_cash10000_dr0500_mnw0200
lb90_atr20_tp0950_mtv10000_cap1200_cash10000_dr0500_mnw0300
lb90_atr20_tp0950_mtv10000_cap1200_cash10000_dr1000_mnw0050
lb90_atr20_tp0950_mtv10000_cap1200_cash10000_dr1000_mnw0100
lb90_atr20_tp0950_mtv10000_cap1200_cash10000_dr1000_mnw0200
lb90_atr20_tp0950_mtv10000_cap1200_cash10000_dr1000_mnw0300
lb90_atr20_tp0950_mtv10000_cap1200_cash5000_dr0200_mnw0050
lb90_atr20_tp0950_mtv10000_cap1200_cash5000_dr0200_mnw0100
lb90_atr20_tp0950_mtv10000_cap1200_cash5000_dr0200_mnw0200
lb90_atr20_tp0950_mtv10000_cap1200_cash5000_dr0200_mnw0300
lb90_atr20_tp0950_mtv10000_

In [9]:
import pandas as pd

# Pick one config and load its data
k = list(returns_data.keys())[0]
r = returns_data[k]

print(f"Config: {k}")
print(f"Total rows: {len(r)}")
print(f"Date range: {r['date'].min()} to {r['date'].max()}")

# Check a skipped window - 2005-01-01 training for 2006-01-01 test
train_start = pd.Timestamp("2005-01-01")
test_start = pd.Timestamp("2006-01-01")

train = r.loc[(r["date"] >= train_start) & (r["date"] < test_start)]
print(f"\nTraining window: {train_start.date()} to {test_start.date()}")
print(f"Rows found: {len(train)}")
print(f"Required (TRADING_DAYS): {TRADING_DAYS}")

if len(train) > 0:
    print(f"Actual date range: {train['date'].min()} to {train['date'].max()}")
else:
    print("NO DATA in this range!")
    
# Check what dates ARE in the data around that period
print(f"\nDates in 2005:")
dates_2005 = r[r["date"].dt.year == 2005]
print(f"Count: {len(dates_2005)}")
if len(dates_2005) > 0:
    print(f"Range: {dates_2005['date'].min()} to {dates_2005['date'].max()}")

Config: (90, 20, 0.95, 10000.0, 0.12, 5000.0, 0.02, 0.02)
Total rows: 6790
Date range: 1999-01-04 00:00:00 to 2025-12-30 00:00:00

Training window: 2005-01-01 to 2006-01-01
Rows found: 252
Required (TRADING_DAYS): 252
Actual date range: 2005-01-03 00:00:00 to 2005-12-30 00:00:00

Dates in 2005:
Count: 252
Range: 2005-01-03 00:00:00 to 2005-12-30 00:00:00


In [10]:
import pandas as pd
import numpy as np

train_start = pd.Timestamp("2005-01-01")
test_start = pd.Timestamp("2006-01-01")

print(f"Checking all {len(top_loaded)} configs for window {train_start.date()} → {test_start.date()}")
print("=" * 80)

passing = []
failing = []

for row in top_loaded:
    k = config_key(row)
    r = returns_data[k]
    
    train = r.loc[(r["date"] >= train_start) & (r["date"] < test_start)]
    
    if len(train) < TRADING_DAYS:
        failing.append((k, len(train), "not enough days"))
        continue
    
    score, stats = score_training_slice(train["strat_ret"], SELECTION_METRIC)
    
    if np.isnan(score):
        failing.append((k, len(train), f"score is NaN, cagr={stats['cagr']:.4f}"))
        continue
    
    passing.append((k, len(train), score))

print(f"\nPassing: {len(passing)}")
print(f"Failing: {len(failing)}")

if failing:
    print("\nFirst 10 failing configs:")
    for k, n_days, reason in failing[:10]:
        print(f"  {k}: {n_days} days, reason: {reason}")

if passing:
    print("\nFirst 5 passing configs:")
    for k, n_days, score in passing[:5]:
        print(f"  {k}: {n_days} days, score={score:.4f}")

Checking all 48 configs for window 2005-01-01 → 2006-01-01

Passing: 48
Failing: 0

First 5 passing configs:
  (90, 20, 0.95, 10000.0, 0.12, 5000.0, 0.02, 0.02): 252 days, score=0.1651
  (90, 20, 0.95, 10000.0, 0.12, 5000.0, 0.02, 0.01): 252 days, score=0.1651
  (90, 20, 0.95, 10000.0, 0.12, 5000.0, 0.02, 0.005): 252 days, score=0.1651
  (90, 20, 0.95, 10000.0, 0.12, 10000.0, 0.02, 0.02): 252 days, score=0.1645
  (90, 20, 0.95, 10000.0, 0.12, 10000.0, 0.02, 0.01): 252 days, score=0.1645


In [11]:
import pandas as pd

START_DATE = pd.Timestamp("2000-01-01")
END_DATE   = pd.Timestamp("2025-12-24")
TRAIN_YEARS = 1
TEST_YEARS  = 1

test_starts = pd.date_range(START_DATE, END_DATE, freq="YS")
windows = []
for test_start in test_starts:
    train_start = test_start - pd.DateOffset(years=TRAIN_YEARS)
    test_end    = test_start + pd.DateOffset(years=TEST_YEARS)
    if train_start < START_DATE:
        continue
    if test_end > END_DATE:
        break
    windows.append((train_start, test_start, test_end))

print(f"Total windows: {len(windows)}\n")
for i, (train_start, test_start, test_end) in enumerate(windows):
    print(f"{i+1}: train={train_start.date()} → test={test_start.date()} → {test_end.date()}")
    

Total windows: 24

1: train=2000-01-01 → test=2001-01-01 → 2002-01-01
2: train=2001-01-01 → test=2002-01-01 → 2003-01-01
3: train=2002-01-01 → test=2003-01-01 → 2004-01-01
4: train=2003-01-01 → test=2004-01-01 → 2005-01-01
5: train=2004-01-01 → test=2005-01-01 → 2006-01-01
6: train=2005-01-01 → test=2006-01-01 → 2007-01-01
7: train=2006-01-01 → test=2007-01-01 → 2008-01-01
8: train=2007-01-01 → test=2008-01-01 → 2009-01-01
9: train=2008-01-01 → test=2009-01-01 → 2010-01-01
10: train=2009-01-01 → test=2010-01-01 → 2011-01-01
11: train=2010-01-01 → test=2011-01-01 → 2012-01-01
12: train=2011-01-01 → test=2012-01-01 → 2013-01-01
13: train=2012-01-01 → test=2013-01-01 → 2014-01-01
14: train=2013-01-01 → test=2014-01-01 → 2015-01-01
15: train=2014-01-01 → test=2015-01-01 → 2016-01-01
16: train=2015-01-01 → test=2016-01-01 → 2017-01-01
17: train=2016-01-01 → test=2017-01-01 → 2018-01-01
18: train=2017-01-01 → test=2018-01-01 → 2019-01-01
19: train=2018-01-01 → test=2019-01-01 → 2020-01-01
20