# 0.0 — Data Split
Split each trader's engineered dataset into train / validation / test sets (70/15/15).

In [None]:
import pandas as pd
import numpy as np
import os

BASE = '.'  # run from trading_datasets root
TRADERS = ['calm_trader', 'loss_averse_trader', 'overtrader', 'revenge_trader']
TRAIN_RATIO, VAL_RATIO = 0.70, 0.15


## Load & Inspect

In [None]:
dfs = {}
for t in TRADERS:
    path = os.path.join(BASE, t, 'data', 'engineered', f'{t}_engineered.csv')
    dfs[t] = pd.read_csv(path, parse_dates=['timestamp'])
    print(f"{t}: {dfs[t].shape}")


## Chronological Split
We split by row order (chronological) — no random shuffle to preserve time series integrity.

In [None]:
def split_df(df, train_r=0.70, val_r=0.15):
    n = len(df)
    i_train = int(n * train_r)
    i_val   = int(n * (train_r + val_r))
    return df.iloc[:i_train], df.iloc[i_train:i_val], df.iloc[i_val:]

splits = {}
for t in TRADERS:
    train, val, test = split_df(dfs[t])
    splits[t] = {'train': train, 'val': val, 'test': test}
    print(f"{t}: train={len(train)}, val={len(val)}, test={len(test)}")


## Save Splits

In [None]:
for t in TRADERS:
    out_dir = os.path.join(BASE, t, 'data', 'processed')
    os.makedirs(out_dir, exist_ok=True)
    for split_name, df_split in splits[t].items():
        out_path = os.path.join(out_dir, f'{t}_{split_name}.csv')
        df_split.to_csv(out_path, index=False)
        print(f"Saved: {out_path}")


## Verify Class Balance per Split

In [None]:
for t in TRADERS:
    print(f"\n── {t} ──")
    for split_name, df_split in splits[t].items():
        wr = df_split['win'].mean() * 100
        print(f"  {split_name}: win_rate={wr:.1f}%")
