In [4]:
from __future__ import annotations
import numpy as np
import pandas as pd
from typing import Dict, Any
from itertools import product
from collections import Counter
from tqdm import tqdm
from scipy.stats import chi2_contingency
from statsmodels.sandbox.stats.runs import runstest_1samp
from sklearn.tree import DecisionTreeClassifier
import matplotlib.pyplot as plt
from pprint import pprint
from scipy.interpolate import interp1d
from scipy.stats import  gaussian_kde
from scipy.stats import ks_2samp

In [5]:
apple = pd.read_csv(r"D:\data\notebooks\week-4\cleaned_apple.csv")

apple.head()

Unnamed: 0,DATE,weekday,OPEN,CLOSE,VOL
0,1984-09-07,Friday,0.10122,0.10122,97236149.0
1,1984-09-10,Monday,0.10122,0.10062,75471114.0
2,1984-09-11,Tuesday,0.10153,0.10246,177965367.0
3,1984-09-12,Wednesday,0.10246,0.09938,155467926.0
4,1984-09-13,Thursday,0.1049,0.1049,242135546.0


In [None]:

def full_strategy_pipeline(params: Dict[str, Any]) -> Dict[str, Any]:
    """
    Weekly trading pipeline:
      - Builds weekly Tue→Thu dataset with thu/tue multipliers.
      - Rolling train/validate/test Decision Tree with threshold tuning.
      - Computes confusion counts, precision, chattiness, correctness.
      - Runs test for randomness of correctness.
      - Uniformity (chi-square) across time with a chosen bin size.
      - Historical Monte Carlo using empirical TP/FP thu/tue multipliers over subsets.
      - Future Monte Carlo from last subset.
      - Baseline comparisons: always trade, random trader, alternate-week trader, weighted-coin trader.
      - Returns a report-card dictionary and pretty-prints it.
    """

    # ============================================================
    # --------------------------- INPUTS --------------------------
    # ============================================================

    df = params["df"]

    # Rolling / model config
    VALID_WEEKS       = params["VALID_WEEKS"]
    depth_grid        = params["depth_grid"]
    leaf_grid         = params["leaf_grid"]
    thresholds_tested = params["thresholds_tested"]
    FIXED             = params["FIXED"]

    # Scoring weights
    alpha_p = params["alpha_p"]
    alpha_c = params["alpha_c"]
    p_min   = params["p_min"]
    c_min   = params["c_min"]

    # Monte Carlo settings (second half)
    n_subsets      = params["n_subsets"]
    n_trajectories = params["n_trajectories"]
    n_weeks        = params["n_weeks"]
    initial_bank   = params["initial_bank"]
    upper_thresh   = params["upper_thresh"]
    lower_thresh   = params["lower_thresh"]
    rng_seed       = params["rng_seed"]

    # Uniformity
    uniformity_binsize = params["uniformity_binsize"]

    rng = np.random.default_rng(rng_seed)

    # ============================================================
    # ---------- PART I: CLEANING + WEEKLY DATASET ---------------
    # ============================================================

    df = df.sort_values("DATE").reset_index(drop=True)
    df["DATE"] = pd.to_datetime(df["DATE"])   # let pandas infer the format

    # Normalization – your original style
    df["normalized_close"] = (
        (df["CLOSE"] - df["CLOSE"].expanding().mean().shift(1)) /
        df["CLOSE"].expanding().std(ddof=0).shift(1)
    )
    df["normalized_open"] = (
        (df["OPEN"] - df["OPEN"].expanding().mean().shift(1)) /
        df["OPEN"].expanding().std(ddof=0).shift(1)
    )

    df["weekday"] = df["DATE"].dt.weekday
    df["week"]    = df["DATE"].dt.to_period("W-SUN")

    # Tue and Thu opens – keep the names as-is
    tue_open = (
        df.loc[df["weekday"] == 1]
          .groupby("week")["OPEN"]
          .first()
          .rename("tue_open")
    )
    thu_open = (
        df.loc[df["weekday"] == 3]
          .groupby("week")["OPEN"]
          .first()
          .rename("thu_open")
    )

    weekly = pd.concat([tue_open, thu_open], axis=1)

    # tue → thu multiplier
    weekly["thu/tue"] = weekly["thu_open"] / weekly["tue_open"]

    # keep your net% logic as well
    weekly["net%"]      = (weekly["thu/tue"] - 1.0) * 100.0
    weekly["week_type"] = (weekly["thu/tue"] > 1.0).astype(int)

    # Normalised features for Tue and previous Thu/Fri
    norm_tue_open = (
        df.loc[df["weekday"] == 1]
          .set_index("week")["normalized_open"]
          .rename("Norm_Tue_Open")
    )
    norm_prev_thu_open = (
        df.loc[df["weekday"] == 3]
          .set_index("week")["normalized_open"]
          .rename("Norm_PrevThu_Open")
          .shift(1)
    )
    norm_prev_fri_open = (
        df.loc[df["weekday"] == 4]
          .set_index("week")["normalized_open"]
          .rename("Norm_PrevFri_Open")
          .shift(1)
    )

    weekly_full_norm = (
        weekly.copy()
              .join(norm_tue_open, how="left")
              .join(norm_prev_thu_open, how="left")
              .join(norm_prev_fri_open, how="left")
              .dropna()
    )

    features = ["Norm_PrevThu_Open", "Norm_PrevFri_Open", "Norm_Tue_Open"]
    target   = "week_type"

    # ============================================================
    # ---------- PART II: ROLLING TRAIN-VAL-TEST -----------------
    # ============================================================

    def precision(tp, fp):
        denom = tp + fp
        return tp / denom if denom > 0 else 0.0

    def chattiness(tp, fp, fn):
        denom = tp + fn
        return (tp + fp) / denom if denom > 0 else 0.0

    def model_score(tp, fp, fn):
        P = precision(tp, fp)
        C = chattiness(tp, fp, fn)
        s = np.exp(alpha_p * (P - p_min) + alpha_c * (C - c_min))
        return 0.0 if np.isnan(s) or np.isinf(s) else float(s)

    TP = TN = FP = FN = 0
    weekly_best = []

    for t in tqdm(range(VALID_WEEKS + 1, len(weekly_full_norm)), desc="Rolling simulation"):
        val_start = max(0, t - VALID_WEEKS)
        training   = weekly_full_norm.iloc[:val_start]
        validation = weekly_full_norm.iloc[val_start:t]
        test       = weekly_full_norm.iloc[[t]]

        # need both classes in training
        if len(training[target].unique()) < 2:
            continue

        train_X, train_y = training[features], training[target]
        val_X, val_y     = validation[features], validation[target]
        test_X, test_y   = test[features], test[target]

        best_score  = -np.inf
        best_params = None
        best_model  = None

        for depth, leaf in product(depth_grid, leaf_grid):
            model = DecisionTreeClassifier(max_depth=depth, min_samples_leaf=leaf, **FIXED)
            model.fit(train_X, train_y)
            probs_val = model.predict_proba(val_X)[:, 1]

            for thr in thresholds_tested:
                preds_val = (probs_val > thr).astype(int)
                tp = ((preds_val == 1) & (val_y == 1)).sum()
                fp = ((preds_val == 1) & (val_y == 0)).sum()
                fn = ((preds_val == 0) & (val_y == 1)).sum()
                sc = model_score(tp, fp, fn)
                if sc > best_score:
                    best_score  = sc
                    best_params = (depth, leaf, thr)
                    best_model  = model

        best_depth, best_leaf, best_thr = best_params
        p_hat = best_model.predict_proba(test_X)[0, 1]
        pred  = int(p_hat > best_thr)
        true  = int(test_y.iloc[0])

        if   pred == 1 and true == 1:
            TP += 1; outcome = "TP"
        elif pred == 0 and true == 0:
            TN += 1; outcome = "TN"
        elif pred == 1 and true == 0:
            FP += 1; outcome = "FP"
        else:
            FN += 1; outcome = "FN"

        thu_tue_val = float(test["thu/tue"].iloc[0])

        weekly_best.append(dict(
            Week=t,
            Best_Depth=best_depth,
            Best_Leaf=best_leaf,
            Best_Threshold=float(best_thr),
            Best_Score=best_score,
            True_Label=true,
            Pred_Label=pred,
            Outcome=outcome,
            thu_tue=thu_tue_val
        ))

    df_final = pd.DataFrame(weekly_best)
    if df_final.empty:
        raise ValueError("df_final is empty; check VALID_WEEKS and data length.")

    # ============================================================
    # ---------------- BASIC METRICS + TESTS ---------------------
    # ============================================================

    total = TP + TN + FP + FN
    correctness_rate = (TP + TN) / total if total > 0 else 0.0
    prec_overall     = precision(TP, FP)
    chat_overall     = chattiness(TP, FP, FN)

    denom_pos = TP + FP
    pct_fp_when_pred_pos = FP / denom_pos if denom_pos > 0 else 0.0

    # Runs test on correctness
    df_final["correct"] = (df_final["True_Label"] == df_final["Pred_Label"]).astype(int)
    z_runs, p_runs = runstest_1samp(df_final["correct"], correction=False)
    randomness_test = {
        "H0": "Correctness sequence is random over time.",
        "z_statistic": float(z_runs),
        "p_value": float(p_runs),
    }

    # Uniformity test with chosen bin size
    df_final["chunk"] = df_final.index // uniformity_binsize
    tab = pd.crosstab(df_final["chunk"], df_final["correct"])
    chi2, p_chi, dof, _ = chi2_contingency(tab)
    uniformity_test = {
        "H0": f"Correctness rate is uniform across time (binsize={uniformity_binsize} weeks).",
        "chi2": float(chi2),
        "p_value": float(p_chi),
        "degrees_of_freedom": int(dof),
        "binsize": int(uniformity_binsize),
    }

    # Longest TP/FP streaks
    def longest_streak(seq, label):
        best = 0
        cur  = 0
        for x in seq:
            if x == label:
                cur += 1
                if cur > best:
                    best = cur
            else:
                cur = 0
        return best

    longest_tp = longest_streak(df_final["Outcome"], "TP")
    longest_fp = longest_streak(df_final["Outcome"], "FP")

    # ============================================================
    # ----------------- PART III: HISTORICAL MC ------------------
    # ============================================================

    outcomes_arr = np.array(["TP", "FP", "FN", "TN"])

    def build_samplers(tp_vals, fp_vals):
        tp_sorted = np.sort(tp_vals)
        fp_sorted = np.sort(fp_vals)

        n_tp = len(tp_sorted)
        n_fp = len(fp_sorted)

        tp_cdf = np.arange(1, n_tp + 1) / n_tp
        fp_cdf = np.arange(1, n_fp + 1) / n_fp

        def sample_tp(size=1):
            u   = rng.uniform(0.0, 1.0, size)
            idx = np.searchsorted(tp_cdf, u, side="right")
            idx = np.clip(idx, 0, n_tp - 1)
            return tp_sorted[idx]

        def sample_fp(size=1):
            u   = rng.uniform(0.0, 1.0, size)
            idx = np.searchsorted(fp_cdf, u, side="right")
            idx = np.clip(idx, 0, n_fp - 1)
            return fp_sorted[idx]

        return sample_tp, sample_fp

    def run_mc_block(p, sample_tp, sample_fp):
        cdf = np.cumsum(p)
        final_banks = np.empty(n_trajectories)

        def step_mult(outcome):
            if outcome == "TP":
                return float(sample_tp(1)[0])
            if outcome == "FP":
                return float(sample_fp(1)[0])
            return 1.0  # FN, TN: no trade

        for i in range(n_trajectories):
            bank = initial_bank
            for _ in range(n_weeks):
                r = rng.random()
                idx = np.searchsorted(cdf, r, side="right")
                outcome = outcomes_arr[idx]
                bank *= step_mult(outcome)
                if bank >= upper_thresh or bank <= lower_thresh:
                    break
            final_banks[i] = bank

        return final_banks

    def run_actual(sub: pd.DataFrame) -> float:
        bank = initial_bank
        for _, row in sub.iterrows():
            if row["Outcome"] in ("TP", "FP"):
                bank *= row["thu_tue"]
            if bank >= upper_thresh or bank <= lower_thresh:
                break
        return bank

    # Split df_final into approx equal subsets
    raw_subsets = np.array_split(df_final, n_subsets)
    subsets = [s for s in raw_subsets if len(s) > 0]

    all_sims = []
    actual_balances = []
    null_percentiles = []
    valid_mc_subsets = []

    for sub in subsets:
        tp_vals = sub.loc[sub["Outcome"] == "TP", "thu_tue"].values
        fp_vals = sub.loc[sub["Outcome"] == "FP", "thu_tue"].values

        if len(tp_vals) < 2 or len(fp_vals) < 2:
            continue

        sample_tp, sample_fp = build_samplers(tp_vals, fp_vals)
        counts = sub["Outcome"].value_counts(normalize=True)
        p = np.array([counts.get(k, 0.0) for k in outcomes_arr])

        sim = run_mc_block(p, sample_tp, sample_fp)
        all_sims.append(sim)

        actual = run_actual(sub)
        actual_balances.append(actual)

        null_pct = float(np.mean(sim <= actual))
        null_percentiles.append(null_pct)

        valid_mc_subsets.append(sub)

    if not all_sims:
        raise ValueError("No valid subsets for Monte Carlo (need TP and FP in some windows).")

    sim_all = np.concatenate(all_sims)
    actual_balances = np.array(actual_balances)

    simulated_mean_balance   = float(np.mean(sim_all))
    simulated_median_balance = float(np.median(sim_all))

    ks_distance, ks_p_value = ks_2samp(actual_balances, sim_all)
    average_null_percentile = float(np.mean(null_percentiles))

    # ============================================================
    # --------------------- PART IV: FUTURE MC -------------------
    # ============================================================

    last = valid_mc_subsets[-1]
    tp_vals_last = last.loc[last["Outcome"] == "TP", "thu_tue"].values
    fp_vals_last = last.loc[last["Outcome"] == "FP", "thu_tue"].values

    sample_tp_last, sample_fp_last = build_samplers(tp_vals_last, fp_vals_last)
    counts_last = last["Outcome"].value_counts(normalize=True)
    p_last = np.array([counts_last.get(k, 0.0) for k in outcomes_arr])

    fut = run_mc_block(p_last, sample_tp_last, sample_fp_last)

    future_mean_balance   = float(np.mean(fut))
    future_median_balance = float(np.median(fut))

    prob_success   = float(np.mean(fut >= upper_thresh))
    prob_failure   = float(np.mean(fut <= lower_thresh))
    prob_uncertain = float(1.0 - prob_success - prob_failure)

    # ============================================================
    # --------------------- INTERNAL METRICS ---------------------
    # ============================================================

    TP_ret = df_final.loc[df_final["Outcome"] == "TP", "thu_tue"].values
    FP_ret = df_final.loc[df_final["Outcome"] == "FP", "thu_tue"].values

    denom_trades = len(TP_ret) + len(FP_ret)
    precision_on_trades = float(len(TP_ret) / denom_trades) if denom_trades > 0 else 0.0
    trade_frequency     = float(denom_trades / len(df_final))

    mistake_asymmetry = float(np.mean(TP_ret) - np.mean(FP_ret)) if len(TP_ret) and len(FP_ret) else np.nan

    gains  = df_final.loc[df_final["thu_tue"] > 1.0, "thu_tue"].values
    losses = df_final.loc[df_final["thu_tue"] < 1.0, "thu_tue"].values

    if len(gains) > 0 and len(losses) > 0:
        macro_return_ratio = float(np.mean(gains) / abs(np.mean(losses)))
        micro_pairs = min(len(gains), len(losses))
        micro_return_ratio = float(np.mean(gains[:micro_pairs] / losses[:micro_pairs]))
        return_ratio_gap   = float(macro_return_ratio - micro_return_ratio)
    else:
        macro_return_ratio = np.nan
        micro_return_ratio = np.nan
        return_ratio_gap   = np.nan

    # ============================================================
    # -------------------- BASELINE COMPARISONS ------------------
    # ============================================================

    ratio_always   = []
    ratio_random   = []
    ratio_alt      = []
    ratio_weighted = []

    for sub in subsets:
        if len(sub) == 0:
            continue

        model_bal = run_actual(sub)

        # Always trade
        b = initial_bank
        for r in sub["thu_tue"]:
            b *= r
            if b >= upper_thresh or b <= lower_thresh:
                break
        ratio_always.append(model_bal / b if b != 0 else np.nan)

        # Random trader with same chattiness
        trade_prob = len(sub.loc[sub["Outcome"].isin(["TP", "FP"])]) / len(sub)
        b = initial_bank
        for r in sub["thu_tue"]:
            if rng.random() < trade_prob:
                b *= r
            if b >= upper_thresh or b <= lower_thresh:
                break
        ratio_random.append(model_bal / b if b != 0 else np.nan)

        # Alternate-week trader
        b = initial_bank
        for i, r in enumerate(sub["thu_tue"]):
            if i % 2 == 0:
                b *= r
            if b >= upper_thresh or b <= lower_thresh:
                break
        ratio_alt.append(model_bal / b if b != 0 else np.nan)

        # Weighted-coin trader (trade prob = fraction of >1 weeks)
        good_rate = float(np.mean(sub["thu_tue"] > 1.0))
        b = initial_bank
        for r in sub["thu_tue"]:
            if rng.random() < good_rate:
                b *= r
            if b >= upper_thresh or b <= lower_thresh:
                break
        ratio_weighted.append(model_bal / b if b != 0 else np.nan)

    # ============================================================
    # ------------------------ FINAL REPORT ----------------------
    # ============================================================

    report = {
        "historical_mc": {
            "simulated_mean_balance": simulated_mean_balance,
            "simulated_median_balance": simulated_median_balance,
            "ks_distance": float(ks_distance),
            "ks_p_value": float(ks_p_value),
            "average_null_percentile": average_null_percentile,
        },
        "future_mc": {
            "future_mean_balance": future_mean_balance,
            "future_median_balance": future_median_balance,
            "prob_success": prob_success,
            "prob_failure": prob_failure,
            "prob_uncertain": prob_uncertain,
        },
        "internal_metrics": {
            "precision_overall": float(prec_overall),
            "chattiness_overall": float(chat_overall),
            "correctness_rate": float(correctness_rate),
            "precision_on_trades": precision_on_trades,
            "trade_frequency": trade_frequency,
            "mistake_asymmetry_TP_minus_FP": mistake_asymmetry,
            "macro_return_ratio": macro_return_ratio,
            "micro_return_ratio": micro_return_ratio,
            "return_ratio_gap": return_ratio_gap,
            "longest_TP_streak": int(longest_tp),
            "longest_FP_streak": int(longest_fp),
            "%FP_when_predicted_positive": float(pct_fp_when_pred_pos),
        },
        "baseline_comparison": {
            "ratio_vs_always_trade": float(np.nanmean(ratio_always)),
            "ratio_vs_random_trader": float(np.nanmean(ratio_random)),
            "ratio_vs_alternate_week_trader": float(np.nanmean(ratio_alt)),
            "ratio_vs_weighted_coin_trader": float(np.nanmean(ratio_weighted)),
        },
        "uniformity_test": uniformity_test,
        "randomness_test": randomness_test,
    }

    print("\n===== MODEL REPORT CARD =====")
    pprint(report)

    return report


In [11]:
params = {
    # --- core data ---
    "df": apple,   # your cleaned OHLC dataframe with DATE, OPEN, CLOSE

    # --- rolling window + model search ---
    "VALID_WEEKS": 52,
    "depth_grid": [2, 3, 4, 5, 6],
    "leaf_grid": [2, 3, 4, 5, 6],
    "thresholds_tested": np.linspace(0.01, 0.99, 99),

    "FIXED": {
        "criterion": "entropy",
        "min_samples_split": 6,
        "class_weight": "balanced",
        "random_state": 42,
    },

    # --- scoring weights ---
    "alpha_p": 1.0,
    "alpha_c": 0.01,
    "p_min": 0.55,
    "c_min": 0.10,

    # --- Monte Carlo (historical + future) ---
    "n_subsets": 18,
    "n_trajectories": 100000,
    "n_weeks": 100,
    "initial_bank": 100.0,
    "upper_thresh": 200.0,
    "lower_thresh": 60.0,
    "rng_seed": 42,

    # --- uniformity test ---
    "uniformity_binsize": 104,
}

results = full_strategy_pipeline(params)

Rolling simulation: 100%|██████████| 1909/1909 [53:40<00:00,  1.69s/it]



===== MODEL REPORT CARD =====
{'baseline_comparison': {'ratio_vs_alternate_week_trader': 1.188748656120587,
                         'ratio_vs_always_trade': 1.1551936421319584,
                         'ratio_vs_random_trader': 1.1987025526240644,
                         'ratio_vs_weighted_coin_trader': 1.1314085477320683},
 'future_mc': {'future_mean_balance': 130.23050665682405,
               'future_median_balance': 128.22582303621516,
               'prob_failure': 0.00039,
               'prob_success': 0.01272,
               'prob_uncertain': 0.98689},
 'historical_mc': {'average_null_percentile': 0.5226894444444444,
                   'ks_distance': 0.09601944444444446,
                   'ks_p_value': 0.9905969629687478,
                   'simulated_mean_balance': 128.24714802704327,
                   'simulated_median_balance': 121.1748874333227},
 'internal_metrics': {'%FP_when_predicted_positive': 0.4537987679671458,
                      'chattiness_overall': 0.99387

# few changes made

In [None]:
def full_strategy_pipeline(params: Dict[str, Any]) -> Dict[str, Any]:
    """
    Weekly trading pipeline:
      - Builds weekly Tue→Thu dataset with thu/tue multipliers.
      - Rolling train/validate/test Decision Tree with threshold tuning.
      - Computes confusion counts, precision, chattiness, correctness.
      - Runs test for randomness of correctness.
      - Uniformity (chi-square) across time with a chosen bin size.
      - Historical Monte Carlo using empirical TP/FP multipliers.
      - Future Monte Carlo using last subset.
      - Baseline comparisons.
      - Returns a report-card dictionary + prints it.
    """

    # ============================================================
    # --------------------------- INPUTS --------------------------
    # ============================================================

    df = params["df"]

    VALID_WEEKS       = params["VALID_WEEKS"]
    depth_grid        = params["depth_grid"]
    leaf_grid         = params["leaf_grid"]
    thresholds_tested = params["thresholds_tested"]
    FIXED             = params["FIXED"]

    alpha_p = params["alpha_p"]
    alpha_c = params["alpha_c"]
    p_min   = params["p_min"]
    c_min   = params["c_min"]

    n_subsets      = params["n_subsets"]
    n_trajectories = params["n_trajectories"]
    n_weeks        = params["n_weeks"]
    initial_bank   = params["initial_bank"]
    upper_thresh   = params["upper_thresh"]
    lower_thresh   = params["lower_thresh"]
    rng_seed       = params["rng_seed"]

    uniformity_binsize = params["uniformity_binsize"]

    rng = np.random.default_rng(rng_seed)

    # ============================================================
    # ---------- PART I: CLEANING + WEEKLY DATASET ---------------
    # ============================================================

    df = df.sort_values("DATE").reset_index(drop=True)
    df["DATE"] = pd.to_datetime(df["DATE"])

    df["normalized_close"] = (
        (df["CLOSE"] - df["CLOSE"].expanding().mean().shift(1))
        / df["CLOSE"].expanding().std(ddof=0).shift(1)
    )
    df["normalized_open"] = (
        (df["OPEN"] - df["OPEN"].expanding().mean().shift(1))
        / df["OPEN"].expanding().std(ddof=0).shift(1)
    )

    df["weekday"] = df["DATE"].dt.weekday
    df["week"]    = df["DATE"].dt.to_period("W-SUN")

    tue_open = (
        df[df["weekday"] == 1]
        .groupby("week")["OPEN"]
        .first()
        .rename("tue_open")
    )
    thu_open = (
        df[df["weekday"] == 3]
        .groupby("week")["OPEN"]
        .first()
        .rename("thu_open")
    )

    weekly = pd.concat([tue_open, thu_open], axis=1)

    weekly["thu/tue"] = weekly["thu_open"] / weekly["tue_open"]
    weekly["net%"] = (weekly["thu/tue"] - 1.0) * 100.0
    weekly["week_type"] = (weekly["thu/tue"] > 1.0).astype(int)

    norm_tue_open = (
        df[df["weekday"] == 1]
        .set_index("week")["normalized_open"]
        .rename("Norm_Tue_Open")
    )
    norm_prev_thu_open = (
        df[df["weekday"] == 3]
        .set_index("week")["normalized_open"]
        .rename("Norm_PrevThu_Open")
        .shift(1)
    )
    norm_prev_fri_open = (
        df[df["weekday"] == 4]
        .set_index("week")["normalized_open"]
        .rename("Norm_PrevFri_Open")
        .shift(1)
    )

    weekly_full_norm = (
        weekly.copy()
        .join(norm_tue_open, how="left")
        .join(norm_prev_thu_open, how="left")
        .join(norm_prev_fri_open, how="left")
        .dropna()
    )

    features = ["Norm_PrevThu_Open", "Norm_PrevFri_Open", "Norm_Tue_Open"]
    target   = "week_type"

    # ============================================================
    # ---------- PART II: ROLLING TRAIN-VAL-TEST -----------------
    # ============================================================

    def precision(tp, fp):
        return tp / (tp + fp) if (tp + fp) > 0 else 0.0

    def chattiness(tp, fp, fn):
        return (tp + fp) / (tp + fn) if (tp + fn) > 0 else 0.0

    def model_score(tp, fp, fn):
        P = precision(tp, fp)
        C = chattiness(tp, fp, fn)
        s = np.exp(alpha_p * (P - p_min) + alpha_c * (C - c_min))
        return 0.0 if np.isnan(s) or np.isinf(s) else float(s)

    from sklearn.tree import DecisionTreeClassifier

    TP = TN = FP = FN = 0
    weekly_best = []

    for t in tqdm(range(VALID_WEEKS + 1, len(weekly_full_norm)), desc="Rolling simulation"):

        val_start = max(0, t - VALID_WEEKS)
        training   = weekly_full_norm.iloc[:val_start]
        validation = weekly_full_norm.iloc[val_start:t]
        test       = weekly_full_norm.iloc[[t]]

        if len(training[target].unique()) < 2:
            continue

        train_X, train_y = training[features], training[target]
        val_X, val_y     = validation[features], validation[target]
        test_X, test_y   = test[features], test[target]

        best_score  = -np.inf
        best_params = None
        best_model  = None

        for depth, leaf in product(depth_grid, leaf_grid):
            model = DecisionTreeClassifier(max_depth=depth, min_samples_leaf=leaf, **FIXED)
            model.fit(train_X, train_y)
            probs_val = model.predict_proba(val_X)[:, 1]

            for thr in thresholds_tested:
                preds_val = (probs_val > thr).astype(int)
                tp = ((preds_val == 1) & (val_y == 1)).sum()
                fp = ((preds_val == 1) & (val_y == 0)).sum()
                fn = ((preds_val == 0) & (val_y == 1)).sum()
                sc = model_score(tp, fp, fn)
                if sc > best_score:
                    best_score  = sc
                    best_params = (depth, leaf, thr)
                    best_model  = model

        best_depth, best_leaf, best_thr = best_params

        p_hat = best_model.predict_proba(test_X)[0, 1]
        pred  = int(p_hat > best_thr)
        true  = int(test_y.iloc[0])

        if pred == 1 and true == 1:
            TP += 1; outcome = "TP"
        elif pred == 0 and true == 0:
            TN += 1; outcome = "TN"
        elif pred == 1 and true == 0:
            FP += 1; outcome = "FP"
        else:
            FN += 1; outcome = "FN"

        weekly_best.append({
            "Week": t,
            "True_Label": true,
            "Pred_Label": pred,
            "Outcome": outcome,
            "thu_tue": float(test["thu/tue"].iloc[0]),
        })

    df_final = pd.DataFrame(weekly_best)

    # ============================================================
    # ---------------- BASIC METRICS + TESTS ---------------------
    # ============================================================

    total = TP + TN + FP + FN
    prec_overall = precision(TP, FP)
    chat_overall = chattiness(TP, FP, FN)
    correctness_rate = (TP + TN) / total

    pct_fp_positive = FP / (TP + FP) if (TP + FP) > 0 else 0.0

    df_final["correct"] = (df_final["True_Label"] == df_final["Pred_Label"]).astype(int)
    z_runs, p_runs = runstest_1samp(df_final["correct"], correction=False)

    randomness_test = {
        "H0": "Correctness is random in time.",
        "z": float(z_runs),
        "p": float(p_runs)
    }

    df_final["chunk"] = df_final.index // uniformity_binsize
    chi2, p_chi, dof, _ = chi2_contingency(pd.crosstab(df_final["chunk"], df_final["correct"]))

    uniformity_test = {
        "chi2": float(chi2),
        "p": float(p_chi),
        "dof": int(dof),
        "binsize": uniformity_binsize,
    }

    # ---------- longest streaks (Option 2) ----------
    def longest_streak(seq, label):
        best = 0
        cur = 0
        for x in seq:
            if x == label:
                cur += 1
                best = max(best, cur)
            else:
                cur = 0
        return best

    longest_tp = longest_streak(df_final["Outcome"], "TP")
    longest_fp = longest_streak(df_final["Outcome"], "FP")

    # ============================================================
    # ----------------- PART III: HISTORICAL MC ------------------
    # ============================================================

    outcomes_arr = np.array(["TP", "FP", "FN", "TN"])

    def build_samplers(tp_vals, fp_vals):
        tp_sorted = np.sort(tp_vals)
        fp_sorted = np.sort(fp_vals)

        tp_cdf = np.arange(1, len(tp_sorted)+1)/len(tp_sorted)
        fp_cdf = np.arange(1, len(fp_sorted)+1)/len(fp_sorted)

        def sample_tp():
            u = rng.random()
            return tp_sorted[np.searchsorted(tp_cdf, u)]

        def sample_fp():
            u = rng.random()
            return fp_sorted[np.searchsorted(fp_cdf, u)]

        return sample_tp, sample_fp

    def run_mc_block(p, sample_tp, sample_fp):
        cdf = np.cumsum(p)
        final = np.empty(n_trajectories)

        for i in range(n_trajectories):
            bank = initial_bank
            for _ in range(n_weeks):
                r = rng.random()
                idx = np.searchsorted(cdf, r)
                outcome = outcomes_arr[idx]

                if outcome == "TP":
                    bank *= sample_tp()
                elif outcome == "FP":
                    bank *= sample_fp()

                if bank >= upper_thresh or bank <= lower_thresh:
                    break

            final[i] = bank
        return final

    def run_actual(sub):
        bank = initial_bank
        for _, row in sub.iterrows():
            if row["Outcome"] in ("TP", "FP"):
                bank *= row["thu_tue"]
            if bank >= upper_thresh or bank <= lower_thresh:
                break
        return bank

    raw_subsets = np.array_split(df_final, n_subsets)
    subsets = [s for s in raw_subsets if len(s) > 0]

    all_sims = []
    actual_balances = []
    null_percentiles = []
    valid_mc_subsets = []

    for sub in subsets:
        tp_vals = sub.loc[sub["Outcome"]=="TP", "thu_tue"].values
        fp_vals = sub.loc[sub["Outcome"]=="FP", "thu_tue"].values

        if len(tp_vals) < 2 or len(fp_vals) < 2:
            continue

        sample_tp, sample_fp = build_samplers(tp_vals, fp_vals)
        p = sub["Outcome"].value_counts(normalize=True).reindex(outcomes_arr, fill_value=0).values

        sims = run_mc_block(p, sample_tp, sample_fp)
        all_sims.append(sims)

        actual = run_actual(sub)
        actual_balances.append(actual)

        null_percentiles.append(np.mean(sims <= actual))
        valid_mc_subsets.append(sub)

    sim_all = np.concatenate(all_sims)
    actual_balances = np.array(actual_balances)

    simulated_mean = float(sim_all.mean())
    simulated_median = float(np.median(sim_all))
    ks_d, ks_p = ks_2samp(actual_balances, sim_all)
    avg_null = float(np.mean(null_percentiles))

    # ============================================================
    # --------------------- PART IV: FUTURE MC -------------------
    # ============================================================

    last = valid_mc_subsets[-1]

    tp_last = last.loc[last["Outcome"]=="TP", "thu_tue"].values
    fp_last = last.loc[last["Outcome"]=="FP", "thu_tue"].values

    sample_tp_f, sample_fp_f = build_samplers(tp_last, fp_last)
    p_last = last["Outcome"].value_counts(normalize=True).reindex(outcomes_arr, fill_value=0).values

    fut = run_mc_block(p_last, sample_tp_f, sample_fp_f)

    future_mean = float(fut.mean())
    future_median = float(np.median(fut))
    prob_above_initial = float(np.mean(fut > initial_bank))
    prob_success = float(np.mean(fut >= upper_thresh))
    prob_failure = float(np.mean(fut <= lower_thresh))
    prob_uncertain = float(1 - prob_success - prob_failure)

    # ============================================================
    # --------------------- INTERNAL METRICS ---------------------
    # ============================================================

    TP_vals = df_final.loc[df_final["Outcome"]=="TP", "thu_tue"].values
    FP_vals = df_final.loc[df_final["Outcome"]=="FP", "thu_tue"].values

    precision_on_trades = float(len(TP_vals)/(len(TP_vals)+len(FP_vals))) if len(TP_vals)+len(FP_vals)>0 else 0.0
    trade_frequency = float((len(TP_vals)+len(FP_vals))/len(df_final))

    # Asymmetry using percentage return
    tp_pct = (TP_vals - 1) * 100
    fp_pct = (FP_vals - 1) * 100
    mistake_asymmetry = float(tp_pct.mean() - fp_pct.mean()) if len(tp_pct)>0 and len(fp_pct)>0 else np.nan

    gains  = df_final.loc[df_final["thu_tue"]>1, "thu_tue"].values
    losses = df_final.loc[df_final["thu_tue"]<1, "thu_tue"].values

    if len(gains) > 0 and len(losses) > 0:
        macro = float(gains.mean() / abs(losses.mean()))
        micro = float((gains[:len(losses)] / losses[:len(gains)]).mean())
        gap   = float(macro - micro)
    else:
        macro = micro = gap = np.nan

    # ============================================================
    # -------------------- BASELINE COMPARISON -------------------
    # ============================================================

    ratio_always, ratio_random, ratio_alt, ratio_weighted = [], [], [], []

    for sub in subsets:

        model_bal = run_actual(sub)

        # always trade
        b = initial_bank
        for r in sub["thu_tue"]:
            b *= r
            if b >= upper_thresh or b <= lower_thresh:
                break
        ratio_always.append(model_bal/b if b!=0 else np.nan)

        # random with same chattiness
        ch_prob = len(sub.loc[sub["Outcome"].isin(["TP","FP"])])/len(sub)
        b = initial_bank
        for r in sub["thu_tue"]:
            if rng.random()<ch_prob:
                b*=r
        ratio_random.append(model_bal/b if b!=0 else np.nan)

        # alternate week
        b = initial_bank
        for i,r in enumerate(sub["thu_tue"]):
            if i%2==0:
                b*=r
        ratio_alt.append(model_bal/b if b!=0 else np.nan)

        # weighted coin
        good_rate = float((sub["thu_tue"]>1).mean())
        b = initial_bank
        for r in sub["thu_tue"]:
            if rng.random()<good_rate:
                b*=r
        ratio_weighted.append(model_bal/b if b!=0 else np.nan)

    # ============================================================
    # ------------------------ FINAL REPORT ----------------------
    # ============================================================

    report = {
        "historical_mc": {
            "simulated_mean": simulated_mean,
            "simulated_median": simulated_median,
            "ks_distance": float(ks_d),
            "ks_p_value": float(ks_p),
            "average_null_percentile": avg_null,
        },
        "future_mc": {
            "future_mean": future_mean,
            "future_median": future_median,
            "prob_above_initial": prob_above_initial,
            "prob_success": prob_success,
            "prob_failure": prob_failure,
            "prob_uncertain": prob_uncertain,
        },
        "internal_metrics": {
            "precision_overall": prec_overall,
            "chattiness_overall": chat_overall,
            "correctness_rate": correctness_rate,
            "precision_on_trades": precision_on_trades,
            "trade_frequency": trade_frequency,
            "mistake_asymmetry_%": mistake_asymmetry,
            "macro_return_ratio": macro,
            "micro_return_ratio": micro,
            "return_ratio_gap": gap,
            "longest_TP_streak": longest_tp,
            "longest_FP_streak": longest_fp,
            "%FP_when_predicted_positive": pct_fp_positive,
        },
        "baseline_comparison": {
            "vs_always_trade": float(np.nanmean(ratio_always)),
            "vs_random_trader": float(np.nanmean(ratio_random)),
            "vs_alternate_trader": float(np.nanmean(ratio_alt)),
            "vs_weighted_coin": float(np.nanmean(ratio_weighted)),
        },
        "uniformity_test": uniformity_test,
        "randomness_test": randomness_test,
    }

    pprint(report)
    return report


In [15]:
params = {
    # --- core data ---
    "df": apple,   # your cleaned OHLC dataframe with DATE, OPEN, CLOSE

    # --- rolling window + model search ---
    "VALID_WEEKS": 52,
    "depth_grid": [2, 3, 4, 5, 6],
    "leaf_grid": [2, 3, 4, 5, 6],
    "thresholds_tested": np.linspace(0.01, 0.99, 99),

    "FIXED": {
        "criterion": "entropy",
        "min_samples_split": 6,
        "class_weight": "balanced",
        "random_state": 42,
    },

    # --- scoring weights ---
    "alpha_p": 1.0,
    "alpha_c": 0.01,
    "p_min": 0.55,
    "c_min": 0.10,

    # --- Monte Carlo (historical + future) ---
    "n_subsets": 18,
    "n_trajectories": 100000,
    "n_weeks": 100,
    "initial_bank": 100.0,
    "upper_thresh": 200.0,
    "lower_thresh": 60.0,
    "rng_seed": 42,

    # --- uniformity test ---
    "uniformity_binsize": 104,
}

results = full_strategy_pipeline(params)

Rolling simulation:   0%|          | 0/1909 [00:00<?, ?it/s]

Rolling simulation: 100%|██████████| 1909/1909 [46:13<00:00,  1.45s/it]


{'baseline_comparison': {'vs_alternate_trader': 1.1636995183050196,
                         'vs_always_trade': 1.1551936421319584,
                         'vs_random_trader': 1.2102404399011697,
                         'vs_weighted_coin': 1.1866106547622692},
 'future_mc': {'future_mean': 130.23050665682405,
               'future_median': 128.22582303621516,
               'prob_above_initial': 0.89096,
               'prob_failure': 0.00039,
               'prob_success': 0.01272,
               'prob_uncertain': 0.98689},
 'historical_mc': {'average_null_percentile': 0.5226894444444444,
                   'ks_distance': 0.09601944444444446,
                   'ks_p_value': 0.9905969629687478,
                   'simulated_mean': 128.24714802704327,
                   'simulated_median': 121.1748874333227},
 'internal_metrics': {'%FP_when_predicted_positive': 0.4537987679671458,
                      'chattiness_overall': 0.9938775510204082,
                      'correctness_rate

# Final code

In [None]:
def full_strategy_pipeline(params: Dict[str, Any]) -> Dict[str, Any]:
    """
    Weekly trading pipeline:
      - Builds weekly Tue→Thu dataset with thu/tue multipliers.
      - Rolling train/validate/test Decision Tree with threshold tuning.
      - Computes confusion counts, precision, chattiness, correctness.
      - Runs test for randomness of correctness.
      - Uniformity (chi-square) across time with a chosen bin size.
      - Historical Monte Carlo using empirical TP/FP multipliers.
      - Future Monte Carlo using last subset.
      - Baseline comparisons.
      - Returns a report-card dictionary + prints it.
    """

    # ============================================================
    # --------------------------- INPUTS --------------------------
    # ============================================================

    df = params["df"]

    VALID_WEEKS       = params["VALID_WEEKS"]
    depth_grid        = params["depth_grid"]
    leaf_grid         = params["leaf_grid"]
    thresholds_tested = params["thresholds_tested"]
    FIXED             = params["FIXED"]

    alpha_p = params["alpha_p"]
    alpha_c = params["alpha_c"]
    p_min   = params["p_min"]
    c_min   = params["c_min"]

    n_subsets      = params["n_subsets"]
    n_trajectories = params["n_trajectories"]
    n_weeks        = params["n_weeks"]
    initial_bank   = params["initial_bank"]
    upper_thresh   = params["upper_thresh"]
    lower_thresh   = params["lower_thresh"]
    rng_seed       = params["rng_seed"]

    uniformity_binsize = params["uniformity_binsize"]

    rng = np.random.default_rng(rng_seed)

    # ============================================================
    # ---------- PART I: CLEANING + WEEKLY DATASET ---------------
    # ============================================================

    df = df.sort_values("DATE").reset_index(drop=True)
    df["DATE"] = pd.to_datetime(df["DATE"])

    df["normalized_close"] = (
        (df["CLOSE"] - df["CLOSE"].expanding().mean().shift(1))
        / df["CLOSE"].expanding().std(ddof=0).shift(1)
    )
    df["normalized_open"] = (
        (df["OPEN"] - df["OPEN"].expanding().mean().shift(1))
        / df["OPEN"].expanding().std(ddof=0).shift(1)
    )

    df["weekday"] = df["DATE"].dt.weekday
    df["week"]    = df["DATE"].dt.to_period("W-SUN")

    tue_open = (
        df[df["weekday"] == 1]
        .groupby("week")["OPEN"]
        .first()
        .rename("tue_open")
    )
    thu_open = (
        df[df["weekday"] == 3]
        .groupby("week")["OPEN"]
        .first()
        .rename("thu_open")
    )

    weekly = pd.concat([tue_open, thu_open], axis=1)

    weekly["thu/tue"] = weekly["thu_open"] / weekly["tue_open"]
    weekly["net%"] = (weekly["thu/tue"] - 1.0) * 100.0
    weekly["week_type"] = (weekly["thu/tue"] > 1.0).astype(int)

    norm_tue_open = (
        df[df["weekday"] == 1]
        .set_index("week")["normalized_open"]
        .rename("Norm_Tue_Open")
    )
    norm_prev_thu_open = (
        df[df["weekday"] == 3]
        .set_index("week")["normalized_open"]
        .rename("Norm_PrevThu_Open")
        .shift(1)
    )
    norm_prev_fri_open = (
        df[df["weekday"] == 4]
        .set_index("week")["normalized_open"]
        .rename("Norm_PrevFri_Open")
        .shift(1)
    )

    weekly_full_norm = (
        weekly.copy()
        .join(norm_tue_open, how="left")
        .join(norm_prev_thu_open, how="left")
        .join(norm_prev_fri_open, how="left")
        .dropna()
    )

    features = ["Norm_PrevThu_Open", "Norm_PrevFri_Open", "Norm_Tue_Open"]
    target   = "week_type"

    # ============================================================
    # ---------- PART II: ROLLING TRAIN-VAL-TEST -----------------
    # ============================================================

    def precision(tp, fp):
        return tp / (tp + fp) if (tp + fp) > 0 else 0.0

    def chattiness(tp, fp, fn):
        return (tp + fp) / (tp + fn) if (tp + fn) > 0 else 0.0

    def model_score(tp, fp, fn):
        P = precision(tp, fp)
        C = chattiness(tp, fp, fn)
        s = np.exp(alpha_p * (P - p_min) + alpha_c * (C - c_min))
        return 0.0 if np.isnan(s) or np.isinf(s) else float(s)

    from sklearn.tree import DecisionTreeClassifier

    TP = TN = FP = FN = 0
    weekly_best = []

    for t in tqdm(range(VALID_WEEKS + 1, len(weekly_full_norm)), desc="Rolling simulation"):

        val_start = max(0, t - VALID_WEEKS)
        training   = weekly_full_norm.iloc[:val_start]
        validation = weekly_full_norm.iloc[val_start:t]
        test       = weekly_full_norm.iloc[[t]]

        if len(training[target].unique()) < 2:
            continue

        train_X, train_y = training[features], training[target]
        val_X, val_y     = validation[features], validation[target]
        test_X, test_y   = test[features], test[target]

        best_score  = -np.inf
        best_params = None
        best_model  = None

        for depth, leaf in product(depth_grid, leaf_grid):
            model = DecisionTreeClassifier(max_depth=depth, min_samples_leaf=leaf, **FIXED)
            model.fit(train_X, train_y)
            probs_val = model.predict_proba(val_X)[:, 1]

            for thr in thresholds_tested:
                preds_val = (probs_val > thr).astype(int)
                tp = ((preds_val == 1) & (val_y == 1)).sum()
                fp = ((preds_val == 1) & (val_y == 0)).sum()
                fn = ((preds_val == 0) & (val_y == 1)).sum()
                sc = model_score(tp, fp, fn)
                if sc > best_score:
                    best_score  = sc
                    best_params = (depth, leaf, thr)
                    best_model  = model

        best_depth, best_leaf, best_thr = best_params

        p_hat = best_model.predict_proba(test_X)[0, 1]
        pred  = int(p_hat > best_thr)
        true  = int(test_y.iloc[0])

        if pred == 1 and true == 1:
            TP += 1; outcome = "TP"
        elif pred == 0 and true == 0:
            TN += 1; outcome = "TN"
        elif pred == 1 and true == 0:
            FP += 1; outcome = "FP"
        else:
            FN += 1; outcome = "FN"

        weekly_best.append({
            "Week": t,
            "True_Label": true,
            "Pred_Label": pred,
            "Outcome": outcome,
            "thu_tue": float(test["thu/tue"].iloc[0]),
        })

    df_final = pd.DataFrame(weekly_best)

    # ============================================================
    # ---------------- BASIC METRICS + TESTS ---------------------
    # ============================================================

    total = TP + TN + FP + FN
    prec_overall = precision(TP, FP)
    chat_overall = chattiness(TP, FP, FN)
    correctness_rate = (TP + TN) / total

    pct_fp_positive = FP / (TP + FP) if (TP + FP) > 0 else 0.0

    df_final["correct"] = (df_final["True_Label"] == df_final["Pred_Label"]).astype(int)
    z_runs, p_runs = runstest_1samp(df_final["correct"], correction=False)

    randomness_test = {
        "H0": "Correctness is random in time.",
        "z": float(z_runs),
        "p": float(p_runs)
    }

    df_final["chunk"] = df_final.index // uniformity_binsize
    chi2, p_chi, dof, _ = chi2_contingency(pd.crosstab(df_final["chunk"], df_final["correct"]))

    uniformity_test = {
        "chi2": float(chi2),
        "p": float(p_chi),
        "dof": int(dof),
        "binsize": uniformity_binsize,
    }

    # ---------- longest streaks (Option 2) ----------
    def longest_streak(seq, label):
        best = 0
        cur = 0
        for x in seq:
            if x == label:
                cur += 1
                best = max(best, cur)
            else:
                cur = 0
        return best

    longest_tp = longest_streak(df_final["Outcome"], "TP")
    longest_fp = longest_streak(df_final["Outcome"], "FP")

    # ============================================================
    # ----------------- PART III: HISTORICAL MC ------------------
    # ============================================================

    outcomes_arr = np.array(["TP", "FP", "FN", "TN"])

    def build_samplers(tp_vals, fp_vals):
        tp_sorted = np.sort(tp_vals)
        fp_sorted = np.sort(fp_vals)

        tp_cdf = np.arange(1, len(tp_sorted)+1)/len(tp_sorted)
        fp_cdf = np.arange(1, len(fp_sorted)+1)/len(fp_sorted)

        def sample_tp():
            u = rng.random()
            return tp_sorted[np.searchsorted(tp_cdf, u)]

        def sample_fp():
            u = rng.random()
            return fp_sorted[np.searchsorted(fp_cdf, u)]

        return sample_tp, sample_fp

    def run_mc_block(p, sample_tp, sample_fp):
        cdf = np.cumsum(p)
        final = np.empty(n_trajectories)

        for i in range(n_trajectories):
            bank = initial_bank
            for _ in range(n_weeks):
                r = rng.random()
                idx = np.searchsorted(cdf, r)
                outcome = outcomes_arr[idx]

                if outcome == "TP":
                    bank *= sample_tp()
                elif outcome == "FP":
                    bank *= sample_fp()

                if bank >= upper_thresh or bank <= lower_thresh:
                    break

            final[i] = bank
        return final

    def run_actual(sub):
        bank = initial_bank
        for _, row in sub.iterrows():
            if row["Outcome"] in ("TP", "FP"):
                bank *= row["thu_tue"]
            if bank >= upper_thresh or bank <= lower_thresh:
                break
        return bank

    raw_subsets = np.array_split(df_final, n_subsets)
    subsets = [s for s in raw_subsets if len(s) > 0]

    all_sims = []
    actual_balances = []
    null_percentiles = []
    valid_mc_subsets = []

    for sub in subsets:
        tp_vals = sub.loc[sub["Outcome"]=="TP", "thu_tue"].values
        fp_vals = sub.loc[sub["Outcome"]=="FP", "thu_tue"].values

        if len(tp_vals) < 2 or len(fp_vals) < 2:
            continue

        sample_tp, sample_fp = build_samplers(tp_vals, fp_vals)
        p = sub["Outcome"].value_counts(normalize=True).reindex(outcomes_arr, fill_value=0).values

        sims = run_mc_block(p, sample_tp, sample_fp)
        all_sims.append(sims)

        actual = run_actual(sub)
        actual_balances.append(actual)

        null_percentiles.append(np.mean(sims <= actual))
        valid_mc_subsets.append(sub)

    sim_all = np.concatenate(all_sims)
    actual_balances = np.array(actual_balances)

    simulated_mean = float(sim_all.mean())
    simulated_median = float(np.median(sim_all))
    ks_d, ks_p = ks_2samp(actual_balances, sim_all)
    avg_null = float(np.mean(null_percentiles))

    # ============================================================
    # --------------------- PART IV: FUTURE MC -------------------
    # ============================================================

    last = valid_mc_subsets[-1]

    tp_last = last.loc[last["Outcome"]=="TP", "thu_tue"].values
    fp_last = last.loc[last["Outcome"]=="FP", "thu_tue"].values

    sample_tp_f, sample_fp_f = build_samplers(tp_last, fp_last)
    p_last = last["Outcome"].value_counts(normalize=True).reindex(outcomes_arr, fill_value=0).values

    fut = run_mc_block(p_last, sample_tp_f, sample_fp_f)

    future_mean = float(fut.mean())
    future_median = float(np.median(fut))
    prob_above_initial = float(np.mean(fut > initial_bank))
    prob_success = float(np.mean(fut >= upper_thresh))
    prob_failure = float(np.mean(fut <= lower_thresh))
    prob_uncertain = float(1 - prob_success - prob_failure)

    # ============================================================
    # --------------------- INTERNAL METRICS ---------------------
    # ============================================================

    TP_vals = df_final.loc[df_final["Outcome"]=="TP", "thu_tue"].values
    FP_vals = df_final.loc[df_final["Outcome"]=="FP", "thu_tue"].values

    precision_on_trades = float(len(TP_vals)/(len(TP_vals)+len(FP_vals))) if len(TP_vals)+len(FP_vals)>0 else 0.0
    trade_frequency = float((len(TP_vals)+len(FP_vals))/len(df_final))

    # Asymmetry using percentage return
    tp_pct = (TP_vals - 1) * 100
    fp_pct = (FP_vals - 1) * 100
    mistake_asymmetry = float(tp_pct.mean() + fp_pct.mean()) if len(tp_pct)>0 and len(fp_pct)>0 else np.nan

    gains  = df_final.loc[df_final["thu_tue"]>1, "thu_tue"].values
    losses = df_final.loc[df_final["thu_tue"]<1, "thu_tue"].values

    if len(gains) > 0 and len(losses) > 0:
        macro = float(gains.mean() / abs(losses.mean()))
        micro = float((gains[:len(losses)] / losses[:len(gains)]).mean())
        gap   = float(macro - micro)
    else:
        macro = micro = gap = np.nan

    # ============================================================
    # -------------------- BASELINE COMPARISON -------------------
    # ============================================================

    ratio_always, ratio_random, ratio_alt, ratio_weighted = [], [], [], []

    for sub in subsets:

        model_bal = run_actual(sub)

        # always trade
        b = initial_bank
        for r in sub["thu_tue"]:
            b *= r
            if b >= upper_thresh or b <= lower_thresh:
                break
        ratio_always.append(model_bal/b if b!=0 else np.nan)

        # random with same chattiness
        ch_prob = len(sub.loc[sub["Outcome"].isin(["TP","FP"])])/len(sub)
        b = initial_bank
        for r in sub["thu_tue"]:
            if rng.random()<ch_prob:
                b*=r
        ratio_random.append(model_bal/b if b!=0 else np.nan)

        # alternate week
        b = initial_bank
        for i,r in enumerate(sub["thu_tue"]):
            if i%2==0:
                b*=r
        ratio_alt.append(model_bal/b if b!=0 else np.nan)

        # weighted coin
        good_rate = float((sub["thu_tue"]>1).mean())
        b = initial_bank
        for r in sub["thu_tue"]:
            if rng.random()<good_rate:
                b*=r
        ratio_weighted.append(model_bal/b if b!=0 else np.nan)

    # ============================================================
    # ------------------------ FINAL REPORT ----------------------
    # ============================================================

    report = {
        "historical_mc": {
            "simulated_mean": simulated_mean,
            "simulated_median": simulated_median,
            "ks_distance": float(ks_d),
            "ks_p_value": float(ks_p),
            "average_null_percentile": avg_null,
        },
        "future_mc": {
            "future_mean": future_mean,
            "future_median": future_median,
            "prob_above_initial": prob_above_initial,
            "prob_success": prob_success,
            "prob_failure": prob_failure,
            "prob_uncertain": prob_uncertain,
        },
        "internal_metrics": {
            "precision_overall": prec_overall,
            "chattiness_overall": chat_overall,
            "correctness_rate": correctness_rate,
            "precision_on_trades": precision_on_trades,
            "trade_frequency": trade_frequency,
            "mistake_asymmetry_%": mistake_asymmetry,
            "macro_return_ratio": macro,
            "micro_return_ratio": micro,
            "return_ratio_gap": gap,
            "longest_TP_streak": longest_tp,
            "longest_FP_streak": longest_fp,
            "%FP_when_predicted_positive": pct_fp_positive,
        },
        "baseline_comparison": {
            "vs_always_trade": float(np.nanmean(ratio_always)),
            "vs_random_trader": float(np.nanmean(ratio_random)),
            "vs_alternate_trader": float(np.nanmean(ratio_alt)),
            "vs_weighted_coin": float(np.nanmean(ratio_weighted)),
        },
        "uniformity_test": uniformity_test,
        "randomness_test": randomness_test,
    }

    pprint(report)
    return report
