In [2]:
from __future__ import annotations
import numpy as np
import pandas as pd
from typing import Dict, Any
from itertools import product
from collections import Counter
from tqdm import tqdm
from scipy.stats import chi2_contingency
from statsmodels.sandbox.stats.runs import runstest_1samp
from sklearn.tree import DecisionTreeClassifier
import matplotlib.pyplot as plt
from pprint import pprint
from scipy.interpolate import interp1d
from scipy.stats import  gaussian_kde
from scipy.stats import ks_2samp

In [3]:
apple = pd.read_csv(r"D:\data\notebooks\week-10\cleaned_apple_high_low.csv")
apple['DATE'] = pd.to_datetime(apple['DATE'], errors='coerce')
apple = apple.sort_values('DATE')
apple.head()

Unnamed: 0,DATE,weekday,OPEN,CLOSE,VOL,HIGH,LOW
0,1984-09-07,Friday,0.10122,0.10122,97236149.0,0.10246,0.1
1,1984-09-10,Monday,0.10122,0.10062,75471114.0,0.10153,0.09878
2,1984-09-11,Tuesday,0.10153,0.10246,177965367.0,0.10428,0.10153
3,1984-09-12,Wednesday,0.10246,0.09938,155467926.0,0.10306,0.09938
4,1984-09-13,Thursday,0.1049,0.1049,242135546.0,0.1052,0.1049


In [9]:
# ============================================================
# FULL PIPELINE — ONE FUNCTION — RETURNS REPORT (NOT df_final)
# Includes: inlined Support/Resistance + SR_Position feature
# Removes: net% (not needed)
# Keeps: subset logic + historical MC + future MC + report card EXACTLY like base
# ============================================================

from typing import Dict, Any
import numpy as np
import pandas as pd
from pprint import pprint


def full_strategy_pipeline(params: Dict[str, Any]) -> Dict[str, Any]:

    df = params["df"]

    VALID_WEEKS       = params["VALID_WEEKS"]
    depth_grid        = params["depth_grid"]
    leaf_grid         = params["leaf_grid"]
    thresholds_tested = params["thresholds_tested"]
    FIXED             = params["FIXED"]

    alpha_p = params["alpha_p"]
    alpha_c = params["alpha_c"]
    p_min   = params["p_min"]
    c_min   = params["c_min"]

    n_trajectories = params["n_trajectories"]
    n_weeks        = params["n_weeks"]
    initial_bank   = params["initial_bank"]
    upper_thresh   = params["upper_thresh"]
    lower_thresh   = params["lower_thresh"]
    rng_seed       = params["rng_seed"]

    uniformity_binsize = params["uniformity_binsize"]

    cutoff_date       = params.get("cutoff_date", None)
    subset_start_date = params.get("subset_start_date", None)
    num_subsets       = params.get("num_subsets", 3)

    # Support/Resistance params (defaults match what you asked)
    SR_SMOOTH_WINDOW = params.get("SR_SMOOTH_WINDOW", 5)
    SR_WINDOW_WEEKS  = params.get("SR_WINDOW_WEEKS", 52)
    ENVELOPE_A       = params.get("ENVELOPE_A", 1.0)
    ENVELOPE_B       = params.get("ENVELOPE_B", 100.0)
    SR_MAX_ITER      = params.get("SR_MAX_ITER", 60)
    SR_TOL           = params.get("SR_TOL", 1e-9)

    rng = np.random.default_rng(rng_seed)

    # --------------------------------------------------------
    # Cleaning and Weekly Dataset (BASE, unchanged except net% removed)
    # --------------------------------------------------------
    df = df.sort_values("DATE").reset_index(drop=True)
    df["DATE"] = pd.to_datetime(df["DATE"])

    if cutoff_date is not None:
        cutoff_dt = pd.to_datetime(cutoff_date)
        df = df[df["DATE"] >= cutoff_dt].reset_index(drop=True)

    # BASE normalizations kept (this is the SR feature version)
    df["normalized_close"] = (
        (df["CLOSE"] - df["CLOSE"].expanding().mean().shift(1))
        / df["CLOSE"].expanding().std(ddof=0).shift(1)
    )
    df["normalized_open"] = (
        (df["OPEN"] - df["OPEN"].expanding().mean().shift(1))
        / df["OPEN"].expanding().std(ddof=0).shift(1)
    )

    df["weekday"] = df["DATE"].dt.weekday
    df["week"]    = df["DATE"].dt.to_period("W-SUN")

    tue_open = (
        df[df["weekday"] == 1]
        .groupby("week")["OPEN"]
        .first()
        .rename("tue_open")
    )
    thu_open = (
        df[df["weekday"] == 3]
        .groupby("week")["OPEN"]
        .first()
        .rename("thu_open")
    )

    weekly = pd.concat([tue_open, thu_open], axis=1)
    weekly["thu/tue"] = weekly["thu_open"] / weekly["tue_open"]
    weekly["week_type"] = (weekly["thu/tue"] > 1.0).astype(int)

    # --------------------------------------------------------
    # FULL SUPPORT / RESISTANCE (INLINED) + SR_Position
    # --------------------------------------------------------
    df_sr = df.copy()
    df_sr["mid"] = (df_sr["HIGH"] + df_sr["LOW"]) / 2.0

    mon = df_sr[df_sr["DATE"].dt.dayofweek == 0].copy().reset_index(drop=True)

    mon["mid_smooth"] = (
        mon["mid"]
        .rolling(SR_SMOOTH_WINDOW, min_periods=SR_SMOOTH_WINDOW)
        .mean()
    )
    mon = mon.dropna(subset=["mid_smooth"]).reset_index(drop=True)

    mon["t"] = np.arange(len(mon), dtype=float)
    t_all = mon["t"].to_numpy()
    y_all = mon["mid_smooth"].to_numpy()

    def fit_line(t, d, g, a_, b_):
        X = np.column_stack([t, np.ones_like(t)])
        m, c = np.linalg.lstsq(X, d, rcond=None)[0]

        for _ in range(SR_MAX_ITER):
            r = d - (m * t + c)
            k = np.where(r > 0, a_, np.where(r < 0, b_, 0.0))
            w = g * k

            S_tt = np.sum(w * t * t)
            S_t  = np.sum(w * t)
            S_1  = np.sum(w)
            R_t  = np.sum(w * t * d)
            R_1  = np.sum(w * d)

            A = np.array([[S_tt, S_t],
                          [S_t,  S_1]])
            B = np.array([R_t, R_1])

            if abs(np.linalg.det(A)) < 1e-12:
                break

            m_new, c_new = np.linalg.solve(A, B)
            if abs(m_new - m) + abs(c_new - c) < SR_TOL:
                break

            m, c = m_new, c_new

        return float(m), float(c)

    support_vals    = np.full(len(mon), np.nan)
    resistance_vals = np.full(len(mon), np.nan)

    for i in range(len(mon)):
        T = t_all[i]
        start = max(0, i - SR_WINDOW_WEEKS)
        idx = slice(start, i + 1)

        t_win = t_all[idx]
        y_win = y_all[idx]

        # base weighting as per your envelope code
        g = (t_win - (T - SR_WINDOW_WEEKS)) / SR_WINDOW_WEEKS

        # Support: (a,b) ; Resistance: swap (b,a)
        m_s, c_s = fit_line(t_win, y_win, g, ENVELOPE_A, ENVELOPE_B)
        m_r, c_r = fit_line(t_win, y_win, g, ENVELOPE_B, ENVELOPE_A)

        support_vals[i]    = m_s * T + c_s
        resistance_vals[i] = m_r * T + c_r

    mon["support_envelope"]    = support_vals
    mon["resistance_envelope"] = resistance_vals
    mon["week"] = mon["DATE"].dt.to_period("W-SUN")

    sr = mon.set_index("week")[["support_envelope", "resistance_envelope"]]
    weekly = weekly.join(sr, how="left")

    weekly["SR_Position"] = (
        (weekly["tue_open"] - weekly["support_envelope"])
        / (weekly["resistance_envelope"] - weekly["support_envelope"])
    )

    weekly.loc[
        (weekly["resistance_envelope"] - weekly["support_envelope"]) <= 0,
        "SR_Position"
    ] = np.nan

    # --------------------------------------------------------
    # Feature matrix (BASE features + SR_Position)
    # --------------------------------------------------------
    norm_tue_open = (
        df[df["weekday"] == 1]
        .set_index("week")["normalized_open"]
        .rename("Norm_Tue_Open")
    )
    norm_prev_thu_open = (
        df[df["weekday"] == 3]
        .set_index("week")["normalized_open"]
        .rename("Norm_PrevThu_Open")
        .shift(1)
    )
    norm_prev_fri_open = (
        df[df["weekday"] == 4]
        .set_index("week")["normalized_open"]
        .rename("Norm_PrevFri_Open")
        .shift(1)
    )

    weekly_full = (
        weekly.copy()
        .join(norm_tue_open, how="left")
        .join(norm_prev_thu_open, how="left")
        .join(norm_prev_fri_open, how="left")
        .dropna()
    )

    features = ["Norm_PrevThu_Open", "Norm_PrevFri_Open", "Norm_Tue_Open", "SR_Position"]
    target   = "week_type"

    # --------------------------------------------------------
    # Rolling Train–Validate–Test (BASE)
    # --------------------------------------------------------
    def precision(tp, fp):
        return tp / (tp + fp) if (tp + fp) > 0 else 0.0

    def chattiness(tp, fp, fn):
        return (tp + fp) / (tp + fn) if (tp + fn) > 0 else 0.0

    def model_score(tp, fp, fn):
        P = precision(tp, fp)
        C = chattiness(tp, fp, fn)
        s = np.exp(alpha_p * (P - p_min) + alpha_c * (C - c_min))
        return 0.0 if np.isnan(s) or np.isinf(s) else float(s)

    from sklearn.tree import DecisionTreeClassifier
    from itertools import product
    from tqdm import tqdm

    TP = TN = FP = FN = 0
    weekly_best = []

    for t in tqdm(range(VALID_WEEKS + 1, len(weekly_full)), desc="Rolling simulation"):

        val_start = max(0, t - VALID_WEEKS)
        training   = weekly_full.iloc[:val_start]
        validation = weekly_full.iloc[val_start:t]
        test       = weekly_full.iloc[[t]]

        if len(training[target].unique()) < 2:
            continue

        train_X, train_y = training[features], training[target]
        val_X, val_y     = validation[features], validation[target]
        test_X, test_y   = test[features], test[target]

        best_score  = -np.inf
        best_params = None
        best_model  = None

        for depth, leaf in product(depth_grid, leaf_grid):
            model = DecisionTreeClassifier(max_depth=depth, min_samples_leaf=leaf, **FIXED)
            model.fit(train_X, train_y)
            probs_val = model.predict_proba(val_X)[:, 1]

            for thr in thresholds_tested:
                preds_val = (probs_val > thr).astype(int)
                tp = ((preds_val == 1) & (val_y == 1)).sum()
                fp = ((preds_val == 1) & (val_y == 0)).sum()
                fn = ((preds_val == 0) & (val_y == 1)).sum()
                sc = model_score(tp, fp, fn)
                if sc > best_score:
                    best_score  = sc
                    best_params = (depth, leaf, thr)
                    best_model  = model

        best_depth, best_leaf, best_thr = best_params

        p_hat = best_model.predict_proba(test_X)[0, 1]
        pred  = int(p_hat > best_thr)
        true  = int(test_y.iloc[0])

        if pred == 1 and true == 1:
            TP += 1; outcome = "TP"
        elif pred == 0 and true == 0:
            TN += 1; outcome = "TN"
        elif pred == 1 and true == 0:
            FP += 1; outcome = "FP"
        else:
            FN += 1; outcome = "FN"

        weekly_best.append({
            "Week": t,
            "True_Label": true,
            "Pred_Label": pred,
            "Outcome": outcome,
            "thu_tue": float(test["thu/tue"].iloc[0]),
        })

    df_final = pd.DataFrame(weekly_best)

    df_final["week_period"]     = weekly_full.index[df_final["Week"]]
    df_final["week_start_date"] = df_final["week_period"].dt.start_time

    # --------------------------------------------------------
    # Internal Metrics (BASE)
    # --------------------------------------------------------
    total = TP + TN + FP + FN
    prec_overall = precision(TP, FP)
    chat_overall = chattiness(TP, FP, FN)
    correctness_rate = (TP + TN) / total if total > 0 else 0.0
    pct_fp_positive = FP / (TP + FP) if (TP + FP) > 0 else 0.0

    df_final["correct"] = (df_final["True_Label"] == df_final["Pred_Label"]).astype(int)

    from statsmodels.sandbox.stats.runs import runstest_1samp
    z_runs, p_runs = runstest_1samp(df_final["correct"], correction=False)

    randomness_test = {
        "H0": "Correctness is random in time.",
        "z": float(z_runs),
        "p": float(p_runs)
    }

    from scipy.stats import chi2_contingency
    df_final["chunk"] = df_final.index // uniformity_binsize
    chi2, p_chi, dof, _ = chi2_contingency(pd.crosstab(df_final["chunk"], df_final["correct"]))

    uniformity_test = {
        "chi2": float(chi2),
        "p": float(p_chi),
        "dof": int(dof),
        "binsize": uniformity_binsize,
    }

    def longest_streak(seq, label):
        best = cur = 0
        for x in seq:
            if x == label:
                cur += 1
                best = max(best, cur)
            else:
                cur = 0
        return best

    longest_tp = longest_streak(df_final["Outcome"], "TP")
    longest_fp = longest_streak(df_final["Outcome"], "FP")

    TP_vals = df_final.loc[df_final["Outcome"]=="TP","thu_tue"].values
    FP_vals = df_final.loc[df_final["Outcome"]=="FP","thu_tue"].values
    tp_pct = (TP_vals - 1) * 100
    fp_pct = (FP_vals - 1) * 100
    mistake_asymmetry = float(tp_pct.mean() + fp_pct.mean()) if len(tp_pct) > 0 and len(fp_pct) > 0 else np.nan
    trade_frequency   = float((len(TP_vals) + len(FP_vals)) / len(df_final)) if len(df_final) > 0 else 0.0

    # --------------------------------------------------------
    # Subset Construction with num_subsets Logic (BASE)
    # --------------------------------------------------------
    if subset_start_date is not None:
        ss_dt = pd.to_datetime(subset_start_date)
        mask = df_final["week_start_date"] >= ss_dt
        if mask.any():
            base_start = mask.idxmax()
        else:
            base_start = len(df_final)
    else:
        base_start = 0

    last_start = len(df_final) - n_weeks
    if last_start < base_start:
        raise ValueError("Not enough data to produce even the final 100-week subset.")

    if num_subsets < 0:
        raise ValueError("num_subsets must be >= 0")

    if num_subsets > 0:
        offsets = np.linspace(0, last_start - base_start, num_subsets + 2, dtype=int)
        start_points = offsets[0:num_subsets] + base_start
    else:
        start_points = []

    subsets = []
    for s in start_points:
        subsets.append((s, df_final.iloc[s:s + n_weeks]))

    subsets.append((last_start, df_final.iloc[last_start:last_start + n_weeks]))

    # --------------------------------------------------------
    # Historical Monte Carlo (BASE)
    # --------------------------------------------------------
    outcomes_arr = np.array(["TP", "FP", "FN", "TN"])

    def build_sampler(vals):
        vals = np.sort(vals)
        if len(vals) == 0:
            return None, None
        cdf = np.arange(1, len(vals) + 1) / len(vals)
        return vals, cdf

    def sample(vals, cdf):
        u = rng.random()
        idx = np.searchsorted(cdf, u)
        return vals[min(idx, len(vals) - 1)]

    def run_actual(sub):
        bank = initial_bank
        for _, row in sub.iterrows():
            if row["Outcome"] in ("TP", "FP"):
                bank *= row["thu_tue"]
            if bank >= upper_thresh or bank <= lower_thresh:
                break
        return bank

    def run_mc_block(p, tp_vals, tp_cdf, fp_vals, fp_cdf):
        cdf = np.cumsum(p)
        final = np.empty(n_trajectories)
        for i in range(n_trajectories):
            bank = initial_bank
            for _ in range(n_weeks):
                r = rng.random()
                idx = np.searchsorted(cdf, r)
                outcome = outcomes_arr[idx]
                if outcome == "TP" and tp_vals is not None:
                    bank *= sample(tp_vals, tp_cdf)
                elif outcome == "FP" and fp_vals is not None:
                    bank *= sample(fp_vals, fp_cdf)
                if bank >= upper_thresh or bank <= lower_thresh:
                    break
            final[i] = bank
        return final

    all_sims = []
    actual_balances = []
    null_percentiles = []

    for i, (s, sub) in enumerate(subsets):

        if i == 0:
            continue

        history = df_final.iloc[:s]

        tp_hist = history.loc[history["Outcome"]=="TP","thu_tue"].values
        fp_hist = history.loc[history["Outcome"]=="FP","thu_tue"].values

        if len(tp_hist) < 2 or len(fp_hist) < 2:
            continue

        tp_vals_hist, tp_cdf_hist = build_sampler(tp_hist)
        fp_vals_hist, fp_cdf_hist = build_sampler(fp_hist)

        p_hist = history["Outcome"].value_counts(normalize=True).reindex(outcomes_arr, fill_value=0).values

        sims = run_mc_block(p_hist, tp_vals_hist, tp_cdf_hist, fp_vals_hist, fp_cdf_hist)
        all_sims.append(sims)

        actual = run_actual(sub)
        actual_balances.append(actual)

        null_percentiles.append(float(np.mean(sims <= actual)))

    if len(all_sims) > 0:
        sim_all = np.concatenate(all_sims)
    else:
        sim_all = np.array([initial_bank])

    actual_balances = np.array(actual_balances) if len(actual_balances) > 0 else np.array([initial_bank])
    null_percentiles = null_percentiles if len(null_percentiles) > 0 else [0.5]

    from scipy.stats import ks_2samp
    ks_d, ks_p = ks_2samp(actual_balances, sim_all)

    simulated_mean   = float(sim_all.mean())
    simulated_median = float(np.median(sim_all))
    avg_null         = float(np.mean(null_percentiles))

    # --------------------------------------------------------
    # Future Monte Carlo (BASE) — uses ALL df_final history
    # --------------------------------------------------------
    tp_all = df_final.loc[df_final["Outcome"]=="TP","thu_tue"].values
    fp_all = df_final.loc[df_final["Outcome"]=="FP","thu_tue"].values

    if len(tp_all) > 1:
        tp_vals_all, tp_cdf_all = build_sampler(tp_all)
    else:
        tp_vals_all, tp_cdf_all = (None, None)

    if len(fp_all) > 1:
        fp_vals_all, fp_cdf_all = build_sampler(fp_all)
    else:
        fp_vals_all, fp_cdf_all = (None, None)

    p_all = df_final["Outcome"].value_counts(normalize=True).reindex(outcomes_arr, fill_value=0).values

    fut = run_mc_block(p_all, tp_vals_all, tp_cdf_all, fp_vals_all, fp_cdf_all)

    future_mean     = float(fut.mean())
    future_median   = float(np.median(fut))
    prob_above_init = float(np.mean(fut > initial_bank))
    prob_success    = float(np.mean(fut >= upper_thresh))
    prob_failure    = float(np.mean(fut <= lower_thresh))
    prob_uncertain  = float(1 - prob_success - prob_failure)

    # --------------------------------------------------------
    # Baseline Comparison (BASE) — all subsets
    # --------------------------------------------------------
    ratio_always   = []
    ratio_random   = []
    ratio_alt      = []
    ratio_weighted = []

    for s, sub in subsets:

        model_bal = run_actual(sub)

        b = initial_bank
        for r in sub["thu_tue"]:
            b *= r
            if b >= upper_thresh or b <= lower_thresh:
                break
        ratio_always.append(model_bal / b if b != 0 else np.nan)

        ch_prob = len(sub.loc[sub["Outcome"].isin(["TP","FP"])]) / len(sub)
        b = initial_bank
        for r in sub["thu_tue"]:
            if rng.random() < ch_prob:
                b *= r
        ratio_random.append(model_bal / b if b != 0 else np.nan)

        b = initial_bank
        for i_idx, r in enumerate(sub["thu_tue"]):
            if i_idx % 2 == 0:
                b *= r
        ratio_alt.append(model_bal / b if b != 0 else np.nan)

        good_rate = float((sub["thu_tue"] > 1).mean())
        b = initial_bank
        for r in sub["thu_tue"]:
            if rng.random() < good_rate:
                b *= r
        ratio_weighted.append(model_bal / b if b != 0 else np.nan)

    # --------------------------------------------------------
    # Report Card (BASE)
    # --------------------------------------------------------
    report = {
        "historical_mc": {
            "simulated_mean": simulated_mean,
            "simulated_median": simulated_median,
            "ks_distance": float(ks_d),
            "ks_p_value": float(ks_p),
            "average_null_percentile": avg_null
        },
        "future_mc": {
            "future_mean": future_mean,
            "future_median": future_median,
            "prob_above_initial": prob_above_init,
            "prob_success": prob_success,
            "prob_failure": prob_failure,
            "prob_uncertain": prob_uncertain
        },
        "internal_metrics": {
            "precision_overall": prec_overall,
            "chattiness_overall": chat_overall,
            "correctness_rate": correctness_rate,
            "trade_frequency": trade_frequency,
            "mistake_asymmetry_%": mistake_asymmetry,
            "longest_TP_streak": longest_tp,
            "longest_FP_streak": longest_fp,
            "%FP_when_predicted_positive": pct_fp_positive
        },
        "baseline_comparison": {
            "vs_always_trade": float(np.nanmean(ratio_always)),
            "vs_random_trader": float(np.nanmean(ratio_random)),
            "vs_alternate_trader": float(np.nanmean(ratio_alt)),
            "vs_weighted_coin": float(np.nanmean(ratio_weighted))
        },
        "uniformity_test": uniformity_test,
        "randomness_test": randomness_test,
    }

    # --------------------------------------------------------
    # Round all floats in report to 2 decimal places (BASE)
    # --------------------------------------------------------
    def round_2(obj):
        if isinstance(obj, float):
            return round(obj, 2)
        if isinstance(obj, dict):
            return {k: round_2(v) for k, v in obj.items()}
        if isinstance(obj, list):
            return [round_2(x) for x in obj]
        return obj

    report = round_2(report)
    pprint(report)
    return report


In [None]:
params = {
    "df": apple,

    "VALID_WEEKS": 52,
    "depth_grid": [2, 3, 4, 5, 6],
    "leaf_grid": [2, 3, 4, 5, 6],
    "thresholds_tested": np.linspace(0.01, 0.99, 99),

    "FIXED": {
        "criterion": "entropy",
        "min_samples_split": 6,
        "class_weight": "balanced",
        "random_state": 42,
    },

    "alpha_p": 1.0,
    "alpha_c": 0.01,
    "p_min": 0.55,
    "c_min": 0.10,

    "n_trajectories": 100000,
    "n_weeks": 100,
    "initial_bank": 100.0,
    "upper_thresh": 200.0,
    "lower_thresh": 60.0,
    "rng_seed": 42,

    "uniformity_binsize": 104,

    "cutoff_date": "2000-01-01",
    "subset_start_date": "2010-01-01",
    "num_subsets": 5,

    # =========================================================
    # NEW: SUPPORT / RESISTANCE PARAMS (only additions)
    # =========================================================
    "SR_SMOOTH_WINDOW": 5,     # Monday mid smoothing window
    "SR_WINDOW_WEEKS": 52,     # rolling Monday window length
    "ENVELOPE_A": 1.0,         # under-penalty for support fit
    "ENVELOPE_B": 100.0,       # over-penalty for support fit
    "SR_MAX_ITER": 60,
    "SR_TOL": 1e-9,
}

report = full_strategy_pipeline(params)

Rolling simulation: 100%|██████████| 1098/1098 [27:41<00:00,  1.51s/it]


{'baseline_comparison': {'vs_alternate_trader': 1.0812967396195128,
                         'vs_always_trade': 0.9388219549148124,
                         'vs_random_trader': 0.8841579281248835,
                         'vs_weighted_coin': 0.9716101514990833},
 'future_mc': {'future_mean': 119.11756261808581,
               'future_median': 117.4032108927795,
               'prob_above_initial': 0.80719,
               'prob_failure': 0.00048,
               'prob_success': 0.0021,
               'prob_uncertain': 0.99742},
 'historical_mc': {'average_null_percentile': 0.7017659999999999,
                   'ks_distance': 0.435628,
                   'ks_p_value': 0.22359943919701197,
                   'simulated_mean': 113.43315550283613,
                   'simulated_median': 110.02676761827134},
 'internal_metrics': {'%FP_when_predicted_positive': 0.45436507936507936,
                      'chattiness_overall': 0.8330578512396695,
                      'correctness_rate': 0.49042

In [17]:
params = {
    "df": apple,

    "VALID_WEEKS": 52,
    "depth_grid": [2, 3, 4, 6],
    "leaf_grid": [2, 3, 4, 6],
    "thresholds_tested": np.linspace(0.01, 0.99, 99),

    "FIXED": {
        "criterion": "entropy",
        "min_samples_split": 6,
        "class_weight": "balanced",
        "random_state": 42,
    },

    "alpha_p": 1.0,
    "alpha_c": 0.01,
    "p_min": 0.55,
    "c_min": 0.10,

    "n_trajectories": 100000,
    "n_weeks": 100,
    "initial_bank": 100.0,
    "upper_thresh": 200.0,
    "lower_thresh": 60.0,
    "rng_seed": 42,

    "uniformity_binsize": 104,

    "cutoff_date": "1990-01-01",
    "subset_start_date": "2000-01-01",
    "num_subsets": 5,

    # =========================================================
    # NEW: SUPPORT / RESISTANCE PARAMS (only additions)
    # =========================================================
    "SR_SMOOTH_WINDOW": 5,     # Monday mid smoothing window
    "SR_WINDOW_WEEKS": 52,     # rolling Monday window length
    "ENVELOPE_A": 1.0,         # under-penalty for support fit
    "ENVELOPE_B": 100.0,       # over-penalty for support fit
    "SR_MAX_ITER": 60,
    "SR_TOL": 1e-9,
}

report = full_strategy_pipeline(params)

Rolling simulation: 100%|██████████| 1620/1620 [30:02<00:00,  1.11s/it]


{'baseline_comparison': {'vs_alternate_trader': 1.4981777312586806,
                         'vs_always_trade': 1.0098909176616684,
                         'vs_random_trader': 0.8198168571372062,
                         'vs_weighted_coin': 0.9088535106921745},
 'future_mc': {'future_mean': 124.50319307456655,
               'future_median': 122.4758497008616,
               'prob_above_initial': 0.85868,
               'prob_failure': 0.0003,
               'prob_success': 0.0054,
               'prob_uncertain': 0.9943000000000001},
 'historical_mc': {'average_null_percentile': 0.692486,
                   'ks_distance': 0.563114,
                   'ks_p_value': 0.0501086989626232,
                   'simulated_mean': 118.6478354838397,
                   'simulated_median': 112.5971241425859},
 'internal_metrics': {'%FP_when_predicted_positive': 0.46797385620915033,
                      'chattiness_overall': 0.9150717703349283,
                      'correctness_rate': 0.51389746

# with the changed code (before was taking full subset instead of just 100 weeks for historical mc)

In [10]:
params = {
    "df": apple,

    "VALID_WEEKS": 52,
    "depth_grid": [2, 3, 4, 6],
    "leaf_grid": [2, 3, 4, 6],
    "thresholds_tested": np.linspace(0.01, 0.99, 99),

    "FIXED": {
        "criterion": "entropy",
        "min_samples_split": 6,
        "class_weight": "balanced",
        "random_state": 42,
    },

    "alpha_p": 1.0,
    "alpha_c": 0.01,
    "p_min": 0.55,
    "c_min": 0.10,

    "n_trajectories": 100000,
    "n_weeks": 100,
    "initial_bank": 100.0,
    "upper_thresh": 200.0,
    "lower_thresh": 60.0,
    "rng_seed": 42,

    "uniformity_binsize": 104,

    "cutoff_date": "1990-01-01",
    "subset_start_date": "2000-01-01",
    "num_subsets": 5,

    # =========================================================
    # NEW: SUPPORT / RESISTANCE PARAMS (only additions)
    # =========================================================
    "SR_SMOOTH_WINDOW": 5,     # Monday mid smoothing window
    "SR_WINDOW_WEEKS": 52,     # rolling Monday window length
    "ENVELOPE_A": 1.0,         # under-penalty for support fit
    "ENVELOPE_B": 100.0,       # over-penalty for support fit
    "SR_MAX_ITER": 60,
    "SR_TOL": 1e-9,
}

report = full_strategy_pipeline(params)

Rolling simulation: 100%|██████████| 1620/1620 [28:37<00:00,  1.06s/it]


{'baseline_comparison': {'vs_alternate_trader': 1.36,
                         'vs_always_trade': 1.05,
                         'vs_random_trader': 1.15,
                         'vs_weighted_coin': 1.2},
 'future_mc': {'future_mean': 117.41,
               'future_median': 114.07,
               'prob_above_initial': 0.7,
               'prob_failure': 0.01,
               'prob_success': 0.02,
               'prob_uncertain': 0.97},
 'historical_mc': {'average_null_percentile': 0.61,
                   'ks_distance': 0.33,
                   'ks_p_value': 0.54,
                   'simulated_mean': 117.9,
                   'simulated_median': 113.88},
 'internal_metrics': {'%FP_when_predicted_positive': 0.47,
                      'chattiness_overall': 0.92,
                      'correctness_rate': 0.51,
                      'longest_FP_streak': 6,
                      'longest_TP_streak': 9,
                      'mistake_asymmetry_%': 0.36,
                      'precision_over