In [1]:
import jupyter

# Comparação por scaler
| Scaler               | Train (↓ melhor) | Test (↓ melhor) | Comentário                     |
| -------------------- | ---------------- | --------------- | ------------------------------ |
| **PowerTransformer** | **0.0058**       | **0.0354**      | 🔥 Melhor separabilidade geral |
| QuantileTransformer  | 0.0112           | 0.0380          | Bom em geral                   |
| StandardScaler       | 0.0115           | 0.0375          | Similar ao Quantile            |
| RobustScaler         | 0.0119           | 0.0464          | Um pouco pior em teste         |
| **MinMaxScaler**     | 0.0260           | **0.0664**      | 🚫 Fraca generalização         |
| **MaxAbsScaler**     | **0.0444**       | 0.0524          | 🚫 Muito fraco                 |


In [2]:
import numpy as np
import pandas as pd
import seaborn as sns
import scipy.stats as stats
import matplotlib.pyplot as plt

from tqdm import tqdm
from src.utils.system import boot
from src.defaults import TOP2_STOCK_BY_SECTOR, RANDOM_SEEDS
from src.data.feature_pipeline import load_base_dataframe

DEVICE = boot()
OHLCV_DF = load_base_dataframe()

  from pandas.core import (


In [3]:
from hurst import compute_Hc
from pyts.image import RecurrencePlot
import datetime

# PROJECT SETUP ==================================
TICKERS         = TOP2_STOCK_BY_SECTOR
#TICKERS = OHLCV_DF['symbol'].unique()
#TICKERS = TOP2_STOCK_BY_SECTOR 

CHAOS_THRESHOLD = 0.45
WINDOW_SIZE     = 120
ENTROPY_BINS    = 10
N_WEEK_DAYS     = 5
STEP_SIZE       = 20
MAX_LEN         = 64
LOOKBACK_BUFFER = 40
ROLLING_WINDOW  = 5
START_DATE      = "2023-01-01",
END_DATE        = "2025-05-01"#pd.Timestamp.today().replace(day=1)
TRAIN_MONTHS    = 2
TEST_MONTHS     = 1
MIN_WINDOW_DAYS = 20
NUM_EPOCHS      = 1
SEEDS = RANDOM_SEEDS[:5]

""" 
DEVELOPER NOTES:
@todo : onehot encode day_of_week so the Model wont use the data as a sized sequence 
"""
BASIC_FEATURES=[
    "close"	,          # Core price for reward and trend awareness
    "volume",          # Volume for activity level
    "candle_body",     # Price strength (close-open)
    "upper_shadow",    # Wick size = volatility / exhaustion
    "lower_shadow",    # Same as above
    "order_flow",      # Flow = pressure indicator (buy/sell imbalance)
    "price_change",    # Price momentum short term
    "volatility",      # Recent price dispersion
    "momentum",        # Rolling price trend
    "vix_norm",        # Implied market risk normalized
    "market_return_1d",# Market regime alignment
    "day_of_week"      # heard it's important                
]

# That’s 12 columns, enough to:
# * See price movement
# * Detect regime shifts
# * Respond to risk

# Second Round:
# * overnight_price_change → if overnight gaps matter to your strategy
# * trade_count_change → intraday activity shifts
# * sp500_norm → macro regime normalization

DEFAULT_INTERNAL_FEATURES = [
    "position",
    "holding_period",
    "cumulative_reward",
    "pct_time",
    "drawdown",
    "unrealized_pnl",
    "entry_price",
    "time_in_position",
    "prev_action"
]



# Data preparation

In [4]:
# STEP 1 - GET DATA =======================
# POC

EXPERIENCE_NAME="regime_detection_and_alignment__v2"

results_path = f"data/experiments/{EXPERIENCE_NAME}.csv"

TICKERS = TOP2_STOCK_BY_SECTOR
FEATURES =  ["volatility", "momentum", "kurtosis", "entropy", "adf_pvalue", "hurst"]
df = OHLCV_DF[OHLCV_DF['symbol'].isin(TICKERS)].copy()


#df = OHLCV_DF[OHLCV_DF['symbol'] == ticker].copy()

df = df.sort_values("date").reset_index(drop=True)




In [25]:
import os
import json
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from datetime import timedelta
from dateutil.relativedelta import relativedelta
from sklearn.cluster import KMeans
from sklearn.preprocessing import QuantileTransformer, PowerTransformer, MaxAbsScaler, MinMaxScaler, StandardScaler, RobustScaler
from scipy.stats import f_oneway
from statsmodels.tsa.stattools import adfuller
from hurst import compute_Hc
import scipy.stats as stats
import ace_tools_open as tools

import random
import numpy as np

def set_global_seed(seed):
    np.random.seed(seed)
    random.seed(seed)
    
# ------------------------
# Regime Indicators (Causal)
# ------------------------
def compute_regime_indicators(df, window=30, hurst_window=128):
    df = df.copy()

    df["volatility"] = df["close"].shift(1).rolling(window).std()
    df["momentum"] = df["close"].pct_change(periods=window).shift(1)
    df["kurtosis"] = df["close"].shift(1).rolling(window).apply(lambda x: stats.kurtosis(x), raw=True)
    df["entropy"] = df["close"].shift(1).rolling(window).apply(
        lambda x: stats.entropy(np.histogram(x, bins=10)[0] + 1), raw=True
    )
    df["adf_pvalue"] = df["close"].shift(1).rolling(window).apply(
        lambda x: adfuller(x)[1] if len(x.dropna()) == window else np.nan
    )

    hurst_vals = []
    for i in range(len(df)):
        if i >= hurst_window:
            window_data = df["close"].iloc[i - hurst_window:i].dropna()
            try:
                hurst_val = compute_Hc(window_data.values, kind="price", simplified=True)[0]
            except:
                hurst_val = np.nan
        else:
            hurst_val = np.nan
        hurst_vals.append(hurst_val)
    df["hurst"] = hurst_vals

    return df

# ------------------------
# Regime Classifier
# ------------------------
def regime_classifier(df_ind, ticker, features, start_date, split_date, end_date, train_months, test_months, n_clusters,  scaler_class, seed=42,verbose=False, plot=False):
    df_clean = df_ind.dropna().copy()
    df_train = df_clean[(df_clean["date"] >= start_date) & (df_clean["date"] < split_date)].copy()
    df_test = df_clean[(df_clean["date"] >= split_date) & (df_clean["date"] <= end_date)].copy()

    if len(df_train) < 10 or len(df_test) < 10:
        return None

    if scaler_class.__name__ == "QuantileTransformer":
        scaler = QuantileTransformer(output_distribution="normal",random_state=seed)
    elif scaler_class.__name__ == "PowerTransformer":
        scaler = PowerTransformer(method="yeo-johnson")
    else:
        scaler = scaler_class()

    X_train = scaler.fit_transform(df_train[features])
    X_test = scaler.transform(df_test[features])

    kmeans = KMeans(n_clusters=n_clusters, random_state=seed)
    df_train["regime"] = kmeans.fit_predict(X_train)
    df_test["regime"] = kmeans.predict(X_test)
    df_test["cluster_dist"] = kmeans.transform(X_test).min(axis=1)

    pvals_test = {}
    significant_count = 0
    for feat in features:
        groups = [df_test[df_test["regime"] == i][feat] for i in range(n_clusters) if (df_test["regime"] == i).shape[0] > 1]
        if len(groups) >= 2:
            _, pval = f_oneway(*groups)
            pvals_test[feat] = pval
            if pval < 0.05:
                significant_count += 1
        else:
            pvals_test[feat] = None

    result = {
        "ticker": ticker,
        "start_date": start_date,
        "split_date": split_date,
        "end_date": end_date,
        "n_clusters": n_clusters,
        "scaler_name": scaler_class.__name__,
        "train_size": len(df_train),
        "test_size": len(df_test),
        "train_months": train_months,
        "test_months": test_months,
        "seed":seed,
        "regime_train_dist": json.dumps(df_train["regime"].value_counts().to_dict(), sort_keys=True),
        "regime_test_dist": json.dumps(df_test["regime"].value_counts().to_dict(), sort_keys=True),
        "anova_significant_features_test": significant_count
    }

    avg_test_pvals = []
    for feat in features:
        result[f"pval_{feat}_test"] = pvals_test.get(feat)
        avg_test_pvals.append(pvals_test.get(feat))
    result["avg_pval_test"] = np.nanmean(avg_test_pvals)

    pvals_train = {}
    significant_count = 0
    for feat in features:
        groups = [df_train[df_train["regime"] == i][feat] for i in range(n_clusters) if (df_train["regime"] == i).shape[0] > 1]
        if len(groups) >= 2:
            _, pval = f_oneway(*groups)
            pvals_train[feat] = pval
            if pval < 0.05:
                significant_count += 1
        else:
            pvals_train[feat] = None

    avg_train_pvals = []
    for feat in features:
        result[f"pval_{feat}_train"] = pvals_train.get(feat)
        avg_train_pvals.append(pvals_train.get(feat))
    result["avg_pval_train"] = np.nanmean(avg_train_pvals)

    return result

# ------------------------
# Discretizer
# ------------------------
def discretize_result(result, thresholds={"anova_significant_features_test": [0, 2, 4, 6], "test_size": [0, 25, 50, 100]}):
    sig = result["anova_significant_features_test"]
    ts = result["test_size"]

    result["anova_strength"] = (
        "low" if sig < thresholds["anova_significant_features_test"][1] else
        "medium" if sig < thresholds["anova_significant_features_test"][2] else "high"
    )

    result["test_size_cat"] = (
        "tiny" if ts < thresholds["test_size"][1] else
        "small" if ts < thresholds["test_size"][2] else
        "medium" if ts < thresholds["test_size"][3] else "large"
    )

    return result

# ------------------------
# Walkforward Regime Classifier
# ------------------------
def walkforward_refime_classifier(
    df, tickers, results_path, features,
    start_date="2023-01-01",
    train_months=5, test_months=5,
    n_clusters=3,
    seeds=[42],
    scalers=[QuantileTransformer, PowerTransformer, MaxAbsScaler, MinMaxScaler, StandardScaler, RobustScaler]
):
    for ticker in tqdm(tickers):
        df_ticker = df[df["symbol"] == ticker].copy().sort_values("date").reset_index(drop=True)
        df_ticker = compute_regime_indicators(df_ticker)

        start = pd.to_datetime(start_date)
        end = df_ticker["date"].max() - relativedelta(months=train_months + test_months)

        if os.path.exists(results_path):
            cached_results = pd.read_csv(results_path)
            cached_results = cached_results.fillna("None")
            cached_results["start_date"] = pd.to_datetime(cached_results["start_date"]).dt.date.astype(str)
            cached_results["split_date"] = pd.to_datetime(cached_results["split_date"]).dt.date.astype(str)
            cached_results["end_date"] = pd.to_datetime(cached_results["end_date"]).dt.date.astype(str)
            monthly_results = cached_results.to_dict("records")
        else:
            monthly_results = []

        while start < end:
            split = start + relativedelta(months=train_months)
            test_end = split + relativedelta(months=test_months)

            for scaler in scalers:
                for seed in seeds:
                    set_global_seed(seed)

                    # Avoid recomputation
                    if any(
                        r["ticker"] == ticker and
                        r["start_date"] == str(start.date()) and
                        r["split_date"] == str(split.date()) and
                        r["end_date"] == str(test_end.date()) and
                        r["n_clusters"] == n_clusters and
                        r["scaler_name"] == scaler.__name__ and
                        int(r.get("seed", -1)) == seed
                        for r in monthly_results
                    ):
                        continue

                    # Run classifier
                    result = regime_classifier(
                        df_ticker, ticker, features,
                        start_date=str(start.date()),
                        split_date=str(split.date()),
                        end_date=str(test_end.date()),
                        train_months=train_months,
                        test_months=test_months,
                        n_clusters=n_clusters,
                        seed=seed,
                        scaler_class=scaler
                    )

                    if result:
                        result = discretize_result(result)
                        monthly_results.append(result)

            start += relativedelta(months=1)

    results_df = pd.DataFrame(monthly_results)
    results_df["avg_pval_test"] = pd.to_numeric(results_df["avg_pval_test"], errors="coerce")

    # Drop rows where avg_pval_test is NaN
    results_df_clean = results_df.dropna(subset=["avg_pval_test"]).copy()
    results_df_clean.to_csv(results_path, index=False)
    tools.display_dataframe_to_user(name="Regime Classification Results", dataframe=results_df_clean)
    return results_df_clean


In [None]:
results_df = walkforward_refime_classifier(df,TOP2_STOCK_BY_SECTOR,results_path,FEATURES,seeds=RANDOM_SEEDS)



  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)


  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)


  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)


  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)


  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["

  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)


  result["avg_pval_test"] = np.nanmean(avg_test_pvals)


  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)


  result["avg_pval_test"] = np.nanmean(avg_test_pvals)


  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)


  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)


  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)


  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["

  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["

  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)


  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["

  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)


  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["

  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["

  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)


  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["

  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["

  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)


  result["avg_pval_test"] = np.nanmean(avg_test_pvals)


  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)


  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)


  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)


  result["avg_pval_test"] = np.nanmean(avg_test_pvals)


  result["avg_pval_test"] = np.nanmean(avg_test_pvals)


  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)


  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["

  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["

  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["

  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["

  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["

  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["

  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)


  result["avg_pval_test"] = np.nanmean(avg_test_pvals)


  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)


  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)


  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["

  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["

  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)


  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)


  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["

  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["

  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["

  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["

  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)


  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["

  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)


  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)


  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["

  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["

  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)


  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)


  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)


  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)


  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)


  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)


  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["avg_pval_test"] = np.nanmean(avg_test_pvals)
  result["

  result["avg_pval_test"] = np.nanmean(avg_test_pvals)


In [None]:
results_df[['scaler_name','avg_pval_train','avg_pval_test']].groupby("scaler_name").mean()

# Checkpoint

### What do we have?
We’ve already constructed a powerful modular pipeline:

1. Causal Regime Feature Extractor

    * Uses only past data (shift(1), rolling windows) to avoid leakage.
    * Extracts volatility, kurtosis, entropy, Hurst, ADF, momentum.


2. Walkforward Regime Classifier
    * Tests all scalers and clusterings in a robust out-of-sample way.
    * Tracks ANOVA p-values to check regime separability.
    * Repeats across seeds, months, and scalers.
    * Stores results in CSV for resumability and comparison.

3. Regime Evaluation Metrics
    * Sharpe per regime: Performance of each regime historically.
    * Persistence: How long each regime usually lasts.
    * Predictive power: Average next-day return for each regime (can be expanded).


### Summary:

* BLOCK 1: "Descriptive regimes" — What does this regime mean?
    * Uses: Sharpe ratio, volatility, return std, etc.
    * Helps answer: What has this regime done in the past?

* BLOCK 2: "Forecastable regimes" — Will this regime persist?
    * Uses: Regime duration/persistence stats.
    * Helps answer: Is this regime stable enough to trade or learn on?

* BLOCK 3: "Predictive regimes" — Can this regime predict what comes next?
    * Uses: Future returns, future Sharpe, forward volatility.
    * Helps answer: Does today’s regime tell us about tomorrow’s market?

### Next steps
| Direction                      | Question it answers                                     |  Tools needed                                          |  Outcome                                                         |
| --------------------------------- | ---------------------------------------------------------- | -------------------------------------------------------- | ----------------------------------------------------------------- |
| **1. Meta-Forecast Model**        | “Given the current regime, what is likely to happen next?” | Historical regime stats + ML (regression/classification) | Predict returns, Sharpe, or volatility                            |
| **2. Transition Modeling**        | “What regime is likely to come next?”                      | Markov models or transformer-based transition learning   | Anticipate shifts and prepare trades                              |
| **3. Regime-specific Agents**     | “How should we act in this regime?”                        | Separate RL agents per regime, or conditioned policy     | Specialized strategies that exploit regime behavior               |
| **4. Regime-weighted RL Rewards** | “Should this episode count more?”                          | Modify reward based on regime predictability             | Efficient learning focus on favorable zones                       |
| **5. Meta-feature integration**   | “Can the agent learn what regimes mean on its own?”        | Feed regime labels or embeddings into agent              | Learn policies that adapt to regime without explicit switch logic |
