In [None]:
import sys,os,datetime
import pandas as pd
from pathlib import Path
project_root = os.path.abspath(Path("../."))
sys.path.append(str(project_root))
print(project_root)

from scripts import s5_EAF_eval
from utils.shap_utils import (
    compute_shap_values_xgb,
    compute_global_shap,
    compute_temporal_shap,
    build_crisis_windows,
    summarize_crisis_vs_normal,
    save_global_shap,
    save_temporal_shap,
    save_crisis_summary,
    plot_global_shap_bar,
    plot_temporal_shap
)
from utils.shap_ks_test_regimes import run_shap_regime_pipeline
import warnings
warnings.simplefilter(action='ignore', category=RuntimeWarning)
warnings.simplefilter(action='ignore', category=FutureWarning)
warnings.simplefilter(action='ignore', category=UserWarning)

##################################
load_mt5_data=False
tsfresh_recompute=True
Long_allowed=True
Short_allowed=False
# [] ['Close', 'High', 'Log_Return', 'Log_Return_change1', 'Log_Return_change2'] [] True
extra_symbols= []
base_features=['Close', 'High', 'Log_Return', 'Log_Return_change1', 'Log_Return_change2']
tsfresh_features = []
##################################

list_target_isClose=[False]
for target_isClose in list_target_isClose:
    # # Feature Engineering
    try:
        features_file = f"gold_price_features_{'Close' if target_isClose else 'LogReturn'}{'_mt5' if load_mt5_data else ''}.csv"
        features_filepath=str(project_root) + f'/data/features/{features_file}'
    except FileNotFoundError:
        print(f"[WARNING] File not found: {features_filepath}")

    df_OHLCV =pd.read_csv(
        features_filepath,
        index_col=0, parse_dates=True)
    
    df_OHLCV.index.rename('Date', inplace=True)
    if 'Date' in df_OHLCV.columns:
        df_OHLCV['Date'] = pd.to_datetime(df_OHLCV['Date'])
        df_OHLCV.set_index('Date', inplace=True)
    ### Run ####
    # pre-computed file is saved: str(project_root) + f'/data/features/{tfresh_features_file}',
    results_folder = str(project_root) +'/results/performance2/'
    list_results=[]
    for GLOBAL_SEED in [10]:
        print("GLOBAL_SEED = ",GLOBAL_SEED)
        list_adwin_config = ['adwin_default'] # ['adwin_Config_B','adwin_Config_C','adwin_Config_D']
        for adwin_config  in list_adwin_config:
            EAF_metrics,final_model,drift_dates_test,X_test_eaf, y_test_eaf= s5_EAF_eval.run_adpative(GLOBAL_SEED,target_isClose,features_filepath,Long_allowed,Short_allowed,adwin_config)
            print(EAF_metrics)
            list_results.append(EAF_metrics)
            
            output_dir = Path(str(project_root) +  "/results/SHAP_EAF_RQ3")
            output_dir.mkdir(parents=True, exist_ok=True)

                        
            # ======================================================================
            # 3. Compute SHAP values for the final adaptive model
            # ======================================================================
            shap_values, base_value = compute_shap_values_xgb(
                model=final_model,
                X=X_test_eaf,
                y=y_test_eaf,
                approximate=False
            )

            feature_names = list(X_test_eaf.columns)


            # ======================================================================
            # 4. Compute GLOBAL SHAP importance
            # ======================================================================
            df_global = compute_global_shap(
                shap_values=shap_values,
                feature_names=feature_names,
                top_k=30
            )


            # ======================================================================
            # 5. Compute TEMPORAL SHAP (rolling)
            # ======================================================================
            df_temporal, top_features = compute_temporal_shap(
                shap_values=shap_values,
                index=X_test_eaf.index,
                feature_names=feature_names,
                window=30,
                top_k=10
            )


            # ======================================================================
            # 6. Build crisis windows from drift dates
            # ======================================================================
            crisis_windows = build_crisis_windows(
                drift_dates=drift_dates_test,
                window_days=30
            )


            # ======================================================================
            # 7. Crisis vs normal summary
            # ======================================================================
            df_crisis = summarize_crisis_vs_normal(
                df_temporal=df_temporal,
                crisis_windows=crisis_windows
            )


            # ======================================================================
            # 8. Save SHAP tables
            # ======================================================================
            path_global_csv = save_global_shap(df_global, output_dir)
            path_temporal_csv = save_temporal_shap(df_temporal, output_dir)
            path_crisis_csv = save_crisis_summary(df_crisis, output_dir)

            # The temporal SHAP file path is now:
            shap_file = path_temporal_csv  # <-- used for KS tests


            # ======================================================================
            # 9. Plot SHAP figures
            # ======================================================================
            path_global_fig = plot_global_shap_bar(df_global, output_dir, top_k=10)
            path_temporal_fig = plot_temporal_shap(df_temporal, output_dir, top_k=5)


            # ======================================================================
            # 10. Run automated KS regime tests (RQ3 statistical evidence)
            # ======================================================================
            ks_results = run_shap_regime_pipeline(
                shap_temporal_path=shap_file,
                top_features=top_features,
                crisis_windows=crisis_windows,
                output_csv=str(output_dir / "ks_RQ3_test_results.csv")
            )

            print("\n==== KS Test Results (RQ3 Regime Dependence) ====\n")
            print(ks_results)

            # Confirm saved outputs
            print("\nGlobal SHAP saved to:", path_global_csv)
            print("Temporal SHAP saved to:", path_temporal_csv)
            print("Crisis summary saved to:", path_crisis_csv)
            print("KS test results saved:", output_dir / "ks_test_results.csv")
            print("Global SHAP figure:", path_global_fig)
            print("Temporal SHAP figure:", path_temporal_fig)
        
list_results


g:\My Drive\0Study_programs\1Msc_ML_AI\EAF_python_project\EAF_implementation_thesis
GLOBAL_SEED =  10
Run EAF evaluation | GLOBAL_SEED = 10...
drift_config = adwin_default 
 {'delta': 0.1, 'clock': 16, 'retrain_window': '126D'}
Loading feature data from g:\My Drive\0Study_programs\1Msc_ML_AI\EAF_python_project\EAF_implementation_thesis/data/features/gold_price_features_LogReturn.csv
Preparing time series data with historical context
Loading model from g:\My Drive\0Study_programs\1Msc_ML_AI\EAF_python_project\EAF_implementation_thesis\models\trained\xgboost_model_LogReturn10.json
Initializing ADWIN with delta=0.1, clock=16
Preparing retraining data up to 2007-03-06 00:00:00 with window 126D
Retraining model using data up to 2007-03-06 00:00:00
Preparing retraining data up to 2008-10-27 00:00:00 with window 126D
Retraining model using data up to 2008-10-27 00:00:00
Preparing retraining data up to 2009-09-17 00:00:00 with window 126D
Retraining model using data up to 2009-09-17 00:00:00
P

[[{'GLOBAL_SEED': 10,
   'baseline_modelName': 'baseline_EAF',
   'target_isClose': False,
   'period': 'training',
   'rmse': 1.5833252078530498,
   'mae': 1.1627906104938475,
   'r2': -0.7725214590922684,
   'strategy_name': 'EAF',
   'strategy_type': 'moc',
   'n_days': 3262,
   'trading_days': 1445,
   'skipped_trades': 0,
   'gap_skips': 0,
   'edge_skips': 0,
   'trade_rate': 0.4429797670141018,
   'win_days': 770,
   'win_rate': 0.532871972318339,
   'compound_gross_return': 2.5668954784465385,
   'total_return': 1.3657835171790025,
   'annualized_return': 0.10323037297053883,
   'cagr': 0.10302351590826042,
   'sharpe_ratio': 0.875752135826851,
   'sortino_ratio': 0.5907768938987287,
   'omega_ratio': 1.2592376802046148,
   'max_drawdown': 0.30676005855946753,
   'calmar_ratio': 0.3365182985532874,
   'profit_factor': 1.2592376802046148,
   'volatility': 0.1204806345134786,
   'risk_free_rate': 0.0,
   'execution_threshold': 0.0,
   'max_gap': 0.02,
   'at': '2025-12-14 00:50:4