In [None]:
# Combined Script: Load CSV -> Feature Engineering -> Rolling Origin XGB Modeling

import pandas as pd
import numpy as np
import time
import os
import warnings
import traceback
from datetime import datetime

# Feature Engineering Imports
import pandas_ta as ta  # Technical indicators

# Modeling Imports
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier, early_stopping, log_evaluation # <--- IMPORT CALLBACKS
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler # Needed for SVM
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.model_selection import ParameterGrid, StratifiedKFold # StratifiedKFold for stacking
from sklearn.exceptions import UndefinedMetricWarning
from sklearn.pipeline import Pipeline # Optional: useful for SVM with scaling
from sklearn.impute import SimpleImputer # Better imputation strategy for pipeline


# --- Suppress Warnings ---
warnings.filterwarnings('ignore', category=UndefinedMetricWarning)
warnings.filterwarnings('ignore', category=FutureWarning)
warnings.filterwarnings('ignore', category=UserWarning)
warnings.filterwarnings('ignore', category=pd.errors.PerformanceWarning)
warnings.filterwarnings('ignore') # General suppression

# --- Configuration ---

# Data Loading
CSV_FILE_PATH = r'C:\Users\mason\AVP\BTCUSD.csv' # Use raw string for Windows paths
SYMBOL_NAME = 'BTCUSD' # Define the symbol represented in the CSV

# Feature Selection (Using the 123 feature list from previous update)
SELECTED_FEATURE_NAMES = [
    'Volume BTC_x_rolling_std_168h', 'Volume BTC_x_std12_div_std72', 'Volume USD',
    'cci_20h_sq', 'cci_20h_x_Volume BTC', 'cci_20h_x_cmf_20h',
    'cci_20h_x_lag_24h_volume_return', 'cci_20h_x_lag_3h_volume_return',
    'cci_20h_x_rolling_kurt_24h', 'cci_20h_x_rolling_std_168h',
    'cci_20h_x_rolling_std_48h', 'cci_20h_x_std12_div_std72',
    'cci_20h_x_volume_div_ma_24h', 'cci_20h_x_volume_ma_168h',
    'close_pos_in_range', 'cmf_20h', 'cmf_20h_x_bband_width_20h',
    'cmf_20h_x_rolling_kurt_24h', 'cmf_20h_x_rolling_std_168h',
    'cmf_20h_x_rolling_std_6h', 'cmf_20h_x_std12_div_std72', 'day_0', 'day_1',
    'day_2', 'day_4', 'day_5', 'day_6', 'hour_0', 'hour_1', 'hour_10',
    'hour_11', 'hour_12', 'hour_13', 'hour_14', 'hour_15', 'hour_16',
    'hour_17', 'hour_18', 'hour_19', 'hour_2', 'hour_20', 'hour_21',
    'hour_22', 'hour_23', 'hour_3', 'hour_4', 'hour_5', 'hour_6', 'hour_8',
    'hour_9', 'lag_12h_price_return_sq', 'lag_12h_price_return_x_cmf_20h',
    'lag_12h_price_return_x_rolling_kurt_24h',
    'lag_12h_price_return_x_rolling_std_168h',
    'lag_12h_price_return_x_volume_ma_168h', 'lag_12h_volume_return',
    'lag_168h_price_return_sq', 'lag_168h_price_return_x_Volume BTC',
    'lag_168h_price_return_x_bband_width_20h', 'lag_168h_price_return_x_cmf_20h',
    'lag_168h_price_return_x_rolling_kurt_24h',
    'lag_168h_price_return_x_rolling_std_168h',
    'lag_168h_price_return_x_rolling_std_6h',
    'lag_168h_price_return_x_std12_div_std72',
    'lag_168h_price_return_x_volume_div_ma_24h',
    'lag_168h_price_return_x_volume_ma_168h', 'lag_24h_price_return_sq',
    'lag_24h_price_return_x_lag_12h_volume_return',
    'lag_24h_price_return_x_lag_6h_volume_return',
    'lag_24h_price_return_x_rolling_std_168h',
    'lag_24h_price_return_x_volume_ma_168h',
    'lag_24h_volume_return_x_rolling_std_168h',
    'lag_24h_volume_return_x_std12_div_std72',
    'lag_3h_volume_return_x_rolling_kurt_24h', 'lag_48h_price_return_sq',
    'lag_48h_price_return_x_lag_24h_volume_return',
    'lag_48h_price_return_x_rolling_std_168h',
    'lag_48h_price_return_x_volume_div_ma_24h',
    'lag_48h_price_return_x_volume_ma_168h',
    'lag_6h_volume_return_x_rolling_kurt_24h', 'lag_72h_price_return_sq',
    'lag_72h_price_return_x_cmf_20h',
    'lag_72h_price_return_x_lag_12h_volume_return',
    'lag_72h_price_return_x_lag_3h_volume_return',
    'lag_72h_price_return_x_lag_6h_volume_return',
    'lag_72h_price_return_x_rolling_kurt_24h',
    'lag_72h_price_return_x_rolling_std_168h',
    'lag_72h_price_return_x_volume_ma_168h', 'macd_hist_sq',
    'macd_hist_x_Volume BTC', 'macd_hist_x_cmf_20h',
    'macd_hist_x_rolling_kurt_24h', 'macd_hist_x_rolling_std_6h',
    'macd_hist_x_volume_div_ma_24h', 'macd_hist_x_volume_ma_12h',
    'macd_signal_sq', 'macd_signal_x_cmf_20h',
    'macd_signal_x_lag_24h_volume_return', 'macd_signal_x_rolling_kurt_24h',
    'macd_signal_x_rolling_std_48h', 'macd_signal_x_rolling_std_6h',
    'macd_signal_x_std12_div_std72', 'macd_signal_x_volume_div_ma_24h',
    'macd_signal_x_volume_ma_12h', 'rolling_kurt_24h', 'rolling_skew_24h',
    'rolling_std_168h', 'rolling_std_3h_sq',
    'rolling_std_6h_div_rolling_std_48h', 'std12_div_std72',
    'volume_btc_x_range', 'volume_btc_x_range_log1p', 'volume_div_ma_24h_sq',
    'volume_div_ma_24h_x_rolling_kurt_24h',
    'volume_div_ma_24h_x_rolling_std_168h',
    'volume_ma_12h_x_bband_width_20h', 'volume_ma_12h_x_rolling_kurt_24h',
    'volume_ma_12h_x_rolling_std_6h', 'volume_ma_168h_x_rolling_kurt_24h',
    'volume_ma_168h_x_rolling_std_48h', 'volume_ma_168h_x_std12_div_std72',
    'volume_return_1h', 'volume_return_1h_x_rolling_kurt_24h'
]


# Modeling & Walk-Forward
TARGET_THRESHOLD_PCT = 0.1 # Target threshold percentage variable

# --- NEW: Define separate prediction horizon for target ---
PREDICTION_WINDOW_HOURS = 24 # Example: Train model to predict outcome 24 hours ahead
PREDICTION_WINDOW_ROWS = PREDICTION_WINDOW_HOURS

# Walk-forward params
TRAIN_WINDOW_HOURS = int(24 * 30 * 1.5) # ~1.5 months training (~1080 hours)
# --- Keep TEST_WINDOW_HOURS for evaluation window size ---
TEST_WINDOW_HOURS = 24 * 7           # Example: Evaluate performance over the next 7 days (168 hours)
STEP_HOURS = 24                      # Example: Retrain and predict daily

TRAIN_WINDOW_ROWS = TRAIN_WINDOW_HOURS
TEST_WINDOW_ROWS = TEST_WINDOW_HOURS # This now refers to the EVALUATION window size
STEP_ROWS = STEP_HOURS

# Stacking Configuration
N_STACKING_FOLDS = 5 # Number of folds for generating Level 0 predictions

# --- Base Model Static Hyperparameters ---
# ... (XGB_BASE_PARAMS, LGBM_BASE_PARAMS, SVM_BASE_PARAMS remain the same) ...
# XGBoost Base Model Params
XGB_BASE_PARAMS = {
    'objective': 'binary:logistic', 'eval_metric': 'logloss',
    'eta': 0.05, 'max_depth': 3, 'n_estimators': 100,
    'subsample': 0.8, 'colsample_bytree': 0.7, 'min_child_weight': 3,
    'gamma': 0.1, 'lambda': 1.5, 'alpha': 0.1,
    'random_state': 42, 'n_jobs': -1, 'tree_method': 'hist', # Using hist for NaN handling
    'use_label_encoder': False,
}
# LightGBM Base Model Params
LGBM_BASE_PARAMS = {
    'objective': 'binary', 'metric': 'logloss',
    'learning_rate': 0.05, 'n_estimators': 100, 'max_depth': 3,
    'num_leaves': 8,
    'subsample': 0.8, 'colsample_bytree': 0.7, 'min_child_samples': 5,
    'reg_alpha': 0.1, 'reg_lambda': 1.5,
    'random_state': 42, 'n_jobs': -1, 'boosting_type': 'gbdt',
    'verbose': -1
}
# SVM Base Model Params (Using SVC with linear kernel)
SVM_BASE_PARAMS = {
    'kernel': 'linear',
    'C': 0.1,
    'probability': True,
    'max_iter': 5000,
    'random_state': 42,
    'class_weight': 'balanced'
}

# --- Meta Learner Configuration ---
# ... (META_LEARNER_IS_XGB, META_XGB_PARAM_GRID, META_XGB_FIXED_PARAMS remain the same) ...
META_LEARNER_IS_XGB = True
META_XGB_PARAM_GRID = {
    'max_depth': [2, 3],
    'n_estimators': [50, 100],
    'eta': [0.05, 0.1],
    'lambda': [1.0, 2.0],
    'min_child_weight': [1, 3]
}
META_XGB_FIXED_PARAMS = {
    'objective': 'binary:logistic', 'eval_metric': 'logloss',
    'subsample': 0.9, 'colsample_bytree': 0.9,
    'gamma': 0.0, 'alpha': 0.0,
    'random_state': 123, 'n_jobs': -1, 'tree_method': 'hist',
    'use_label_encoder': False,
}

# --- Probability Threshold Tuning Configuration ---
# ... (THRESHOLD_SEARCH_RANGE, META_VALIDATION_PCT remain the same) ...
THRESHOLD_SEARCH_RANGE = np.arange(0.10, 0.90, 0.05)
META_VALIDATION_PCT = 0.25

# --- Feature Engineering Function (Using 123 features) ---
# ... (The updated calculate_selected_features function goes here) ...
def calculate_selected_features(df, symbol):
    """Calculates only the 123 pre-selected features and their prerequisites."""
    print("Starting calculation of selected 123 features...")
    start_time = time.time()
    if df is None or len(df) < 3:
        print("Error: Input DataFrame is None or too small.")
        return pd.DataFrame()
    df = df.copy()
    df['symbol'] = symbol

    # --- Timestamp and Index ---
    if 'timestamp' not in df.columns:
        print("Error: 'timestamp' column not found.")
        return pd.DataFrame()
    try:
        df['timestamp'] = pd.to_datetime(df['timestamp'])
    except Exception as e:
        print(f"Error converting timestamp to datetime: {e}")
        return pd.DataFrame()
    df = df.sort_values('timestamp').dropna(subset=['timestamp'])
    df = df.set_index('timestamp', drop=False)

    # --- Volume Columns (Standardize internal naming) ---
    # Use original names from CSV loading step for consistency with feature list
    if 'Volume BTC' in df.columns:
        df['volume_btc'] = df['Volume BTC'] # Internal standard name
    elif 'volume_btc' in df.columns:
        df['Volume BTC'] = df['volume_btc'] # Ensure original name exists if passed
    else:
        df['volume_btc'] = 0
        df['Volume BTC'] = 0 # Add original name column if missing
    if 'Volume USD' in df.columns:
        # Keep 'Volume USD' as it is requested directly
        pass
    elif 'volume_usd' in df.columns:
         df['Volume USD'] = df['volume_usd'] # Ensure original name exists if passed
    else:
        df['Volume USD'] = 0 # Add original name column if missing


    # --- Basic Checks (OHLC) ---
    required_ohlc = ['open', 'high', 'low', 'close']
    all_ohlc_present = True
    for col in required_ohlc:
        if col not in df.columns:
            print(f"Error: Required column '{col}' not found.")
            all_ohlc_present = False
        else:
            df[col] = pd.to_numeric(df[col], errors='coerce')
    if not all_ohlc_present: return pd.DataFrame()

    if df[required_ohlc].isnull().any().any():
        print(f"Warning: NaNs found in required OHLC columns. Rows before drop: {len(df)}")
        df = df.dropna(subset=required_ohlc)
        print(f"Rows after dropping NaNs in OHLC: {len(df)}")
    if df.empty:
        print("Error: DataFrame empty after dropping OHLC NaNs.")
        return pd.DataFrame()

    # --- 1. Calculate Prerequisites ---
    print("  Calculating prerequisites...")
    min_periods_base = 2

    # Basic Returns/Changes
    df['price_return_1h_temp'] = df['close'].pct_change() # Needed for skew/kurt
    df['volume_return_1h'] = df['volume_btc'].pct_change()
    with np.errstate(divide='ignore', invalid='ignore'):
        df['price_range_pct_temp'] = (df['high'] - df['low']) / df['low'].replace(0, np.nan) # Needed for volume_btc_x_range

    # Lags
    for hours in [12, 24, 48, 72, 168]: df[f'lag_{hours}h_price_return'] = df['close'].pct_change(periods=hours)
    for hours in [3, 6, 12, 24]: df[f'lag_{hours}h_volume_return'] = df['volume_btc'].pct_change(periods=hours)

    # Rolling Stats (Only calculate those needed)
    needed_rolling_std_hours = [6, 48, 168] # 48 added
    for hours in needed_rolling_std_hours:
        if len(df) >= hours: df[f'rolling_std_{hours}h'] = df['close'].rolling(window=hours, min_periods=min_periods_base).std()
        else: df[f'rolling_std_{hours}h'] = np.nan
    # Intermediates for ratios/squares
    if len(df) >= 12: df['rolling_std_12h_temp'] = df['close'].rolling(window=12, min_periods=min_periods_base).std()
    else: df['rolling_std_12h_temp'] = np.nan
    if len(df) >= 72: df['rolling_std_72h_temp'] = df['close'].rolling(window=72, min_periods=min_periods_base).std()
    else: df['rolling_std_72h_temp'] = np.nan
    if len(df) >= 3: df['rolling_std_3h_temp'] = df['close'].rolling(window=3, min_periods=min_periods_base).std()
    else: df['rolling_std_3h_temp'] = np.nan
    # Ensure rolling_std_48h exists for division (it's calculated above now)

    # Skew/Kurtosis
    if len(df) >= 24 and 'price_return_1h_temp' in df.columns:
        df['rolling_skew_24h'] = df['price_return_1h_temp'].rolling(window=24, min_periods=24).skew()
        df['rolling_kurt_24h'] = df['price_return_1h_temp'].rolling(window=24, min_periods=24).kurt()
    else:
        df['rolling_skew_24h'] = np.nan
        df['rolling_kurt_24h'] = np.nan

    # Volume MAs and Ratios
    needed_vol_ma_hours = [12, 168]
    for hours in needed_vol_ma_hours:
        if len(df) >= hours: df[f'volume_ma_{hours}h'] = df['volume_btc'].rolling(window=hours, min_periods=min_periods_base).mean()
        else: df[f'volume_ma_{hours}h'] = np.nan
    # Need MA24 for volume_div_ma_24h
    if len(df) >= 24: df['volume_ma_24h_temp'] = df['volume_btc'].rolling(window=24, min_periods=min_periods_base).mean()
    else: df['volume_ma_24h_temp'] = np.nan
    with np.errstate(divide='ignore', invalid='ignore'):
        df['volume_div_ma_24h'] = df['volume_btc'] / df['volume_ma_24h_temp'].replace(0, np.nan)

    # MACD Components
    if len(df) >= 35: # Need 26 for base + 9 for signal
        ema_12 = df['close'].ewm(span=12, adjust=False, min_periods=12).mean()
        ema_26 = df['close'].ewm(span=26, adjust=False, min_periods=26).mean()
        df['macd_temp'] = ema_12 - ema_26
        df['macd_signal'] = df['macd_temp'].ewm(span=9, adjust=False, min_periods=9).mean()
        df['macd_hist'] = df['macd_temp'] - df['macd_signal']
    else:
        df['macd_temp'] = np.nan
        df['macd_signal'] = np.nan
        df['macd_hist'] = np.nan

    # TA-Lib Indicators (CCI, CMF, BBands Width)
    ta_df = df.rename(columns={'volume_btc': 'volume'}, errors='ignore') # Use internal standard name
    if all(c in ta_df.columns for c in ['high', 'low', 'close']):
        try: df['cci_20h'] = ta_df.ta.cci(length=20)
        except Exception as e: print(f" Warning: CCI calc failed: {e}"); df['cci_20h'] = np.nan

        if 'volume' in ta_df.columns:
             try: df['cmf_20h'] = ta_df.ta.cmf(length=20)
             except Exception as e: print(f" Warning: CMF calc failed: {e}"); df['cmf_20h'] = np.nan
        else: df['cmf_20h'] = np.nan

        try:
            bbands_df = ta_df.ta.bbands(length=20, std=2)
            if bbands_df is not None: df['bband_width_20h'] = bbands_df.get(f'BBB_20_2.0', np.nan)
            else: df['bband_width_20h'] = np.nan
        except Exception as e: print(f" Warning: BBand Width calc failed: {e}"); df['bband_width_20h'] = np.nan
    else:
        df['cci_20h'], df['cmf_20h'], df['bband_width_20h'] = np.nan, np.nan, np.nan

    # Position in Range
    range_hl = df['high'] - df['low']
    with np.errstate(divide='ignore', invalid='ignore'):
        df['close_pos_in_range'] = ((df['close'] - df['low']) / range_hl.replace(0, np.nan)).fillna(0.5).replace([np.inf, -np.inf], 0.5)

    # Ratio std12_div_std72
    if 'rolling_std_12h_temp' in df.columns and 'rolling_std_72h_temp' in df.columns:
         with np.errstate(divide='ignore', invalid='ignore'):
             df['std12_div_std72'] = df['rolling_std_12h_temp'] / df['rolling_std_72h_temp'].replace(0, np.nan)
    else: df['std12_div_std72'] = np.nan

    # Interaction volume_btc_x_range
    if 'price_range_pct_temp' in df.columns:
         df['volume_btc_x_range'] = df['volume_btc'] * df['price_range_pct_temp']
    else: df['volume_btc_x_range'] = np.nan

    # Time Features
    hour_of_day = df.index.hour
    day_of_week = df.index.dayofweek
    # Use full range for calculation, selection happens later
    for hour in range(24): df[f'hour_{hour}'] = (hour_of_day == hour).astype(int)
    for day in range(7): df[f'day_{day}'] = (day_of_week == day).astype(int)


    # --- 2. Calculate Final Interaction and Transformation Features ---
    print("  Calculating final derived features...")
    final_feature_dict = {} # Store results here

    # Helper functions
    def safe_multiply(col1_name, col2_name):
        col1_actual = 'volume_btc' if col1_name == 'Volume BTC' else col1_name
        col2_actual = 'volume_btc' if col2_name == 'Volume BTC' else col2_name
        if col1_actual in df.columns and col2_actual in df.columns: return df[col1_actual] * df[col2_actual]
        return pd.Series(np.nan, index=df.index)
    def safe_divide(col1_name, col2_name):
         if col1_name in df.columns and col2_name in df.columns:
              with np.errstate(divide='ignore', invalid='ignore'): return df[col1_name] / df[col2_name].replace(0, np.nan)
         return pd.Series(np.nan, index=df.index)
    def safe_sq(col_name):
         if col_name in df.columns: return df[col_name]**2
         return pd.Series(np.nan, index=df.index)
    def safe_log1p(col_name):
         if col_name in df.columns: return np.log1p(df[col_name].clip(lower=0))
         return pd.Series(np.nan, index=df.index)

    # Add direct/base features that are part of the final list first
    direct_features_in_final_list = [
        'Volume USD', 'close_pos_in_range', 'cmf_20h', 'lag_12h_volume_return',
        'rolling_kurt_24h', 'rolling_skew_24h', 'rolling_std_168h', 'std12_div_std72',
        'volume_btc_x_range', 'volume_ma_12h', 'volume_ma_168h', 'volume_return_1h',
        'macd_signal', 'macd_hist'
    ]
    direct_features_to_add = [f for f in direct_features_in_final_list if f in SELECTED_FEATURE_NAMES]
    for feat in direct_features_to_add:
        if feat in df.columns: final_feature_dict[feat] = df[feat]

    # Add requested Time features
    for hour in range(24):
         feat_name = f'hour_{hour}'
         if feat_name in SELECTED_FEATURE_NAMES: final_feature_dict[feat_name] = df[feat_name]
    for day in range(7):
         feat_name = f'day_{day}'
         if feat_name in SELECTED_FEATURE_NAMES: final_feature_dict[feat_name] = df[feat_name]

    # Calculate Interaction/Transformation Features (Only if requested in SELECTED_FEATURE_NAMES)
    def add_if_requested(name, calculation):
        if name in SELECTED_FEATURE_NAMES:
            final_feature_dict[name] = calculation

    add_if_requested('Volume BTC_x_rolling_std_168h', safe_multiply('Volume BTC', 'rolling_std_168h'))
    add_if_requested('Volume BTC_x_std12_div_std72', safe_multiply('Volume BTC', 'std12_div_std72'))
    add_if_requested('cci_20h_sq', safe_sq('cci_20h'))
    add_if_requested('cci_20h_x_Volume BTC', safe_multiply('cci_20h', 'Volume BTC'))
    add_if_requested('cci_20h_x_cmf_20h', safe_multiply('cci_20h', 'cmf_20h'))
    add_if_requested('cci_20h_x_lag_24h_volume_return', safe_multiply('cci_20h', 'lag_24h_volume_return'))
    add_if_requested('cci_20h_x_lag_3h_volume_return', safe_multiply('cci_20h', 'lag_3h_volume_return'))
    add_if_requested('cci_20h_x_rolling_kurt_24h', safe_multiply('cci_20h', 'rolling_kurt_24h'))
    add_if_requested('cci_20h_x_rolling_std_168h', safe_multiply('cci_20h', 'rolling_std_168h'))
    add_if_requested('cci_20h_x_rolling_std_48h', safe_multiply('cci_20h', 'rolling_std_48h'))
    add_if_requested('cci_20h_x_std12_div_std72', safe_multiply('cci_20h', 'std12_div_std72'))
    add_if_requested('cci_20h_x_volume_div_ma_24h', safe_multiply('cci_20h', 'volume_div_ma_24h'))
    add_if_requested('cci_20h_x_volume_ma_168h', safe_multiply('cci_20h', 'volume_ma_168h'))
    add_if_requested('cmf_20h_x_bband_width_20h', safe_multiply('cmf_20h', 'bband_width_20h'))
    add_if_requested('cmf_20h_x_rolling_kurt_24h', safe_multiply('cmf_20h', 'rolling_kurt_24h'))
    add_if_requested('cmf_20h_x_rolling_std_168h', safe_multiply('cmf_20h', 'rolling_std_168h'))
    add_if_requested('cmf_20h_x_rolling_std_6h', safe_multiply('cmf_20h', 'rolling_std_6h'))
    add_if_requested('cmf_20h_x_std12_div_std72', safe_multiply('cmf_20h', 'std12_div_std72'))
    add_if_requested('lag_12h_price_return_sq', safe_sq('lag_12h_price_return'))
    add_if_requested('lag_12h_price_return_x_cmf_20h', safe_multiply('lag_12h_price_return', 'cmf_20h'))
    add_if_requested('lag_12h_price_return_x_rolling_kurt_24h', safe_multiply('lag_12h_price_return', 'rolling_kurt_24h'))
    add_if_requested('lag_12h_price_return_x_rolling_std_168h', safe_multiply('lag_12h_price_return', 'rolling_std_168h'))
    add_if_requested('lag_12h_price_return_x_volume_ma_168h', safe_multiply('lag_12h_price_return', 'volume_ma_168h'))
    add_if_requested('lag_168h_price_return_sq', safe_sq('lag_168h_price_return'))
    add_if_requested('lag_168h_price_return_x_Volume BTC', safe_multiply('lag_168h_price_return', 'Volume BTC'))
    add_if_requested('lag_168h_price_return_x_bband_width_20h', safe_multiply('lag_168h_price_return', 'bband_width_20h'))
    add_if_requested('lag_168h_price_return_x_cmf_20h', safe_multiply('lag_168h_price_return', 'cmf_20h'))
    add_if_requested('lag_168h_price_return_x_rolling_kurt_24h', safe_multiply('lag_168h_price_return', 'rolling_kurt_24h'))
    add_if_requested('lag_168h_price_return_x_rolling_std_168h', safe_multiply('lag_168h_price_return', 'rolling_std_168h'))
    add_if_requested('lag_168h_price_return_x_rolling_std_6h', safe_multiply('lag_168h_price_return', 'rolling_std_6h'))
    add_if_requested('lag_168h_price_return_x_std12_div_std72', safe_multiply('lag_168h_price_return', 'std12_div_std72'))
    add_if_requested('lag_168h_price_return_x_volume_div_ma_24h', safe_multiply('lag_168h_price_return', 'volume_div_ma_24h'))
    add_if_requested('lag_168h_price_return_x_volume_ma_168h', safe_multiply('lag_168h_price_return', 'volume_ma_168h'))
    add_if_requested('lag_24h_price_return_sq', safe_sq('lag_24h_price_return'))
    add_if_requested('lag_24h_price_return_x_lag_12h_volume_return', safe_multiply('lag_24h_price_return', 'lag_12h_volume_return'))
    add_if_requested('lag_24h_price_return_x_lag_6h_volume_return', safe_multiply('lag_24h_price_return', 'lag_6h_volume_return'))
    add_if_requested('lag_24h_price_return_x_rolling_std_168h', safe_multiply('lag_24h_price_return', 'rolling_std_168h'))
    add_if_requested('lag_24h_price_return_x_volume_ma_168h', safe_multiply('lag_24h_price_return', 'volume_ma_168h'))
    add_if_requested('lag_24h_volume_return_x_rolling_std_168h', safe_multiply('lag_24h_volume_return', 'rolling_std_168h'))
    add_if_requested('lag_24h_volume_return_x_std12_div_std72', safe_multiply('lag_24h_volume_return', 'std12_div_std72'))
    add_if_requested('lag_3h_volume_return_x_rolling_kurt_24h', safe_multiply('lag_3h_volume_return', 'rolling_kurt_24h'))
    add_if_requested('lag_48h_price_return_sq', safe_sq('lag_48h_price_return'))
    add_if_requested('lag_48h_price_return_x_lag_24h_volume_return', safe_multiply('lag_48h_price_return', 'lag_24h_volume_return'))
    add_if_requested('lag_48h_price_return_x_rolling_std_168h', safe_multiply('lag_48h_price_return', 'rolling_std_168h'))
    add_if_requested('lag_48h_price_return_x_volume_div_ma_24h', safe_multiply('lag_48h_price_return', 'volume_div_ma_24h'))
    add_if_requested('lag_48h_price_return_x_volume_ma_168h', safe_multiply('lag_48h_price_return', 'volume_ma_168h'))
    add_if_requested('lag_6h_volume_return_x_rolling_kurt_24h', safe_multiply('lag_6h_volume_return', 'rolling_kurt_24h'))
    add_if_requested('lag_72h_price_return_sq', safe_sq('lag_72h_price_return'))
    add_if_requested('lag_72h_price_return_x_cmf_20h', safe_multiply('lag_72h_price_return', 'cmf_20h'))
    add_if_requested('lag_72h_price_return_x_lag_12h_volume_return', safe_multiply('lag_72h_price_return', 'lag_12h_volume_return'))
    add_if_requested('lag_72h_price_return_x_lag_3h_volume_return', safe_multiply('lag_72h_price_return', 'lag_3h_volume_return'))
    add_if_requested('lag_72h_price_return_x_lag_6h_volume_return', safe_multiply('lag_72h_price_return', 'lag_6h_volume_return'))
    add_if_requested('lag_72h_price_return_x_rolling_kurt_24h', safe_multiply('lag_72h_price_return', 'rolling_kurt_24h'))
    add_if_requested('lag_72h_price_return_x_rolling_std_168h', safe_multiply('lag_72h_price_return', 'rolling_std_168h'))
    add_if_requested('lag_72h_price_return_x_volume_ma_168h', safe_multiply('lag_72h_price_return', 'volume_ma_168h'))
    add_if_requested('macd_hist_sq', safe_sq('macd_hist'))
    add_if_requested('macd_hist_x_Volume BTC', safe_multiply('macd_hist', 'Volume BTC'))
    add_if_requested('macd_hist_x_cmf_20h', safe_multiply('macd_hist', 'cmf_20h'))
    add_if_requested('macd_hist_x_rolling_kurt_24h', safe_multiply('macd_hist', 'rolling_kurt_24h'))
    add_if_requested('macd_hist_x_rolling_std_6h', safe_multiply('macd_hist', 'rolling_std_6h'))
    add_if_requested('macd_hist_x_volume_div_ma_24h', safe_multiply('macd_hist', 'volume_div_ma_24h'))
    add_if_requested('macd_hist_x_volume_ma_12h', safe_multiply('macd_hist', 'volume_ma_12h'))
    add_if_requested('macd_signal_sq', safe_sq('macd_signal'))
    add_if_requested('macd_signal_x_cmf_20h', safe_multiply('macd_signal', 'cmf_20h'))
    add_if_requested('macd_signal_x_lag_24h_volume_return', safe_multiply('macd_signal', 'lag_24h_volume_return'))
    add_if_requested('macd_signal_x_rolling_kurt_24h', safe_multiply('macd_signal', 'rolling_kurt_24h'))
    add_if_requested('macd_signal_x_rolling_std_48h', safe_multiply('macd_signal', 'rolling_std_48h'))
    add_if_requested('macd_signal_x_rolling_std_6h', safe_multiply('macd_signal', 'rolling_std_6h'))
    add_if_requested('macd_signal_x_std12_div_std72', safe_multiply('macd_signal', 'std12_div_std72'))
    add_if_requested('macd_signal_x_volume_div_ma_24h', safe_multiply('macd_signal', 'volume_div_ma_24h'))
    add_if_requested('macd_signal_x_volume_ma_12h', safe_multiply('macd_signal', 'volume_ma_12h'))
    add_if_requested('rolling_std_3h_sq', safe_sq('rolling_std_3h_temp'))
    add_if_requested('rolling_std_6h_div_rolling_std_48h', safe_divide('rolling_std_6h', 'rolling_std_48h'))
    add_if_requested('volume_div_ma_24h_sq', safe_sq('volume_div_ma_24h'))
    add_if_requested('volume_div_ma_24h_x_rolling_kurt_24h', safe_multiply('volume_div_ma_24h', 'rolling_kurt_24h'))
    add_if_requested('volume_div_ma_24h_x_rolling_std_168h', safe_multiply('volume_div_ma_24h', 'rolling_std_168h'))
    add_if_requested('volume_ma_12h_x_bband_width_20h', safe_multiply('volume_ma_12h', 'bband_width_20h'))
    add_if_requested('volume_ma_12h_x_rolling_kurt_24h', safe_multiply('volume_ma_12h', 'rolling_kurt_24h'))
    add_if_requested('volume_ma_12h_x_rolling_std_6h', safe_multiply('volume_ma_12h', 'rolling_std_6h'))
    add_if_requested('volume_ma_168h_x_rolling_kurt_24h', safe_multiply('volume_ma_168h', 'rolling_kurt_24h'))
    add_if_requested('volume_ma_168h_x_rolling_std_48h', safe_multiply('volume_ma_168h', 'rolling_std_48h'))
    add_if_requested('volume_ma_168h_x_std12_div_std72', safe_multiply('volume_ma_168h', 'std12_div_std72'))
    add_if_requested('volume_return_1h_x_rolling_kurt_24h', safe_multiply('volume_return_1h', 'rolling_kurt_24h'))
    add_if_requested('volume_btc_x_range_log1p', safe_log1p('volume_btc_x_range'))


    # --- 3. Final Assembly and Cleanup ---
    print("  Assembling final dataframe...")
    # Create DataFrame from the calculated features in the dictionary
    df_final_features = pd.DataFrame(final_feature_dict, index=df.index)

    # Combine essential columns from original df with calculated features
    essential_cols = ['timestamp', 'symbol', 'open', 'high', 'low', 'close']
    # Ensure essential columns exist before concatenation
    essential_cols_present = [col for col in essential_cols if col in df.columns]
    df_combined = pd.concat([df[essential_cols_present], df_final_features], axis=1)


    # Define the list of columns to keep: essential + the globally defined SELECTED_FEATURE_NAMES
    # Ensure we only try to keep essential columns that actually exist
    cols_to_keep = essential_cols_present + SELECTED_FEATURE_NAMES

    # Select final columns, ensuring all requested are present, filling missing with NaN
    # This step ensures the final df has exactly the requested feature columns + essentials
    final_df_structure = pd.DataFrame(index=df_combined.index)
    present_cols_count = 0
    missing_final_cols = []

    for col in cols_to_keep:
        if col in df_combined.columns:
            final_df_structure[col] = df_combined[col]
            # Count only actual features added
            if col not in essential_cols_present:
                present_cols_count += 1
        else:
            # This happens if a feature in SELECTED_FEATURE_NAMES wasn't generated correctly
            missing_final_cols.append(col)
            final_df_structure[col] = np.nan

    if missing_final_cols:
        print(f"  Final Warning: {len(missing_final_cols)} columns from SELECTED_FEATURE_NAMES "
              f"were missing in the combined df and added as NaN: {missing_final_cols}")

    # Final cleanup
    final_df_structure = final_df_structure.reset_index(drop=True)
    final_df_structure = final_df_structure.replace([np.inf, -np.inf], np.nan)

    end_time = time.time()
    actual_feature_count = len([col for col in final_df_structure.columns if col not in essential_cols_present])
    print(f"Selected feature calculation finished. Returning {len(final_df_structure)} rows, "
          f"{len(final_df_structure.columns)} total columns ({actual_feature_count} features). "
          f"Took {end_time - start_time:.2f}s.")

    # Verify final column count against the request
    expected_feature_count = len(SELECTED_FEATURE_NAMES)
    if actual_feature_count != expected_feature_count:
        # It's possible fewer were generated if prerequisites weren't met early on
        print(f"  NOTE: Expected {expected_feature_count} features based on SELECTED_FEATURE_NAMES, "
              f"but returning DataFrame with {actual_feature_count} non-essential features.")

    return final_df_structure

# --- Main Execution Block ---
if __name__ == "__main__":

    print("--- 1. Data Loading & Initial Prep ---")
    try:
        print(f"Loading data from: {CSV_FILE_PATH}")
        col_names = ['unix', 'date', 'symbol_csv', 'open', 'high', 'low', 'close', 'Volume BTC', 'Volume USD']
        df_raw = pd.read_csv(CSV_FILE_PATH, header=0, names=col_names)
        print(f"Raw data loaded. Shape: {df_raw.shape}")
        df_raw['timestamp'] = pd.to_datetime(df_raw['date'])
        # Keep original Volume names for the feature function
        df_raw = df_raw.drop(['unix', 'date', 'symbol_csv'], axis=1)
        df_raw = df_raw.sort_values('timestamp').reset_index(drop=True)
        if df_raw.empty: exit("DataFrame empty after loading. Exiting.")
        print(f"Initial data prep done. Shape: {df_raw.shape}")
    except Exception as e:
        print(f"Error loading or processing CSV: {e}"); traceback.print_exc(); exit()

    print("\n--- 2. Feature Engineering (Selected Features) ---")
    feature_calc_start = time.time()
    # Assuming calculate_selected_features function is defined above and uses SELECTED_FEATURE_NAMES
    df_features = calculate_selected_features(df_raw, symbol=SYMBOL_NAME)
    feature_calc_end = time.time()
    if df_features.empty: exit("Feature calculation failed. Exiting.")
    print(f"Feature calculation completed in {feature_calc_end - feature_calc_start:.2f} seconds.")

    # Use the features actually present in the dataframe after calculation
    # Filter SELECTED_FEATURE_NAMES to only include columns that were actually generated
    CURRENT_FEATURE_COLS = [f for f in SELECTED_FEATURE_NAMES if f in df_features.columns]
    if len(CURRENT_FEATURE_COLS) == 0:
        exit("ERROR: No selected features found in the DataFrame after calculation.")
    if len(CURRENT_FEATURE_COLS) < len(SELECTED_FEATURE_NAMES):
         print(f"Warning: Only {len(CURRENT_FEATURE_COLS)} out of {len(SELECTED_FEATURE_NAMES)} requested features were found/generated.")
    print(f"Using {len(CURRENT_FEATURE_COLS)} features found in DataFrame for modeling.")


    print("\n--- 3. Data Cleaning (Post-Features) ---")
    df_features = df_features.replace([np.inf, -np.inf], np.nan)
    nan_check = df_features[CURRENT_FEATURE_COLS].isnull().sum()
    total_nans = nan_check.sum()
    print(f"Total NaNs found in feature columns: {total_nans}.")
    # NaNs will be handled by models/pipelines

    print("\n--- 4. Modeling Target & Final Prep ---")
    TARGET_COLUMN = 'target'
    df = df_features.copy()
    df = df.sort_values('timestamp')
    if 'close' not in df.columns: exit("ERROR: 'close' column missing before target creation.")

    # --- USE PREDICTION_WINDOW_ROWS FOR TARGET ---
    print(f"Creating binary target based on {PREDICTION_WINDOW_HOURS}-hour future return >= {TARGET_THRESHOLD_PCT}%...")
    df['future_price'] = df['close'].shift(-PREDICTION_WINDOW_ROWS) # USE PREDICTION WINDOW
    # --- END TARGET MODIFICATION ---

    df['price_return_future'] = (df['future_price'] - df['close']) / df['close'].replace(0, np.nan) * 100
    df['target'] = (df['price_return_future'] >= TARGET_THRESHOLD_PCT).astype(int)
    df = df.drop(['future_price', 'price_return_future'], axis=1)

    # Only drop rows where target or 'close' (needed for target calc) is NaN.
    initial_rows = len(df)
    df = df.dropna(subset=[TARGET_COLUMN, 'close'])
    print(f"Rows after removing NaN targets/close: {len(df)} (Removed {initial_rows - len(df)})")

    if df.empty: exit("DataFrame empty after target creation/NaN drop. Exiting.")
    target_counts = df[TARGET_COLUMN].value_counts(normalize=True) * 100
    print("\nTarget variable distribution:")
    print(f"  0 (< {TARGET_THRESHOLD_PCT}% return): {target_counts.get(0, 0):.2f}%")
    print(f"  1 (>= {TARGET_THRESHOLD_PCT}% return): {target_counts.get(1, 0):.2f}%")
    df = df.sort_values('timestamp').reset_index(drop=True)
    print(f"Final DataFrame shape for backtesting: {df.shape}")


    # --- 5. Walk-Forward Validation with Stacking ---
    print("\n--- 5. Starting Walk-Forward Validation (Stacking Ensemble) ---")
    all_metrics = {'accuracy': [], 'precision': [], 'recall': [], 'f1': []}
    all_best_thresholds = []
    meta_feature_names = ['xgb_pred', 'lgbm_pred', 'svm_pred']
    meta_feature_importances = {meta_feat: [] for meta_feat in meta_feature_names}
    iteration_count = 0

    n_rows_total = len(df)
    current_train_start_idx = 0
    # Estimate depends on the size of the EVALUATION window (TEST_WINDOW_ROWS)
    total_iterations_estimate = max(0, (n_rows_total - TRAIN_WINDOW_ROWS - TEST_WINDOW_ROWS) // STEP_ROWS + 1)

    # --- UPDATED Print Statement ---
    print(f"Total rows: {n_rows_total}, Train Window: {TRAIN_WINDOW_HOURS}h, Prediction Horizon: {PREDICTION_WINDOW_HOURS}h, Evaluation (Test) Window: {TEST_WINDOW_HOURS}h, Step: {STEP_HOURS}h")
    # --- END Print Statement Update ---

    print(f"Estimated iterations: {total_iterations_estimate}")
    print(f"Stacking Folds (K): {N_STACKING_FOLDS}")
    print(f"Meta Learner Grid: {META_XGB_PARAM_GRID}")
    print(f"Threshold Search Range: {THRESHOLD_SEARCH_RANGE}")
    print("-" * 30)
    start_loop_time = time.time()

    while True:
        # --- Define Window Boundaries using TEST_WINDOW_ROWS for evaluation slice ---
        train_end_idx = current_train_start_idx + TRAIN_WINDOW_ROWS
        test_start_idx = train_end_idx
        test_end_idx = test_start_idx + TEST_WINDOW_ROWS # Size of the evaluation slice
        if test_end_idx > n_rows_total:
             print(f"\nStopping: Evaluation window end ({test_end_idx}) exceeds total rows ({n_rows_total}). Last start index: {current_train_start_idx}")
             break
        if current_train_start_idx >= n_rows_total:
             print(f"\nStopping: Train start index ({current_train_start_idx}) reached end.")
             break

        # --- Data Slicing ---
        train_df = df.iloc[current_train_start_idx : train_end_idx].copy()
        test_df = df.iloc[test_start_idx : test_end_idx].copy() # This slice is for evaluation

        # --- Basic Validity Checks ---
        min_train_samples = max(50, int(0.1 * TRAIN_WINDOW_ROWS), N_STACKING_FOLDS * 2)
        min_test_samples = 5 # Minimum needed in the evaluation slice
        if len(train_df) < min_train_samples or len(test_df) < min_test_samples:
            print(f"Skipping iter {iteration_count + 1}: Insufficient data train ({len(train_df)}/{min_train_samples}) or test ({len(test_df)}/{min_test_samples}).")
            current_train_start_idx += STEP_ROWS
            continue

        # Use CURRENT_FEATURE_COLS determined after feature generation
        X_train_full = train_df[CURRENT_FEATURE_COLS]
        y_train_full = train_df[TARGET_COLUMN]
        X_test = test_df[CURRENT_FEATURE_COLS] # Features for evaluation slice
        y_test = test_df[TARGET_COLUMN]       # Ground truth for evaluation slice

        if len(y_train_full.unique()) < 2:
            print(f"Skipping iter {iteration_count + 1}: Training data has only one class.")
            current_train_start_idx += STEP_ROWS
            continue
        if len(y_test.unique()) < 2:
             print(f"Warning iter {iteration_count + 1}: Evaluation test data (size {len(test_df)}) has only one class. Metrics will be affected.")


        # --- Calculate scale_pos_weight ---
        neg_count = y_train_full.value_counts().get(0, 0)
        pos_count = y_train_full.value_counts().get(1, 0)
        scale_pos_weight_val = neg_count / pos_count if pos_count > 0 else 1.0

        iter_start_time = time.time()
        print(f"\n--- Iter {iteration_count + 1}/{total_iterations_estimate} ---")
        print(f"  Train Indices: [{current_train_start_idx}:{train_end_idx-1}], Evaluation Indices: [{test_start_idx}:{test_end_idx-1}]")

        # --- Level 0: Generate Out-of-Fold (OOF) Predictions ---
        print(f"  Level 0: Generating OOF predictions using {N_STACKING_FOLDS}-Fold CV...")
        skf = StratifiedKFold(n_splits=N_STACKING_FOLDS, shuffle=True, random_state=42 + iteration_count)
        oof_xgb = np.full(len(train_df), np.nan)
        oof_lgbm = np.full(len(train_df), np.nan)
        oof_svm = np.full(len(train_df), np.nan)

        # Define Base Models (re-init each iteration)
        model_xgb_base = XGBClassifier(**XGB_BASE_PARAMS, scale_pos_weight=scale_pos_weight_val)
        model_lgbm_base = LGBMClassifier(**LGBM_BASE_PARAMS, scale_pos_weight=scale_pos_weight_val)
        pipeline_svm_base = Pipeline([
            ('imputer', SimpleImputer(strategy='median')),
            ('scaler', StandardScaler()),
            ('svm', SVC(**SVM_BASE_PARAMS))
        ])
        models_oof = {'xgb': model_xgb_base, 'lgbm': model_lgbm_base, 'svm': pipeline_svm_base}
        oof_arrays = {'xgb': oof_xgb, 'lgbm': oof_lgbm, 'svm': oof_svm}

        # K-Fold Loop (Using Attempt 3 logic)
        for fold, (train_idx_k, val_idx_k) in enumerate(skf.split(X_train_full, y_train_full)):
            X_train_k, y_train_k = X_train_full.iloc[train_idx_k], y_train_full.iloc[train_idx_k]
            X_val_k, y_val_k = X_train_full.iloc[val_idx_k], y_train_full.iloc[val_idx_k]

            if len(np.unique(y_train_k)) < 2 or len(np.unique(y_val_k)) < 2:
                print(f"    Warning: Fold {fold+1} has single class in train/val. Assigning prior.")
                prior = y_train_full.mean()
                for key in oof_arrays: oof_arrays[key][val_idx_k] = prior
                continue

            for name, model in models_oof.items():
                try:
                    fit_params_k = {}
                    lgbm_eval_set = None
                    if name == 'lgbm':
                        fit_params_k['callbacks'] = [early_stopping(10, verbose=False), log_evaluation(0)]
                        lgbm_eval_set = [(X_val_k, y_val_k)]
                        fit_params_k['eval_metric'] = 'logloss' # Explicitly adding metric
                    elif name == 'xgb':
                        fit_params_k['eval_set'] = [(X_val_k, y_val_k)]
                        fit_params_k['early_stopping_rounds'] = 10
                        fit_params_k['verbose'] = False

                    if name == 'lgbm' and lgbm_eval_set:
                        model.fit(X_train_k, y_train_k, eval_set=lgbm_eval_set, **fit_params_k)
                    else:
                        model.fit(X_train_k, y_train_k, **fit_params_k)

                    oof_arrays[name][val_idx_k] = model.predict_proba(X_val_k)[:, 1]
                except Exception as e_kfold:
                    print(f"    Error during K-Fold {fold+1} training for {name}: {e_kfold}")
                    prior = y_train_full.mean()
                    if val_idx_k is not None and len(val_idx_k) > 0: oof_arrays[name][val_idx_k] = prior

        # Check OOF NaNs
        if np.isnan(oof_xgb).all() or np.isnan(oof_lgbm).all() or np.isnan(oof_svm).all():
             print("  ERROR: At least one base model failed in all K-Folds. Skipping iteration.")
             current_train_start_idx += STEP_ROWS
             continue

        # Create Meta Train Features
        X_meta_train = pd.DataFrame({
            'xgb_pred': np.nan_to_num(oof_xgb, nan=np.nanmean(oof_xgb)),
            'lgbm_pred': np.nan_to_num(oof_lgbm, nan=np.nanmean(oof_lgbm)),
            'svm_pred': np.nan_to_num(oof_svm, nan=np.nanmean(oof_svm))
        }, index=X_train_full.index)
        y_meta_train = y_train_full

        # --- Level 0: Train Base Models on Full Training Data ---
        print(f"  Level 0: Training base models on full training data ({len(train_df)} rows)...")
        models_full = {}
        all_base_trained = True
        for name, model in models_oof.items():
             try:
                  full_fit_params = {}
                  if name == 'xgb': full_fit_params['verbose'] = False
                  # LGBM uses verbose=-1 from init, SVM pipeline is simple
                  model.fit(X_train_full, y_train_full, **full_fit_params)
                  models_full[name] = model
             except Exception as e_full_fit:
                  print(f"  ERROR: Failed to train base model '{name}' on full data: {e_full_fit}")
                  all_base_trained = False; break
        if not all_base_trained:
             print("  Skipping iteration due to base model training failure.")
             current_train_start_idx += STEP_ROWS; continue
        print("  Level 0 Full Training Done.")

        # --- Level 1: Meta Learner Tuning & Threshold Tuning ---
        print("  Level 1: Tuning Meta-Learner (XGBoost) and Probability Threshold...")
        best_meta_params = None
        best_meta_score = -np.inf
        best_meta_model_for_thresh = None
        best_threshold_iter = 0.5
        best_thresh_f1_score = -np.inf

        meta_val_size = int(len(X_meta_train) * META_VALIDATION_PCT)
        if meta_val_size < N_STACKING_FOLDS or (len(X_meta_train) - meta_val_size) < N_STACKING_FOLDS:
             print("  Warning: Meta dataset too small for validation split. Using defaults.")
             best_meta_params = list(ParameterGrid(META_XGB_PARAM_GRID))[0]
             best_threshold_iter = 0.5
        else:
            X_meta_train_sub = X_meta_train[:-meta_val_size]
            y_meta_train_sub = y_meta_train[:-meta_val_size]
            X_meta_val = X_meta_train[-meta_val_size:]
            y_meta_val = y_meta_train[-meta_val_size:]

            if len(y_meta_val.unique()) < 2 or len(y_meta_train_sub.unique()) < 2:
                 print("  Warning: Meta train/val split has single class. Using defaults.")
                 best_meta_params = list(ParameterGrid(META_XGB_PARAM_GRID))[0]
                 best_threshold_iter = 0.5
            else:
                # Meta Grid Search
                for params_meta_cv in ParameterGrid(META_XGB_PARAM_GRID):
                    try:
                        current_meta_params = {**META_XGB_FIXED_PARAMS, **params_meta_cv}
                        model_meta_cv = XGBClassifier(**current_meta_params, scale_pos_weight=scale_pos_weight_val)
                        model_meta_cv.fit(X_meta_train_sub, y_meta_train_sub, eval_set=[(X_meta_val, y_meta_val)], early_stopping_rounds=10, verbose=False)
                        y_pred_meta_val_cv = model_meta_cv.predict(X_meta_val)
                        meta_score = f1_score(y_meta_val, y_pred_meta_val_cv, average='binary', pos_label=1, zero_division=0)
                        if meta_score > best_meta_score:
                            best_meta_score = meta_score; best_meta_params = params_meta_cv; best_meta_model_for_thresh = model_meta_cv
                    except Exception as e_meta_cv:
                        print(f"    Error during Meta CV with params {params_meta_cv}: {e_meta_cv}")
                        if best_meta_params is None: best_meta_params = list(ParameterGrid(META_XGB_PARAM_GRID))[0]
                if best_meta_params is None: best_meta_params = list(ParameterGrid(META_XGB_PARAM_GRID))[0]
                print(f"    Best Meta Params: {best_meta_params} (Val F1: {best_meta_score:.4f})")

                # Threshold Tuning
                if best_meta_model_for_thresh is not None:
                    try:
                        y_meta_proba_val = best_meta_model_for_thresh.predict_proba(X_meta_val)[:, 1]
                        for t in THRESHOLD_SEARCH_RANGE:
                            y_pred_meta_val_t = (y_meta_proba_val >= t).astype(int)
                            current_f1 = f1_score(y_meta_val, y_pred_meta_val_t, average='binary', pos_label=1, zero_division=0)
                            if current_f1 >= best_thresh_f1_score: best_thresh_f1_score = current_f1; best_threshold_iter = t
                        print(f"    Best Threshold: {best_threshold_iter:.2f} (Val F1: {best_thresh_f1_score:.4f})")
                    except Exception as e_thresh: print(f"    Error during threshold tuning: {e_thresh}. Using default 0.5."); best_threshold_iter = 0.5
                else: print("    Skipping threshold tuning (no best meta model found). Using default 0.5."); best_threshold_iter = 0.5

        # --- Level 1: Train Final Meta Learner ---
        print("  Level 1: Training final Meta-Learner...")
        try:
             final_meta_params = {**META_XGB_FIXED_PARAMS, **(best_meta_params or list(ParameterGrid(META_XGB_PARAM_GRID))[0])}
             meta_model_final = XGBClassifier(**final_meta_params, scale_pos_weight=scale_pos_weight_val)
             meta_model_final.fit(X_meta_train, y_meta_train, verbose=False)
             print("  Level 1 Final Training Done.")
        except Exception as e_meta_final:
             print(f"  ERROR: Failed to train final meta-learner: {e_meta_final}")
             current_train_start_idx += STEP_ROWS; continue

        # --- Prediction Phase ---
        print("  Prediction: Generating final predictions...")
        try:
            # Predict with Level 0 models on the EVALUATION test data slice
            pred_xgb_test = models_full['xgb'].predict_proba(X_test)[:, 1]
            pred_lgbm_test = models_full['lgbm'].predict_proba(X_test)[:, 1]
            pred_svm_test = models_full['svm'].predict_proba(X_test)[:, 1]
            # Create Meta Features for Test Data
            X_meta_test = pd.DataFrame({'xgb_pred': pred_xgb_test, 'lgbm_pred': pred_lgbm_test, 'svm_pred': pred_svm_test})
            # Predict Probabilities with Final Meta Learner
            y_proba_test = meta_model_final.predict_proba(X_meta_test)[:, 1]
            # Apply Tuned Threshold
            y_pred = (y_proba_test >= best_threshold_iter).astype(int)
            print("  Prediction Done.")
        except Exception as e_pred:
             print(f"  ERROR during prediction phase: {e_pred}")
             for key in all_metrics: all_metrics[key].append(np.nan)
             for meta_feat in meta_feature_names: meta_feature_importances[meta_feat].append(np.nan)
             all_best_thresholds.append(np.nan)
             current_train_start_idx += STEP_ROWS; continue

        # --- Evaluation (on the EVALUATION test slice) ---
        if len(np.unique(y_test)) < 2:
            # Handled above with warning print
            accuracy = accuracy_score(y_test, y_pred)
            precision, recall, f1 = 0.0, 0.0, 0.0
            if np.unique(y_test)[0] == 1 and np.all(y_pred == 1): precision, recall, f1 = 1.0, 1.0, 1.0
        else:
             accuracy = accuracy_score(y_test, y_pred)
             precision = precision_score(y_test, y_pred, average='binary', pos_label=1, zero_division=0)
             recall = recall_score(y_test, y_pred, average='binary', pos_label=1, zero_division=0)
             f1 = f1_score(y_test, y_pred, average='binary', pos_label=1, zero_division=0)

        all_metrics['accuracy'].append(accuracy)
        all_metrics['precision'].append(precision)
        all_metrics['recall'].append(recall)
        all_metrics['f1'].append(f1)
        all_best_thresholds.append(best_threshold_iter)
        print(f"  Evaluation Metrics (Test Window Size: {TEST_WINDOW_HOURS}h): Acc={accuracy:.4f}, Prc={precision:.4f}, Rec={recall:.4f}, F1={f1:.4f}")

        # --- Store Meta-Learner Feature Importances ---
        # (Keep importance storing logic as before)
        try:
            if isinstance(meta_model_final, XGBClassifier):
                fold_importances = meta_model_final.get_booster().get_score(importance_type='gain')
                booster_feature_names = meta_model_final.get_booster().feature_names
                name_map = {booster_name: original_name for booster_name, original_name in zip(booster_feature_names, X_meta_train.columns)}
                iter_importances = {key: 0.0 for key in meta_feature_names}
                for internal_name, imp_value in fold_importances.items():
                     original_name = name_map.get(internal_name)
                     if original_name in iter_importances: iter_importances[original_name] = imp_value
                for key in meta_feature_names: meta_feature_importances[key].append(iter_importances[key])
            else:
                 print(" Meta-learner is not XGBoost, cannot get gain importance easily.")
                 for meta_feat in meta_feature_names: meta_feature_importances[meta_feat].append(np.nan)
        except Exception as e_imp:
            print(f"  Warning: Could not get meta-feature importance: {e_imp}")
            for meta_feat in meta_feature_names: meta_feature_importances[meta_feat].append(np.nan)

        iteration_count += 1
        iter_end_time = time.time()
        print(f"  Iteration {iteration_count} finished in {iter_end_time - iter_start_time:.2f} seconds.")

        # --- Move to Next Window ---
        current_train_start_idx += STEP_ROWS


    end_loop_time = time.time()
    print("-" * 30)
    loop_duration_minutes = (end_loop_time - start_loop_time) / 60
    print(f"Walk-Forward Validation (Stacking) finished in {end_loop_time - start_loop_time:.2f} seconds ({loop_duration_minutes:.2f} minutes).")

    # --- 6. Aggregate and Display Results ---
    print("\n--- 6. Final Results (Stacking Ensemble) ---")
    if iteration_count > 0 and len(all_metrics['f1']) > 0:
        valid_f1 = [m for m in all_metrics['f1'] if not pd.isna(m)]
        if valid_f1:
            avg_accuracy = np.nanmean(all_metrics['accuracy'])
            avg_precision = np.nanmean(all_metrics['precision'])
            avg_recall = np.nanmean(all_metrics['recall'])
            avg_f1 = np.nanmean(valid_f1)

            print("\n--- Average Walk-Forward Validation Results ---")
            print(f"Total Folds / Successful Iterations Evaluated: {iteration_count}")
            # --- UPDATED Print Statements ---
            print(f"Target Threshold: {TARGET_THRESHOLD_PCT}% increase over {PREDICTION_WINDOW_HOURS} hours (Prediction Horizon)")
            print(f"Train Window: {TRAIN_WINDOW_HOURS} hours, Evaluation Window: {TEST_WINDOW_HOURS} hours, Step: {STEP_HOURS} hours")
             # --- END Print Statements Update ---
            print(f"Stacking Folds: {N_STACKING_FOLDS}")
            print(f"Average Accuracy:  {avg_accuracy:.4f}")
            print(f"Average Precision: {avg_precision:.4f}")
            print(f"Average Recall:    {avg_recall:.4f}")
            print(f"Average F1-Score:  {avg_f1:.4f}")

            std_accuracy = np.nanstd(all_metrics['accuracy'])
            std_precision = np.nanstd(all_metrics['precision'])
            std_recall = np.nanstd(all_metrics['recall'])
            std_f1 = np.nanstd(valid_f1)
            print("\n--- Standard Deviation of Metrics Across Folds ---")
            print(f"Std Dev Accuracy:  {std_accuracy:.4f}")
            print(f"Std Dev Precision: {std_precision:.4f}")
            print(f"Std Dev Recall:    {std_recall:.4f}")
            print(f"Std Dev F1-Score:  {std_f1:.4f}")

            avg_threshold = np.nanmean(all_best_thresholds)
            std_threshold = np.nanstd(all_best_thresholds)
            print(f"\nAverage Best Threshold Found: {avg_threshold:.3f} (StdDev: {std_threshold:.3f})")

            print("\n--- Average Meta-Feature Importances (Gain) ---")
            avg_meta_importances = {}
            for f, imp_list in meta_feature_importances.items():
                 valid_imps = [imp for imp in imp_list if not pd.isna(imp)]
                 avg_meta_importances[f] = np.mean(valid_imps) if valid_imps else 0.0
            total_importance = sum(avg_meta_importances.values())
            if total_importance > 1e-9:
                 normalized_importances = {f: (imp / total_importance) * 100 for f, imp in avg_meta_importances.items()}
                 sorted_meta_importances = sorted(normalized_importances.items(), key=lambda item: item[1], reverse=True)
                 for i, (feature, importance) in enumerate(sorted_meta_importances): print(f"  {i+1}. {feature}: {importance:.2f}%")
            else: print("  Meta-feature importance data could not be calculated or was always zero.")
        else: print("\nNo valid metrics recorded (all folds might have failed evaluation).")
    else: print("\nNo iterations were successfully completed.")

    print("\nScript finished.")

--- 1. Data Loading & Initial Prep ---
Loading data from: C:\Users\mason\AVP\BTCUSD.csv
Raw data loaded. Shape: (60403, 9)
Initial data prep done. Shape: (60403, 7)

--- 2. Feature Engineering (Selected Features) ---
Starting calculation of selected 58 features...
Selected feature calculation finished. Returning 60403 rows with 64 columns.
Feature calculation completed in 0.37 seconds.
Using 58 features found in DataFrame for modeling.

--- 3. Data Cleaning (Post-Features) ---
Total NaNs found in feature columns: 599.

--- 4. Modeling Target & Final Prep ---
Rows after removing NaN targets/close: 60403 (Removed 0)

Target variable distribution:
  0 (< 0.1% return): 48.51%
  1 (>= 0.1% return): 51.49%
Final DataFrame shape for backtesting: (60403, 65)

--- 5. Starting Walk-Forward Validation (Stacking Ensemble) ---
Total rows: 60403, Train Window: 1080h, Test Window: 72h, Step: 24h
Estimated iterations: 2469
Stacking Folds (K): 5
Meta Learner Grid: {'max_depth': [2, 3], 'n_estimators': 

Imports: Added LGBMClassifier, SVC, StandardScaler, StratifiedKFold, Pipeline.

Configuration:

Defined N_STACKING_FOLDS.

Defined static hyperparameters for base models (XGB_BASE_PARAMS, LGBM_BASE_PARAMS, SVM_BASE_PARAMS). Note the probability=True and class_weight='balanced' for SVC.

Defined meta-learner grid (META_XGB_PARAM_GRID) and fixed params (META_XGB_FIXED_PARAMS).

Defined META_VALIDATION_PCT.

Main Loop Structure: The core logic now happens inside the while True loop:

Level 0 OOF: A StratifiedKFold loop iterates through the train_df. In each fold, base models are trained on K-1 parts and predict probabilities on the held-out part. These predictions populate oof_xgb, oof_lgbm, oof_svm. Error handling for single-class folds is included.

SVM Handling: SVM requires scaling and imputation. A Pipeline is used, and imputation (fillna(median)) is done just before scaling/fitting within each K-fold and for the full fit to avoid data leakage. Train median is used to impute validation/test sets.

Level 0 Full Training: After the K-Fold loop, base models (model_xgb_full, etc.) are trained on the entire train_df.

Meta-Feature Creation: X_meta_train is created from the OOF arrays.

Level 1 Tuning: X_meta_train is split into sub-train/validation sets. A grid search tunes the meta-XGBoost model (best_meta_params, best_meta_model_for_thresh).

Threshold Tuning: Uses best_meta_model_for_thresh and the meta-validation set to find best_threshold_iter.

Level 1 Final Training: The final meta-learner (meta_model_final) is trained on all of X_meta_train using best_meta_params.

Prediction Pipeline:

Base models (*_full) predict on X_test.

These predictions form X_meta_test.

meta_model_final predicts probabilities on X_meta_test.

best_threshold_iter is applied to get final y_pred.

Evaluation: Metrics are calculated as before.

Importance: Feature importance is now tracked for the meta-learner (features are 'xgb_pred', 'lgbm_pred', 'svm_pred').

Important Notes:

Computational Cost: This script will be significantly slower than the previous ones due to fitting multiple models multiple times in each iteration.

NaN Handling: XGBoost/LightGBM handle NaNs internally (if using hist tree method). SVM requires explicit imputation, which is handled here using median imputation within a Pipeline or just before fitting. Ensure your base models' parameters are set appropriately if you expect NaNs.

Hyperparameters: The static hyperparameters for base models are crucial. If they are poorly chosen, the ensemble might not perform well. Consider tuning them separately beforehand if possible.

Memory: Storing multiple models and intermediate predictions might increase memory usage per iteration.

Complexity: The code is inherently more complex due to the nested nature of stacking.

This implementation provides the full stacking workflow with meta-learner tuning and probability threshold optimization within your walk-forward structure. Be prepared for the longer run times!