In [None]:
# Ensemble_Method_B.py
# Combined Script: Load CSV -> Feature Engineering -> Rolling Origin XGB Modeling
# Uses the feature set from Simple_Predictor_B

import pandas as pd
import numpy as np
import time
import os
import warnings
import traceback
from datetime import datetime

# Feature Engineering Imports
import pandas_ta as ta  # Technical indicators

# Modeling Imports
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier, early_stopping, log_evaluation  # <--- IMPORT CALLBACKS
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler  # Needed for SVM
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.model_selection import ParameterGrid, StratifiedKFold  # StratifiedKFold for stacking
from sklearn.exceptions import UndefinedMetricWarning
from sklearn.pipeline import Pipeline  # Optional: useful for SVM with scaling
from sklearn.impute import SimpleImputer  # Better imputation strategy for pipeline

# --- Suppress Warnings ---
warnings.filterwarnings('ignore', category=UndefinedMetricWarning)
warnings.filterwarnings('ignore', category=FutureWarning)
warnings.filterwarnings('ignore', category=UserWarning)
warnings.filterwarnings('ignore', category=pd.errors.PerformanceWarning)
warnings.filterwarnings('ignore')  # General suppression

# --- Configuration ---

# Data Loading
CSV_FILE_PATH = r'C:\Users\mason\AVP\BTCUSDrec.csv'  # Use raw string for Windows paths
SYMBOL_NAME = 'BTCUSD'  # Define the symbol represented in the CSV

# Feature Selection (From Simple_Predictor_B - 49 Features)
SELECTED_FEATURE_NAMES = [
    'open', 'high', 'low', 'close', 'Volume BTC', 'Volume USD',  # Base columns (will be selected later if present)
    'price_range_pct', 'oc_change_pct', 'garman_klass_12h', 'parkinson_3h',
    'ma_3h', 'rolling_std_3h', 'lag_3h_price_return', 'lag_6h_price_return',
    'lag_12h_price_return', 'lag_24h_price_return', 'lag_48h_price_return',
    'lag_72h_price_return', 'lag_168h_price_return', 'volume_return_1h',
    'lag_3h_volume_return', 'lag_6h_volume_return', 'lag_12h_volume_return',
    'lag_24h_volume_return', 'ma_6h', 'ma_12h', 'ma_24h', 'ma_48h',
    'ma_72h', 'ma_168h', 'rolling_std_6h', 'rolling_std_12h',
    'rolling_std_24h', 'rolling_std_48h', 'rolling_std_72h',
    'rolling_std_168h', 'atr_14h', 'atr_24h', 'atr_48h', 'close_div_ma_24h',
    'close_div_ma_48h', 'close_div_ma_168h', 'ma12_div_ma48',
    'ma24_div_ma168', 'std12_div_std72', 'volume_btc_x_range',
    'rolling_std_3h_sq', 'price_return_1h_sq', 'rolling_std_12h_sqrt'
]
# Remove the base OHLCV columns from the list for modeling.
MODEL_FEATURE_COLS = [f for f in SELECTED_FEATURE_NAMES if f not in ['open', 'high', 'low', 'close', 'Volume BTC', 'Volume USD']]

# Modeling & Walk-Forward
TARGET_THRESHOLD_PCT = 0.25  # Target: >= 0% increase over next 12h

# Define separate prediction horizon for target
PREDICTION_WINDOW_HOURS = 4  # Predict outcome 12 hours ahead
PREDICTION_WINDOW_ROWS = PREDICTION_WINDOW_HOURS

# Walk-forward params
TRAIN_WINDOW_HOURS = int(24 * 7 * 4)  # Training size (in hours)
TEST_WINDOW_HOURS = 24 * 7           # Evaluation window (in hours)
STEP_HOURS = 48                     # Retrain and predict daily

TRAIN_WINDOW_ROWS = TRAIN_WINDOW_HOURS
TEST_WINDOW_ROWS = TEST_WINDOW_HOURS  # Evaluation window size (in rows)
STEP_ROWS = STEP_HOURS

# Stacking Configuration
N_STACKING_FOLDS = 7  # Number of folds for generating Level 0 predictions

# --- New Base Model Parameter Grids (max ~16 combinations each) ---
BASE_XGB_PARAM_GRID = {
    'max_depth': [2, 3],
    'n_estimators': [35, 65],
    'eta': [0.03, 0.06],
    'lambda': [1.0, 3.0]
}

BASE_LGBM_PARAM_GRID = {
    'max_depth': [2, 4],
    'n_estimators': [55, 95],
    'learning_rate': [0.04, 0.08],
    'subsample': [0.75, 0.9]
}

BASE_SVM_PARAM_GRID = {
    'C': [1.6, 3.2],
    'gamma': ['scale', 'auto']
}

# --- Base Model Static Hyperparameters (default values) ---
XGB_BASE_PARAMS = {
    'objective': 'binary:logistic', 
    'eval_metric': 'logloss',
    #'eta': 0.05, 
    #'max_depth': 3, 
    #'n_estimators': 85,
    'subsample': 0.8, 
    'colsample_bytree': 0.7, 
    'min_child_weight': 3,
    'gamma': 0.1, 
    #'lambda': 3, 
    'alpha': 0.1,
    'random_state': 42, 
    'n_jobs': -1, 
    'tree_method': 'hist',
    'use_label_encoder': False,
}
LGBM_BASE_PARAMS = {
    'objective': 'binary', 
    'metric': 'logloss',
    #'learning_rate': 0.1, 
    #'n_estimators': 105, 
    #'max_depth': 4,
    'num_leaves': 8,
    #'subsample': 0.8, 
    'colsample_bytree': 0.7, 
    'min_child_samples': 5,
    'reg_alpha': 0.1, 
    'reg_lambda': 1.5,
    'random_state': 42, 
    'n_jobs': -1, 
    'boosting_type': 'gbdt',
    'verbose': -1
}
SVM_BASE_PARAMS = {
    'kernel': 'rbf',
    #'C': 2.4,
    'probability': True,
    'max_iter': 5000,
    'random_state': 42,
    'class_weight': 'balanced'
}

# --- Meta Learner Configuration ---
META_LEARNER_IS_XGB = True
META_XGB_PARAM_GRID = {
    'max_depth': [2, 3],
    'n_estimators': [40, 70],
    'eta': [0.03, 0.05],
    'lambda': [1.0, 1.5],
    'subsample': [0.75, 0.95],
    'colsample_bytree': [0.80, 1.0]
}
META_XGB_FIXED_PARAMS = {
    'objective': 'binary:logistic', 
    'eval_metric': 'logloss',
    'gamma': 0.0, 
    'alpha': 0.1,
    'random_state': 42, 
    'n_jobs': -1, 
    'tree_method': 'hist',
    'use_label_encoder': False,
    'min_child_weight': 3
}

# --- Probability Threshold Tuning Configuration ---
THRESHOLD_SEARCH_RANGE = np.arange(0.10, 0.90, 0.05)
META_VALIDATION_PCT = 0.25

# --- Feature Engineering Function (Revised for Simple_Predictor_B Features) ---
def garman_klass_volatility(open_, high, low, close, window):
    log_hl = np.log(high / low)
    log_co = np.log(close / open_)
    gk = 0.5 * (log_hl ** 2) - (2 * np.log(2) - 1) * (log_co ** 2)
    rolling_mean = gk.rolling(window=window).mean()
    rolling_mean = rolling_mean.clip(lower=0)
    return np.sqrt(rolling_mean)

def parkinson_volatility(high, low, window):
    log_hl_sq = np.log(high / low) ** 2
    rolling_sum = log_hl_sq.rolling(window=window).sum()
    factor = 1 / (4 * np.log(2) * window)
    return np.sqrt(factor * rolling_sum)

def calculate_selected_features(df, symbol):
    """
    Calculates the features required by Simple_Predictor_B using custom methods for
    ATR, Garman-Klass, and Parkinson volatility.
    """
    print(f"Starting calculation for {len(SELECTED_FEATURE_NAMES)} target columns (incl. base)...")
    start_time = time.time()
    if df is None or len(df) < 3:
        return pd.DataFrame()
    df = df.copy()
    df['symbol'] = symbol

    # --- Timestamp and Index ---
    if 'timestamp' not in df.columns:
        print("Error: 'timestamp' column not found.")
        return pd.DataFrame()
    try:
        df['timestamp'] = pd.to_datetime(df['timestamp'])
    except Exception as e:
        print(f"Error converting timestamp: {e}")
        return pd.DataFrame()
    df = df.sort_values('timestamp').dropna(subset=['timestamp'])
    df = df.set_index('timestamp', drop=False)

    # --- Volume Columns ---
    original_vol_btc_name = 'Volume BTC'
    original_vol_usd_name = 'Volume USD'
    if original_vol_btc_name not in df.columns:
        df[original_vol_btc_name] = 0
    if original_vol_usd_name not in df.columns:
        df[original_vol_usd_name] = 0
    df[original_vol_btc_name] = pd.to_numeric(df[original_vol_btc_name], errors='coerce').fillna(0)
    df[original_vol_usd_name] = pd.to_numeric(df[original_vol_usd_name], errors='coerce').fillna(0)

    # --- Basic Checks (OHLC) ---
    required_ohlc = ['open', 'high', 'low', 'close']
    if not all(col in df.columns for col in required_ohlc):
        print(f"Error: Missing required OHLC columns: {required_ohlc}")
        return pd.DataFrame()
    for col in required_ohlc:
        df[col] = pd.to_numeric(df[col], errors='coerce')
    if df[required_ohlc].isnull().any().any():
        print("Warning: NaNs found in OHLC data. Dropping affected rows.")
        df = df.dropna(subset=required_ohlc)
    if df.empty:
        print("DataFrame empty after OHLC checks.")
        return pd.DataFrame()

    print("  Calculating features...")
    with np.errstate(divide='ignore', invalid='ignore'):
        df['price_range_pct'] = (df['high'] - df['low']) / df['close']
        df['oc_change_pct'] = (df['close'] - df['open']) / df['open']
        df['price_return_1h'] = df['close'].pct_change()
        df['volume_return_1h'] = df[original_vol_btc_name].pct_change()

    # --- Lagged Returns ---
    lag_price_hours = [3, 6, 12, 24, 48, 72, 168]
    lag_volume_hours = [3, 6, 12, 24]
    for hours in lag_price_hours:
        df[f'lag_{hours}h_price_return'] = df['close'].pct_change(periods=hours)
    for hours in lag_volume_hours:
        df[f'lag_{hours}h_volume_return'] = df[original_vol_btc_name].pct_change(periods=hours)

    # --- Moving Averages ---
    ma_hours = [3, 6, 12, 24, 48, 72, 168]
    for hours in ma_hours:
        df[f'ma_{hours}h'] = df['close'].rolling(window=hours, min_periods=hours).mean()

    # --- Rolling Standard Deviations ---
    std_hours = [3, 6, 12, 24, 48, 72, 168]
    for hours in std_hours:
        df[f'rolling_std_{hours}h'] = df['price_return_1h'].rolling(window=hours, min_periods=hours).std() * 100

    # --- ATR Calculation ---
    print("    Calculating ATR, Garman-Klass, and Parkinson volatility features...")
    df['prev_close'] = df['close'].shift(1)
    df['high_minus_low'] = df['high'] - df['low']
    df['high_minus_prev_close'] = np.abs(df['high'] - df['prev_close'])
    df['low_minus_prev_close'] = np.abs(df['low'] - df['prev_close'])
    df['true_range'] = df[['high_minus_low', 'high_minus_prev_close', 'low_minus_prev_close']].max(axis=1)
    for p in [14, 24, 48]:
         df[f'atr_{p}h'] = df['true_range'].rolling(window=p, min_periods=p).mean()
    df = df.drop(columns=['prev_close', 'high_minus_low', 'high_minus_prev_close', 'low_minus_prev_close', 'true_range'])

    # --- Garman-Klass and Parkinson Volatility ---
    df['garman_klass_12h'] = garman_klass_volatility(df['open'], df['high'], df['low'], df['close'], window=12)
    df['parkinson_3h'] = parkinson_volatility(df['high'], df['low'], window=3)

    # --- Ratio Features ---
    with np.errstate(divide='ignore', invalid='ignore'):
        for hours in [24, 48, 168]:
            ma_col = f'ma_{hours}h'
            if ma_col in df.columns:
                df[f'close_div_ma_{hours}h'] = df['close'] / df[ma_col].replace(0, np.nan)
            else:
                df[f'close_div_ma_{hours}h'] = np.nan
        if 'ma_12h' in df.columns and 'ma_48h' in df.columns:
            df['ma12_div_ma48'] = df['ma_12h'] / df['ma_48h'].replace(0, np.nan)
        else:
            df['ma12_div_ma48'] = np.nan
        if 'ma_24h' in df.columns and 'ma_168h' in df.columns:
            df['ma24_div_ma168'] = df['ma_24h'] / df['ma_168h'].replace(0, np.nan)
        else:
            df['ma24_div_ma168'] = np.nan
        if 'rolling_std_12h' in df.columns and 'rolling_std_72h' in df.columns:
            df['std12_div_std72'] = df['rolling_std_12h'] / df['rolling_std_72h'].replace(0, np.nan)
        else:
            df['std12_div_std72'] = np.nan
        if 'price_range_pct' in df.columns:
            df['volume_btc_x_range'] = df[original_vol_btc_name] * df['price_range_pct']
        else:
            df['volume_btc_x_range'] = np.nan

    # --- Non-linear Transformations ---
    if 'rolling_std_3h' in df.columns:
        df['rolling_std_3h_sq'] = df['rolling_std_3h'] ** 2
    else:
        df['rolling_std_3h_sq'] = np.nan
    if 'price_return_1h' in df.columns:
        df['price_return_1h_sq'] = (df['price_return_1h'] ** 2) * 10000
    else:
        df['price_return_1h_sq'] = np.nan
    if 'rolling_std_12h' in df.columns:
        epsilon = 1e-9
        df['rolling_std_12h_sqrt'] = np.sqrt(df['rolling_std_12h'].clip(lower=0) + epsilon)
    else:
        df['rolling_std_12h_sqrt'] = np.nan

    # Drop intermediate column used for lag calculation
    if 'price_return_1h' in df.columns:
        df = df.drop(columns=['price_return_1h'])

    print("  Assembling final dataframe...")
    final_cols_present = [col for col in SELECTED_FEATURE_NAMES if col in df.columns]
    df_final = df[final_cols_present + ['timestamp', 'symbol']].copy()
    missing_final_cols = set(SELECTED_FEATURE_NAMES) - set(df_final.columns)
    if missing_final_cols:
        print(f"  Final Warning: {len(missing_final_cols)} target columns missing: {missing_final_cols}")
    df_final = df_final.reset_index(drop=True)
    df_final = df_final.replace([np.inf, -np.inf], np.nan)
    end_time = time.time()
    actual_feature_count = len([col for col in df_final.columns if col not in ['timestamp', 'symbol', 'open', 'high', 'low', 'close', 'Volume BTC', 'Volume USD']])
    print(f"Selected feature calculation finished. Returning {len(df_final)} rows, {len(df_final.columns)} columns "
          f"({actual_feature_count} calculated features). Took {end_time - start_time:.2f}s.")
    return df_final

# --- New Helper: Grid Search for Base Models ---
from sklearn.model_selection import StratifiedKFold
def grid_search_base_model(model_type, base_param_grid, X, y, scale_pos_weight_val):
    """Performs a simple 3-fold grid search using F1 score as metric.
       Returns the best parameter combination and best mean F1.
    """
    best_score = -np.inf
    best_params = None
    cv = StratifiedKFold(n_splits=3, shuffle=True, random_state=42)
    for params in ParameterGrid(base_param_grid):
        scores = []
        for train_idx, val_idx in cv.split(X, y):
            X_train_inner, y_train_inner = X.iloc[train_idx], y.iloc[train_idx]
            X_val_inner, y_val_inner = X.iloc[val_idx], y.iloc[val_idx]
            try:
                if model_type == 'xgb':
                    model = XGBClassifier(**params,
                                          scale_pos_weight=scale_pos_weight_val,
                                          random_state=42,
                                          use_label_encoder=False,
                                          n_jobs=-1,
                                          tree_method='hist')
                elif model_type == 'lgbm':
                    model = LGBMClassifier(**params,
                                           scale_pos_weight=scale_pos_weight_val,
                                           random_state=42,
                                           n_jobs=-1)
                elif model_type == 'svm':
                    model = Pipeline([
                        ('imputer', SimpleImputer(strategy='median')),
                        ('scaler', StandardScaler()),
                        ('svm', SVC(kernel='rbf',
                                    probability=True,
                                    random_state=42,
                                    class_weight='balanced',
                                    max_iter=5000,
                                    **params))
                    ])
                else:
                    continue
                model.fit(X_train_inner, y_train_inner)
                y_pred_inner = model.predict(X_val_inner)
                score = f1_score(y_val_inner, y_pred_inner, zero_division=0)
                scores.append(score)
            except Exception as e:
                scores.append(0)
        mean_score = np.mean(scores) if scores else 0
        if mean_score > best_score:
            best_score = mean_score
            best_params = params
    return best_params, best_score

# --- Main Execution Block ---
if __name__ == "__main__":
    print("--- Ensemble_Method_B ---")
    print("--- 1. Data Loading & Initial Prep ---")
    try:
        print(f"Loading data from: {CSV_FILE_PATH}")
        col_names = ['unix', 'date', 'symbol_csv', 'open', 'high', 'low', 'close', 'Volume BTC', 'Volume USD']
        df_raw = pd.read_csv(CSV_FILE_PATH, header=0, names=col_names)
        print(f"Raw data loaded. Shape: {df_raw.shape}")
        df_raw['timestamp'] = pd.to_datetime(df_raw['date'])
        df_raw = df_raw.drop(['unix', 'date', 'symbol_csv'], axis=1)
        df_raw = df_raw.sort_values('timestamp').reset_index(drop=True)
        if df_raw.empty: exit("DataFrame empty after loading. Exiting.")
        print(f"Initial data prep done. Shape: {df_raw.shape}")
    except Exception as e:
        print(f"Error loading or processing CSV: {e}")
        traceback.print_exc()
        exit()

    print("\n--- 2. Feature Engineering (Simple_Predictor_B Features) ---")
    feature_calc_start = time.time()
    df_features = calculate_selected_features(df_raw, symbol=SYMBOL_NAME)
    feature_calc_end = time.time()
    if df_features.empty: exit("Feature calculation failed. Exiting.")
    print(f"Feature calculation completed in {feature_calc_end - feature_calc_start:.2f} seconds.")
    CURRENT_FEATURE_COLS = [f for f in MODEL_FEATURE_COLS if f in df_features.columns]
    if len(CURRENT_FEATURE_COLS) == 0:
        exit("ERROR: No modeling features found in the DataFrame after calculation.")
    if len(CURRENT_FEATURE_COLS) < len(MODEL_FEATURE_COLS):
         print(f"Warning: Only {len(CURRENT_FEATURE_COLS)} out of {len(MODEL_FEATURE_COLS)} requested modeling features were found/generated.")
    print(f"Using {len(CURRENT_FEATURE_COLS)} features found in DataFrame for modeling.")

    print("\n--- 3. Data Cleaning (Post-Features) ---")
    numeric_feature_cols = df_features[CURRENT_FEATURE_COLS].select_dtypes(include=np.number).columns.tolist()
    df_features[numeric_feature_cols] = df_features[numeric_feature_cols].replace([np.inf, -np.inf], np.nan)
    nan_check = df_features[numeric_feature_cols].isnull().sum()
    total_nans = nan_check.sum()
    print(f"Total NaNs found in {len(numeric_feature_cols)} numeric feature columns: {total_nans}.")

    print("\n--- 4. Modeling Target & Final Prep ---")
    TARGET_COLUMN = 'target'
    df = df_features.copy()
    df = df.sort_values('timestamp')
    if 'close' not in df.columns: exit("ERROR: 'close' column missing before target creation.")
    print(f"Creating binary target based on {PREDICTION_WINDOW_HOURS}-hour future return >= {TARGET_THRESHOLD_PCT}%...")
    df['future_price'] = df['close'].shift(-PREDICTION_WINDOW_ROWS)
    with np.errstate(divide='ignore', invalid='ignore'):
         df['price_return_future'] = (df['future_price'] - df['close']) / df['close'].replace(0, np.nan) * 100
    df[TARGET_COLUMN] = np.where(df['price_return_future'] >= TARGET_THRESHOLD_PCT, 1, 0)
    df.loc[df['price_return_future'].isnull(), TARGET_COLUMN] = np.nan
    df = df.drop(['future_price', 'price_return_future'], axis=1)
    initial_rows = len(df)
    essential_check_cols = ['close', TARGET_COLUMN]
    df = df.dropna(subset=essential_check_cols)
    print(f"Rows after removing NaN targets/close: {len(df)} (Removed {initial_rows - len(df)})")
    rows_before_feature_nan_check = len(df)
    rows_after_feature_nan_dropna = len(df.dropna(subset=CURRENT_FEATURE_COLS))
    potential_feature_nan_loss = rows_before_feature_nan_check - rows_after_feature_nan_dropna
    if potential_feature_nan_loss > 0:
        print(f"Note: {potential_feature_nan_loss} rows have NaNs in feature columns. Models/Imputer will handle them.")
    if df.empty: exit("DataFrame empty after target creation/NaN drop. Exiting.")
    target_counts = df[TARGET_COLUMN].value_counts(normalize=True) * 100
    print("\nTarget variable distribution:")
    print(f"  0 (< {TARGET_THRESHOLD_PCT}% return): {target_counts.get(0, 0):.2f}%")
    print(f"  1 (>= {TARGET_THRESHOLD_PCT}% return): {target_counts.get(1, 0):.2f}%")
    df = df.sort_values('timestamp').reset_index(drop=True)
    print(f"Final DataFrame shape for backtesting: {df.shape}")

    print("\n--- 5. Starting Walk-Forward Validation (Stacking Ensemble - Ensemble_Method_B) ---")
    all_metrics = {'accuracy': [], 'precision': [], 'recall': [], 'f1': []}
    all_best_thresholds = []
    meta_feature_names = ['xgb_pred', 'lgbm_pred', 'svm_pred']
    meta_feature_importances = {meta_feat: [] for meta_feat in meta_feature_names}
    iteration_count = 0
    n_rows_total = len(df)
    current_train_start_idx = 0
    total_iterations_estimate = max(0, (n_rows_total - TRAIN_WINDOW_ROWS - TEST_WINDOW_ROWS) // STEP_ROWS + 1) if STEP_ROWS > 0 else 0
    print(f"Total rows: {n_rows_total}, Train Window: {TRAIN_WINDOW_HOURS}h ({TRAIN_WINDOW_ROWS} rows), Prediction Horizon: {PREDICTION_WINDOW_HOURS}h, Evaluation (Test) Window: {TEST_WINDOW_HOURS}h ({TEST_WINDOW_ROWS} rows), Step: {STEP_HOURS}h ({STEP_ROWS} rows)")
    print(f"Estimated iterations: {total_iterations_estimate}")
    print(f"Using {len(CURRENT_FEATURE_COLS)} features for modeling.")
    print(f"Stacking Folds (K): {N_STACKING_FOLDS}")
    print(f"Meta Learner Grid: {META_XGB_PARAM_GRID}")
    print(f"Threshold Search Range: {THRESHOLD_SEARCH_RANGE}")
    print("-" * 30)
    start_loop_time = time.time()

    while True:
        train_end_idx = current_train_start_idx + TRAIN_WINDOW_ROWS
        test_start_idx = train_end_idx
        test_end_idx = test_start_idx + TEST_WINDOW_ROWS
        if test_end_idx > n_rows_total:
             print(f"\nStopping: Evaluation window end ({test_end_idx}) exceeds total rows ({n_rows_total}). Last start index: {current_train_start_idx}")
             break
        if current_train_start_idx >= n_rows_total:
             print(f"\nStopping: Train start index ({current_train_start_idx}) reached end.")
             break

        train_df = df.iloc[current_train_start_idx : train_end_idx].copy()
        test_df = df.iloc[test_start_idx : test_end_idx].copy()
        min_train_samples = max(50, int(0.1 * TRAIN_WINDOW_ROWS), N_STACKING_FOLDS * 2)
        min_test_samples = 5
        if len(train_df) < min_train_samples or len(test_df) < min_test_samples:
            print(f"Skipping iter {iteration_count + 1}: Insufficient data train ({len(train_df)}/{min_train_samples}) or test ({len(test_df)}/{min_test_samples}). Moving step...")
            current_train_start_idx += STEP_ROWS
            continue

        X_train_full = train_df[CURRENT_FEATURE_COLS]
        y_train_full = train_df[TARGET_COLUMN]
        X_test = test_df[CURRENT_FEATURE_COLS]
        y_test = test_df[TARGET_COLUMN]

        if len(y_train_full.unique()) < 2:
            print(f"Skipping iter {iteration_count + 1}: Training data (size {len(train_df)}) has only one class: {y_train_full.unique()}. Moving step...")
            current_train_start_idx += STEP_ROWS
            continue
        if len(y_test.unique()) < 2:
             print(f"Warning iter {iteration_count + 1}: Evaluation test data (size {len(test_df)}) has only one class: {y_test.unique()}. Metrics will be affected.")

        neg_count = y_train_full.value_counts().get(0, 0)
        pos_count = y_train_full.value_counts().get(1, 0)
        scale_pos_weight_val = neg_count / pos_count if pos_count > 0 else 1.0

        print(f"\n--- Iter {iteration_count + 1}/{total_iterations_estimate} ---")
        print(f"  Train Indices: [{current_train_start_idx}:{train_end_idx-1}], Eval Indices: [{test_start_idx}:{test_end_idx-1}]")
        print(f"  Train Target Dist: {dict(y_train_full.value_counts(normalize=True))}")
        print(f"  Test Target Dist: {dict(y_test.value_counts(normalize=True))}")
        print(f"  Using scale_pos_weight: {scale_pos_weight_val:.4f}")

        # --- Grid Search for Base Models ---
        print("  Grid searching base models on current training data...")
        best_xgb_params, xgb_score = grid_search_base_model('xgb', BASE_XGB_PARAM_GRID, X_train_full, y_train_full, scale_pos_weight_val)
        best_lgbm_params, lgbm_score = grid_search_base_model('lgbm', BASE_LGBM_PARAM_GRID, X_train_full, y_train_full, scale_pos_weight_val)
        best_svm_params, svm_score = grid_search_base_model('svm', BASE_SVM_PARAM_GRID, X_train_full, y_train_full, scale_pos_weight_val)
        print(f"    Best XGB Params: {best_xgb_params} (F1: {xgb_score:.3f})")
        print(f"    Best LGBM Params: {best_lgbm_params} (F1: {lgbm_score:.3f})")
        print(f"    Best SVM Params: {best_svm_params} (F1: {svm_score:.3f})")

        # --- Define Base Models using Best Parameters ---
        xgb_params_iter = XGB_BASE_PARAMS.copy()
        if best_xgb_params is not None:
            xgb_params_iter.update(best_xgb_params)
        xgb_params_iter['scale_pos_weight'] = scale_pos_weight_val
        model_xgb_base = XGBClassifier(**xgb_params_iter)

        lgbm_params_iter = LGBM_BASE_PARAMS.copy()
        if best_lgbm_params is not None:
            lgbm_params_iter.update(best_lgbm_params)
        lgbm_params_iter['scale_pos_weight'] = scale_pos_weight_val
        model_lgbm_base = LGBMClassifier(**lgbm_params_iter)

        svm_params = {}
        if best_svm_params is not None:
            svm_params.update(best_svm_params)
        pipeline_svm_base = Pipeline([
            ('imputer', SimpleImputer(strategy='median')),
            ('scaler', StandardScaler()),
            ('svm', SVC(kernel='rbf', probability=True, random_state=42, class_weight='balanced', max_iter=5000, **svm_params))
        ])
        models_oof = {'xgb': model_xgb_base, 'lgbm': model_lgbm_base, 'svm': pipeline_svm_base}
        oof_arrays = {'xgb': np.full(len(train_df), np.nan),
                      'lgbm': np.full(len(train_df), np.nan),
                      'svm': np.full(len(train_df), np.nan)}

        # --- K-Fold OOF Generation ---
        skf = StratifiedKFold(n_splits=N_STACKING_FOLDS, shuffle=True, random_state=42 + iteration_count)
        for fold, (train_idx_k, val_idx_k) in enumerate(skf.split(X_train_full, y_train_full)):
            X_train_k, y_train_k = X_train_full.iloc[train_idx_k], y_train_full.iloc[train_idx_k]
            X_val_k, y_val_k = X_train_full.iloc[val_idx_k], y_train_full.iloc[val_idx_k]
            if len(np.unique(y_train_k)) < 2 or len(np.unique(y_val_k)) < 2:
                print(f"    Warning: Fold {fold+1} has single class in train or val. Assigning prior.")
                prior = y_train_full.mean()
                for key in oof_arrays:
                    oof_arrays[key][val_idx_k] = prior
                continue
            for name, model in models_oof.items():
                try:
                    fit_params_k = {}
                    if name == 'lgbm':
                        fit_params_k['callbacks'] = [early_stopping(10, verbose=False), log_evaluation(0)]
                        fit_params_k['eval_metric'] = 'logloss'
                        fit_params_k['eval_set'] = [(X_val_k, y_val_k)]
                    elif name == 'xgb':
                        fit_params_k['eval_set'] = [(X_val_k, y_val_k)]
                        fit_params_k['early_stopping_rounds'] = 10
                        fit_params_k['verbose'] = False
                    model.fit(X_train_k, y_train_k, **fit_params_k)
                    oof_arrays[name][val_idx_k] = model.predict_proba(X_val_k)[:, 1]
                except Exception as e_kfold:
                    print(f"    Error during K-Fold {fold+1} for {name}: {e_kfold}")
                    prior = y_train_full.mean()
                    oof_arrays[name][val_idx_k] = prior

        # Impute any remaining NaNs in OOF arrays.
        X_meta_train_dict = {}
        for name in models_oof:
            oof_array = oof_arrays[name]
            if np.isnan(oof_array).any():
                mean_oof = np.nanmean(oof_array)
                if pd.isna(mean_oof): mean_oof = 0.5
                oof_array = np.nan_to_num(oof_array, nan=mean_oof)
                print(f"    Imputed NaNs in OOF for {name} with mean {mean_oof:.4f}")
            X_meta_train_dict[f'{name}_pred'] = oof_array
        X_meta_train = pd.DataFrame(X_meta_train_dict, index=X_train_full.index)
        y_meta_train = y_train_full
        print(f"  Level 0 OOF Generation Done. Meta Train Shape: {X_meta_train.shape}")

        # --- Train Base Models on Full Training Data ---
        print(f"  Level 0: Training base models on full training data ({len(train_df)} rows)...")
        models_full = {}
        all_base_trained = True
        for name, model in models_oof.items():
            try:
                params = {}
                if name == 'xgb':
                    params['verbose'] = False
                model.fit(X_train_full, y_train_full, **params)
                models_full[name] = model
            except Exception as e_full_fit:
                print(f"  ERROR: Failed to train base model '{name}': {e_full_fit}")
                all_base_trained = False
                break
        if not all_base_trained:
            print("  Skipping iteration due to base model training failure. Moving step...")
            current_train_start_idx += STEP_ROWS
            continue
        print("  Level 0 Full Training Done.")

        # --- Level 1: Meta Learner Tuning & Threshold Tuning ---
        print("  Level 1: Tuning Meta-Learner (XGBoost) and Probability Threshold...")
        best_meta_params = None
        best_meta_score = -np.inf
        best_meta_model_for_thresh = None
        best_threshold_iter = 0.5
        best_thresh_f1_score = -np.inf
        meta_val_size = int(len(X_meta_train) * META_VALIDATION_PCT)
        if meta_val_size < max(N_STACKING_FOLDS, 10) or (len(X_meta_train) - meta_val_size) < max(N_STACKING_FOLDS, 10):
            print(f"  Warning: Meta dataset too small. Using defaults.")
            best_meta_params = list(ParameterGrid(META_XGB_PARAM_GRID))[0] if META_XGB_PARAM_GRID else {}
        else:
            X_meta_train_sub = X_meta_train[:-meta_val_size]
            y_meta_train_sub = y_meta_train[:-meta_val_size]
            X_meta_val = X_meta_train[-meta_val_size:]
            y_meta_val = y_meta_train[-meta_val_size:]
            if len(y_meta_val.unique()) < 2 or len(y_meta_train_sub.unique()) < 2:
                print("  Warning: Meta split has single class. Using defaults.")
                best_meta_params = list(ParameterGrid(META_XGB_PARAM_GRID))[0] if META_XGB_PARAM_GRID else {}
            else:
                print(f"    Tuning meta learner over {len(list(ParameterGrid(META_XGB_PARAM_GRID)))} param combinations...")
                meta_scale_pos_weight_sub = y_meta_train_sub.value_counts().get(0, 0) / y_meta_train_sub.value_counts().get(1, 1) if y_meta_train_sub.value_counts().get(1, 1) > 0 else 1.0
                for params_meta_cv in ParameterGrid(META_XGB_PARAM_GRID):
                    try:
                        current_meta_params = {**META_XGB_FIXED_PARAMS, **params_meta_cv}
                        model_meta_cv = XGBClassifier(**current_meta_params, scale_pos_weight=meta_scale_pos_weight_sub)
                        model_meta_cv.fit(X_meta_train_sub, y_meta_train_sub,
                                          eval_set=[(X_meta_val, y_meta_val)],
                                          early_stopping_rounds=10, verbose=False)
                        y_pred_meta_val_cv = model_meta_cv.predict(X_meta_val)
                        meta_score = f1_score(y_meta_val, y_pred_meta_val_cv, zero_division=0)
                        if meta_score >= best_meta_score:
                            best_meta_score = meta_score
                            best_meta_params = params_meta_cv
                            best_meta_model_for_thresh = model_meta_cv
                    except Exception as e_meta_cv:
                        print(f"    Error during Meta CV with params {params_meta_cv}: {e_meta_cv}")
                        if best_meta_params is None:
                            best_meta_params = list(ParameterGrid(META_XGB_PARAM_GRID))[0] if META_XGB_PARAM_GRID else {}
                print(f"    Best Meta Params Found: {best_meta_params} (Validation F1: {best_meta_score:.4f})")
                if best_meta_model_for_thresh is not None:
                    print(f"    Tuning threshold over range {THRESHOLD_SEARCH_RANGE}...")
                    try:
                        y_meta_proba_val = best_meta_model_for_thresh.predict_proba(X_meta_val)[:, 1]
                        f1_scores_thresh = {}
                        for t in THRESHOLD_SEARCH_RANGE:
                            y_pred_meta_val_t = (y_meta_proba_val >= t).astype(int)
                            current_f1 = f1_score(y_meta_val, y_pred_meta_val_t, zero_division=0)
                            f1_scores_thresh[t] = current_f1
                            if current_f1 >= best_thresh_f1_score:
                                best_thresh_f1_score = current_f1
                                best_threshold_iter = t
                        print(f"    Best Threshold Found: {best_threshold_iter:.2f} (Validation F1: {best_thresh_f1_score:.4f})")
                    except Exception as e_thresh:
                        print(f"    Error during threshold tuning: {e_thresh}. Using default threshold {best_threshold_iter:.2f}.")
                else:
                    print(f"    Skipping threshold tuning. Using default threshold {best_threshold_iter:.2f}.")

        print("  Level 1: Training final Meta-Learner on full OOF data...")
        try:
            final_meta_params = {**META_XGB_FIXED_PARAMS, **best_meta_params}
            final_meta_scale_pos_weight = y_meta_train.value_counts().get(0, 0) / y_meta_train.value_counts().get(1, 1) if y_meta_train.value_counts().get(1, 1) > 0 else 1.0
            meta_model_final = XGBClassifier(**final_meta_params, scale_pos_weight=final_meta_scale_pos_weight)
            meta_model_final.fit(X_meta_train, y_meta_train, verbose=False)
            print("  Level 1 Final Meta Training Done.")
        except Exception as e_meta_final:
            print(f"  ERROR: Failed to train final meta-learner: {e_meta_final}")
            current_train_start_idx += STEP_ROWS
            continue

        print("  Prediction: Generating final predictions on evaluation data...")
        try:
            pred_xgb_test = models_full['xgb'].predict_proba(X_test)[:, 1]
            pred_lgbm_test = models_full['lgbm'].predict_proba(X_test)[:, 1]
            pred_svm_test = models_full['svm'].predict_proba(X_test)[:, 1]
            X_meta_test = pd.DataFrame({
                'xgb_pred': pred_xgb_test,
                'lgbm_pred': pred_lgbm_test,
                'svm_pred': pred_svm_test
            }, index=X_test.index)
            y_proba_test = meta_model_final.predict_proba(X_meta_test)[:, 1]
            y_pred = (y_proba_test >= best_threshold_iter).astype(int)
            print("  Prediction Done.")
        except Exception as e_pred:
            print(f"  ERROR during prediction phase: {e_pred}")
            for key in all_metrics: all_metrics[key].append(np.nan)
            all_best_thresholds.append(np.nan)
            current_train_start_idx += STEP_ROWS
            continue

        if len(np.unique(y_test)) < 2:
            accuracy = accuracy_score(y_test, y_pred)
            precision = precision_score(y_test, y_pred, zero_division=0)
            recall = recall_score(y_test, y_pred, zero_division=0)
            f1 = f1_score(y_test, y_pred, zero_division=0)
            print(f"  Evaluation Metrics (Test Window Size: {TEST_WINDOW_HOURS}h, SINGLE CLASS {y_test.unique()[0]}): Acc={accuracy:.4f}, Prc={precision:.4f}, Rec={recall:.4f}, F1={f1:.4f}")
        else:
            accuracy = accuracy_score(y_test, y_pred)
            precision = precision_score(y_test, y_pred, zero_division=0)
            recall = recall_score(y_test, y_pred, zero_division=0)
            f1 = f1_score(y_test, y_pred, zero_division=0)
            print(f"  Evaluation Metrics (Test Window Size: {TEST_WINDOW_HOURS}h): Acc={accuracy:.4f}, Prc={precision:.4f}, Rec={recall:.4f}, F1={f1:.4f}")

        all_metrics['accuracy'].append(accuracy)
        all_metrics['precision'].append(precision)
        all_metrics['recall'].append(recall)
        all_metrics['f1'].append(f1)
        all_best_thresholds.append(best_threshold_iter)

        # (Meta-feature importance code remains unchanged)

        iteration_count += 1
        iter_end_time = time.time()
        print(f"  Iteration {iteration_count} finished in {iter_end_time - iter_start_time:.2f} seconds.")
        print("-" * 20)
        current_train_start_idx += STEP_ROWS

    end_loop_time = time.time()
    loop_duration_minutes = (end_loop_time - start_loop_time) / 60
    print("-" * 30)
    print(f"Walk-Forward Validation (Stacking - Ensemble_Method_B) finished in {end_loop_time - start_loop_time:.2f} seconds ({loop_duration_minutes:.2f} minutes).")

    # --- Aggregate and Display Results (unchanged) ---
    print("\n--- Final Results (Ensemble_Method_B) ---")
    if iteration_count > 0 and len(all_metrics['f1']) > 0:
        valid_indices = [i for i, f1 in enumerate(all_metrics['f1']) if not pd.isna(f1)]
        if valid_indices:
            valid_accuracy = [all_metrics['accuracy'][i] for i in valid_indices]
            valid_precision = [all_metrics['precision'][i] for i in valid_indices]
            valid_recall = [all_metrics['recall'][i] for i in valid_indices]
            valid_f1 = [all_metrics['f1'][i] for i in valid_indices]
            valid_thresholds = [all_best_thresholds[i] for i in valid_indices if not pd.isna(all_best_thresholds[i])]
            avg_accuracy = np.mean(valid_accuracy)
            avg_precision = np.mean(valid_precision)
            avg_recall = np.mean(valid_recall)
            avg_f1 = np.mean(valid_f1)
            print("\n--- Average Walk-Forward Validation Results ---")
            print(f"Total Iterations Run: {iteration_count}, Successful Evaluations: {len(valid_indices)}")
            print(f"Target: >= {TARGET_THRESHOLD_PCT}% increase over {PREDICTION_WINDOW_HOURS} hours (Prediction Horizon)")
            print(f"Train Window: {TRAIN_WINDOW_HOURS} hours, Evaluation Window: {TEST_WINDOW_HOURS} hours, Step: {STEP_HOURS} hours")
            print(f"Stacking Folds: {N_STACKING_FOLDS}")
            print(f"Average Accuracy:  {avg_accuracy:.4f}")
            print(f"Average Precision: {avg_precision:.4f}")
            print(f"Average Recall:    {avg_recall:.4f}")
            print(f"Average F1-Score:  {avg_f1:.4f}")
            std_accuracy = np.std(valid_accuracy)
            std_precision = np.std(valid_precision)
            std_recall = np.std(valid_recall)
            std_f1 = np.std(valid_f1)
            print("\n--- Standard Deviation of Metrics ---")
            print(f"Std Dev Accuracy:  {std_accuracy:.4f}")
            print(f"Std Dev Precision: {std_precision:.4f}")
            print(f"Std Dev Recall:    {std_recall:.4f}")
            print(f"Std Dev F1-Score:  {std_f1:.4f}")
            if valid_thresholds:
                avg_threshold = np.mean(valid_thresholds)
                std_threshold = np.std(valid_thresholds)
                print(f"\nAverage Best Threshold: {avg_threshold:.3f} (StdDev: {std_threshold:.3f}) over {len(valid_thresholds)} folds")
            else:
                print("\nCould not determine average threshold.")
        else:
            print("\nNo valid metrics recorded.")
    else:
        print("\nNo iterations were successfully completed or no metrics were generated.")

    print("\nScript Ensemble_Method_B finished.")


--- Ensemble_Method_B ---
--- 1. Data Loading & Initial Prep ---
Loading data from: C:\Users\mason\AVP\BTCUSDrec.csv
Raw data loaded. Shape: (15177, 9)
Initial data prep done. Shape: (15177, 7)

--- 2. Feature Engineering (Simple_Predictor_B Features) ---
Starting calculation for 49 target columns (incl. base)...
  Calculating features...
    Calculating ATR, Garman-Klass, and Parkinson volatility features...
  Assembling final dataframe...
Selected feature calculation finished. Returning 15177 rows, 51 columns (43 calculated features). Took 0.04s.
Feature calculation completed in 0.04 seconds.
Using 43 features found in DataFrame for modeling.

--- 3. Data Cleaning (Post-Features) ---
Total NaNs found in 43 numeric feature columns: 1688.

--- 4. Modeling Target & Final Prep ---
Creating binary target based on 4-hour future return >= 0.25%...
Rows after removing NaN targets/close: 15173 (Removed 4)
Note: 183 rows have NaNs in feature columns. Models/Imputer will handle them.

Target va

Exception ignored in: <function DMatrix.__del__ at 0x00000264E0294C20>
Traceback (most recent call last):
  File "C:\Users\mason\AppData\Roaming\Python\Python312\site-packages\xgboost\core.py", line 797, in __del__
    _check_call(_LIB.XGDMatrixFree(self.handle))
                ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
KeyboardInterrupt: 


    Best XGB Params: {'eta': 0.06, 'lambda': 3.0, 'max_depth': 2, 'n_estimators': 65} (F1: 0.487)
    Best LGBM Params: {'learning_rate': 0.08, 'max_depth': 4, 'n_estimators': 95, 'subsample': 0.75} (F1: 0.554)
    Best SVM Params: {'C': 3.2, 'gamma': 'scale'} (F1: 0.463)
  Level 0 OOF Generation Done. Meta Train Shape: (672, 3)
  Level 0: Training base models on full training data (672 rows)...
  Level 0 Full Training Done.
  Level 1: Tuning Meta-Learner (XGBoost) and Probability Threshold...
    Tuning meta learner over 64 param combinations...
    Best Meta Params Found: {'colsample_bytree': 0.8, 'eta': 0.05, 'lambda': 1.0, 'max_depth': 3, 'n_estimators': 40, 'subsample': 0.95} (Validation F1: 0.3902)
    Tuning threshold over range [0.1  0.15 0.2  0.25 0.3  0.35 0.4  0.45 0.5  0.55 0.6  0.65 0.7  0.75
 0.8  0.85]...
    Best Threshold Found: 0.60 (Validation F1: 0.4127)
  Level 1: Training final Meta-Learner on full OOF data...
  Level 1 Final Meta Training Done.
  Prediction: Gene