In [1]:
# Cell 1: Import Libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from ta.momentum import RSIIndicator
from ta.trend import MACD
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split, GridSearchCV, StratifiedKFold # Added StratifiedKFold
from sklearn.metrics import classification_report, roc_auc_score, make_scorer
from sklearn.svm import SVC
from xgboost import XGBClassifier
from sklearn.preprocessing import StandardScaler, LabelEncoder # Added LabelEncoder for XGBoost
import seaborn as sns
from sklearn.linear_model import LogisticRegression # Added for meta-learner

from alpha_vantage.timeseries import TimeSeries
from alpha_vantage.techindicators import TechIndicators # Often used for news
from alpha_vantage.foreignexchange import ForeignExchange # No, news is not here

# For Alpha Vantage News & Sentiment API
# While TimeSeries is also from alpha_vantage, the News and Sentiment endpoint is a bit distinct
# You usually use the main TimeSeries object or a separate AlphaVantage object if available for news.
# The simplest way is usually via ts.get_news_sentiment() if available or a direct API call if not.

from dotenv import load_dotenv
import os
import requests # For direct API call if alpha_vantage library doesn't expose news directly

# Load environment variables from .env
load_dotenv()

# Get the API key
API_KEY = os.getenv("ALPHA_VANTAGE_API_KEY")

# Define target labels for multi-class classification
LABELS = [-1, 0, 1]

# Create a custom multi-class ROC AUC scorer for GridSearchCV
multi_class_roc_auc_scorer = make_scorer(roc_auc_score, needs_proba=True, multi_class='ovr', labels=LABELS)

print("✅ Libraries imported and global variables defined.")


✅ Libraries imported and global variables defined.


In [2]:
# Cell 2: Fetch Financial Data and News Data from Alpha Vantage

# --- Financial Data ---
ts = TimeSeries(key=API_KEY, output_format='pandas')
SYMBOL = 'AAPL' # Define the ticker symbol
data, meta_data = ts.get_daily(symbol=SYMBOL, outputsize='full')

# Clean and rename columns
data.columns = ['open', 'high', 'low', 'close', 'volume']
data.index = pd.to_datetime(data.index)
data.sort_index(inplace=True)

print("✅ Financial Data fetched and prepared.")
print(data.head())


# --- News Data ---
# Alpha Vantage News & Sentiment API:
# This endpoint gives you news articles and their sentiment scores.
# You might need to adjust the time range or make multiple calls for a long history.
# For simplicity, we'll try to fetch recent news.
# Example URL for Alpha Vantage News:
# https://www.alphavantage.co/query?function=NEWS_SENTIMENT&tickers=AAPL&time_from=20230101T0000&time_to=20231231T2359&apikey=YOUR_API_KEY

news_data = []
# Define a reasonable time range for fetching news, align with your financial data if possible
# Note: Free Alpha Vantage API has limits on historical news data and rate limits.
# You might need to iterate through smaller time windows or rely on mock data for extensive testing.
# For demonstration, we'll fetch news for the last ~6 months from current date.
# Adjust 'time_from' and 'time_to' as needed for your backtesting period.

# To get news from the start of your financial data:
# start_date_for_news = data.index.min().strftime('%Y%m%dT%H%M')
# end_date_for_news = data.index.max().strftime('%Y%m%dT%H%M')

# For this example, let's target a specific recent range or use a short period for demonstration
# Due to API limits, fetching a "full" news history can be challenging on a free tier.
# Let's set a realistic range for a quick run.
# Current date: June 2025. Let's get news from Jan 2024 to May 2025 for a decent period.
time_from = '20240101T0000'
time_to = '20250531T2359' # Adjust to relevant date range for your data

news_url = f"https://www.alphavantage.co/query?function=NEWS_SENTIMENT&tickers={SYMBOL}&time_from={time_from}&time_to={time_to}&apikey={API_KEY}"
response = requests.get(news_url)
json_data = response.json()

if 'feed' in json_data:
    # Extract relevant sentiment information
    for article in json_data['feed']:
        # Each article has 'overall_sentiment_score' and 'overall_sentiment_label'
        # We also need the date. 'time_published' is usually in YYYYMMDDTHHMMSS format
        pub_time = pd.to_datetime(article['time_published'], format='%Y%m%dT%H%M%S', errors='coerce')
        if pd.isna(pub_time): # If format fails, try without seconds
            pub_time = pd.to_datetime(article['time_published'], format='%Y%m%dT%H%M', errors='coerce')

        # Ensure the article is about the target ticker
        # Alpha Vantage tags relevant tickers in 'ticker_sentiment'
        # This part requires careful parsing as 'ticker_sentiment' is a list of dicts.
        target_ticker_sentiment = None
        if 'ticker_sentiment' in article:
            for ts_entry in article['ticker_sentiment']:
                if ts_entry.get('ticker') == SYMBOL:
                    target_ticker_sentiment = float(ts_entry.get('ticker_sentiment_score', 0.0))
                    break
        if target_ticker_sentiment is not None:
             news_data.append({
                 'date': pub_time.date(), # Get only the date part
                 'sentiment': target_ticker_sentiment
             })

if news_data:
    news_df = pd.DataFrame(news_data)
    news_df['date'] = pd.to_datetime(news_df['date'])
    news_df.set_index('date', inplace=True)
    news_df.sort_index(inplace=True)

    # Aggregate daily average sentiment
    daily_avg_sentiment = news_df.groupby(news_df.index).mean().rename(columns={'sentiment': 'daily_sentiment'})

    print("\n✅ News Data fetched and daily sentiment aggregated.")
    print(daily_avg_sentiment.head())
else:
    print("\n⚠️ Could not fetch news data or no relevant news found. Using mock data for sentiment.")
    # Fallback to mock data if API call fails or no data for the ticker.
    # This is from your 05.5_newsarticles_signals.ipynb (simplified for a single ticker)
    mock_data = {
        'date': pd.to_datetime([
            '2024-01-01', '2024-01-02', '2024-01-03', '2024-01-04', '2024-01-05',
            '2024-01-08', '2024-01-09', '2024-01-10', '2024-01-11', '2024-01-12',
            '2024-01-15', '2024-01-16', '2024-01-17', '2024-01-18', '2024-01-19',
        ]),
        'sentiment': [
            0.8, 0.6, 0.1, 0.7, 0.9,
            0.9, -0.3, -0.9, -0.7, 0.8,
            -0.6, 0.7, 0.8, 0.3, -0.4,
        ]
    }
    daily_avg_sentiment = pd.DataFrame(mock_data)
    daily_avg_sentiment = daily_avg_sentiment.groupby('date')['sentiment'].mean().rename('daily_sentiment').to_frame()
    daily_avg_sentiment.index = pd.to_datetime(daily_avg_sentiment.index)
    daily_avg_sentiment.sort_index(inplace=True)
    print("Using mock news sentiment data.")
    print(daily_avg_sentiment.head())


✅ Financial Data fetched and prepared.
             open   high    low  close     volume
date                                             
1999-11-01  80.00  80.69  77.37  77.62  2487300.0
1999-11-02  78.00  81.69  77.31  80.25  3564600.0
1999-11-03  81.62  83.25  81.00  81.50  2932700.0
1999-11-04  82.06  85.37  80.62  83.62  3384700.0
1999-11-05  84.62  88.37  84.00  88.31  3721500.0

✅ News Data fetched and daily sentiment aggregated.
            daily_sentiment
date                       
2025-05-27         0.134346
2025-05-28         0.091177
2025-05-29         0.047185
2025-05-30         0.112797
2025-05-31         0.254246


In [3]:
# Cell 3: Feature Engineering (Updated to include sentiment features)
# Create a copy for feature engineering
feature_data = data.copy()

# Calculate Daily Returns for feature
feature_data['daily_return'] = feature_data['close'].pct_change()

# Calculate RSI
rsi_indicator = RSIIndicator(close=feature_data['close'], window=14)
feature_data['rsi'] = rsi_indicator.rsi()

# Calculate MACD
macd_indicator = MACD(close=feature_data['close'])
feature_data['macd'] = macd_indicator.macd()
feature_data['macd_signal'] = macd_indicator.macd_signal()

# --- Integrate News Sentiment Features ---
# Join financial data with daily sentiment data
# Use a left join to keep all financial dates and fill missing sentiment with 0 or NaN,
# then handle NaNs as appropriate.
feature_data = feature_data.join(daily_avg_sentiment, how='left')

# Fill NaN sentiment values (e.g., no news on that day) with 0 or a reasonable neutral value
# Or forward-fill/backward-fill if you assume sentiment persists.
# For simplicity, let's fill with 0 (neutral) if no news.
feature_data['daily_sentiment'].fillna(0, inplace=True)

# Calculate rolling average and rolling standard deviation for sentiment
# These help us understand the typical sentiment and its variability over time.
# Use min_periods to ensure enough data points are available.
feature_data['rolling_avg_sentiment_7d'] = feature_data['daily_sentiment'].rolling(window=7, min_periods=5).mean()
feature_data['rolling_std_sentiment_30d'] = feature_data['daily_sentiment'].rolling(window=30, min_periods=10).std()

# Define a threshold for detecting a "shift"
# A shift is detected if the current daily sentiment deviates from the rolling average
# by more than a certain number of standard deviations.
STD_DEV_THRESHOLD = 1.5

# Calculate the deviation from the rolling average
feature_data['sentiment_deviation'] = abs(feature_data['daily_sentiment'] - feature_data['rolling_avg_sentiment_7d'])

# Flag a sentiment shift if deviation is above the threshold * rolling_std_sentiment_30d
# Ensure rolling std is not NaN when checking the condition
feature_data['sentiment_shift_flag'] = np.where(
    (feature_data['sentiment_deviation'] > (STD_DEV_THRESHOLD * feature_data['rolling_std_sentiment_30d'])) &
    (feature_data['rolling_std_sentiment_30d'].notna()), # Ensure rolling std is not NaN
    1, # Indicates a shift
    0  # No shift
)

# Create a more nuanced sentiment signal
# This can be a new feature for your ML models.
# Example: 1 for significant positive sentiment, -1 for significant negative, 0 for neutral
# You can refine these thresholds.
feature_data['sentiment_signal'] = 0 # Default to neutral

# Positive sentiment signal: if sentiment is high AND it's not a large negative shift
feature_data.loc[(feature_data['daily_sentiment'] > 0.5) & (feature_data['sentiment_shift_flag'] == 0), 'sentiment_signal'] = 1
# Negative sentiment signal: if sentiment is low AND it's not a large positive shift
feature_data.loc[(feature_data['daily_sentiment'] < -0.5) & (feature_data['sentiment_shift_flag'] == 0), 'sentiment_signal'] = -1
# Consider sentiment shifts (either positive or negative large shifts) as potentially influential
feature_data.loc[(feature_data['sentiment_shift_flag'] == 1) & (feature_data['daily_sentiment'] > 0), 'sentiment_signal'] = 1
feature_data.loc[(feature_data['sentiment_shift_flag'] == 1) & (feature_data['daily_sentiment'] < 0), 'sentiment_signal'] = -1

# Drop rows with NaN values resulting from indicator calculations (financial and sentiment)
feature_data.dropna(inplace=True)

print("✅ Features engineered, including news sentiment features.")
print(feature_data.head())


✅ Features engineered, including news sentiment features.
              open    high     low   close     volume  daily_return  \
date                                                                  
1999-12-17  100.87  102.00   98.50  100.00  4419700.0      0.017191   
1999-12-20   99.56   99.62   96.62   98.00  2535600.0     -0.020000   
1999-12-21   98.19  103.06   97.94  102.50  2746400.0      0.045918   
1999-12-22  102.87  104.56   98.75   99.94  2920300.0     -0.024976   
1999-12-23  101.81  104.25  101.06  103.50  2049400.0      0.035621   

                  rsi      macd  macd_signal  daily_sentiment  \
date                                                            
1999-12-17  52.729509  2.724819     4.846229              0.0   
1999-12-20  49.896383  2.279161     4.332816              0.0   
1999-12-21  55.667989  2.263000     3.918852              0.0   
1999-12-22  51.998303  2.020332     3.539148              0.0   
1999-12-23  56.311373  2.091173     3.249553          

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  feature_data['daily_sentiment'].fillna(0, inplace=True)


In [4]:
# Cell 4: Define Multi-Class Target Variable (-1 for Sell, 0 for Hold, 1 for Buy) (No Change)

# Define return for the next period (shifted by -1 to predict the next day)
feature_data['future_return'] = feature_data['close'].pct_change().shift(-1)

# Define your thresholds for Buy/Sell
BUY_THRESHOLD = 0.001
SELL_THRESHOLD = -0.001

# Initialize target column
feature_data['target'] = 0

# Assign Buy signals
feature_data.loc[feature_data['future_return'] > BUY_THRESHOLD, 'target'] = 1

# Assign Sell signals
feature_data.loc[feature_data['future_return'] < SELL_THRESHOLD, 'target'] = -1

# Drop the 'future_return' column after target definition to avoid look-ahead bias
feature_data.drop(columns=['future_return'], inplace=True)

# Drop any remaining rows with NaN values (e.g., last row after target shift)
feature_data.dropna(inplace=True)

print("✅ Multi-class target variable defined.")
print(feature_data['target'].value_counts())
print(feature_data.head())


✅ Multi-class target variable defined.
target
 1    3186
-1    2887
 0     346
Name: count, dtype: int64
              open    high     low   close     volume  daily_return  \
date                                                                  
1999-12-17  100.87  102.00   98.50  100.00  4419700.0      0.017191   
1999-12-20   99.56   99.62   96.62   98.00  2535600.0     -0.020000   
1999-12-21   98.19  103.06   97.94  102.50  2746400.0      0.045918   
1999-12-22  102.87  104.56   98.75   99.94  2920300.0     -0.024976   
1999-12-23  101.81  104.25  101.06  103.50  2049400.0      0.035621   

                  rsi      macd  macd_signal  daily_sentiment  \
date                                                            
1999-12-17  52.729509  2.724819     4.846229              0.0   
1999-12-20  49.896383  2.279161     4.332816              0.0   
1999-12-21  55.667989  2.263000     3.918852              0.0   
1999-12-22  51.998303  2.020332     3.539148              0.0   
1999-12

In [5]:
# Cell 5: Prepare Data for Modeling (Updated features list)

# Define features (X) and target (y)
# Now include the new sentiment features!
features = ['daily_return', 'rsi', 'macd', 'macd_signal',
            'daily_sentiment', 'rolling_avg_sentiment_7d', 'sentiment_deviation', 'sentiment_shift_flag', 'sentiment_signal'] # Added sentiment features
X = feature_data[features]
y = feature_data['target']

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False, random_state=42)

# Initialize StandardScaler for SVM.
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Convert scaled arrays back to DataFrames with original indices and columns
X_train_scaled_df = pd.DataFrame(X_train_scaled, index=X_train.index, columns=X_train.columns)
X_test_scaled_df = pd.DataFrame(X_test_scaled, index=X_test.index, columns=X_test.columns)

print("✅ Data prepared for modeling, including sentiment features.")
print(f"Train set shape: {X_train.shape}, Test set shape: {X_test.shape}")
print("Features used:", X.columns.tolist())


✅ Data prepared for modeling, including sentiment features.
Train set shape: (5135, 9), Test set shape: (1284, 9)
Features used: ['daily_return', 'rsi', 'macd', 'macd_signal', 'daily_sentiment', 'rolling_avg_sentiment_7d', 'sentiment_deviation', 'sentiment_shift_flag', 'sentiment_signal']


In [6]:
# Cell 6: Train and Evaluate RandomForestClassifier (Updated for OOF predictions)

print("--- RandomForestClassifier ---")
rf_model = RandomForestClassifier(random_state=42, class_weight='balanced')
rf_model.fit(X_train, y_train)

y_pred_rf = rf_model.predict(X_test)
y_proba_rf = rf_model.predict_proba(X_test)

print("\n📈 Classification Report (RandomForest):")
print(classification_report(y_test, y_pred_rf, labels=LABELS, zero_division=0))

print("ROC AUC Score (RandomForest):", roc_auc_score(y_test, y_proba_rf, multi_class='ovr', labels=LABELS))

# --- NEW: Generate Out-Of-Fold (OOF) predictions for meta-learner training ---
from sklearn.model_selection import cross_val_predict

# Define a StratifiedKFold cross-validation strategy
# This ensures that each fold has approximately the same proportion of target classes as the full dataset.
# The number of splits (e.g., 5) is typical.
kf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

# Generate out-of-fold probabilities for RandomForest
# The model is re-trained on (n_splits-1) folds and predicts on the remaining fold.
# This ensures no data leakage from the base model to the meta-learner.
rf_oof_proba = cross_val_predict(rf_model, X_train, y_train, cv=kf, method='predict_proba', n_jobs=-1)
print("✅ RandomForest OOF predictions generated.")


--- RandomForestClassifier ---

📈 Classification Report (RandomForest):
              precision    recall  f1-score   support

          -1       0.47      0.47      0.47       562
           0       0.33      0.01      0.03        76
           1       0.51      0.57      0.54       646

    accuracy                           0.50      1284
   macro avg       0.44      0.35      0.35      1284
weighted avg       0.49      0.50      0.48      1284

ROC AUC Score (RandomForest): 0.5208747922326545
✅ RandomForest OOF predictions generated.


In [7]:
# Cell 7: Hyperparameter Tuning for RandomForestClassifier (GridSearchCV) (No logical change, just rerun)

print("--- Hyperparameter Tuning (RandomForest with GridSearchCV) ---")

param_grid = {
    'n_estimators': [50, 100],
    'max_depth': [5, 10],
    'min_samples_split': [5, 10],
    'min_samples_leaf': [2, 4],
    'class_weight': ['balanced']
}

rf_base = RandomForestClassifier(random_state=42)

grid_search_rf = GridSearchCV(estimator=rf_base,
                              param_grid=param_grid,
                              cv=3,
                              scoring=multi_class_roc_auc_scorer,
                              verbose=2,
                              n_jobs=-1)

grid_search_rf.fit(X_train, y_train)

print("🧠 Best Parameters (RandomForest):")
print(grid_search_rf.best_params_)

best_rf_model = grid_search_rf.best_estimator_

y_pred_best_rf = best_rf_model.predict(X_test)
y_proba_best_rf = best_rf_model.predict_proba(X_test)

print("\n📈 Classification Report (Best RandomForest Model):")
print(classification_report(y_test, y_pred_best_rf, labels=LABELS, zero_division=0))

print(f"ROC AUC (Best RandomForest Model): {roc_auc_score(y_test, y_proba_best_rf, multi_class='ovr', labels=LABELS):.4f}")


--- Hyperparameter Tuning (RandomForest with GridSearchCV) ---
Fitting 3 folds for each of 16 candidates, totalling 48 fits


Traceback (most recent call last):
  File "/workspaces/backtesting/my_env/lib/python3.12/site-packages/sklearn/model_selection/_validation.py", line 942, in _score
    scores = scorer(estimator, X_test, y_test, **score_params)
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/workspaces/backtesting/my_env/lib/python3.12/site-packages/sklearn/metrics/_scorer.py", line 308, in __call__
    return self._score(partial(_cached_call, None), estimator, X, y_true, **_kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/workspaces/backtesting/my_env/lib/python3.12/site-packages/sklearn/metrics/_scorer.py", line 408, in _score
    return self._sign * self._score_func(y_true, y_pred, **scoring_kwargs)
                        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/workspaces/backtesting/my_env/lib/python3.12/site-packages/sklearn/utils/_param_validation.py", line 196, in wrapper
    params = func_sig.bind(*

[CV] END class_weight=balanced, max_depth=5, min_samples_leaf=2, min_samples_split=5, n_estimators=50; total time=   0.3s
[CV] END class_weight=balanced, max_depth=5, min_samples_leaf=2, min_samples_split=5, n_estimators=50; total time=   0.3s


Traceback (most recent call last):
  File "/workspaces/backtesting/my_env/lib/python3.12/site-packages/sklearn/model_selection/_validation.py", line 942, in _score
    scores = scorer(estimator, X_test, y_test, **score_params)
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/workspaces/backtesting/my_env/lib/python3.12/site-packages/sklearn/metrics/_scorer.py", line 308, in __call__
    return self._score(partial(_cached_call, None), estimator, X, y_true, **_kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/workspaces/backtesting/my_env/lib/python3.12/site-packages/sklearn/metrics/_scorer.py", line 408, in _score
    return self._sign * self._score_func(y_true, y_pred, **scoring_kwargs)
                        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/workspaces/backtesting/my_env/lib/python3.12/site-packages/sklearn/utils/_param_validation.py", line 196, in wrapper
    params = func_sig.bind(*

[CV] END class_weight=balanced, max_depth=5, min_samples_leaf=2, min_samples_split=5, n_estimators=50; total time=   0.3s


Traceback (most recent call last):
  File "/workspaces/backtesting/my_env/lib/python3.12/site-packages/sklearn/model_selection/_validation.py", line 942, in _score
    scores = scorer(estimator, X_test, y_test, **score_params)
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/workspaces/backtesting/my_env/lib/python3.12/site-packages/sklearn/metrics/_scorer.py", line 308, in __call__
    return self._score(partial(_cached_call, None), estimator, X, y_true, **_kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/workspaces/backtesting/my_env/lib/python3.12/site-packages/sklearn/metrics/_scorer.py", line 408, in _score
    return self._sign * self._score_func(y_true, y_pred, **scoring_kwargs)
                        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/workspaces/backtesting/my_env/lib/python3.12/site-packages/sklearn/utils/_param_validation.py", line 196, in wrapper
    params = func_sig.bind(*

[CV] END class_weight=balanced, max_depth=5, min_samples_leaf=2, min_samples_split=5, n_estimators=100; total time=   0.5s


Traceback (most recent call last):
  File "/workspaces/backtesting/my_env/lib/python3.12/site-packages/sklearn/model_selection/_validation.py", line 942, in _score
    scores = scorer(estimator, X_test, y_test, **score_params)
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/workspaces/backtesting/my_env/lib/python3.12/site-packages/sklearn/metrics/_scorer.py", line 308, in __call__
    return self._score(partial(_cached_call, None), estimator, X, y_true, **_kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/workspaces/backtesting/my_env/lib/python3.12/site-packages/sklearn/metrics/_scorer.py", line 408, in _score
    return self._sign * self._score_func(y_true, y_pred, **scoring_kwargs)
                        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/workspaces/backtesting/my_env/lib/python3.12/site-packages/sklearn/utils/_param_validation.py", line 196, in wrapper
    params = func_sig.bind(*

[CV] END class_weight=balanced, max_depth=5, min_samples_leaf=2, min_samples_split=5, n_estimators=100; total time=   0.5s


Traceback (most recent call last):
  File "/workspaces/backtesting/my_env/lib/python3.12/site-packages/sklearn/model_selection/_validation.py", line 942, in _score
    scores = scorer(estimator, X_test, y_test, **score_params)
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/workspaces/backtesting/my_env/lib/python3.12/site-packages/sklearn/metrics/_scorer.py", line 308, in __call__
    return self._score(partial(_cached_call, None), estimator, X, y_true, **_kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/workspaces/backtesting/my_env/lib/python3.12/site-packages/sklearn/metrics/_scorer.py", line 408, in _score
    return self._sign * self._score_func(y_true, y_pred, **scoring_kwargs)
                        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/workspaces/backtesting/my_env/lib/python3.12/site-packages/sklearn/utils/_param_validation.py", line 196, in wrapper
    params = func_sig.bind(*

[CV] END class_weight=balanced, max_depth=5, min_samples_leaf=2, min_samples_split=5, n_estimators=100; total time=   0.5s
[CV] END class_weight=balanced, max_depth=5, min_samples_leaf=2, min_samples_split=10, n_estimators=50; total time=   0.3s


Traceback (most recent call last):
  File "/workspaces/backtesting/my_env/lib/python3.12/site-packages/sklearn/model_selection/_validation.py", line 942, in _score
    scores = scorer(estimator, X_test, y_test, **score_params)
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/workspaces/backtesting/my_env/lib/python3.12/site-packages/sklearn/metrics/_scorer.py", line 308, in __call__
    return self._score(partial(_cached_call, None), estimator, X, y_true, **_kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/workspaces/backtesting/my_env/lib/python3.12/site-packages/sklearn/metrics/_scorer.py", line 408, in _score
    return self._sign * self._score_func(y_true, y_pred, **scoring_kwargs)
                        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/workspaces/backtesting/my_env/lib/python3.12/site-packages/sklearn/utils/_param_validation.py", line 196, in wrapper
    params = func_sig.bind(*

[CV] END class_weight=balanced, max_depth=5, min_samples_leaf=2, min_samples_split=10, n_estimators=50; total time=   0.3s
[CV] END class_weight=balanced, max_depth=5, min_samples_leaf=2, min_samples_split=10, n_estimators=50; total time=   0.3s


Traceback (most recent call last):
  File "/workspaces/backtesting/my_env/lib/python3.12/site-packages/sklearn/model_selection/_validation.py", line 942, in _score
    scores = scorer(estimator, X_test, y_test, **score_params)
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/workspaces/backtesting/my_env/lib/python3.12/site-packages/sklearn/metrics/_scorer.py", line 308, in __call__
    return self._score(partial(_cached_call, None), estimator, X, y_true, **_kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/workspaces/backtesting/my_env/lib/python3.12/site-packages/sklearn/metrics/_scorer.py", line 408, in _score
    return self._sign * self._score_func(y_true, y_pred, **scoring_kwargs)
                        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/workspaces/backtesting/my_env/lib/python3.12/site-packages/sklearn/utils/_param_validation.py", line 196, in wrapper
    params = func_sig.bind(*

[CV] END class_weight=balanced, max_depth=5, min_samples_leaf=2, min_samples_split=10, n_estimators=100; total time=   0.6s
[CV] END class_weight=balanced, max_depth=5, min_samples_leaf=2, min_samples_split=10, n_estimators=100; total time=   0.6s


Traceback (most recent call last):
  File "/workspaces/backtesting/my_env/lib/python3.12/site-packages/sklearn/model_selection/_validation.py", line 942, in _score
    scores = scorer(estimator, X_test, y_test, **score_params)
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/workspaces/backtesting/my_env/lib/python3.12/site-packages/sklearn/metrics/_scorer.py", line 308, in __call__
    return self._score(partial(_cached_call, None), estimator, X, y_true, **_kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/workspaces/backtesting/my_env/lib/python3.12/site-packages/sklearn/metrics/_scorer.py", line 408, in _score
    return self._sign * self._score_func(y_true, y_pred, **scoring_kwargs)
                        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/workspaces/backtesting/my_env/lib/python3.12/site-packages/sklearn/utils/_param_validation.py", line 196, in wrapper
    params = func_sig.bind(*

[CV] END class_weight=balanced, max_depth=5, min_samples_leaf=4, min_samples_split=5, n_estimators=50; total time=   0.3s


Traceback (most recent call last):
  File "/workspaces/backtesting/my_env/lib/python3.12/site-packages/sklearn/model_selection/_validation.py", line 942, in _score
    scores = scorer(estimator, X_test, y_test, **score_params)
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/workspaces/backtesting/my_env/lib/python3.12/site-packages/sklearn/metrics/_scorer.py", line 308, in __call__
    return self._score(partial(_cached_call, None), estimator, X, y_true, **_kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/workspaces/backtesting/my_env/lib/python3.12/site-packages/sklearn/metrics/_scorer.py", line 408, in _score
    return self._sign * self._score_func(y_true, y_pred, **scoring_kwargs)
                        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/workspaces/backtesting/my_env/lib/python3.12/site-packages/sklearn/utils/_param_validation.py", line 196, in wrapper
    params = func_sig.bind(*

[CV] END class_weight=balanced, max_depth=5, min_samples_leaf=2, min_samples_split=10, n_estimators=100; total time=   0.6s
[CV] END class_weight=balanced, max_depth=5, min_samples_leaf=4, min_samples_split=5, n_estimators=50; total time=   0.3s


Traceback (most recent call last):
  File "/workspaces/backtesting/my_env/lib/python3.12/site-packages/sklearn/model_selection/_validation.py", line 942, in _score
    scores = scorer(estimator, X_test, y_test, **score_params)
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/workspaces/backtesting/my_env/lib/python3.12/site-packages/sklearn/metrics/_scorer.py", line 308, in __call__
    return self._score(partial(_cached_call, None), estimator, X, y_true, **_kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/workspaces/backtesting/my_env/lib/python3.12/site-packages/sklearn/metrics/_scorer.py", line 408, in _score
    return self._sign * self._score_func(y_true, y_pred, **scoring_kwargs)
                        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/workspaces/backtesting/my_env/lib/python3.12/site-packages/sklearn/utils/_param_validation.py", line 196, in wrapper
    params = func_sig.bind(*

[CV] END class_weight=balanced, max_depth=5, min_samples_leaf=4, min_samples_split=5, n_estimators=50; total time=   0.3s


Traceback (most recent call last):
  File "/workspaces/backtesting/my_env/lib/python3.12/site-packages/sklearn/model_selection/_validation.py", line 942, in _score
    scores = scorer(estimator, X_test, y_test, **score_params)
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/workspaces/backtesting/my_env/lib/python3.12/site-packages/sklearn/metrics/_scorer.py", line 308, in __call__
    return self._score(partial(_cached_call, None), estimator, X, y_true, **_kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/workspaces/backtesting/my_env/lib/python3.12/site-packages/sklearn/metrics/_scorer.py", line 408, in _score
    return self._sign * self._score_func(y_true, y_pred, **scoring_kwargs)
                        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/workspaces/backtesting/my_env/lib/python3.12/site-packages/sklearn/utils/_param_validation.py", line 196, in wrapper
    params = func_sig.bind(*

[CV] END class_weight=balanced, max_depth=5, min_samples_leaf=4, min_samples_split=5, n_estimators=100; total time=   0.7s


Traceback (most recent call last):
  File "/workspaces/backtesting/my_env/lib/python3.12/site-packages/sklearn/model_selection/_validation.py", line 942, in _score
    scores = scorer(estimator, X_test, y_test, **score_params)
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/workspaces/backtesting/my_env/lib/python3.12/site-packages/sklearn/metrics/_scorer.py", line 308, in __call__
    return self._score(partial(_cached_call, None), estimator, X, y_true, **_kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/workspaces/backtesting/my_env/lib/python3.12/site-packages/sklearn/metrics/_scorer.py", line 408, in _score
    return self._sign * self._score_func(y_true, y_pred, **scoring_kwargs)
                        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/workspaces/backtesting/my_env/lib/python3.12/site-packages/sklearn/utils/_param_validation.py", line 196, in wrapper
    params = func_sig.bind(*

[CV] END class_weight=balanced, max_depth=5, min_samples_leaf=4, min_samples_split=5, n_estimators=100; total time=   0.6s


Traceback (most recent call last):
  File "/workspaces/backtesting/my_env/lib/python3.12/site-packages/sklearn/model_selection/_validation.py", line 942, in _score
    scores = scorer(estimator, X_test, y_test, **score_params)
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/workspaces/backtesting/my_env/lib/python3.12/site-packages/sklearn/metrics/_scorer.py", line 308, in __call__
    return self._score(partial(_cached_call, None), estimator, X, y_true, **_kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/workspaces/backtesting/my_env/lib/python3.12/site-packages/sklearn/metrics/_scorer.py", line 408, in _score
    return self._sign * self._score_func(y_true, y_pred, **scoring_kwargs)
                        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/workspaces/backtesting/my_env/lib/python3.12/site-packages/sklearn/utils/_param_validation.py", line 196, in wrapper
    params = func_sig.bind(*

[CV] END class_weight=balanced, max_depth=5, min_samples_leaf=4, min_samples_split=10, n_estimators=50; total time=   0.3s
[CV] END class_weight=balanced, max_depth=5, min_samples_leaf=4, min_samples_split=5, n_estimators=100; total time=   0.5s
[CV] END class_weight=balanced, max_depth=5, min_samples_leaf=4, min_samples_split=10, n_estimators=50; total time=   0.3s
[CV] END class_weight=balanced, max_depth=5, min_samples_leaf=4, min_samples_split=10, n_estimators=50; total time=   0.3s


Traceback (most recent call last):
  File "/workspaces/backtesting/my_env/lib/python3.12/site-packages/sklearn/model_selection/_validation.py", line 942, in _score
    scores = scorer(estimator, X_test, y_test, **score_params)
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/workspaces/backtesting/my_env/lib/python3.12/site-packages/sklearn/metrics/_scorer.py", line 308, in __call__
    return self._score(partial(_cached_call, None), estimator, X, y_true, **_kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/workspaces/backtesting/my_env/lib/python3.12/site-packages/sklearn/metrics/_scorer.py", line 408, in _score
    return self._sign * self._score_func(y_true, y_pred, **scoring_kwargs)
                        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/workspaces/backtesting/my_env/lib/python3.12/site-packages/sklearn/utils/_param_validation.py", line 196, in wrapper
    params = func_sig.bind(*

[CV] END class_weight=balanced, max_depth=5, min_samples_leaf=4, min_samples_split=10, n_estimators=100; total time=   0.7s
[CV] END class_weight=balanced, max_depth=5, min_samples_leaf=4, min_samples_split=10, n_estimators=100; total time=   0.6s


Traceback (most recent call last):
  File "/workspaces/backtesting/my_env/lib/python3.12/site-packages/sklearn/model_selection/_validation.py", line 942, in _score
    scores = scorer(estimator, X_test, y_test, **score_params)
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/workspaces/backtesting/my_env/lib/python3.12/site-packages/sklearn/metrics/_scorer.py", line 308, in __call__
    return self._score(partial(_cached_call, None), estimator, X, y_true, **_kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/workspaces/backtesting/my_env/lib/python3.12/site-packages/sklearn/metrics/_scorer.py", line 408, in _score
    return self._sign * self._score_func(y_true, y_pred, **scoring_kwargs)
                        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/workspaces/backtesting/my_env/lib/python3.12/site-packages/sklearn/utils/_param_validation.py", line 196, in wrapper
    params = func_sig.bind(*

[CV] END class_weight=balanced, max_depth=10, min_samples_leaf=2, min_samples_split=5, n_estimators=50; total time=   0.5s
[CV] END class_weight=balanced, max_depth=5, min_samples_leaf=4, min_samples_split=10, n_estimators=100; total time=   0.5s


Traceback (most recent call last):
  File "/workspaces/backtesting/my_env/lib/python3.12/site-packages/sklearn/model_selection/_validation.py", line 942, in _score
    scores = scorer(estimator, X_test, y_test, **score_params)
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/workspaces/backtesting/my_env/lib/python3.12/site-packages/sklearn/metrics/_scorer.py", line 308, in __call__
    return self._score(partial(_cached_call, None), estimator, X, y_true, **_kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/workspaces/backtesting/my_env/lib/python3.12/site-packages/sklearn/metrics/_scorer.py", line 408, in _score
    return self._sign * self._score_func(y_true, y_pred, **scoring_kwargs)
                        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/workspaces/backtesting/my_env/lib/python3.12/site-packages/sklearn/utils/_param_validation.py", line 196, in wrapper
    params = func_sig.bind(*

[CV] END class_weight=balanced, max_depth=10, min_samples_leaf=2, min_samples_split=5, n_estimators=50; total time=   0.4s
[CV] END class_weight=balanced, max_depth=10, min_samples_leaf=2, min_samples_split=5, n_estimators=50; total time=   0.4s


Traceback (most recent call last):
  File "/workspaces/backtesting/my_env/lib/python3.12/site-packages/sklearn/model_selection/_validation.py", line 942, in _score
    scores = scorer(estimator, X_test, y_test, **score_params)
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/workspaces/backtesting/my_env/lib/python3.12/site-packages/sklearn/metrics/_scorer.py", line 308, in __call__
    return self._score(partial(_cached_call, None), estimator, X, y_true, **_kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/workspaces/backtesting/my_env/lib/python3.12/site-packages/sklearn/metrics/_scorer.py", line 408, in _score
    return self._sign * self._score_func(y_true, y_pred, **scoring_kwargs)
                        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/workspaces/backtesting/my_env/lib/python3.12/site-packages/sklearn/utils/_param_validation.py", line 196, in wrapper
    params = func_sig.bind(*

[CV] END class_weight=balanced, max_depth=10, min_samples_leaf=2, min_samples_split=5, n_estimators=100; total time=   0.8s
[CV] END class_weight=balanced, max_depth=10, min_samples_leaf=2, min_samples_split=5, n_estimators=100; total time=   0.8s
[CV] END class_weight=balanced, max_depth=10, min_samples_leaf=2, min_samples_split=10, n_estimators=50; total time=   0.4s


Traceback (most recent call last):
  File "/workspaces/backtesting/my_env/lib/python3.12/site-packages/sklearn/model_selection/_validation.py", line 942, in _score
    scores = scorer(estimator, X_test, y_test, **score_params)
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/workspaces/backtesting/my_env/lib/python3.12/site-packages/sklearn/metrics/_scorer.py", line 308, in __call__
    return self._score(partial(_cached_call, None), estimator, X, y_true, **_kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/workspaces/backtesting/my_env/lib/python3.12/site-packages/sklearn/metrics/_scorer.py", line 408, in _score
    return self._sign * self._score_func(y_true, y_pred, **scoring_kwargs)
                        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/workspaces/backtesting/my_env/lib/python3.12/site-packages/sklearn/utils/_param_validation.py", line 196, in wrapper
    params = func_sig.bind(*

[CV] END class_weight=balanced, max_depth=10, min_samples_leaf=2, min_samples_split=10, n_estimators=50; total time=   0.4s
[CV] END class_weight=balanced, max_depth=10, min_samples_leaf=2, min_samples_split=5, n_estimators=100; total time=   0.8s


Traceback (most recent call last):
  File "/workspaces/backtesting/my_env/lib/python3.12/site-packages/sklearn/model_selection/_validation.py", line 942, in _score
    scores = scorer(estimator, X_test, y_test, **score_params)
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/workspaces/backtesting/my_env/lib/python3.12/site-packages/sklearn/metrics/_scorer.py", line 308, in __call__
    return self._score(partial(_cached_call, None), estimator, X, y_true, **_kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/workspaces/backtesting/my_env/lib/python3.12/site-packages/sklearn/metrics/_scorer.py", line 408, in _score
    return self._sign * self._score_func(y_true, y_pred, **scoring_kwargs)
                        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/workspaces/backtesting/my_env/lib/python3.12/site-packages/sklearn/utils/_param_validation.py", line 196, in wrapper
    params = func_sig.bind(*

[CV] END class_weight=balanced, max_depth=10, min_samples_leaf=2, min_samples_split=10, n_estimators=50; total time=   0.4s


Traceback (most recent call last):
  File "/workspaces/backtesting/my_env/lib/python3.12/site-packages/sklearn/model_selection/_validation.py", line 942, in _score
    scores = scorer(estimator, X_test, y_test, **score_params)
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/workspaces/backtesting/my_env/lib/python3.12/site-packages/sklearn/metrics/_scorer.py", line 308, in __call__
    return self._score(partial(_cached_call, None), estimator, X, y_true, **_kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/workspaces/backtesting/my_env/lib/python3.12/site-packages/sklearn/metrics/_scorer.py", line 408, in _score
    return self._sign * self._score_func(y_true, y_pred, **scoring_kwargs)
                        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/workspaces/backtesting/my_env/lib/python3.12/site-packages/sklearn/utils/_param_validation.py", line 196, in wrapper
    params = func_sig.bind(*

[CV] END class_weight=balanced, max_depth=10, min_samples_leaf=2, min_samples_split=10, n_estimators=100; total time=   0.8s


Traceback (most recent call last):
  File "/workspaces/backtesting/my_env/lib/python3.12/site-packages/sklearn/model_selection/_validation.py", line 942, in _score
    scores = scorer(estimator, X_test, y_test, **score_params)
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/workspaces/backtesting/my_env/lib/python3.12/site-packages/sklearn/metrics/_scorer.py", line 308, in __call__
    return self._score(partial(_cached_call, None), estimator, X, y_true, **_kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/workspaces/backtesting/my_env/lib/python3.12/site-packages/sklearn/metrics/_scorer.py", line 408, in _score
    return self._sign * self._score_func(y_true, y_pred, **scoring_kwargs)
                        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/workspaces/backtesting/my_env/lib/python3.12/site-packages/sklearn/utils/_param_validation.py", line 196, in wrapper
    params = func_sig.bind(*

[CV] END class_weight=balanced, max_depth=10, min_samples_leaf=2, min_samples_split=10, n_estimators=100; total time=   0.8s


Traceback (most recent call last):
  File "/workspaces/backtesting/my_env/lib/python3.12/site-packages/sklearn/model_selection/_validation.py", line 942, in _score
    scores = scorer(estimator, X_test, y_test, **score_params)
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/workspaces/backtesting/my_env/lib/python3.12/site-packages/sklearn/metrics/_scorer.py", line 308, in __call__
    return self._score(partial(_cached_call, None), estimator, X, y_true, **_kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/workspaces/backtesting/my_env/lib/python3.12/site-packages/sklearn/metrics/_scorer.py", line 408, in _score
    return self._sign * self._score_func(y_true, y_pred, **scoring_kwargs)
                        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/workspaces/backtesting/my_env/lib/python3.12/site-packages/sklearn/utils/_param_validation.py", line 196, in wrapper
    params = func_sig.bind(*

[CV] END class_weight=balanced, max_depth=10, min_samples_leaf=4, min_samples_split=5, n_estimators=50; total time=   0.4s
[CV] END class_weight=balanced, max_depth=10, min_samples_leaf=2, min_samples_split=10, n_estimators=100; total time=   0.9s


Traceback (most recent call last):
  File "/workspaces/backtesting/my_env/lib/python3.12/site-packages/sklearn/model_selection/_validation.py", line 942, in _score
    scores = scorer(estimator, X_test, y_test, **score_params)
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/workspaces/backtesting/my_env/lib/python3.12/site-packages/sklearn/metrics/_scorer.py", line 308, in __call__
    return self._score(partial(_cached_call, None), estimator, X, y_true, **_kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/workspaces/backtesting/my_env/lib/python3.12/site-packages/sklearn/metrics/_scorer.py", line 408, in _score
    return self._sign * self._score_func(y_true, y_pred, **scoring_kwargs)
                        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/workspaces/backtesting/my_env/lib/python3.12/site-packages/sklearn/utils/_param_validation.py", line 196, in wrapper
    params = func_sig.bind(*

[CV] END class_weight=balanced, max_depth=10, min_samples_leaf=4, min_samples_split=5, n_estimators=50; total time=   0.4s
[CV] END class_weight=balanced, max_depth=10, min_samples_leaf=4, min_samples_split=5, n_estimators=50; total time=   0.5s
[CV] END class_weight=balanced, max_depth=10, min_samples_leaf=4, min_samples_split=5, n_estimators=100; total time=   0.8s
[CV] END class_weight=balanced, max_depth=10, min_samples_leaf=4, min_samples_split=5, n_estimators=100; total time=   0.8s


Traceback (most recent call last):
  File "/workspaces/backtesting/my_env/lib/python3.12/site-packages/sklearn/model_selection/_validation.py", line 942, in _score
    scores = scorer(estimator, X_test, y_test, **score_params)
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/workspaces/backtesting/my_env/lib/python3.12/site-packages/sklearn/metrics/_scorer.py", line 308, in __call__
    return self._score(partial(_cached_call, None), estimator, X, y_true, **_kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/workspaces/backtesting/my_env/lib/python3.12/site-packages/sklearn/metrics/_scorer.py", line 408, in _score
    return self._sign * self._score_func(y_true, y_pred, **scoring_kwargs)
                        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/workspaces/backtesting/my_env/lib/python3.12/site-packages/sklearn/utils/_param_validation.py", line 196, in wrapper
    params = func_sig.bind(*

[CV] END class_weight=balanced, max_depth=10, min_samples_leaf=4, min_samples_split=10, n_estimators=50; total time=   0.4s


Traceback (most recent call last):
  File "/workspaces/backtesting/my_env/lib/python3.12/site-packages/sklearn/model_selection/_validation.py", line 942, in _score
    scores = scorer(estimator, X_test, y_test, **score_params)
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/workspaces/backtesting/my_env/lib/python3.12/site-packages/sklearn/metrics/_scorer.py", line 308, in __call__
    return self._score(partial(_cached_call, None), estimator, X, y_true, **_kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/workspaces/backtesting/my_env/lib/python3.12/site-packages/sklearn/metrics/_scorer.py", line 408, in _score
    return self._sign * self._score_func(y_true, y_pred, **scoring_kwargs)
                        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/workspaces/backtesting/my_env/lib/python3.12/site-packages/sklearn/utils/_param_validation.py", line 196, in wrapper
    params = func_sig.bind(*

[CV] END class_weight=balanced, max_depth=10, min_samples_leaf=4, min_samples_split=5, n_estimators=100; total time=   0.9s
[CV] END class_weight=balanced, max_depth=10, min_samples_leaf=4, min_samples_split=10, n_estimators=50; total time=   0.4s


Traceback (most recent call last):
  File "/workspaces/backtesting/my_env/lib/python3.12/site-packages/sklearn/model_selection/_validation.py", line 942, in _score
    scores = scorer(estimator, X_test, y_test, **score_params)
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/workspaces/backtesting/my_env/lib/python3.12/site-packages/sklearn/metrics/_scorer.py", line 308, in __call__
    return self._score(partial(_cached_call, None), estimator, X, y_true, **_kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/workspaces/backtesting/my_env/lib/python3.12/site-packages/sklearn/metrics/_scorer.py", line 408, in _score
    return self._sign * self._score_func(y_true, y_pred, **scoring_kwargs)
                        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/workspaces/backtesting/my_env/lib/python3.12/site-packages/sklearn/utils/_param_validation.py", line 196, in wrapper
    params = func_sig.bind(*

[CV] END class_weight=balanced, max_depth=10, min_samples_leaf=4, min_samples_split=10, n_estimators=50; total time=   0.4s


Traceback (most recent call last):
  File "/workspaces/backtesting/my_env/lib/python3.12/site-packages/sklearn/model_selection/_validation.py", line 942, in _score
    scores = scorer(estimator, X_test, y_test, **score_params)
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/workspaces/backtesting/my_env/lib/python3.12/site-packages/sklearn/metrics/_scorer.py", line 308, in __call__
    return self._score(partial(_cached_call, None), estimator, X, y_true, **_kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/workspaces/backtesting/my_env/lib/python3.12/site-packages/sklearn/metrics/_scorer.py", line 408, in _score
    return self._sign * self._score_func(y_true, y_pred, **scoring_kwargs)
                        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/workspaces/backtesting/my_env/lib/python3.12/site-packages/sklearn/utils/_param_validation.py", line 196, in wrapper
    params = func_sig.bind(*

[CV] END class_weight=balanced, max_depth=10, min_samples_leaf=4, min_samples_split=10, n_estimators=100; total time=   0.7s
[CV] END class_weight=balanced, max_depth=10, min_samples_leaf=4, min_samples_split=10, n_estimators=100; total time=   0.8s


Traceback (most recent call last):
  File "/workspaces/backtesting/my_env/lib/python3.12/site-packages/sklearn/model_selection/_validation.py", line 942, in _score
    scores = scorer(estimator, X_test, y_test, **score_params)
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/workspaces/backtesting/my_env/lib/python3.12/site-packages/sklearn/metrics/_scorer.py", line 308, in __call__
    return self._score(partial(_cached_call, None), estimator, X, y_true, **_kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/workspaces/backtesting/my_env/lib/python3.12/site-packages/sklearn/metrics/_scorer.py", line 408, in _score
    return self._sign * self._score_func(y_true, y_pred, **scoring_kwargs)
                        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/workspaces/backtesting/my_env/lib/python3.12/site-packages/sklearn/utils/_param_validation.py", line 196, in wrapper
    params = func_sig.bind(*

[CV] END class_weight=balanced, max_depth=10, min_samples_leaf=4, min_samples_split=10, n_estimators=100; total time=   0.7s
🧠 Best Parameters (RandomForest):
{'class_weight': 'balanced', 'max_depth': 5, 'min_samples_leaf': 2, 'min_samples_split': 5, 'n_estimators': 50}

📈 Classification Report (Best RandomForest Model):
              precision    recall  f1-score   support

          -1       0.46      0.41      0.43       562
           0       0.08      0.41      0.13        76
           1       0.50      0.29      0.37       646

    accuracy                           0.35      1284
   macro avg       0.35      0.37      0.31      1284
weighted avg       0.46      0.35      0.38      1284

ROC AUC (Best RandomForest Model): 0.5203


In [8]:
# Cell 8: Train and Evaluate XGBClassifier (Updated for OOF predictions)

from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import cross_val_predict # Already imported in Cell 1 now.

print("\n--- XGBClassifier ---")

le = LabelEncoder()
y_train_encoded = le.fit_transform(y_train)
y_test_encoded = le.transform(y_test)

xgb_model = XGBClassifier(objective='multi:softmax',
                          num_class=len(le.classes_),
                          use_label_encoder=False,
                          eval_metric='mlogloss',
                          random_state=42)

xgb_model.fit(X_train, y_train_encoded)

y_pred_xgb_encoded = xgb_model.predict(X_test)
y_pred_xgb = le.inverse_transform(y_pred_xgb_encoded)

y_proba_xgb = xgb_model.predict_proba(X_test)

print("\n📈 Classification Report (XGBoost):")
print(classification_report(y_test, y_pred_xgb, labels=LABELS, zero_division=0))

print("ROC AUC Score (XGBoost):", roc_auc_score(y_test, y_proba_xgb, multi_class='ovr', labels=LABELS))

# --- NEW: Generate Out-Of-Fold (OOF) predictions for meta-learner training ---
# Use the same kf (StratifiedKFold) object defined in Cell 6.
xgb_oof_proba = cross_val_predict(xgb_model, X_train, y_train_encoded, cv=kf, method='predict_proba', n_jobs=-1)
print("✅ XGBoost OOF predictions generated.")



--- XGBClassifier ---


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)



📈 Classification Report (XGBoost):
              precision    recall  f1-score   support

          -1       0.46      0.44      0.45       562
           0       0.00      0.00      0.00        76
           1       0.51      0.58      0.54       646

    accuracy                           0.49      1284
   macro avg       0.32      0.34      0.33      1284
weighted avg       0.46      0.49      0.47      1284

ROC AUC Score (XGBoost): 0.5054368748744007


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


✅ XGBoost OOF predictions generated.


In [9]:
# Cell 9: Train and Evaluate SVM (SVC) (Updated for OOF predictions)

from sklearn.model_selection import cross_val_predict # Already imported in Cell 1 now.

print("\n--- SVM (SVC) ---")
svm_model = SVC(probability=True, random_state=42, class_weight='balanced')
svm_model.fit(X_train_scaled_df, y_train)

y_pred_svm = svm_model.predict(X_test_scaled_df)
y_proba_svm = svm_model.predict_proba(X_test_scaled_df)

print("\n📈 Classification Report (SVM):")
print(classification_report(y_test, y_pred_svm, labels=LABELS, zero_division=0))

print("ROC AUC Score (SVM):", roc_auc_score(y_test, y_proba_svm, multi_class='ovr', labels=LABELS))

# --- NEW: Generate Out-Of-Fold (OOF) predictions for meta-learner training ---
# Use the same kf (StratifiedKFold) object defined in Cell 6.
# Note: For SVM, ensure you are using the scaled training data for OOF predictions
svm_oof_proba = cross_val_predict(svm_model, X_train_scaled_df, y_train, cv=kf, method='predict_proba', n_jobs=-1)
print("✅ SVM OOF predictions generated.")



--- SVM (SVC) ---

📈 Classification Report (SVM):
              precision    recall  f1-score   support

          -1       0.45      0.35      0.39       562
           0       0.07      0.55      0.12        76
           1       0.50      0.17      0.26       646

    accuracy                           0.27      1284
   macro avg       0.34      0.36      0.26      1284
weighted avg       0.45      0.27      0.31      1284

ROC AUC Score (SVM): 0.5326894644990571
✅ SVM OOF predictions generated.


In [10]:
# Cell 9.7 (New Cell): Implement Stacking Ensemble

print("\n--- Stacking Ensemble ---")

# Prepare the meta-features for training the meta-learner:
# Concatenate the out-of-fold probabilities from each base model on the training data.
X_meta_train = np.hstack((rf_oof_proba, xgb_oof_proba, svm_oof_proba))

# The target for the meta-learner's training is the original training labels.
y_meta_train = y_train

# Prepare the meta-features for making predictions on the test set:
# Concatenate the predicted probabilities from each base model on the test data.
X_meta_test = np.hstack((y_proba_rf, y_proba_xgb, y_proba_svm))

# Define the meta-learner (e.g., Logistic Regression)
meta_learner = LogisticRegression(solver='lbfgs', multi_class='ovr', random_state=42, class_weight='balanced', max_iter=1000)

# Train the meta-learner on the out-of-fold predictions of the base models
# and the original training labels.
meta_learner.fit(X_meta_train, y_meta_train)

# Make final predictions with the stacking model on the test set.
y_pred_stack = meta_learner.predict(X_meta_test)
y_proba_stack = meta_learner.predict_proba(X_meta_test)

print("\n📈 Classification Report (Stacking Ensemble):")
# Evaluate the stacking model's performance on the actual test set labels.
print(classification_report(y_test, y_pred_stack, labels=LABELS, zero_division=0))

print("ROC AUC Score (Stacking Ensemble):", roc_auc_score(y_test, y_proba_stack, multi_class='ovr', labels=LABELS))
print("✅ Stacking Ensemble model trained and evaluated.")



--- Stacking Ensemble ---

📈 Classification Report (Stacking Ensemble):
              precision    recall  f1-score   support

          -1       0.46      0.48      0.47       562
           0       0.07      0.49      0.12        76
           1       0.57      0.16      0.25       646

    accuracy                           0.32      1284
   macro avg       0.37      0.38      0.28      1284
weighted avg       0.49      0.32      0.34      1284

ROC AUC Score (Stacking Ensemble): 0.5376176104584328
✅ Stacking Ensemble model trained and evaluated.




In [11]:
# Cell 10: Classical Signal Generation (SMA Crossover & RSI) (No Change, just rerun)

feature_data['SMA_10'] = feature_data['close'].rolling(window=10).mean()
feature_data['SMA_50'] = feature_data['close'].rolling(window=50).mean()

feature_data['y_pred_crossover'] = np.where(feature_data['SMA_10'] > feature_data['SMA_50'], 1, 0)

feature_data['y_pred_rsi'] = np.where(feature_data['rsi'] < 30, 1, 0)
feature_data['y_pred_rsi'] = np.where(feature_data['rsi'] > 70, 0, feature_data['y_pred_rsi'])

feature_data.dropna(inplace=True)

print("✅ Classical signals generated.")
print(feature_data[['SMA_10', 'SMA_50', 'y_pred_crossover', 'rsi', 'y_pred_rsi']].head())


✅ Classical signals generated.
             SMA_10    SMA_50  y_pred_crossover        rsi  y_pred_rsi
date                                                                  
2000-02-29  114.274  106.1032                 1  54.716535           0
2000-03-01  115.405  106.7094                 1  66.965397           0
2000-03-02  116.193  107.1894                 1  58.014699           0
2000-03-03  117.506  107.6994                 1  61.967428           0
2000-03-06  118.950  108.2144                 1  59.639451           0


In [12]:
# Cell 11: Prepare and Save Signal Data for Backtesting (Updated to save all predictions)

# Assign predictions from models back to feature_data based on their respective test set indices
feature_data['y_pred'] = 0
feature_data.loc[X_test.index, 'y_pred'] = y_pred_rf

feature_data['y_pred_best'] = 0
feature_data.loc[X_test.index, 'y_pred_best'] = y_pred_best_rf

feature_data['y_pred_xgb'] = 0
feature_data.loc[X_test.index, 'y_pred_xgb'] = y_pred_xgb

feature_data['y_pred_svm'] = 0
feature_data.loc[X_test.index, 'y_pred_svm'] = y_pred_svm

# --- NEW ADDITION ---
# Add stacking model predictions
feature_data['y_pred_stack'] = 0
feature_data.loc[X_test.index, 'y_pred_stack'] = y_pred_stack
# --- END NEW ADDITION ---


# Select relevant columns for signal_data.csv
signal_data = feature_data[[
    'close',
    'y_pred',
    'y_pred_best',
    'y_pred_xgb',
    'y_pred_svm',
    'y_pred_stack', # Added y_pred_stack to the list
    'y_pred_crossover',
    'y_pred_rsi'
]].copy()

signal_data.index.name = 'date'

output_path = '/workspaces/backtesting/investment-portfolio-project/data/signal_data.csv'
signal_data.to_csv(output_path)

print(f"✅ Señales añadidas correctamente. Archivo actualizado: {output_path}")
print(signal_data.tail())


✅ Señales añadidas correctamente. Archivo actualizado: /workspaces/backtesting/investment-portfolio-project/data/signal_data.csv
             close  y_pred  y_pred_best  y_pred_xgb  y_pred_svm  y_pred_stack  \
date                                                                            
2025-06-20  201.00       1           -1           1          -1             1   
2025-06-23  201.50       1           -1           1           0             1   
2025-06-24  200.30      -1           -1           1          -1            -1   
2025-06-25  201.56       1           -1           1           0             0   
2025-06-26  201.00      -1           -1          -1          -1             0   

            y_pred_crossover  y_pred_rsi  
date                                      
2025-06-20                 0           0  
2025-06-23                 0           0  
2025-06-24                 0           0  
2025-06-25                 0           0  
2025-06-26                 0           0  


In [13]:
# Cell 12: Save Feature Data (Optional, but good practice) (No Change)

feature_data_output_path = '/workspaces/backtesting/investment-portfolio-project/data/feature_data.csv'
feature_data.to_csv(feature_data_output_path)
print(f"✅ Feature data saved to: {feature_data_output_path}")


✅ Feature data saved to: /workspaces/backtesting/investment-portfolio-project/data/feature_data.csv
