In [1]:
import pandas as pd
import numpy as np
from pathlib import Path
import logging
from new_strategy import Asset, BetSizingMethod, get_bet_sizing
import nbimporter
from backtest import Backtest
from meta_strategy import MetaLabelingStrategy
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.calibration import CalibratedClassifierCV
from xgboost import XGBClassifier
import matplotlib.pyplot as plt
from lightgbm import LGBMClassifier
%load_ext autoreload
%autoreload 2

In [2]:
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")

# ---------------------- MetaModelHandler ---------------------- #
class MetaModelHandler:
    def __init__(self):
        self.long_model = None
        self.short_model = None
        self.long_scaler = None
        self.short_scaler = None
        self.feature_cols = []

    #LightGBM

    def train(self, trades_df: pd.DataFrame, feature_cols: list):
        self.feature_cols = feature_cols
        trades_df = trades_df.dropna(subset=feature_cols + ['meta_label'])

        long_trades = trades_df[trades_df['direction'] == 'long']
        short_trades = trades_df[trades_df['direction'] == 'short']

        def preprocess(df):
            X = df[feature_cols]
            y = df['meta_label']
            scaler = StandardScaler()
            X_scaled = scaler.fit_transform(X)
            return X_scaled, y, scaler

        X_long, y_long, self.long_scaler = preprocess(long_trades)
        X_short, y_short, self.short_scaler = preprocess(short_trades)

        self.long_model = LGBMClassifier(random_state=42)
        self.short_model = LGBMClassifier(random_state=42)

        self.long_model.fit(X_long, y_long)
        self.short_model.fit(X_short, y_short)

        self.plot_feature_importance(self.long_model, feature_cols, "Long Trades")
        self.plot_feature_importance(self.short_model, feature_cols, "Short Trades")

    def plot_feature_importance(self, model, feature_names, title):
        importance = model.feature_importances_
        sorted_idx = importance.argsort()[::-1]
        sorted_names = [feature_names[i] for i in sorted_idx]
        sorted_importance = importance[sorted_idx]

        plt.figure(figsize=(10, 6))
        plt.barh(sorted_names, sorted_importance)
        plt.title(f"🔍 Feature Importance — {title}")
        plt.gca().invert_yaxis()
        plt.tight_layout()
        plt.show()

    #XGBOOST    

    """def train(self, trades_df: pd.DataFrame, feature_cols: list):
        self.feature_cols = feature_cols
        trades_df = trades_df.dropna(subset=feature_cols + ['meta_label'])

        long_trades = trades_df[trades_df['direction'] == 'long']
        short_trades = trades_df[trades_df['direction'] == 'short']

        def preprocess(df):
            X = df[feature_cols]
            y = df['meta_label']
            scaler = StandardScaler()
            X_scaled = scaler.fit_transform(X)
            return X_scaled, y, scaler

        X_long, y_long, self.long_scaler = preprocess(long_trades)
        X_short, y_short, self.short_scaler = preprocess(short_trades)

        self.long_model = XGBClassifier(use_label_encoder=False, eval_metric='logloss')
        self.short_model = XGBClassifier(use_label_encoder=False, eval_metric='logloss')

        self.long_model.fit(X_long, y_long)
        self.short_model.fit(X_short, y_short)

        self.plot_feature_importance(self.long_model, feature_cols, "Long Trades")
        self.plot_feature_importance(self.short_model, feature_cols, "Short Trades")

    def plot_feature_importance(self, model, feature_names, title):
        importance = model.feature_importances_
        sorted_idx = importance.argsort()[::-1]
        sorted_names = [feature_names[i] for i in sorted_idx]
        sorted_importance = importance[sorted_idx]

        plt.figure(figsize=(10, 6))
        plt.barh(sorted_names, sorted_importance)
        plt.title(f"🔍 Feature Importance — {title}")
        plt.gca().invert_yaxis()
        plt.tight_layout()
        plt.show()"""

    #Logistic Reg

    """def train(self, trades_df: pd.DataFrame, feature_cols: list):
        self.feature_cols = feature_cols
        trades_df = trades_df.dropna(subset=feature_cols + ['meta_label'])

        long_trades = trades_df[trades_df['direction'] == 'long']
        short_trades = trades_df[trades_df['direction'] == 'short']

        def preprocess(df):
            X = df[feature_cols]
            y = df['meta_label']
            scaler = StandardScaler()
            X_scaled = scaler.fit_transform(X)
            return X_scaled, y, scaler

        X_long, y_long, self.long_scaler = preprocess(long_trades)
        X_short, y_short, self.short_scaler = preprocess(short_trades)

        self.long_model = CalibratedClassifierCV(LogisticRegression(), method='sigmoid').fit(X_long, y_long)
        self.short_model = CalibratedClassifierCV(LogisticRegression(), method='sigmoid').fit(X_short, y_short)

        
        self.long_model = XGBClassifier(use_label_encoder=False, eval_metric='logloss')
        self.short_model = XGBClassifier(use_label_encoder=False, eval_metric='logloss')

        self.long_model.fit(X_long, y_long)
        self.short_model.fit(X_short, y_short)"""
        


    def is_trade_approved(self, features: dict, direction: str, threshold: float = 0.6) -> bool:
    # Clean NaNs or infinities in input features
        cleaned = {}
        for k in self.feature_cols:
            val = features.get(k, 0)
            if pd.isna(val) or val in [np.inf, -np.inf]:
                cleaned[k] = 0  # replace invalid values with 0 (or another neutral default)
            else:
                cleaned[k] = val

        df = pd.DataFrame([cleaned])[self.feature_cols]

        if direction == 'long':
            X = self.long_scaler.transform(df)
            prob = self.long_model.predict_proba(X)[0, 1]
        else:
            X = self.short_scaler.transform(df)
            prob = self.short_model.predict_proba(X)[0, 1]

        
        print(f"[MetaModel] Direction: {direction}, Prob: {prob:.3f}, Threshold: {threshold}, Approved: {prob >= threshold}")

        return prob >= threshold

def train_meta_model(train_df: pd.DataFrame, feature_cols: list) -> MetaModelHandler:
    # Shift rolling metrics to avoid lookahead bias
    rolling_cols = [
        'rolling_f1', 'rolling_accuracy', 'rolling_precision', 'rolling_recall',
        'n_total_seen', 'n_window_obs'
    ]
    for col in rolling_cols:
        if col in train_df.columns:
            train_df[col] = train_df.groupby('session')[col].shift(1)
    meta_model = MetaModelHandler()
    meta_model.train(train_df, feature_cols)
    return meta_model