In [1]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
import joblib

In [4]:
def generate_meta_labeled_data(trade_df: pd.DataFrame, window: int = 5) -> pd.DataFrame:
    df = trade_df.copy()

    # Assign meta labels
    def assign_meta_label(row):
        if row['status'] == 'session_close':
            if row['direction'] == 'long':
                return 1 if row['exit_price'] > row['entry_price'] else 0
            elif row['direction'] == 'short':
                return 1 if row['exit_price'] < row['entry_price'] else 0
        elif row['status'] == 'tp_hit':
            return 1
        elif row['status'] == 'sl_hit':
            return 0
        return np.nan

    df['meta_label'] = df.apply(assign_meta_label, axis=1)

    # Add rolling evaluation stats
    df = add_rolling_stats(df, window=window)

    return df

def add_rolling_stats(df: pd.DataFrame, window: int = 5) -> pd.DataFrame:
    df = df.sort_values("entry_time").reset_index(drop=True)

    # Rolling average PnL
    df[f'rolling_pnl_{window}'] = df['pnl'].rolling(window=window, min_periods=1).mean()

    # Rolling win rate
    df[f'rolling_winrate_{window}'] = (
        (df['pnl'] > 0).astype(int).rolling(window=window, min_periods=1).mean()
    )

    # Rolling standard deviation of PnL (volatility)
    df[f'rolling_volatility_{window}'] = df['pnl'].rolling(window=window, min_periods=1).std()

    return df

