# Batch DQN Trading - Improved Version

In [1]:
import pandas as pd
import numpy as np
import json
import os

import gymnasium as gym
from stable_baselines3 import DQN
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3.common.vec_env import SubprocVecEnv
from stable_baselines3.common.evaluation import evaluate_policy
from stable_baselines3.common.callbacks import EvalCallback, CheckpointCallback

import sys
sys.path.append('../')

# IMPORTANT: Use the IMPROVED environment
from src.trading_env_improved import ImprovedTradingEnv

from sklearn.preprocessing import MinMaxScaler

import plotly.graph_objects as go
from plotly.subplots import make_subplots

In [2]:
# Remove the warning messages
import warnings
warnings.filterwarnings("ignore")

In [3]:
timeframe = '1d'
# timeframe = '1h'

In [4]:
## Load scenario_config
scenario_config = pd.read_csv(f'../config_files/scenarios_config_{timeframe}_baseline_v2.csv')

# Format start_train_date, end_train_date, start_val_date, end_val_date
scenario_config['start_train_date'] = pd.to_datetime(scenario_config['start_train_date'], format='%d/%m/%Y')
scenario_config['end_train_date'] = pd.to_datetime(scenario_config['end_train_date'], format='%d/%m/%Y')
scenario_config['start_val_date'] = pd.to_datetime(scenario_config['start_val_date'], format='%d/%m/%Y')
scenario_config['end_val_date'] = pd.to_datetime(scenario_config['end_val_date'], format='%d/%m/%Y')

In [5]:
# Load `feature_family.json` to get the feature family
with open('../config_files/feature_family_v1.json') as f:
    feature_family = json.load(f)

In [6]:
def get_family_members(family_name, data):
    # Parse the JSON data if it's a string, otherwise use it as is
    if isinstance(data, str):
        data = json.loads(data)
    
    # Find the family with the given name
    for family in data['Families']:
        if family['Name'] == family_name:
            # Return a list of member names
            return [member['Name'] for member in family['Members']]
    
    # If family not found, return None or an empty list
    return None

In [7]:
# For a given scenario from scenario_config, get each column into a new variable

def get_scenario_data(scenario_name, scenario_config):
    # Get the scenario row
    scenario = scenario_config[scenario_config['scenario'] == scenario_name].iloc[0]
    
    # Get the data for the scenario
    scenario_data = {
        'asset': scenario['asset'],
        'feature_family': scenario['feature_family'],
        'start_train_date': scenario['start_train_date'],
        'end_train_date': scenario['end_train_date'],
        'start_val_date': scenario['start_val_date'],
        'end_val_date': scenario['end_val_date'],
        'feature_path': scenario['feature_path'],
        'feature_file': scenario['feature_file'],
        'raw_path': scenario['raw_path'],
        'raw_file': scenario['raw_file'],
    }
    
    return scenario_data

## Configure Transaction Costs

Set realistic transaction costs based on asset type

In [8]:
def get_transaction_cost(asset_name):
    """
    Determine transaction cost based on asset type
    
    Crypto (USDT pairs): 0.02% (0.0002) - Binance maker fee
    Traditional assets: 0.01% (0.0001)
    """
    if 'USDT' in asset_name:
        return 0.0002  # 0.02% for crypto
    else:
        return 0.0001  # 0.01% for traditional assets

# Test
print(f"BTC/USDT transaction cost: {get_transaction_cost('BTCUSDT')*100:.3f}%")
print(f"SPY transaction cost: {get_transaction_cost('SPY')*100:.3f}%")

BTC/USDT transaction cost: 0.020%
SPY transaction cost: 0.010%


In [9]:
import pandas as pd
import numpy as np
from typing import List, Tuple, Dict, Optional
import warnings


def calculate_feature_variability(series: pd.Series) -> Dict[str, float]:
    """
    Calculate multiple variability metrics for a single feature.
    
    Returns dict with:
    - std: Standard deviation
    - cv: Coefficient of variation (std/mean) - scale-independent
    - iqr: Interquartile range
    - range_ratio: (max-min)/mean - relative range
    - unique_ratio: Number of unique values / total values
    - mad: Mean absolute deviation
    - entropy: Shannon entropy of binned values
    """
    # Handle NaN/inf
    clean_series = series.replace([np.inf, -np.inf], np.nan).dropna()
    
    if len(clean_series) < 10:
        return {
            'std': 0, 'cv': 0, 'iqr': 0, 'range_ratio': 0,
            'unique_ratio': 0, 'mad': 0, 'entropy': 0
        }
    
    mean_val = clean_series.mean()
    std_val = clean_series.std()
    
    # Coefficient of Variation (handle zero mean)
    cv = std_val / abs(mean_val) if abs(mean_val) > 1e-10 else 0
    
    # Interquartile Range
    q75, q25 = np.percentile(clean_series, [75, 25])
    iqr = q75 - q25
    
    # Range ratio
    range_val = clean_series.max() - clean_series.min()
    range_ratio = range_val / abs(mean_val) if abs(mean_val) > 1e-10 else range_val
    
    # Unique ratio
    unique_ratio = clean_series.nunique() / len(clean_series)
    
    # Mean Absolute Deviation
    mad = (clean_series - mean_val).abs().mean()
    
    # Entropy (binned)
    try:
        hist, _ = np.histogram(clean_series, bins=20)
        hist = hist / hist.sum()
        hist = hist[hist > 0]  # Remove zeros for log
        entropy = -np.sum(hist * np.log2(hist))
    except:
        entropy = 0
    
    return {
        'std': std_val,
        'cv': cv,
        'iqr': iqr,
        'range_ratio': range_ratio,
        'unique_ratio': unique_ratio,
        'mad': mad,
        'entropy': entropy
    }


def is_feature_flat(
    series: pd.Series,
    cv_threshold: float = 0.01,
    unique_threshold: float = 0.01,
    entropy_threshold: float = 1.0,
    method: str = 'combined'
) -> Tuple[bool, Dict[str, float]]:
    """
    Determine if a feature is flat/near-flat.
    
    Parameters:
    -----------
    series : pd.Series
        The feature values
    cv_threshold : float
        Coefficient of variation threshold. Below this = flat.
        Default 0.01 means std < 1% of mean
    unique_threshold : float
        Unique ratio threshold. Below this = flat.
        Default 0.01 means <1% unique values
    entropy_threshold : float
        Entropy threshold. Below this = flat.
        Default 1.0 (max entropy for 20 bins is ~4.3)
    method : str
        How to combine criteria:
        - 'any': Flat if ANY criterion says flat
        - 'all': Flat only if ALL criteria say flat
        - 'combined': Use weighted combination (recommended)
        - 'cv': Use only coefficient of variation
        - 'entropy': Use only entropy
    
    Returns:
    --------
    is_flat : bool
        True if feature is considered flat
    metrics : dict
        All calculated variability metrics
    """
    metrics = calculate_feature_variability(series)
    
    # Individual flat checks
    cv_flat = metrics['cv'] < cv_threshold
    unique_flat = metrics['unique_ratio'] < unique_threshold
    entropy_flat = metrics['entropy'] < entropy_threshold
    
    if method == 'cv':
        is_flat = cv_flat
    elif method == 'entropy':
        is_flat = entropy_flat
    elif method == 'any':
        is_flat = cv_flat or unique_flat or entropy_flat
    elif method == 'all':
        is_flat = cv_flat and unique_flat and entropy_flat
    elif method == 'combined':
        # Weighted score: higher = more varied
        # Normalize each metric to 0-1 range approximately
        cv_score = min(metrics['cv'] / 0.5, 1.0)  # 0.5 CV = max score
        entropy_score = min(metrics['entropy'] / 4.0, 1.0)  # 4.0 entropy = max score
        unique_score = min(metrics['unique_ratio'] / 0.5, 1.0)  # 50% unique = max score
        
        combined_score = 0.4 * cv_score + 0.4 * entropy_score + 0.2 * unique_score
        is_flat = combined_score < 0.1  # Threshold for combined score
    else:
        raise ValueError(f"Unknown method: {method}")
    
    return is_flat, metrics


def filter_flat_features(
    df: pd.DataFrame,
    feature_columns: Optional[List[str]] = None,
    cv_threshold: float = 0.01,
    unique_threshold: float = 0.01,
    entropy_threshold: float = 1.0,
    method: str = 'combined',
    verbose: bool = True
) -> Tuple[List[str], List[str], pd.DataFrame]:
    """
    Filter out flat/near-flat features from a dataframe.
    
    Parameters:
    -----------
    df : pd.DataFrame
        Dataframe containing features
    feature_columns : List[str], optional
        List of feature column names to check.
        If None, uses all numeric columns except 'close', 'date', etc.
    cv_threshold : float
        Coefficient of variation threshold
    unique_threshold : float
        Unique ratio threshold
    entropy_threshold : float
        Entropy threshold
    method : str
        Method to determine flatness
    verbose : bool
        Print details about filtered features
        
    Returns:
    --------
    selected_features : List[str]
        Features that passed the filter (not flat)
    removed_features : List[str]
        Features that were removed (flat)
    metrics_df : pd.DataFrame
        Variability metrics for all features
    """
    # Determine feature columns
    if feature_columns is None:
        exclude_cols = ['close', 'open', 'high', 'low', 'volume', 'date', 
                       'end_date', 'start_date', 'timestamp']
        feature_columns = [col for col in df.select_dtypes(include=[np.number]).columns 
                          if col.lower() not in exclude_cols]
    
    selected_features = []
    removed_features = []
    all_metrics = []
    
    for col in feature_columns:
        is_flat, metrics = is_feature_flat(
            df[col],
            cv_threshold=cv_threshold,
            unique_threshold=unique_threshold,
            entropy_threshold=entropy_threshold,
            method=method
        )
        
        metrics['feature'] = col
        metrics['is_flat'] = is_flat
        all_metrics.append(metrics)
        
        if is_flat:
            removed_features.append(col)
        else:
            selected_features.append(col)
    
    metrics_df = pd.DataFrame(all_metrics)
    metrics_df = metrics_df[['feature', 'is_flat', 'cv', 'entropy', 'std', 
                             'iqr', 'range_ratio', 'unique_ratio', 'mad']]
    
    if verbose:
        print("="*70)
        print("FEATURE VARIABILITY ANALYSIS")
        print("="*70)
        print(f"\nTotal features analyzed: {len(feature_columns)}")
        print(f"Features KEPT (varied): {len(selected_features)}")
        print(f"Features REMOVED (flat): {len(removed_features)}")
        
        if removed_features:
            print(f"\n⚠️  Removed flat features:")
            for feat in removed_features:
                m = metrics_df[metrics_df['feature'] == feat].iloc[0]
                print(f"   - {feat}: CV={m['cv']:.4f}, Entropy={m['entropy']:.2f}")
        
        if selected_features:
            print(f"\n✓ Selected features with variation:")
            for feat in selected_features:
                m = metrics_df[metrics_df['feature'] == feat].iloc[0]
                print(f"   - {feat}: CV={m['cv']:.4f}, Entropy={m['entropy']:.2f}")
        
        print("="*70)
    
    return selected_features, removed_features, metrics_df


def select_varied_features(
    df: pd.DataFrame,
    feature_columns: List[str],
    min_cv: float = 0.01,
    min_entropy: float = 1.0,
    top_n: Optional[int] = None,
    verbose: bool = True
) -> List[str]:
    """
    Simple function to select features with sufficient variation.
    
    Parameters:
    -----------
    df : pd.DataFrame
        Dataframe with features
    feature_columns : List[str]
        Feature column names to evaluate
    min_cv : float
        Minimum coefficient of variation required
    min_entropy : float
        Minimum entropy required
    top_n : int, optional
        If provided, return only top N features by variability
    verbose : bool
        Print summary
        
    Returns:
    --------
    List[str] : Selected feature names
    """
    selected, removed, metrics = filter_flat_features(
        df, 
        feature_columns=feature_columns,
        cv_threshold=min_cv,
        entropy_threshold=min_entropy,
        method='combined',
        verbose=False
    )
    
    if top_n is not None and len(selected) > top_n:
        # Rank by combined variability and take top N
        varied_metrics = metrics[~metrics['is_flat']].copy()
        varied_metrics['variability_score'] = (
            varied_metrics['cv'].rank(pct=True) * 0.4 +
            varied_metrics['entropy'].rank(pct=True) * 0.4 +
            varied_metrics['unique_ratio'].rank(pct=True) * 0.2
        )
        varied_metrics = varied_metrics.sort_values('variability_score', ascending=False)
        selected = varied_metrics.head(top_n)['feature'].tolist()
    
    if verbose:
        print(f"Selected {len(selected)}/{len(feature_columns)} features with sufficient variation")
        if removed:
            print(f"Removed {len(removed)} flat features: {removed}")
    
    return selected


# =============================================================================
# CONVENIENCE FUNCTION FOR YOUR WORKFLOW
# =============================================================================

def filter_tda_features(
    df_features: pd.DataFrame,
    list_features: List[str],
    cv_threshold: float = 0.05,
    entropy_threshold: float = 2.0,
    verbose: bool = True
) -> List[str]:
    """
    Filter TDA features to remove flat/constant ones.
    
    This is the main function to use in your preprocessing pipeline.
    
    Parameters:
    -----------
    df_features : pd.DataFrame
        Feature dataframe (with date as index)
    list_features : List[str]
        List of feature column names
    cv_threshold : float
        Minimum coefficient of variation (default 0.05 = 5%)
        Higher value = stricter filtering
    entropy_threshold : float
        Minimum entropy (default 2.0)
        Higher value = stricter filtering
    verbose : bool
        Print analysis details
    
    Returns:
    --------
    List[str] : Filtered feature names (only varied features)
    
    Example:
    --------
    >>> # Before training
    >>> list_features = get_family_members('TDA_TD_168_SS_720', feature_family)
    >>> list_features = filter_tda_features(df_features, list_features)
    >>> df_features = df_features[list_features]
    """
    selected, removed, metrics = filter_flat_features(
        df_features,
        feature_columns=list_features,
        cv_threshold=cv_threshold,
        entropy_threshold=entropy_threshold,
        method='combined',
        verbose=verbose
    )
    
    if len(selected) == 0:
        warnings.warn("All features were filtered out! Using original features.")
        return list_features
    
    return selected


def filter_tda_features_strict(
    df_features: pd.DataFrame,
    list_features: List[str],
    min_cv: float = 0.1,
    min_range_pct: float = 0.2,
    verbose: bool = True
) -> List[str]:
    """
    Stricter filtering using coefficient of variation and range.
    
    A feature is kept only if:
    - CV (std/mean) > min_cv  (default 10%)
    - Range (max-min)/mean > min_range_pct (default 20%)
    
    Parameters:
    -----------
    df_features : pd.DataFrame
        Feature dataframe
    list_features : List[str]
        Feature column names
    min_cv : float
        Minimum coefficient of variation (0.1 = 10%)
    min_range_pct : float
        Minimum range as % of mean (0.2 = 20%)
    verbose : bool
        Print details
        
    Returns:
    --------
    List[str] : Selected feature names
    """
    selected = []
    removed = []
    
    for col in list_features:
        series = df_features[col].replace([np.inf, -np.inf], np.nan).dropna()
        
        if len(series) < 10:
            removed.append((col, 'insufficient data'))
            continue
            
        mean_val = abs(series.mean())
        std_val = series.std()
        range_val = series.max() - series.min()
        
        cv = std_val / mean_val if mean_val > 1e-10 else 0
        range_pct = range_val / mean_val if mean_val > 1e-10 else 0
        
        if cv >= min_cv and range_pct >= min_range_pct:
            selected.append(col)
            if verbose:
                print(f"✓ {col}: CV={cv:.3f}, Range%={range_pct:.3f}")
        else:
            removed.append((col, f'CV={cv:.3f}, Range%={range_pct:.3f}'))
    
    if verbose:
        print(f"\n{'='*60}")
        print(f"Selected: {len(selected)}/{len(list_features)} features")
        if removed:
            print(f"\nRemoved {len(removed)} flat features:")
            for feat, reason in removed:
                print(f"  ❌ {feat}: {reason}")
        print(f"{'='*60}")
    
    if len(selected) == 0:
        warnings.warn("All features filtered! Returning original list.")
        return list_features
        
    return selected

## Select Scenarios to Process

In [10]:
# Create a list_scenario_id that are integers from 1 to 23
list_scenario_id = list(range(1, 24))

# list_scenario_id = [21]  # For testing

print(f"Processing {len(list_scenario_id)} scenarios")

Processing 23 scenarios


## Main Processing Loop

Process each scenario with improved environment and proper cost tracking

In [11]:
# Create results directories
os.makedirs('../RL_outputs/results/df', exist_ok=True)
os.makedirs('../RL_outputs/results/json', exist_ok=True)
os.makedirs('../RL_outputs/results/plot', exist_ok=True)
os.makedirs('../RL_outputs/tensorboard', exist_ok=True)
os.makedirs('../RL_outputs/models', exist_ok=True)
os.makedirs('../RL_outputs/logs', exist_ok=True)
os.makedirs('../RL_outputs/checkpoints', exist_ok=True)

print("Results directories created")

Results directories created


In [12]:
# Track batch results
batch_summary = []

for idx, scenario_id in enumerate(list_scenario_id):
    print("\n" + "="*80)
    print(f"Processing Scenario {scenario_id} ({idx+1}/{len(list_scenario_id)})")
    print("="*80)
    
    try:
        # Get the scenario data
        scenario_data = get_scenario_data(scenario_id, scenario_config)
        print(f"Asset: {scenario_data['asset']}, Feature Family: {scenario_data['feature_family']}")

        # Determine transaction cost
        transaction_cost = get_transaction_cost(scenario_data['asset'])
        print(f"Transaction cost: {transaction_cost*100:.3f}% per trade")

        # Import the features and raw data
        features_full_path = f"../{scenario_data['feature_path']}/{scenario_data['feature_file']}"
        df_features = pd.read_csv(features_full_path)

        # Format date based on feature family
        if scenario_data['feature_family'].startswith('TDA'):
            df_features['date'] = pd.to_datetime(df_features['end_date'])
            df_features['date'] = df_features['date'].dt.floor('d')        
        else:
            df_features['date'] = pd.to_datetime(df_features['date'])

        # Set date as index
        df_features.set_index('date', inplace=True)

        # Get the features for the scenario
        list_features = get_family_members(scenario_data['feature_family'], feature_family)

        # # Filter TDA features
        # if scenario_data['feature_family'].startswith('TDA'):
        #     list_features = [f for f in list_features if f.endswith('_entropy') or f.endswith('_amplitude')]

        # Filter TDA features to remove flat ones
        if scenario_data['feature_family'].startswith('TDA'):
            # print("\n1. Standard filtering (filter_tda_features):")
            # print("-" * 50)
            # selected = filter_tda_features(df_features, list_features, verbose=True)
            # print(f"Result: {selected}")
            
            print("\n2. Strict filtering (filter_tda_features_strict):")
            print("-" * 50)
            list_features = filter_tda_features_strict(df_features, list_features, min_cv=0.1, min_range_pct=0.2, verbose=True)
            print(f"Result: {list_features}")        

        df_features = df_features[list_features]
        print(f"Using {len(list_features)} features")

        # Load raw price data
        raw_full_path = f"../{scenario_data['raw_path']}/{scenario_data['raw_file']}"
        df_raw = pd.read_csv(raw_full_path)
        df_raw['date'] = pd.to_datetime(df_raw['date'])
        df_raw.set_index('date', inplace=True)

        # Align features with raw data timestamps
        df_features_modified = pd.DataFrame()
        for date in df_features.index:
            if date in df_raw.index:
                df_features_modified = pd.concat([df_features_modified, df_features.loc[[date]]])
            else:
                date_only = date.date()
                df_raw_date = df_raw[df_raw.index.date == date_only]
                if not df_raw_date.empty:
                    last_time = df_raw_date.index.max()
                    df_features_modified = pd.concat([df_features_modified, df_features.loc[[date]].rename(index={date: last_time})])

            df_features_modified = df_features_modified[~df_features_modified.index.duplicated(keep='last')]

        # Join features with raw data
        df = df_features_modified.join(df_raw, how='inner')
        df = df[list_features + ['close']]

        # Split train/validation
        df_train = df.loc[(df.index >= scenario_data['start_train_date']) & (df.index <= scenario_data['end_train_date'])]
        df_val = df.loc[(df.index >= scenario_data['start_val_date']) & (df.index <= scenario_data['end_val_date'])]    
        print(f"Train: {len(df_train)} samples, Val: {len(df_val)} samples")

        # Scale features
        for feature in list_features:
            df_train[feature].replace([np.inf, -np.inf], np.nan, inplace=True)
            df_val[feature].replace([np.inf, -np.inf], np.nan, inplace=True)
            
            df_train[feature].fillna(method='ffill', inplace=True)
            df_val[feature].fillna(method='ffill', inplace=True)
            
            scaler = MinMaxScaler(feature_range=(0, 1))
            df_train[feature] = scaler.fit_transform(df_train[feature].values.reshape(-1, 1))
            df_val[feature] = scaler.transform(df_val[feature].values.reshape(-1, 1)) 

        # Create environments with IMPROVED environment
        def make_env(df, rank):
            def _init():
                env = ImprovedTradingEnv(df, transaction_cost=transaction_cost)
                return env
            return _init
                
        n_envs = 4
        train_env = SubprocVecEnv([make_env(df_train, i) for i in range(n_envs)])
        val_env = DummyVecEnv([lambda: ImprovedTradingEnv(df_val, transaction_cost=transaction_cost)])

        # Create DQN model
        model = DQN(
            "MlpPolicy",
            train_env,
            learning_rate=1e-4,
            learning_starts=1000,
            buffer_size=50000,
            batch_size=64,
            gamma=0.99,
            target_update_interval=500,
            exploration_fraction=0.3,
            exploration_initial_eps=1.0,
            exploration_final_eps=0.05,
            train_freq=4,
            gradient_steps=1,
            tensorboard_log=f"../RL_outputs/tensorboard/{scenario_id}",
            verbose=0,
            seed=42
        )

        # Training configuration
        total_timesteps = max(100000, len(df_train) * 50)

        # Callbacks
        eval_callback = EvalCallback(
            val_env,
            best_model_save_path=f'../RL_outputs/models/{scenario_id}/',
            log_path=f'../RL_outputs/logs/{scenario_id}/',
            eval_freq=len(df_train) * n_envs,
            n_eval_episodes=5,
            deterministic=True,
            render=False
        )

        checkpoint_callback = CheckpointCallback(
            save_freq=len(df_train) * n_envs * 5,
            save_path=f'../RL_outputs/checkpoints/{scenario_id}/',
            name_prefix='dqn_model'
        )

        # Train
        print(f"Training for {total_timesteps:,} timesteps...")
        model.learn(
            total_timesteps=total_timesteps,
            callback=[eval_callback, checkpoint_callback]
        )     
        print("Training complete!")

        # Evaluate
        mean_reward, std_reward = evaluate_policy(model, val_env, n_eval_episodes=10, deterministic=False)

        # Run full episode with CORRECTED cost tracking
        obs = val_env.reset()
        done = False
        
        episode_diagnostics = {
            'dates': [],
            'actions': [],
            'rewards': [],
            'positions': [],
            'prices': [],
            'net_worth': [],
            'net_worth_before_costs': [],
            'costs': [],
            'cumulative_costs': [],
        }
        
        prev_total_costs = 0

        while not done:
            action, _states = model.predict(obs, deterministic=True)
            obs, rewards, done, info = val_env.step(action)
            
            episode_diagnostics['dates'].append(info[0]['current_date'])
            episode_diagnostics['actions'].append(action[0])
            episode_diagnostics['rewards'].append(rewards[0])
            episode_diagnostics['positions'].append(info[0]['position'])
            episode_diagnostics['prices'].append(info[0]['close_price'])
            episode_diagnostics['net_worth'].append(info[0]['net_worth'])
            episode_diagnostics['net_worth_before_costs'].append(info[0]['net_worth_before_costs'])
            
            # FIX: Get costs directly from environment
            current_total_costs = info[0]['total_transaction_costs']
            cost_this_step = current_total_costs - prev_total_costs
            prev_total_costs = current_total_costs
            
            episode_diagnostics['costs'].append(cost_this_step)
            episode_diagnostics['cumulative_costs'].append(current_total_costs)

        # Create results dataframe
        df_results = pd.DataFrame(episode_diagnostics)
        df_results['date'] = pd.to_datetime(df_results['dates'])

        # Verify data quality
        negative_costs = df_results[df_results['costs'] < -0.0001]
        if len(negative_costs) > 0:
            print(f"⚠️  WARNING: {len(negative_costs)} steps with negative costs!")
        
        # Print summary
        print("\n" + "-"*60)
        print("RESULTS SUMMARY")
        print("-"*60)
        print(f"Total Return (after costs):  {info[0]['total_return']*100:>8.2f}%")
        print(f"Total Return (before costs): {info[0]['total_return_before_costs']*100:>8.2f}%")
        print(f"Sharpe Ratio (after costs):  {info[0]['sharpe_ratio']:>8.3f}")
        print(f"Sharpe Ratio (before costs): {info[0]['sharpe_ratio_before_costs']:>8.3f}")
        print(f"Win Rate:                    {info[0]['win_rate']*100:>8.2f}%")
        print(f"Total Trades:                {info[0]['trade_count']:>8.0f}")
        print(f"Total Transaction Costs:     ${info[0]['total_transaction_costs']:>8.2f}")
        
        # Interpretation
        if info[0]['total_return_before_costs'] < 0:
            print("\n⚠️  No edge before costs - features not predictive")
        elif info[0]['total_return'] < 0:
            print("\n⚠️  Profitable before costs, loses after - overtrading")
        else:
            print("\n✓ Profitable after costs!")
        print("-"*60)

        # Save detailed results
        df_results.to_csv(f'../RL_outputs/results/df/{scenario_id}_DQN_results.csv', index=False)        

        # Save metrics JSON
        results = {
            'scenario_id': scenario_id,
            'asset': scenario_data['asset'],
            'feature_family': scenario_data['feature_family'],
            'transaction_cost_pct': transaction_cost * 100,
            'mean_reward': float(df_results['rewards'].mean()),
            'std_reward': float(df_results['rewards'].std()),
            'final_net_worth': float(df_results['net_worth'].iloc[-1]),
            'final_net_worth_before_costs': float(df_results['net_worth_before_costs'].iloc[-1]),
            'total_return': float(info[0]['total_return']),
            'total_return_before_costs': float(info[0]['total_return_before_costs']),
            'sharpe_ratio': float(info[0]['sharpe_ratio']),
            'sharpe_ratio_before_costs': float(info[0]['sharpe_ratio_before_costs']),
            'sortino_ratio': float(info[0]['sortino_ratio']),
            'max_drawdown': float(info[0]['max_drawdown']),
            'win_rate': float(info[0]['win_rate']),
            'win_rate_before_costs': float(info[0]['win_rate_before_costs']),
            'policy_mean_reward': float(mean_reward),
            'policy_std_reward': float(std_reward),
            'total_transaction_costs': float(info[0]['total_transaction_costs']),
            'trade_count': int(info[0]['trade_count']),
            'trade_frequency': float(info[0]['trade_frequency']),
        }

        with open(f"../RL_outputs/results/json/{scenario_id}_DQN_results.json", 'w') as f:
            json.dump(results, f, indent=2)

        # Track for batch summary
        batch_summary.append(results)

        # Create visualization
        fig = make_subplots(
            rows=6, cols=1, 
            shared_xaxes=True, 
            vertical_spacing=0.05,
            subplot_titles=(
                "Price and Positions", 
                "Rewards", 
                "Net Worth (Before vs After Costs)",
                "Features", 
                "Transaction Costs", 
                "Cumulative Costs"
            )
        )

        # Plot 1: Price and positions
        fig.add_trace(
            go.Scatter(x=df_results['date'], y=df_results['prices'], 
                       name='Price', line=dict(color='gray', width=1)),
            row=1, col=1
        )

        # Add position markers (0=Long, 1=Short)
        long_mask = df_results['actions'] == 0
        short_mask = df_results['actions'] == 1

        fig.add_trace(
            go.Scatter(x=df_results[long_mask]['date'], y=df_results[long_mask]['prices'],
                       mode='markers', name='Long', 
                       marker=dict(color='green', size=4, symbol='triangle-up')),
            row=1, col=1
        )

        fig.add_trace(
            go.Scatter(x=df_results[short_mask]['date'], y=df_results[short_mask]['prices'],
                       mode='markers', name='Short', 
                       marker=dict(color='red', size=4, symbol='triangle-down')),
            row=1, col=1
        )

        # Plot 2: Rewards
        colors = ['green' if r >= 0 else 'red' for r in df_results['rewards']]
        fig.add_trace(
            go.Bar(x=df_results['date'], y=df_results['rewards'],
                   name='Rewards', marker_color=colors, showlegend=False),
            row=2, col=1
        )

        # Plot 3: Net worth comparison
        fig.add_trace(
            go.Scatter(x=df_results['date'], y=df_results['net_worth'],
                       name='After Costs', line=dict(color='blue', width=2)),
            row=3, col=1
        )
        fig.add_trace(
            go.Scatter(x=df_results['date'], y=df_results['net_worth_before_costs'],
                       name='Before Costs', line=dict(color='lightblue', width=2, dash='dash')),
            row=3, col=1
        )

        # Plot 4: Features (sample)
        for i, feature in enumerate(list_features[:5]):  # Show only first 5
            fig.add_trace(
                go.Scatter(x=df_val.index, y=df_val[feature], 
                           name=feature, line=dict(width=1), showlegend=False),
                row=4, col=1
            )

        # Plot 5: Transaction costs per step
        fig.add_trace(
            go.Bar(x=df_results['date'], y=df_results['costs'],
                   name='Costs', marker_color='orange', showlegend=False),
            row=5, col=1
        )

        # Plot 6: Cumulative costs
        fig.add_trace(
            go.Scatter(x=df_results['date'], y=df_results['cumulative_costs'],
                       name='Cumulative', line=dict(color='red', width=2), showlegend=False),
            row=6, col=1
        )

        # Update layout
        fig.update_layout(
            height=1400,
            title_text=f"Scenario {scenario_id} - {scenario_data['asset']} - {scenario_data['feature_family']}<br>" +
                       f"Return: {info[0]['total_return']*100:.2f}% (Before: {info[0]['total_return_before_costs']*100:.2f}%)",
            showlegend=True
        )

        fig.update_xaxes(title_text="Date", row=6, col=1)
        fig.update_yaxes(title_text="Price", row=1, col=1)
        fig.update_yaxes(title_text="Reward", row=2, col=1)
        fig.update_yaxes(title_text="Net Worth ($)", row=3, col=1)
        fig.update_yaxes(title_text="Feature Value", row=4, col=1)
        fig.update_yaxes(title_text="Cost ($)", row=5, col=1)
        fig.update_yaxes(title_text="Cumulative ($)", row=6, col=1)

        # Save plot
        fig.write_html(f"../RL_outputs/results/plot/scenario_{scenario_id}_DQN_trading_analysis.html")
        fig.write_image(f"../RL_outputs/results/plot/scenario_{scenario_id}_DQN_trading_analysis.png")        
        
        print(f"✓ Scenario {scenario_id} complete!\n")
        
    except Exception as e:
        print(f"\n❌ ERROR in scenario {scenario_id}: {str(e)}")
        print(f"Continuing to next scenario...\n")
        continue

print("\n" + "="*80)
print("BATCH PROCESSING COMPLETE!")
print("="*80)
print(f"Processed {len(batch_summary)} scenarios successfully")


Processing Scenario 1 (1/23)
Asset: ADAUSDT, Feature Family: SMA
Transaction cost: 0.020% per trade
Using 3 features
Train: 853 samples, Val: 395 samples
Improved Environment initialized:
  - Features in df: 4
  - Lookback window: 20
  - Expected observation size: 90
  - Action space: 0=Long, 1=Short (ALWAYS IN MARKET)
Improved Environment initialized:
  - Features in df: 4
  - Lookback window: 20
  - Expected observation size: 90
  - Action space: 0=Long, 1=Short (ALWAYS IN MARKET)  - Transaction cost: 0.0200% per trade

  - Cost per position flip: 0.0400% (2 trades)
  - Initial position: LONG  - Transaction cost: 0.0200% per trade

  - Cost per position flip: 0.0400% (2 trades)
  - Initial position: LONG
Improved Environment initialized:
  - Features in df: 4
  - Lookback window: 20
  - Expected observation size: 90
  - Action space: 0=Long, 1=Short (ALWAYS IN MARKET)
  - Transaction cost: 0.0200% per trade
  - Cost per position flip: 0.0400% (2 trades)
  - Initial position: LONG
Im

## Batch Results Analysis

In [13]:
# Create batch summary dataframe
df_batch_summary = pd.DataFrame(batch_summary)

# Save batch summary
df_batch_summary.to_csv('../RL_outputs/results/DQL_batch_summary.csv', index=False)

print("Batch Summary Statistics:")
print("="*60)
print(f"Total scenarios processed: {len(df_batch_summary)}")
print(f"\nProfitable (after costs): {len(df_batch_summary[df_batch_summary['total_return'] > 0])}")
print(f"Profitable (before costs): {len(df_batch_summary[df_batch_summary['total_return_before_costs'] > 0])}")
print(f"\nMean return (after costs): {df_batch_summary['total_return'].mean()*100:.2f}%")
print(f"Mean return (before costs): {df_batch_summary['total_return_before_costs'].mean()*100:.2f}%")
print(f"\nMean Sharpe (after costs): {df_batch_summary['sharpe_ratio'].mean():.3f}")
print(f"Mean Sharpe (before costs): {df_batch_summary['sharpe_ratio_before_costs'].mean():.3f}")
print(f"\nMean trade count: {df_batch_summary['trade_count'].mean():.0f}")
print(f"Mean total costs: ${df_batch_summary['total_transaction_costs'].mean():.2f}")

df_batch_summary.head(10)

Batch Summary Statistics:
Total scenarios processed: 22

Profitable (after costs): 7
Profitable (before costs): 8

Mean return (after costs): 44.24%
Mean return (before costs): 51.17%

Mean Sharpe (after costs): -0.002
Mean Sharpe (before costs): 0.030

Mean trade count: 116
Mean total costs: $56.25


Unnamed: 0,scenario_id,asset,feature_family,transaction_cost_pct,mean_reward,std_reward,final_net_worth,final_net_worth_before_costs,total_return,total_return_before_costs,...,sharpe_ratio_before_costs,sortino_ratio,max_drawdown,win_rate,win_rate_before_costs,policy_mean_reward,policy_std_reward,total_transaction_costs,trade_count,trade_frequency
0,1,ADAUSDT,SMA,0.02,0.002611,0.06095,1002.639355,1016.371161,0.002639,0.016371,...,0.692669,0.709139,-0.834801,0.529412,0.529412,1.1809,0.330731,16.67177,34,0.090909
1,2,ADAUSDT,EMA,0.02,0.003446,0.060918,1970.79496,2009.007533,0.970795,1.009008,...,0.916043,1.591367,-0.570212,0.513369,0.513369,0.914872,0.190236,40.678113,48,0.128342
2,3,ADAUSDT,RSI,0.02,0.001442,0.060997,929.684944,975.017365,-0.070315,-0.024983,...,0.417373,0.609382,-0.686823,0.540107,0.540107,0.570384,0.241808,72.990997,119,0.318182
3,4,ADAUSDT,MACD,0.02,-5e-06,0.061009,371.427114,376.363447,-0.628573,-0.623637,...,0.010245,0.00102,-0.796279,0.524064,0.524064,0.079665,0.384906,9.268659,33,0.088235
4,5,ADAUSDT,BB,0.02,-0.000742,0.061023,411.036402,423.556866,-0.588964,-0.576443,...,-0.167119,-0.321488,-0.739375,0.497326,0.497326,-0.364021,0.300112,19.64952,75,0.200535
5,6,ADAUSDT,SO,0.02,0.000212,0.061032,408.879038,433.647124,-0.591121,-0.566353,...,0.106345,0.066026,-0.857402,0.52139,0.524064,0.132973,0.161343,48.801576,147,0.393048
6,7,ADAUSDT,ATR,0.02,0.0005,0.061014,454.685266,459.255848,-0.545315,-0.540744,...,0.138967,0.140774,-0.86708,0.5,0.5,0.091328,0.236492,9.488002,25,0.066845
7,8,ADAUSDT,lagged,0.02,0.004508,0.060814,2997.751586,3272.257328,1.997752,2.272257,...,1.254839,2.275295,-0.584037,0.502674,0.502674,1.576298,0.29677,192.381158,219,0.585561
8,9,ADAUSDT,datetime,0.02,-0.003259,0.060949,157.69012,163.995614,-0.84231,-0.836004,...,-0.815963,-1.243326,-0.905909,0.475936,0.475936,-0.911853,0.616489,12.559392,98,0.262032
9,10,ADAUSDT,difference_and_change,0.02,-0.000544,0.061002,312.18827,337.519079,-0.687812,-0.662481,...,-0.073824,-0.132421,-0.91432,0.524064,0.524064,-0.313643,0.295605,65.496356,195,0.52139


In [14]:
# Top 10 performers (after costs)
print("\nTop 10 Scenarios by Return (After Costs):")
df_batch_summary.nlargest(10, 'total_return')[[
    'scenario_id', 'asset', 'feature_family', 
    'total_return', 'sharpe_ratio', 'trade_count'
]]


Top 10 Scenarios by Return (After Costs):


Unnamed: 0,scenario_id,asset,feature_family,total_return,sharpe_ratio,trade_count
18,20,ADAUSDT,TDA_TD_168_SS_1080,15.288732,2.391688,107
7,8,ADAUSDT,lagged,1.997752,1.193718,219
1,2,ADAUSDT,EMA,0.970795,0.90264,48
11,13,ADAUSDT,TDA_TD_72_SS_336,0.886412,0.873525,163
21,23,ADAUSDT,baseline,0.322491,0.625021,186
16,18,ADAUSDT,TDA_TD_168_SS_504,0.26936,0.591972,110
0,1,ADAUSDT,SMA,0.002639,0.683158,34
14,16,ADAUSDT,TDA_TD_72_SS_1080,-0.030893,0.664294,79
2,3,ADAUSDT,RSI,-0.070315,0.384236,119
6,7,ADAUSDT,ATR,-0.545315,0.132064,25


In [15]:
# Scenarios with no edge (negative before costs)
no_edge = df_batch_summary[df_batch_summary['total_return_before_costs'] < 0]
print(f"\nScenarios with NO edge (negative before costs): {len(no_edge)}")
print("These feature families may not work for these assets/timeframes")
no_edge[['scenario_id', 'asset', 'feature_family', 'total_return_before_costs']].head(10)


Scenarios with NO edge (negative before costs): 14
These feature families may not work for these assets/timeframes


Unnamed: 0,scenario_id,asset,feature_family,total_return_before_costs
2,3,ADAUSDT,RSI,-0.024983
3,4,ADAUSDT,MACD,-0.623637
4,5,ADAUSDT,BB,-0.576443
5,6,ADAUSDT,SO,-0.566353
6,7,ADAUSDT,ATR,-0.540744
8,9,ADAUSDT,datetime,-0.836004
9,10,ADAUSDT,difference_and_change,-0.662481
10,11,ADAUSDT,temporal_decomposition,-0.98295
12,14,ADAUSDT,TDA_TD_72_SS_504,-0.585262
13,15,ADAUSDT,TDA_TD_72_SS_720,-0.74847
