# Results Aggregation & Analysis - Improved Version

This notebook aggregates results from all scenarios and provides comprehensive analysis including:
- Before/After cost comparison
- Top performers by various metrics
- Edge detection (scenarios with no predictive power)
- Cost impact analysis

In [1]:
import os
import pandas as pd
import json
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

In [2]:
timeframe = '1d'
# timeframe = '1h'

## Load and Aggregate Results

In [5]:
def get_json_files(path):
    files = os.listdir(path)
    json_files = [f for f in files if f.endswith('.json')]
    return json_files

def get_dataframes(path):
    dfs = []
    for f in get_json_files(path):
        with open(f'{path}/{f}', 'r') as file:
            print(f"Loading {f}")
            data = json.load(file)
            if all(isinstance(value, (int, float, str, bool, type(None))) for value in data.values()):
                df = pd.DataFrame([data])  # Wrap data in a list to create a DataFrame with a single row
            else:
                df = pd.DataFrame(data)
            dfs.append(df)
    return dfs

def main():
    path = '../RL_outputs/results/json'
    dfs = get_dataframes(path)
    df = pd.concat(dfs, ignore_index=True)
    
    ## Load scenario_config
    scenario_config = pd.read_csv(f'../config_files/scenarios_config_{timeframe}_baseline_v2.csv')
    
    # Rename scenario to scenario_id
    scenario_config.rename(columns={'scenario': 'scenario_id'}, inplace=True)

    # Select columns (keep asset and feature_family from config)
    scenario_config_cols = scenario_config[['scenario_id', 'asset', 'feature_family']]

    # Sort by scenario_id
    df = df.sort_values(by='scenario_id')

    # Merge dataframes - use df columns if available, otherwise use scenario_config
    if 'asset' not in df.columns or 'feature_family' not in df.columns:
        df = df.merge(scenario_config_cols, on='scenario_id', how='left')

    # Create scenario_name
    df['scenario_name'] = df['asset'] + '_' + df['feature_family']
    
    # Calculate additional metrics
    if 'total_return_before_costs' in df.columns and 'total_return' in df.columns:
        df['cost_impact_pct'] = ((df['total_return_before_costs'] - df['total_return']) / 
                                 df['total_return_before_costs'].abs()) * 100
        df['cost_impact_pct'] = df['cost_impact_pct'].replace([np.inf, -np.inf], np.nan)
    
    # Flag scenarios with no edge
    if 'total_return_before_costs' in df.columns:
        df['has_edge'] = df['total_return_before_costs'] > 0
        df['profitable_after_costs'] = df['total_return'] > 0

    # Save to csv
    output_file = f'../RL_outputs/results/RL_scenarios_results_with_baseline_{timeframe}.csv'
    df.to_csv(output_file, index=False)
    print(f"\n‚úì Results saved to {output_file}")

    return df

In [6]:
if __name__ == '__main__':
    df = main()
    
print(f"\nLoaded {len(df)} scenarios")
print(f"Columns: {df.columns.tolist()}")

Loading 5_DQN_results.json
Loading 11_DQN_results.json
Loading 20_DQN_results.json
Loading 1_DQN_results.json
Loading 15_DQN_results.json
Loading 10_DQN_results.json
Loading 4_DQN_results.json
Loading 21_DQN_results.json
Loading 14_DQN_results.json
Loading 18_DQN_results.json
Loading 13_DQN_results.json
Loading 7_DQN_results.json
Loading 17_DQN_results.json
Loading 3_DQN_results.json
Loading 8_DQN_results.json
Loading 22_DQN_results.json
Loading 19_DQN_results.json
Loading 6_DQN_results.json
Loading 2_DQN_results.json
Loading 16_DQN_results.json
Loading 23_DQN_results.json
Loading 9_DQN_results.json

‚úì Results saved to ../RL_outputs/results/RL_scenarios_results_with_baseline_1d.csv

Loaded 22 scenarios
Columns: ['scenario_id', 'asset', 'feature_family', 'transaction_cost_pct', 'mean_reward', 'std_reward', 'final_net_worth', 'final_net_worth_before_costs', 'total_return', 'total_return_before_costs', 'sharpe_ratio', 'sharpe_ratio_before_costs', 'sortino_ratio', 'max_drawdown', 'win_ra

In [7]:
df.head()

Unnamed: 0,scenario_id,asset,feature_family,transaction_cost_pct,mean_reward,std_reward,final_net_worth,final_net_worth_before_costs,total_return,total_return_before_costs,...,win_rate_before_costs,policy_mean_reward,policy_std_reward,total_transaction_costs,trade_count,trade_frequency,scenario_name,cost_impact_pct,has_edge,profitable_after_costs
3,1,ADAUSDT,SMA,0.02,0.002611,0.06095,1002.639355,1016.371161,0.002639,0.016371,...,0.529412,1.1809,0.330731,16.67177,34,0.090909,ADAUSDT_SMA,83.878023,True,True
18,2,ADAUSDT,EMA,0.02,0.003446,0.060918,1970.79496,2009.007533,0.970795,1.009008,...,0.513369,0.914872,0.190236,40.678113,48,0.128342,ADAUSDT_EMA,3.787144,True,True
13,3,ADAUSDT,RSI,0.02,0.001442,0.060997,929.684944,975.017365,-0.070315,-0.024983,...,0.540107,0.570384,0.241808,72.990997,119,0.318182,ADAUSDT_RSI,181.455725,False,False
6,4,ADAUSDT,MACD,0.02,-5e-06,0.061009,371.427114,376.363447,-0.628573,-0.623637,...,0.524064,0.079665,0.384906,9.268659,33,0.088235,ADAUSDT_MACD,0.79154,False,False
0,5,ADAUSDT,BB,0.02,-0.000742,0.061023,411.036402,423.556866,-0.588964,-0.576443,...,0.497326,-0.364021,0.300112,19.64952,75,0.200535,ADAUSDT_BB,2.172021,False,False


## Overall Statistics

In [8]:
print("="*80)
print("OVERALL STATISTICS")
print("="*80)

print(f"\nTotal scenarios: {len(df)}")
print(f"Unique assets: {df['asset'].nunique()}")
print(f"Unique feature families: {df['feature_family'].nunique()}")

if 'has_edge' in df.columns:
    print(f"\nüìä Edge Analysis:")
    print(f"  Scenarios with edge (before costs): {df['has_edge'].sum()} ({df['has_edge'].sum()/len(df)*100:.1f}%)")
    print(f"  Scenarios profitable after costs: {df['profitable_after_costs'].sum()} ({df['profitable_after_costs'].sum()/len(df)*100:.1f}%)")
    
    no_edge = len(df[~df['has_edge']])
    edge_but_unprofitable = len(df[df['has_edge'] & ~df['profitable_after_costs']])
    
    print(f"\n  ‚ö†Ô∏è  No edge (negative before costs): {no_edge} ({no_edge/len(df)*100:.1f}%)")
    print(f"  ‚ö†Ô∏è  Edge killed by costs: {edge_but_unprofitable} ({edge_but_unprofitable/len(df)*100:.1f}%)")

print(f"\nüí∞ Returns:")
if 'total_return' in df.columns:
    print(f"  Mean return (after costs): {df['total_return'].mean()*100:.2f}%")
if 'total_return_before_costs' in df.columns:
    print(f"  Mean return (before costs): {df['total_return_before_costs'].mean()*100:.2f}%")
if 'final_net_worth' in df.columns:
    print(f"  Mean final net worth: ${df['final_net_worth'].mean():.2f}")

print(f"\nüìà Risk Metrics:")
if 'sharpe_ratio' in df.columns:
    print(f"  Mean Sharpe (after costs): {df['sharpe_ratio'].mean():.3f}")
if 'sharpe_ratio_before_costs' in df.columns:
    print(f"  Mean Sharpe (before costs): {df['sharpe_ratio_before_costs'].mean():.3f}")
if 'win_rate' in df.columns:
    print(f"  Mean win rate: {df['win_rate'].mean()*100:.2f}%")

print(f"\nüí∏ Trading Costs:")
if 'total_transaction_costs' in df.columns:
    print(f"  Mean total costs: ${df['total_transaction_costs'].mean():.2f}")
if 'trade_count' in df.columns:
    print(f"  Mean trade count: {df['trade_count'].mean():.0f}")
if 'cost_impact_pct' in df.columns:
    print(f"  Mean cost impact: {df['cost_impact_pct'].mean():.1f}% of gross returns")

print("="*80)

OVERALL STATISTICS

Total scenarios: 22
Unique assets: 1
Unique feature families: 22

üìä Edge Analysis:
  Scenarios with edge (before costs): 8 (36.4%)
  Scenarios profitable after costs: 7 (31.8%)

  ‚ö†Ô∏è  No edge (negative before costs): 14 (63.6%)
  ‚ö†Ô∏è  Edge killed by costs: 1 (4.5%)

üí∞ Returns:
  Mean return (after costs): 44.24%
  Mean return (before costs): 51.17%
  Mean final net worth: $1442.41

üìà Risk Metrics:
  Mean Sharpe (after costs): -0.002
  Mean Sharpe (before costs): 0.030
  Mean win rate: 50.85%

üí∏ Trading Costs:
  Mean total costs: $56.25
  Mean trade count: 116
  Mean cost impact: 642.6% of gross returns


## Visualizations

In [9]:
# 1. Final Net Worth by Scenario
fig = px.bar(df.sort_values('final_net_worth', ascending=False), 
             x='scenario_name', y='final_net_worth', 
             title='Final Net Worth by Scenario',
             color='final_net_worth',
             color_continuous_scale='RdYlGn')
fig.add_hline(y=1000, line_dash="dash", line_color="red", 
              annotation_text="Initial Investment", annotation_position="top left")
fig.update_xaxes(tickangle=45)
fig.update_layout(height=600)
fig.show()

In [10]:
# 2. Before vs After Costs Comparison
if 'total_return_before_costs' in df.columns:
    fig = go.Figure()
    
    df_sorted = df.sort_values('total_return_before_costs', ascending=False)
    
    fig.add_trace(go.Bar(
        x=df_sorted['scenario_name'],
        y=df_sorted['total_return_before_costs'] * 100,
        name='Before Costs',
        marker_color='lightblue'
    ))
    
    fig.add_trace(go.Bar(
        x=df_sorted['scenario_name'],
        y=df_sorted['total_return'] * 100,
        name='After Costs',
        marker_color='darkblue'
    ))
    
    fig.update_layout(
        title='Returns: Before vs After Transaction Costs',
        xaxis_title='Scenario',
        yaxis_title='Return (%)',
        barmode='group',
        height=600,
        xaxis_tickangle=45
    )
    fig.add_hline(y=0, line_dash="dash", line_color="red")
    fig.show()

In [11]:
# 3. Cost Impact Analysis
if 'cost_impact_pct' in df.columns:
    fig = px.bar(df.sort_values('cost_impact_pct', ascending=False), 
                 x='scenario_name', y='cost_impact_pct',
                 title='Cost Impact (% of Gross Returns)',
                 color='cost_impact_pct',
                 color_continuous_scale='Reds')
    fig.add_hline(y=30, line_dash="dash", line_color="orange",
                  annotation_text="30% threshold", annotation_position="top left")
    fig.update_xaxes(tickangle=45)
    fig.update_layout(height=600)
    fig.show()

In [12]:
# 4. Sharpe Ratio Comparison
if 'sharpe_ratio_before_costs' in df.columns:
    fig = go.Figure()
    
    df_sorted = df.sort_values('sharpe_ratio', ascending=False)
    
    fig.add_trace(go.Scatter(
        x=df_sorted['scenario_name'],
        y=df_sorted['sharpe_ratio_before_costs'],
        mode='markers',
        name='Before Costs',
        marker=dict(size=8, color='lightblue')
    ))
    
    fig.add_trace(go.Scatter(
        x=df_sorted['scenario_name'],
        y=df_sorted['sharpe_ratio'],
        mode='markers',
        name='After Costs',
        marker=dict(size=8, color='darkblue')
    ))
    
    fig.update_layout(
        title='Sharpe Ratio: Before vs After Costs',
        xaxis_title='Scenario',
        yaxis_title='Sharpe Ratio',
        height=600,
        xaxis_tickangle=45
    )
    fig.add_hline(y=1.0, line_dash="dash", line_color="green",
                  annotation_text="Sharpe > 1.0", annotation_position="top left")
    fig.show()

In [13]:
# 5. Win Rate Distribution
if 'win_rate' in df.columns:
    fig = px.bar(df.sort_values('win_rate', ascending=False), 
                 x='scenario_name', y='win_rate',
                 title='Win Rate by Scenario',
                 color='win_rate',
                 color_continuous_scale='RdYlGn')
    fig.add_hline(y=0.5, line_dash="dash", line_color="red", 
                  annotation_text="50% Win Rate", annotation_position="top left")
    fig.update_xaxes(tickangle=45)
    fig.update_layout(height=600)
    fig.show()

In [14]:
# 6. Trade Frequency vs Performance
if 'trade_frequency' in df.columns and 'total_return' in df.columns:
    fig = px.scatter(df, x='trade_frequency', y='total_return',
                     title='Trade Frequency vs Returns',
                     hover_data=['scenario_name', 'asset', 'feature_family'],
                     color='total_return',
                     color_continuous_scale='RdYlGn')
    fig.update_xaxes(title='Trade Frequency (% of days)')
    fig.update_yaxes(title='Total Return')
    fig.add_hline(y=0, line_dash="dash", line_color="red")
    fig.show()

## Edge Detection Analysis

In [15]:
if 'has_edge' in df.columns:
    print("="*80)
    print("EDGE DETECTION ANALYSIS")
    print("="*80)
    
    # Scenarios with NO edge
    no_edge = df[~df['has_edge']].sort_values('total_return_before_costs')
    print(f"\n‚ö†Ô∏è  Scenarios with NO predictive edge: {len(no_edge)}")
    print("\nWorst 10:")
    print(no_edge[['scenario_id', 'asset', 'feature_family', 'total_return_before_costs']].head(10))
    
    # Scenarios with edge but killed by costs
    edge_killed = df[df['has_edge'] & ~df['profitable_after_costs']].sort_values('cost_impact_pct', ascending=False)
    print(f"\n\n‚ö†Ô∏è  Scenarios with edge KILLED by costs: {len(edge_killed)}")
    if len(edge_killed) > 0:
        print("\nTop 10 by cost impact:")
        print(edge_killed[['scenario_id', 'asset', 'feature_family', 
                          'total_return_before_costs', 'total_return', 'cost_impact_pct']].head(10))
    
    # Profitable scenarios
    profitable = df[df['profitable_after_costs']].sort_values('total_return', ascending=False)
    print(f"\n\n‚úì Profitable scenarios (after costs): {len(profitable)}")
    print("\nTop 10:")
    print(profitable[['scenario_id', 'asset', 'feature_family', 
                      'total_return', 'sharpe_ratio', 'win_rate']].head(10))
    
    print("\n" + "="*80)

EDGE DETECTION ANALYSIS

‚ö†Ô∏è  Scenarios with NO predictive edge: 14

Worst 10:
    scenario_id    asset          feature_family  total_return_before_costs
1            11  ADAUSDT  temporal_decomposition                  -0.982950
7            21  ADAUSDT                  FRUITS                  -0.955709
12           17  ADAUSDT       TDA_TD_168_SS_336                  -0.953996
16           19  ADAUSDT       TDA_TD_168_SS_720                  -0.919785
21            9  ADAUSDT                datetime                  -0.836004
15           22  ADAUSDT                    NetF                  -0.828790
4            15  ADAUSDT        TDA_TD_72_SS_720                  -0.748470
5            10  ADAUSDT   difference_and_change                  -0.662481
6             4  ADAUSDT                    MACD                  -0.623637
8            14  ADAUSDT        TDA_TD_72_SS_504                  -0.585262


‚ö†Ô∏è  Scenarios with edge KILLED by costs: 1

Top 10 by cost impact:
    scena

## Top Performers Analysis

In [16]:
def get_top3(df):
    """
    For each asset, return the top 3 feature_family based on various metrics
    """
    list_assets = df['asset'].unique()

    results = {
        'final_net_worth': pd.DataFrame(),
        'total_return': pd.DataFrame(),
        'sharpe_ratio': pd.DataFrame(),
        'sortino_ratio': pd.DataFrame(),
        'max_drawdown': pd.DataFrame(),
        'win_rate': pd.DataFrame()
    }
    
    # Add before_costs metrics if available
    if 'total_return_before_costs' in df.columns:
        results['total_return_before_costs'] = pd.DataFrame()
    if 'sharpe_ratio_before_costs' in df.columns:
        results['sharpe_ratio_before_costs'] = pd.DataFrame()

    for asset in list_assets:
        df_asset = df[df['asset'] == asset]
        
        # For each metric, get top 3
        for metric, ascending in [
            ('final_net_worth', False),
            ('total_return', False),
            ('sharpe_ratio', False),
            ('sortino_ratio', False),
            ('max_drawdown', True),  # Lower is better
            ('win_rate', False)
        ]:
            if metric in df_asset.columns:
                top3 = df_asset.sort_values(by=metric, ascending=ascending).head(3)
                results[metric] = pd.concat([results[metric], 
                                            top3[['scenario_id', 'asset', 'feature_family', metric]]])
        
        # Before costs metrics
        if 'total_return_before_costs' in df_asset.columns:
            top3 = df_asset.sort_values(by='total_return_before_costs', ascending=False).head(3)
            results['total_return_before_costs'] = pd.concat([
                results['total_return_before_costs'],
                top3[['scenario_id', 'asset', 'feature_family', 'total_return_before_costs']]
            ])
        
        if 'sharpe_ratio_before_costs' in df_asset.columns:
            top3 = df_asset.sort_values(by='sharpe_ratio_before_costs', ascending=False).head(3)
            results['sharpe_ratio_before_costs'] = pd.concat([
                results['sharpe_ratio_before_costs'],
                top3[['scenario_id', 'asset', 'feature_family', 'sharpe_ratio_before_costs']]
            ])

    return results

In [18]:
top3_results = get_top3(df)

# Save top 3 results
for metric, df_top3 in top3_results.items():
    if len(df_top3) > 0:
        filename = f'../RL_outputs/results/top3_{metric}_with_baseline_{timeframe}.csv'
        df_top3.to_csv(filename, index=False)
        print(f"Saved: {filename}")

Saved: ../RL_outputs/results/top3_final_net_worth_with_baseline_1d.csv
Saved: ../RL_outputs/results/top3_total_return_with_baseline_1d.csv
Saved: ../RL_outputs/results/top3_sharpe_ratio_with_baseline_1d.csv
Saved: ../RL_outputs/results/top3_sortino_ratio_with_baseline_1d.csv
Saved: ../RL_outputs/results/top3_max_drawdown_with_baseline_1d.csv
Saved: ../RL_outputs/results/top3_win_rate_with_baseline_1d.csv
Saved: ../RL_outputs/results/top3_total_return_before_costs_with_baseline_1d.csv
Saved: ../RL_outputs/results/top3_sharpe_ratio_before_costs_with_baseline_1d.csv


## Display Top 3 by Each Metric

In [19]:
# Final Net Worth
print("Top 3 by Final Net Worth (per asset):")
top3_results['final_net_worth'].sort_values(by='final_net_worth', ascending=False).head(15)

Top 3 by Final Net Worth (per asset):


Unnamed: 0,scenario_id,asset,feature_family,final_net_worth
2,20,ADAUSDT,TDA_TD_168_SS_1080,16288.731561
14,8,ADAUSDT,lagged,2997.751586
18,2,ADAUSDT,EMA,1970.79496


In [20]:
# Total Return (After Costs)
print("Top 3 by Total Return - After Costs (per asset):")
top3_results['total_return'].sort_values(by='total_return', ascending=False).head(15)

Top 3 by Total Return - After Costs (per asset):


Unnamed: 0,scenario_id,asset,feature_family,total_return
2,20,ADAUSDT,TDA_TD_168_SS_1080,15.288732
14,8,ADAUSDT,lagged,1.997752
18,2,ADAUSDT,EMA,0.970795


In [21]:
# Total Return (Before Costs)
if 'total_return_before_costs' in top3_results:
    print("Top 3 by Total Return - Before Costs (per asset):")
    top3_results['total_return_before_costs'].sort_values(by='total_return_before_costs', ascending=False).head(15)

Top 3 by Total Return - Before Costs (per asset):


In [22]:
# Sharpe Ratio (After Costs)
print("Top 3 by Sharpe Ratio - After Costs (per asset):")
top3_results['sharpe_ratio'].sort_values(by='sharpe_ratio', ascending=False).head(15)

Top 3 by Sharpe Ratio - After Costs (per asset):


Unnamed: 0,scenario_id,asset,feature_family,sharpe_ratio
2,20,ADAUSDT,TDA_TD_168_SS_1080,2.391688
14,8,ADAUSDT,lagged,1.193718
18,2,ADAUSDT,EMA,0.90264


In [23]:
# Sharpe Ratio (After Costs)
print("Top 3 by Sharpe Ratio - After Costs (per asset):")
top3_results['sharpe_ratio'].sort_values(by='sharpe_ratio', ascending=False).tail(15)

Top 3 by Sharpe Ratio - After Costs (per asset):


Unnamed: 0,scenario_id,asset,feature_family,sharpe_ratio
2,20,ADAUSDT,TDA_TD_168_SS_1080,2.391688
14,8,ADAUSDT,lagged,1.193718
18,2,ADAUSDT,EMA,0.90264


In [24]:
# Sharpe Ratio (Before Costs)
if 'sharpe_ratio_before_costs' in top3_results:
    print("Top 3 by Sharpe Ratio - Before Costs (per asset):")
    top3_results['sharpe_ratio_before_costs'].sort_values(by='sharpe_ratio_before_costs', ascending=False).head(15)

Top 3 by Sharpe Ratio - Before Costs (per asset):


In [25]:
# Win Rate
print("Top 3 by Win Rate (per asset):")
top3_results['win_rate'].sort_values(by='win_rate', ascending=False).head(15)

Top 3 by Win Rate (per asset):


Unnamed: 0,scenario_id,asset,feature_family,win_rate
2,20,ADAUSDT,TDA_TD_168_SS_1080,0.574866
13,3,ADAUSDT,RSI,0.540107
3,1,ADAUSDT,SMA,0.529412


In [26]:
# Max Drawdown (Lower is better)
print("Top 3 by Max Drawdown - Lowest (per asset):")
top3_results['max_drawdown'].sort_values(by='max_drawdown', ascending=True).head(15)

Top 3 by Max Drawdown - Lowest (per asset):


Unnamed: 0,scenario_id,asset,feature_family,max_drawdown
1,11,ADAUSDT,temporal_decomposition,-0.990222
7,21,ADAUSDT,FRUITS,-0.971173
12,17,ADAUSDT,TDA_TD_168_SS_336,-0.969549


## Feature Family Performance

In [27]:
# Aggregate by feature family
print("="*80)
print("PERFORMANCE BY FEATURE FAMILY")
print("="*80)

feature_family_stats = df.groupby('feature_family').agg({
    'scenario_id': 'count',
    'total_return': 'mean',
    'total_return_before_costs': 'mean' if 'total_return_before_costs' in df.columns else lambda x: np.nan,
    'sharpe_ratio': 'mean',
    'win_rate': 'mean',
    'total_transaction_costs': 'mean',
    'has_edge': 'sum' if 'has_edge' in df.columns else lambda x: np.nan,
    'profitable_after_costs': 'sum' if 'profitable_after_costs' in df.columns else lambda x: np.nan
}).round(4)

feature_family_stats.columns = ['count', 'mean_return_after', 'mean_return_before', 
                                'mean_sharpe', 'mean_win_rate', 'mean_costs',
                                'scenarios_with_edge', 'profitable_scenarios']

feature_family_stats = feature_family_stats.sort_values('mean_return_after', ascending=False)
feature_family_stats

PERFORMANCE BY FEATURE FAMILY


Unnamed: 0_level_0,count,mean_return_after,mean_return_before,mean_sharpe,mean_win_rate,mean_costs,scenarios_with_edge,profitable_scenarios
feature_family,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
TDA_TD_168_SS_1080,1,15.2887,16.0012,2.3917,0.5749,258.8666,1,1
lagged,1,1.9978,2.2723,1.1937,0.5027,192.3812,1,1
EMA,1,0.9708,1.009,0.9026,0.5134,40.6781,1,1
TDA_TD_72_SS_336,1,0.8864,1.0135,0.8735,0.5267,120.692,1,1
baseline,1,0.3225,0.4247,0.625,0.5107,75.9755,1,1
TDA_TD_168_SS_504,1,0.2694,0.3265,0.592,0.5053,86.733,1,1
SMA,1,0.0026,0.0164,0.6832,0.5294,16.6718,1,1
TDA_TD_72_SS_1080,1,-0.0309,0.0002,0.6643,0.5214,25.6112,1,0
RSI,1,-0.0703,-0.025,0.3842,0.5401,72.991,0,0
ATR,1,-0.5453,-0.5407,0.1321,0.5,9.488,0,0


In [28]:
# Visualize feature family performance
fig = px.bar(feature_family_stats.reset_index(), 
             x='feature_family', y='mean_return_after',
             title='Mean Return by Feature Family (After Costs)',
             color='mean_return_after',
             color_continuous_scale='RdYlGn')
fig.add_hline(y=0, line_dash="dash", line_color="red")
fig.update_xaxes(tickangle=45)
fig.show()

## Summary Report

In [29]:
print("\n" + "="*80)
print("FINAL SUMMARY")
print("="*80)

print(f"\nüìä Total Scenarios Analyzed: {len(df)}")

if 'has_edge' in df.columns:
    edge_count = df['has_edge'].sum()
    profitable_count = df['profitable_after_costs'].sum()
    
    print(f"\n‚úì Scenarios with edge: {edge_count}/{len(df)} ({edge_count/len(df)*100:.1f}%)")
    print(f"‚úì Profitable after costs: {profitable_count}/{len(df)} ({profitable_count/len(df)*100:.1f}%)")
    
    if edge_count > 0:
        survival_rate = (profitable_count / edge_count) * 100
        print(f"\nüìà Cost Survival Rate: {survival_rate:.1f}%")
        print(f"   (% of strategies with edge that remain profitable after costs)")

best_scenario = df.loc[df['total_return'].idxmax()]
print(f"\nüèÜ Best Scenario:")
print(f"   ID: {best_scenario['scenario_id']}")
print(f"   Asset: {best_scenario['asset']}")
print(f"   Feature Family: {best_scenario['feature_family']}")
print(f"   Return (after costs): {best_scenario['total_return']*100:.2f}%")
if 'total_return_before_costs' in df.columns:
    print(f"   Return (before costs): {best_scenario['total_return_before_costs']*100:.2f}%")
print(f"   Sharpe Ratio: {best_scenario['sharpe_ratio']:.3f}")

print("\n" + "="*80)
print("Analysis complete! Check the CSV files and visualizations above.")
print("="*80)


FINAL SUMMARY

üìä Total Scenarios Analyzed: 22

‚úì Scenarios with edge: 8/22 (36.4%)
‚úì Profitable after costs: 7/22 (31.8%)

üìà Cost Survival Rate: 87.5%
   (% of strategies with edge that remain profitable after costs)

üèÜ Best Scenario:
   ID: 20
   Asset: ADAUSDT
   Feature Family: TDA_TD_168_SS_1080
   Return (after costs): 1528.87%
   Return (before costs): 1600.12%
   Sharpe Ratio: 2.392

Analysis complete! Check the CSV files and visualizations above.
