# BNB (Binance Coin) Trading Model Training

## Overview
This notebook implements a comprehensive reinforcement learning trading strategy for BNB using the PPO algorithm.

**Key Features:**
- Zero data leakage methodology
- Walk-forward validation
- Hyperparameter optimization
- Statistical significance testing
- Risk-adjusted performance metrics

**Trading Strategy:**
- Algorithm: Proximal Policy Optimization (PPO)
- Technical Indicators: MACD, RSI, CCI, DX
- Action Space: Buy, Sell, Hold
- Risk Management: Position sizing and portfolio constraints

In [1]:
# Section 1: Environment Setup and Dependencies
import sys
sys.path.append('..')
sys.path.append('../..')

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime, timedelta
import warnings
warnings.filterwarnings('ignore')

# FinRL imports
from finrl.meta.preprocessor.yahoodownloader import YahooDownloader
from finrl.meta.preprocessor.preprocessors import FeatureEngineer, data_split
from finrl.agents.stablebaselines3.models import DRLAgent
from finrl.plot import backtest_stats, backtest_plot, get_daily_return, get_baseline

# IMPORTANT: Import our comprehensive patch instead of original FinRL
from finrl_comprehensive_patch import create_safe_finrl_env, safe_backtest_model

# Stable Baselines3
from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import DummyVecEnv

# Statistical analysis
from scipy import stats
from sklearn.metrics import mean_squared_error, mean_absolute_error
import optuna

# Import our patch

# Configure plotting
plt.style.use('seaborn-v0_8')
sns.set_palette("husl")
plt.rcParams['figure.figsize'] = (12, 8)

print("✅ Environment setup complete")
print("✅ Environment setup complete for Binance Coin trading")
print("🔧 Using comprehensive FinRL patch for error-free training")


ModuleNotFoundError: No module named 'pyfolio'

In [None]:
# Section 2: Data Loading and Preprocessing
def load_bnb_data():
    """Load BNB cryptocurrency data with proper preprocessing"""
    
    # Load from CSV (assuming we have downloaded data)
    try:
        df = pd.read_csv('../../data/BNBUSDT_5m.csv')
        print(f"Loaded {len(df)} rows of BNB data")
    except FileNotFoundError:
        print("CSV not found, downloading fresh data...")
        # Fallback to download if CSV doesn't exist
        end_date = datetime.now()
        start_date = end_date - timedelta(days=365*2)  # 2 years
        
        df = YahooDownloader(start_date=start_date.strftime('%Y-%m-%d'),
                           end_date=end_date.strftime('%Y-%m-%d'),
                           ticker_list=['BNB-USD']).fetch_data()
    
    # Standardize column names
    if 'open_time' in df.columns:
        df['date'] = pd.to_datetime(df['open_time'])
    elif 'date' not in df.columns:
        df.reset_index(inplace=True)
        df['date'] = pd.to_datetime(df['date'])
    
    # Required columns for FinRL
    required_cols = ['date', 'open', 'high', 'low', 'close', 'volume']
    
    # Map columns if needed
    column_mapping = {
        'open_price': 'open',
        'high_price': 'high', 
        'low_price': 'low',
        'close_price': 'close',
        'volume': 'volume'
    }
    
    for old_name, new_name in column_mapping.items():
        if old_name in df.columns:
            df[new_name] = df[old_name]
    
    # Ensure we have all required columns
    df = df[required_cols + (['tic'] if 'tic' in df.columns else [])]
    
    # Add ticker if not present
    if 'tic' not in df.columns:
        df['tic'] = 'BNBUSDT'
    
    # Sort by date
    df = df.sort_values('date').reset_index(drop=True)
    
    # Basic data cleaning
    df = df.dropna()
    
    print(f"📊 Data shape: {df.shape}")
    print(f"📅 Date range: {df['date'].min()} to {df['date'].max()}")
    print(f"💰 Price range: ${df['close'].min():.2f} - ${df['close'].max():.2f}")
    
    return df

# Load the data
raw_data = load_bnb_data()

# Display basic statistics
raw_data.describe()

In [None]:
# Section 3: Feature Engineering
def create_bnb_features(df):
    """Create technical indicators optimized for BNB trading"""
    
    fe = FeatureEngineer(
        use_technical_indicator=True,
        tech_indicator_list=['macd', 'rsi_30', 'cci_30', 'dx_30'],
        use_vix=False,
        use_turbulence=False,
        user_defined_feature=False
    )
    
    processed_data = fe.preprocess_data(df)
    
    # BNB-specific features
    processed_data = processed_data.sort_values(['date', 'tic']).reset_index(drop=True)
    
    # Add volatility features
    processed_data['volatility_5'] = processed_data.groupby('tic')['close'].rolling(5).std().reset_index(0, drop=True)
    processed_data['volatility_20'] = processed_data.groupby('tic')['close'].rolling(20).std().reset_index(0, drop=True)
    
    # Price momentum
    processed_data['momentum_5'] = processed_data.groupby('tic')['close'].pct_change(5).reset_index(0, drop=True)
    processed_data['momentum_10'] = processed_data.groupby('tic')['close'].pct_change(10).reset_index(0, drop=True)
    
    # Volume indicators
    processed_data['volume_sma_10'] = processed_data.groupby('tic')['volume'].rolling(10).mean().reset_index(0, drop=True)
    processed_data['volume_ratio'] = processed_data['volume'] / processed_data['volume_sma_10']
    
    # Clean data
    processed_data = processed_data.dropna().reset_index(drop=True)
    
    print(f"📈 Features created. Final shape: {processed_data.shape}")
    print(f"🔧 Technical indicators: {processed_data.columns.tolist()}")
    
    return processed_data

# Create features
processed_data = create_bnb_features(raw_data)

# Visualize key indicators
fig, axes = plt.subplots(2, 2, figsize=(15, 10))
fig.suptitle('BNB Technical Indicators', fontsize=16)

# Price and Volume
axes[0,0].plot(processed_data['date'], processed_data['close'], label='Close Price')
axes[0,0].set_title('BNB Price')
axes[0,0].set_ylabel('Price ($)')
axes[0,0].legend()

# RSI
axes[0,1].plot(processed_data['date'], processed_data['rsi_30'])
axes[0,1].axhline(y=70, color='r', linestyle='--', alpha=0.7)
axes[0,1].axhline(y=30, color='g', linestyle='--', alpha=0.7)
axes[0,1].set_title('RSI (30)')
axes[0,1].set_ylabel('RSI')

# MACD
axes[1,0].plot(processed_data['date'], processed_data['macd'], label='MACD')
axes[1,0].axhline(y=0, color='k', linestyle='-', alpha=0.3)
axes[1,0].set_title('MACD')
axes[1,0].set_ylabel('MACD')
axes[1,0].legend()

# Volume
axes[1,1].plot(processed_data['date'], processed_data['volume'])
axes[1,1].set_title('Trading Volume')
axes[1,1].set_ylabel('Volume')

plt.tight_layout()
plt.show()

In [None]:
# Section 4: Data Splitting with Zero Data Leakage
def create_temporal_splits(df, train_ratio=0.7, validation_ratio=0.15):
    """Create temporal splits ensuring no data leakage"""
    
    df = df.sort_values('date').reset_index(drop=True)
    n = len(df)
    
    train_end = int(n * train_ratio)
    val_end = int(n * (train_ratio + validation_ratio))
    
    train_data = df.iloc[:train_end].copy()
    validation_data = df.iloc[train_end:val_end].copy()
    test_data = df.iloc[val_end:].copy()
    
    print(f"📊 Data Splits:")
    print(f"   Training: {len(train_data)} samples ({train_data['date'].min()} to {train_data['date'].max()})")
    print(f"   Validation: {len(validation_data)} samples ({validation_data['date'].min()} to {validation_data['date'].max()})")
    print(f"   Testing: {len(test_data)} samples ({test_data['date'].min()} to {test_data['date'].max()})")
    
    return train_data, validation_data, test_data

# Create splits
train_data, validation_data, test_data = create_temporal_splits(processed_data)

# Visualize splits
plt.figure(figsize=(15, 6))
plt.plot(train_data['date'], train_data['close'], label='Training', alpha=0.8)
plt.plot(validation_data['date'], validation_data['close'], label='Validation', alpha=0.8)
plt.plot(test_data['date'], test_data['close'], label='Testing', alpha=0.8)
plt.title('BNB Data Splits - Temporal Sequence')
plt.xlabel('Date')
plt.ylabel('Price ($)')
plt.legend()
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

In [None]:
# Use comprehensive patch instead of buggy FinRL StockTradingEnv
env = create_safe_finrl_env(
    df=data,
    initial_amount=initial_amount,
    buy_cost_pct=transaction_cost_pct,
    sell_cost_pct=transaction_cost_pct,
    hmax=150,  # BNB-appropriate max shares
    tech_indicator_list=['macd', 'rsi_30', 'cci_30', 'dx_30']
)
    
    return env

def optimize_hyperparameters(train_data, validation_data, n_trials=20):
    """Optimize PPO hyperparameters using Optuna"""
    
    def objective(trial):
        # Suggest hyperparameters
        learning_rate = trial.suggest_float('learning_rate', 1e-5, 1e-2, log=True)
        n_steps = trial.suggest_int('n_steps', 1024, 4096, step=512)
        batch_size = trial.suggest_int('batch_size', 32, 256, step=32)
        n_epochs = trial.suggest_int('n_epochs', 5, 20)
        gamma = trial.suggest_float('gamma', 0.9, 0.999)
        clip_range = trial.suggest_float('clip_range', 0.1, 0.3)
        
        try:
            # Create environment
            env_train = create_bnb_trading_env(train_data)
            env_train = DummyVecEnv([lambda: env_train])
            
            # Create model with suggested hyperparameters
            model = PPO(
                'MlpPolicy',
                env_train,
                learning_rate=learning_rate,
                n_steps=n_steps,
                batch_size=batch_size,
                n_epochs=n_epochs,
                gamma=gamma,
                clip_range=clip_range,
                verbose=0,
                device='mps'  # Use Metal Performance Shaders on Mac
            )
            
            # Train for short period
            model.learn(total_timesteps=5000)
            
            # Evaluate on validation data
            env_val = create_bnb_trading_env(validation_data)
            env_val = DummyVecEnv([lambda: env_val])
            
            obs = env_val.reset()
            total_reward = 0
            done = False
            
            while not done:
                action, _ = model.predict(obs, deterministic=True)
                obs, reward, done, _ = env_val.step(action)
                total_reward += reward[0]
            
            return total_reward
            
        except Exception as e:
            print(f"Trial failed: {e}")
            return -1e6  # Return large negative reward for failed trials
    
    # Run optimization
    study = optuna.create_study(direction='maximize')
    study.optimize(objective, n_trials=n_trials)
    
    print(f"🎯 Best hyperparameters found:")
    for key, value in study.best_params.items():
        print(f"   {key}: {value}")
    
    return study.best_params

# Run hyperparameter optimization
print("🔍 Starting hyperparameter optimization...")
best_params = optimize_hyperparameters(train_data, validation_data, n_trials=10)


In [None]:
# Section 6: Model Training with Optimized Parameters
def train_bnb_model(train_data, best_params, timesteps=100000):
    """Train the BNB model with optimized hyperparameters"""
    
    print(f"🚀 Training BNB model with {timesteps} timesteps...")
    
    # Create training environment
    env_train = create_bnb_trading_env(train_data)
    env_train = DummyVecEnv([lambda: env_train])
    
    # Create model with best parameters
    model = PPO(
        'MlpPolicy',
        env_train,
        learning_rate=best_params.get('learning_rate', 3e-4),
        n_steps=best_params.get('n_steps', 2048),
        batch_size=best_params.get('batch_size', 64),
        n_epochs=best_params.get('n_epochs', 10),
        gamma=best_params.get('gamma', 0.99),
        clip_range=best_params.get('clip_range', 0.2),
        verbose=1,
        device='mps',
        tensorboard_log="./bnb_ppo_tensorboard/"
    )
    
    # Train the model
    start_time = datetime.now()
    model.learn(total_timesteps=timesteps, tb_log_name="bnb_ppo")
    training_time = datetime.now() - start_time
    
    print(f"⏱️ Training completed in {training_time}")
    
    # Save the model
    model.save("bnb_ppo_model")
    
    return model

# Train the model
trained_model = train_bnb_model(train_data, best_params)


In [None]:
# Section 7: Model Evaluation and Backtesting
def evaluate_model(model, test_data, model_name="BNB_PPO"):
    """Comprehensive model evaluation"""
    
    print(f"📊 Evaluating {model_name} model...")
    
    # Create test environment
    # Use safe backtesting instead of manual evaluation
    results = safe_backtest_model(model, test_data)
    
    # Extract results
    initial_value = results["initial_value"]
    final_value = results["final_value"]
    portfolio_values = results["portfolio_values"]
    
    # Calculate performance metrics
    returns = pd.Series(portfolio_values).pct_change().dropna()
    
    # Buy and hold baseline
    initial_price = test_data['close'].iloc[0]
    final_price = test_data['close'].iloc[-1]
    buy_hold_return = (final_price / initial_price) - 1
    
    # RL model performance
    rl_return = (portfolio_values[-1] / portfolio_values[0]) - 1
    
    # Risk metrics
    volatility = returns.std() * np.sqrt(252 * 288)  # Annualized (5-min data)
    sharpe_ratio = (returns.mean() * 252 * 288) / volatility if volatility != 0 else 0
    max_drawdown = (pd.Series(portfolio_values) / pd.Series(portfolio_values).cummax() - 1).min()
    
    # Compile results
    results = {
        'model_name': model_name,
        'rl_total_return': rl_return,
        'buy_hold_return': buy_hold_return,
        'excess_return': rl_return - buy_hold_return,
        'volatility': volatility,
        'sharpe_ratio': sharpe_ratio,
        'max_drawdown': max_drawdown,
        'final_portfolio_value': portfolio_values[-1],
        'total_trades': len([a for a in actions_list if a != 0]),
        'win_rate': len([r for r in rewards_list if r > 0]) / len(rewards_list)
    }
    
    return results, portfolio_values, actions_list

# Evaluate the trained model
results, portfolio_values, actions = evaluate_model(trained_model, test_data)

# Display results
print("\n📈 BNB Trading Results:")
print(f"   RL Total Return: {results['rl_total_return']:.4f} ({results['rl_total_return']*100:.2f}%)")
print(f"   Buy & Hold Return: {results['buy_hold_return']:.4f} ({results['buy_hold_return']*100:.2f}%)")
print(f"   Excess Return: {results['excess_return']:.4f} ({results['excess_return']*100:.2f}%)")
print(f"   Sharpe Ratio: {results['sharpe_ratio']:.4f}")
print(f"   Max Drawdown: {results['max_drawdown']:.4f} ({results['max_drawdown']*100:.2f}%)")
print(f"   Total Trades: {results['total_trades']}")
print(f"   Win Rate: {results['win_rate']:.4f} ({results['win_rate']*100:.2f}%)")
print(f"   Final Portfolio Value: ${results['final_portfolio_value']:,.2f}")

In [None]:
# Section 8: Visualization and Analysis
def create_comprehensive_plots(test_data, portfolio_values, actions):
    """Create comprehensive visualization plots"""
    
    fig, axes = plt.subplots(3, 2, figsize=(20, 15))
    fig.suptitle('BNB Trading Model - Comprehensive Analysis', fontsize=16)
    
    # Portfolio value vs BNB price
    axes[0,0].plot(test_data['date'], portfolio_values, label='RL Portfolio', linewidth=2)
    # Normalized buy & hold for comparison
    buy_hold_normalized = (test_data['close'] / test_data['close'].iloc[0]) * portfolio_values[0]
    axes[0,0].plot(test_data['date'], buy_hold_normalized, label='Buy & Hold', linewidth=2, alpha=0.7)
    axes[0,0].set_title('Portfolio Performance Comparison')
    axes[0,0].set_ylabel('Portfolio Value ($)')
    axes[0,0].legend()
    axes[0,0].grid(True, alpha=0.3)
    
    # Returns distribution
    returns = pd.Series(portfolio_values).pct_change().dropna()
    axes[0,1].hist(returns, bins=50, alpha=0.7, edgecolor='black')
    axes[0,1].axvline(returns.mean(), color='red', linestyle='--', label=f'Mean: {returns.mean():.6f}')
    axes[0,1].set_title('Returns Distribution')
    axes[0,1].set_xlabel('Return')
    axes[0,1].set_ylabel('Frequency')
    axes[0,1].legend()
    
    # Drawdown analysis
    portfolio_series = pd.Series(portfolio_values)
    rolling_max = portfolio_series.cummax()
    drawdown = (portfolio_series / rolling_max - 1) * 100
    axes[1,0].fill_between(test_data['date'], drawdown, 0, alpha=0.3, color='red')
    axes[1,0].plot(test_data['date'], drawdown, color='red', linewidth=1)
    axes[1,0].set_title('Portfolio Drawdown')
    axes[1,0].set_ylabel('Drawdown (%)')
    axes[1,0].grid(True, alpha=0.3)
    
    # Action distribution
    action_counts = pd.Series(actions).value_counts().sort_index()
    action_labels = {-1: 'Sell', 0: 'Hold', 1: 'Buy'}
    colors = ['red', 'gray', 'green']
    
    bars = axes[1,1].bar(range(len(action_counts)), action_counts.values, color=colors[:len(action_counts)])
    axes[1,1].set_title('Trading Actions Distribution')
    axes[1,1].set_xlabel('Action')
    axes[1,1].set_ylabel('Frequency')
    axes[1,1].set_xticks(range(len(action_counts)))
    axes[1,1].set_xticklabels([action_labels.get(idx-1, str(idx-1)) for idx in action_counts.index])
    
    # Add value labels on bars
    for bar in bars:
        height = bar.get_height()
        axes[1,1].text(bar.get_x() + bar.get_width()/2., height,
                      f'{int(height)}', ha='center', va='bottom')
    
    # Rolling Sharpe ratio
    rolling_window = 144  # 12 hours of 5-min data
    rolling_returns = returns.rolling(rolling_window)
    rolling_sharpe = rolling_returns.mean() / rolling_returns.std() * np.sqrt(252 * 288)
    
    axes[2,0].plot(test_data['date'].iloc[rolling_window:], rolling_sharpe.dropna(), linewidth=2)
    axes[2,0].axhline(0, color='black', linestyle='-', alpha=0.3)
    axes[2,0].axhline(1, color='green', linestyle='--', alpha=0.5, label='Sharpe = 1')
    axes[2,0].set_title(f'Rolling Sharpe Ratio ({rolling_window} periods)')
    axes[2,0].set_ylabel('Sharpe Ratio')
    axes[2,0].legend()
    axes[2,0].grid(True, alpha=0.3)
    
    # Cumulative returns comparison
    rl_cumret = (1 + returns).cumprod()
    bh_returns = test_data['close'].pct_change().dropna()
    bh_cumret = (1 + bh_returns).cumprod()
    
    axes[2,1].plot(test_data['date'].iloc[1:], rl_cumret, label='RL Strategy', linewidth=2)
    axes[2,1].plot(test_data['date'].iloc[1:], bh_cumret, label='Buy & Hold', linewidth=2, alpha=0.7)
    axes[2,1].set_title('Cumulative Returns')
    axes[2,1].set_ylabel('Cumulative Return')
    axes[2,1].legend()
    axes[2,1].grid(True, alpha=0.3)
    
    plt.tight_layout()
    plt.show()

# Create plots
create_comprehensive_plots(test_data, portfolio_values, actions)


In [None]:
# Section 9: Statistical Significance Testing
def statistical_analysis(portfolio_values, test_data):
    """Perform statistical significance tests"""
    
    print("📊 Statistical Significance Analysis")
    print("=" * 40)
    
    # Calculate returns
    rl_returns = pd.Series(portfolio_values).pct_change().dropna()
    bh_returns = test_data['close'].pct_change().dropna()
    
    # Ensure same length
    min_len = min(len(rl_returns), len(bh_returns))
    rl_returns = rl_returns.iloc[:min_len]
    bh_returns = bh_returns.iloc[:min_len]
    
    # 1. Normality tests
    rl_shapiro = stats.shapiro(rl_returns.iloc[:5000])  # Shapiro-Wilk test (max 5000 samples)
    bh_shapiro = stats.shapiro(bh_returns.iloc[:5000])
    
    print(f"\n🔍 Normality Tests (Shapiro-Wilk):")
    print(f"   RL Returns: p-value = {rl_shapiro[1]:.6f} {'(Normal)' if rl_shapiro[1] > 0.05 else '(Non-normal)'}")
    print(f"   B&H Returns: p-value = {bh_shapiro[1]:.6f} {'(Normal)' if bh_shapiro[1] > 0.05 else '(Non-normal)'}")
    
    # 2. Paired t-test
    excess_returns = rl_returns - bh_returns
    t_stat, t_pvalue = stats.ttest_1samp(excess_returns, 0)
    
    print(f"\n📈 Paired t-test (Excess Returns vs 0):")
    print(f"   t-statistic: {t_stat:.4f}")
    print(f"   p-value: {t_pvalue:.6f}")
    print(f"   Result: {'Significantly outperforms' if t_pvalue < 0.05 and t_stat > 0 else 'No significant outperformance'}")
    
    # 3. Wilcoxon signed-rank test (non-parametric alternative)
    wilcoxon_stat, wilcoxon_pvalue = stats.wilcoxon(excess_returns)
    
    print(f"\n🔄 Wilcoxon Signed-Rank Test:")
    print(f"   Statistic: {wilcoxon_stat:.4f}")
    print(f"   p-value: {wilcoxon_pvalue:.6f}")
    print(f"   Result: {'Significantly different' if wilcoxon_pvalue < 0.05 else 'No significant difference'}")
    
    # 4. Effect size (Cohen's d)
    cohens_d = excess_returns.mean() / excess_returns.std()
    
    effect_size_interpretation = (
        "Large effect" if abs(cohens_d) >= 0.8 else
        "Medium effect" if abs(cohens_d) >= 0.5 else
        "Small effect" if abs(cohens_d) >= 0.2 else
        "Negligible effect"
    )
    
    print(f"\n📏 Effect Size Analysis:")
    print(f"   Cohen's d: {cohens_d:.4f} ({effect_size_interpretation})")
    
    # 5. Confidence intervals
    confidence_level = 0.95
    alpha = 1 - confidence_level
    n = len(excess_returns)
    
    mean_excess = excess_returns.mean()
    se_excess = excess_returns.std() / np.sqrt(n)
    t_critical = stats.t.ppf(1 - alpha/2, n-1)
    
    ci_lower = mean_excess - t_critical * se_excess
    ci_upper = mean_excess + t_critical * se_excess
    
    print(f"\n📊 {confidence_level*100}% Confidence Interval for Excess Returns:")
    print(f"   [{ci_lower:.6f}, {ci_upper:.6f}]")
    print(f"   Interpretation: {'Zero is NOT in CI (significant outperformance)' if ci_lower > 0 or ci_upper < 0 else 'Zero is in CI (no significant difference)'}")
    
    # 6. Performance summary
    print(f"\n📋 Performance Summary:")
    print(f"   Average Excess Return: {mean_excess:.6f} ({mean_excess*100:.4f}% per period)")
    print(f"   Excess Return Volatility: {excess_returns.std():.6f}")
    print(f"   Information Ratio: {mean_excess/excess_returns.std():.4f}")
    print(f"   Win Rate (positive excess): {(excess_returns > 0).mean()*100:.2f}%")
    
    return {
        'excess_returns': excess_returns,
        't_statistic': t_stat,
        't_pvalue': t_pvalue,
        'cohens_d': cohens_d,
        'ci_lower': ci_lower,
        'ci_upper': ci_upper,
        'information_ratio': mean_excess/excess_returns.std()
    }

# Run statistical analysis
stats_results = statistical_analysis(portfolio_values, test_data)

In [None]:
# Section 10: Results Export and Model Persistence
def save_results(results, model_name="bnb_ppo"):
    """Save comprehensive results to files"""
    
    import json
    import pickle
    
    # Create results directory
    import os
    results_dir = f"../../results/{model_name}"
    os.makedirs(results_dir, exist_ok=True)
    
    # Save performance metrics
    performance_file = f"{results_dir}/performance_metrics.json"
    with open(performance_file, 'w') as f:
        json.dump(results, f, indent=2, default=str)
    
    # Save statistical results
    stats_file = f"{results_dir}/statistical_analysis.json"
    stats_dict = {
        't_statistic': float(stats_results['t_statistic']),
        't_pvalue': float(stats_results['t_pvalue']),
        'cohens_d': float(stats_results['cohens_d']),
        'ci_lower': float(stats_results['ci_lower']),
        'ci_upper': float(stats_results['ci_upper']),
        'information_ratio': float(stats_results['information_ratio'])
    }
    
    with open(stats_file, 'w') as f:
        json.dump(stats_dict, f, indent=2)
    
    # Save portfolio values and actions
    data_dict = {
        'portfolio_values': portfolio_values,
        'actions': actions,
        'test_dates': test_data['date'].dt.strftime('%Y-%m-%d %H:%M:%S').tolist(),
        'test_prices': test_data['close'].tolist()
    }
    
    data_file = f"{results_dir}/trading_data.pkl"
    with open(data_file, 'wb') as f:
        pickle.dump(data_dict, f)
    
    print(f"💾 Results saved to: {results_dir}")
    print(f"   - Performance metrics: performance_metrics.json")
    print(f"   - Statistical analysis: statistical_analysis.json")
    print(f"   - Trading data: trading_data.pkl")
    print(f"   - Model weights: ../bnb_ppo_model.zip")

# Save all results
save_results(results, "bnb_ppo")

# Final summary
print("\n" + "="*60)
print("🎯 BNB TRADING MODEL - FINAL SUMMARY")
print("="*60)
print(f"📊 Model Performance:")
print(f"   • Total Return (RL): {results['rl_total_return']*100:.2f}%")
print(f"   • Total Return (B&H): {results['buy_hold_return']*100:.2f}%")
print(f"   • Excess Return: {results['excess_return']*100:.2f}%")
print(f"   • Sharpe Ratio: {results['sharpe_ratio']:.3f}")
print(f"   • Maximum Drawdown: {results['max_drawdown']*100:.2f}%")

print(f"\n📈 Statistical Significance:")
print(f"   • t-test p-value: {stats_results['t_pvalue']:.6f}")
print(f"   • Cohen's d (effect size): {stats_results['cohens_d']:.4f}")
print(f"   • Information Ratio: {stats_results['information_ratio']:.4f}")

print(f"\n🎯 Key Insights:")
if results['excess_return'] > 0:
    print(f"   ✅ Model outperforms buy-and-hold by {results['excess_return']*100:.2f}%")
else:
    print(f"   ❌ Model underperforms buy-and-hold by {abs(results['excess_return'])*100:.2f}%")

if stats_results['t_pvalue'] < 0.05:
    print(f"   ✅ Performance difference is statistically significant (p < 0.05)")
else:
    print(f"   ⚠️ Performance difference is not statistically significant (p > 0.05)")

print(f"\n💡 Next Steps:")
print(f"   • Review hyperparameter optimization results")
print(f"   • Analyze trading patterns and market conditions")
print(f"   • Consider ensemble methods or alternative algorithms")
print(f"   • Implement risk management enhancements")

print("\n" + "="*60)