# DQN Trading System - Main Orchestration Notebook

This notebook orchestrates the complete DQN trading workflow:
1. Data collection and preprocessing (one-time per project)
2. Training for each strategy config
3. Out-of-sample validation comparison across all strategies
4. Final test comparison across all strategies

In [None]:
# Setup and imports
import sys
import os
import json
import glob
import warnings
warnings.filterwarnings('ignore')

# Import modules
from src.utils.config_loader import ConfigLoader
from src.utils.model_manager import ModelManager
from src.utils.progress_logger import ProgressLogger
from src.data.collector import DataCollector
from src.data.splitter import DataSplitter
from src.features.engineer import FeatureEngineer
from src.features.normalizer import RollingNormalizer
from src.trading.environment import TradingEnvironment
from src.training.trainer import DQNTrainer
from src.evaluation.validator import OutOfSampleValidator
from src.evaluation.metrics import PerformanceMetrics

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from datetime import datetime
from IPython.display import display, HTML, clear_output
import time
import tensorflow as tf

print(f"TensorFlow version: {tf.__version__}")
%matplotlib inline

## 1. Configuration

### Project Folder Structure:
- Each project folder contains:
  - `data_config.json` - Data settings, ticker, dates, validation/test splits
  - `trading_*.json` - One or more trading strategy configs

### Available Projects:
- `dry_run` - Quick test (1 episode, 2 years data)
- `default_run` - Full training (100 episodes, 10 years data)

### Test Mode:
- `test_mode=True` - Skip training if models exist, otherwise assert
- `test_mode=False` - Always perform training

In [None]:
# ========== CONFIGURATION ==========
PROJECT_FOLDER = 'my_project'  # Options: 'dry_run', 'default_run', or custom folder name
TEST_MODE = False  # If True, skip training if models exist (for testing only)
# ===================================

# Build paths
project_path = f'config/{PROJECT_FOLDER}'
data_config_path = f'{project_path}/data_config.json'

# Verify project folder exists
if not os.path.exists(project_path):
    raise ValueError(f"Project folder not found: {project_path}")

if not os.path.exists(data_config_path):
    raise ValueError(f"Data config not found: {data_config_path}")

# Load data config
data_config = ConfigLoader(data_config_path)

# Find all trading config files
trading_configs_paths = glob.glob(f'{project_path}/trading_*.json')
if not trading_configs_paths:
    raise ValueError(f"No trading configs found in {project_path}/")

# Sort for consistent ordering
trading_configs_paths.sort()

# Load all trading configs
trading_configs = []
for path in trading_configs_paths:
    with open(path, 'r') as f:
        tc = json.load(f)
        trading_configs.append({
            'path': path,
            'config': tc,
            'strategy_name': tc.get('strategy_name', tc.get('experiment_name', 'Unnamed')),
            'experiment_name': tc.get('experiment_name', os.path.basename(path).replace('.json', ''))
        })

# Initialize run
ticker = data_config.config['ticker']
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
run_name = f"{PROJECT_FOLDER}_{ticker}_{timestamp}"
logger = ProgressLogger(run_name=run_name, log_dir=f"logs/{PROJECT_FOLDER}")

# Create results directory early for saving plots
results_dir = f"results/{PROJECT_FOLDER}/run_{timestamp}"
os.makedirs(results_dir, exist_ok=True)

print(f"üöÄ DQN Trading System")
print(f"{'='*70}")
print(f"üìÅ Project: {PROJECT_FOLDER}")
print(f"üìä Ticker: {ticker}")
print(f"üìÖ Date range: {data_config.config['start_date']} to {data_config.config['end_date']}")
print(f"üéØ Strategies: {len(trading_configs)}")
for i, tc in enumerate(trading_configs, 1):
    print(f"   {i}. {tc['strategy_name']} ({tc['experiment_name']})")
print(f"üß™ Test mode: {'ON' if TEST_MODE else 'OFF'}")
print(f"üìù Run name: {run_name}")
print(f"üìÇ Results will be saved to: {results_dir}")
print(f"{'='*70}")
print(f"\nMonitor this run: python monitor_training.py --run {run_name}")

## 2. Data Collection (One-Time)

In [None]:
# Collect data once for all strategies
logger.start_stage("Data Collection", f"Collecting {ticker} and VIX data")

collector = DataCollector(data_config.config)
spy_data, vix_data = collector.collect_data(force_download=False)

info = collector.get_data_info(spy_data, vix_data)
logger.complete_stage("Data Collection", info)

print("‚úÖ Data Collection Summary:")
for key, value in info.items():
    print(f"  {key}: {value}")

## 3. Feature Engineering

In [None]:
# Combine data and create features
logger.start_stage("Feature Engineering", f"Creating technical indicators and features")

combined_data = collector.combine_data(spy_data, vix_data)
print(f"Combined data shape: {combined_data.shape}")

engineer = FeatureEngineer(data_config.config)
featured_data = engineer.create_features(combined_data)

feature_columns = engineer.get_feature_names(featured_data)
feature_info = engineer.get_feature_info(featured_data)

metrics = {
    'total_features': len(feature_columns),
    'price_features': len(feature_info['price_features']),
    'technical_indicators': len(feature_info['technical_indicators']),
    'volume_features': len(feature_info['volume_features']),
    'vix_features': len(feature_info['vix_features'])
}

logger.complete_stage("Feature Engineering", metrics)

print(f"\n‚úÖ Total features: {len(feature_columns)}")
print(f"Feature categories:")
for category in ['price_features', 'technical_indicators', 'volume_features', 'vix_features']:
    print(f"  ‚Ä¢ {category}: {len(feature_info[category])} features")

## 4. Data Splitting & Normalization

In [None]:
# Split data FIRST (before normalization to avoid lookahead bias)
logger.start_stage("Data Splitting", "Creating train/validation/test splits")

splitter = DataSplitter(data_config.config)
splits = splitter.split_data(featured_data, verbose=True)

train_data_raw = splits['train']
validation_periods_raw = splits['validation']
test_data_raw = splits['test']

print(f"\n‚úÖ Data split complete (unnormalized):")
print(f"  ‚Ä¢ Training samples: {len(train_data_raw)}")
print(f"  ‚Ä¢ Validation periods: {len(validation_periods_raw)}")
print(f"  ‚Ä¢ Test samples: {len(test_data_raw)}")

# Continuous timeline normalization
print(f"\nüìä Normalizing data (continuous timeline, stateful rolling)...")
logger.start_stage("Data Normalization", "Applying continuous rolling Z-score normalization")

all_data_chronological = pd.concat([
    train_data_raw,
    *validation_periods_raw,
    test_data_raw
]).sort_index()

print(f"  ‚Ä¢ Chronological timeline: {len(all_data_chronological)} total samples")
print(f"  ‚Ä¢ Date range: {all_data_chronological.index.min().date()} to {all_data_chronological.index.max().date()}")

normalizer = RollingNormalizer(data_config.config)
all_data_normalized = normalizer.fit_transform(
    all_data_chronological,
    feature_columns,
    preserve_original=True
)

print(f"  ‚úì Applied rolling normalization (window={data_config.config['data']['normalization_window']} days)")

# Extract normalized splits
train_data = all_data_normalized.loc[train_data_raw.index]
validation_periods = [
    all_data_normalized.loc[val_period.index]
    for val_period in validation_periods_raw
]
test_data = all_data_normalized.loc[test_data_raw.index]

logger.complete_stage("Data Normalization", {
    'train_shape': train_data.shape,
    'validation_periods': len(validation_periods),
    'test_shape': test_data.shape
})

logger.complete_stage("Data Splitting", {
    'training_samples': len(train_data),
    'validation_periods': len(validation_periods),
    'test_samples': len(test_data)
})

print(f"\n‚úÖ Normalized data shapes:")
print(f"  ‚Ä¢ Train: {train_data.shape}")
print(f"  ‚Ä¢ Validation: {len(validation_periods)} periods")
print(f"  ‚Ä¢ Test: {test_data.shape}")

## 5. Training Loop - All Strategies

Train a model for each trading config in the project folder.

In [None]:
# Storage for all results
all_results = {}
model_manager = ModelManager(base_dir="models")

for idx, tc_info in enumerate(trading_configs, 1):
    strategy_name = tc_info['strategy_name']
    experiment_name = tc_info['experiment_name']

    print(f"\n{'='*70}")
    print(f"üìä Strategy {idx}/{len(trading_configs)}: {strategy_name}")
    print(f"{'='*70}")

    # Merge data config with trading config
    full_config = {**data_config.config, **tc_info['config']}

    # Create training environment
    train_env = TradingEnvironment(
        train_data,
        feature_columns,
        full_config,
        mode='train'
    )

    # Build model identifier (mirroring config structure)
    model_identifier = f"{PROJECT_FOLDER}/{experiment_name}"

    # Initialize trainer
    trainer = DQNTrainer(full_config, model_manager, progress_logger=logger)

    # Check for existing model (for TEST_MODE)
    model_dir = f"models/{model_identifier}"
    existing_models = glob.glob(f"{model_dir}/*.h5") if os.path.exists(model_dir) else []

    # Training logic
    if TEST_MODE:
        if existing_models:
            # Find the latest model
            latest_model = sorted(existing_models)[-1]
            print(f"‚úÖ Test mode: Loading existing model from {latest_model}")
            trainer.agent.model.load_weights(latest_model)
            training_history = {'training_history': []}
            print(f"   Model loaded successfully")
        else:
            raise AssertionError(f"Test mode enabled but no models found in {model_dir}")
    else:
        # Train model
        logger.start_stage(f"Training: {strategy_name}",
                          f"Training for {full_config['training']['episodes']} episodes")

        print(f"Starting training...")
        print(f"üìÅ Models will be saved to: {model_dir}/")
        print(f"üíæ Save frequency: Every {full_config['training']['save_frequency']} episodes")
        print(f"Monitor progress: python monitor_training.py --run {run_name}\n")

        # The trainer will automatically save models based on save_frequency
        training_history = trainer.train(train_env, validation_env=None, verbose=True)

        logger.complete_stage(f"Training: {strategy_name}", {
            'episodes_completed': len(training_history['training_history']),
            'final_return': training_history['training_history'][-1]['return'] if training_history['training_history'] else 0
        })

        print(f"\n‚úÖ Training complete for {strategy_name}")

    # Store results
    all_results[strategy_name] = {
        'experiment_name': experiment_name,
        'trainer': trainer,
        'training_history': training_history,
        'config': full_config,
        'model_identifier': model_identifier
    }

print(f"\n{'='*70}")
print(f"‚úÖ ALL TRAINING COMPLETE - {len(trading_configs)} strategies")
print(f"{'='*70}")

In [None]:
# Plot training progress for all strategies
if not TEST_MODE:
    fig, axes = plt.subplots(2, 3, figsize=(15, 8))

    for strategy_name, results in all_results.items():
        history = results['training_history']['training_history']
        if history:
            history_df = pd.DataFrame(history)

            # Episode rewards
            axes[0, 0].plot(history_df['episode'], history_df['reward'],
                          label=strategy_name, linewidth=2)
            axes[0, 0].set_title('Episode Rewards')
            axes[0, 0].set_xlabel('Episode')
            axes[0, 0].set_ylabel('Total Reward')
            axes[0, 0].grid(True, alpha=0.3)
            axes[0, 0].legend()

            # Total returns
            axes[0, 1].plot(history_df['episode'], history_df['return'] * 100,
                          label=strategy_name, linewidth=2)
            axes[0, 1].set_title('Total Returns')
            axes[0, 1].set_xlabel('Episode')
            axes[0, 1].set_ylabel('Return (%)')
            axes[0, 1].grid(True, alpha=0.3)
            axes[0, 1].legend()

            # Training Loss
            axes[0, 2].plot(history_df['episode'], history_df.get('avg_loss', 0),
                          label=strategy_name, linewidth=2)
            axes[0, 2].set_title('Training Loss')
            axes[0, 2].set_xlabel('Episode')
            axes[0, 2].set_ylabel('Average Loss')
            axes[0, 2].grid(True, alpha=0.3)
            axes[0, 2].legend()

            # Win rate
            axes[1, 0].plot(history_df['episode'], history_df['win_rate'] * 100,
                          label=strategy_name, linewidth=2)
            axes[1, 0].set_title('Win Rate')
            axes[1, 0].set_xlabel('Episode')
            axes[1, 0].set_ylabel('Win Rate (%)')
            axes[1, 0].grid(True, alpha=0.3)
            axes[1, 0].legend()

            # Number of trades
            axes[1, 1].plot(history_df['episode'], history_df['trades'],
                          label=strategy_name, linewidth=2)
            axes[1, 1].set_title('Number of Trades')
            axes[1, 1].set_xlabel('Episode')
            axes[1, 1].set_ylabel('Trades')
            axes[1, 1].grid(True, alpha=0.3)
            axes[1, 1].legend()

            # Sharpe Ratio
            axes[1, 2].plot(history_df['episode'], history_df['sharpe'],
                          label=strategy_name, linewidth=2)
            axes[1, 2].set_title('Sharpe Ratio')
            axes[1, 2].set_xlabel('Episode')
            axes[1, 2].set_ylabel('Sharpe')
            axes[1, 2].grid(True, alpha=0.3)
            axes[1, 2].legend()

    plt.suptitle(f'Training Progress - {PROJECT_FOLDER}', fontsize=14, y=1.02)
    plt.tight_layout()
    
    # Save the plot
    plot_path = f"{results_dir}/training_progress.png"
    plt.savefig(plot_path, dpi=100, bbox_inches='tight')
    print(f"‚úÖ Training progress plot saved to: {plot_path}")
    
    plt.show()
else:
    print("üìä Skipping training plots (TEST_MODE enabled)")

## 6. Out-of-Sample Validation Comparison

Compare all strategies across all validation periods.

In [None]:
# Validate all strategies on all periods
print(f"\n{'='*70}")
print(f"üìä OUT-OF-SAMPLE VALIDATION COMPARISON")
print(f"{'='*70}")
print(f"Validation periods: {len(validation_periods)}\n")

validation_results = {}

for strategy_name, results in all_results.items():
    print(f"\nValidating: {strategy_name}")
    trainer = results['trainer']

    period_metrics = []

    for i, val_period in enumerate(validation_periods, 1):
        print(f"  Period {i}: {val_period.index.min().date()} to {val_period.index.max().date()}", end=" ")

        val_env = TradingEnvironment(
            val_period,
            feature_columns,
            results['config'],
            mode='test'
        )

        val_metrics = trainer.evaluate(val_env, verbose=False)
        period_metrics.append({
            'period': i,
            'return': val_metrics.get('total_return', 0),
            'sharpe': val_metrics.get('sharpe_ratio', 0),
            'max_drawdown': val_metrics.get('max_drawdown', 0),
            'win_rate': val_metrics.get('win_rate', 0),
            'trades': val_metrics.get('num_trades', 0)
        })

        print(f"‚Üí Return: {val_metrics.get('total_return', 0):.2%}")

    validation_results[strategy_name] = period_metrics

print(f"\n‚úÖ Validation complete for all strategies")

In [None]:
# Plot validation comparison
if len(validation_periods) > 0 and len(all_results) > 0:
    fig, axes = plt.subplots(2, 2, figsize=(14, 10))

    metrics_to_plot = [
        ('return', 'Returns by Period (%)', 0, 0, 100),
        ('sharpe', 'Sharpe Ratio by Period', 0, 1, 1),
        ('win_rate', 'Win Rate by Period (%)', 1, 0, 100),
        ('trades', 'Trades by Period', 1, 1, 1)
    ]

    for metric, title, row, col, multiplier in metrics_to_plot:
        ax = axes[row, col]

        x = np.arange(1, len(validation_periods) + 1)
        width = 0.8 / max(len(all_results), 1)

        for i, (strategy_name, period_metrics) in enumerate(validation_results.items()):
            values = [pm[metric] * multiplier for pm in period_metrics]
            offset = (i - len(all_results)/2 + 0.5) * width
            ax.bar(x + offset, values, width, label=strategy_name)

        ax.set_title(title)
        ax.set_xlabel('Validation Period')
        ax.set_ylabel(metric.replace('_', ' ').title())
        ax.set_xticks(x)
        ax.legend()
        ax.grid(True, alpha=0.3)

    plt.suptitle(f'Out-of-Sample Validation Comparison - {PROJECT_FOLDER}', fontsize=14, y=1.00)
    plt.tight_layout()
    
    # Save the plot
    plot_path = f"{results_dir}/validation_comparison.png"
    plt.savefig(plot_path, dpi=100, bbox_inches='tight')
    print(f"‚úÖ Validation comparison plot saved to: {plot_path}")
    
    plt.show()
else:
    print("üìä No validation periods to plot")

## 7. Test Set Comparison

Compare all strategies on the held-out test period.

In [None]:
# Test all strategies
print(f"\n{'='*70}")
print(f"üß™ FINAL TEST COMPARISON")
print(f"{'='*70}")
print(f"Test period: {test_data.index.min().date()} to {test_data.index.max().date()}")
print(f"Test samples: {len(test_data)}\n")

test_results = {}
metrics_calc = PerformanceMetrics()

for strategy_name, results in all_results.items():
    print(f"Testing: {strategy_name}")
    trainer = results['trainer']

    test_env = TradingEnvironment(
        test_data,
        feature_columns,
        results['config'],
        mode='test'
    )

    test_metrics = trainer.evaluate(test_env, verbose=False)
    comprehensive_metrics = metrics_calc.calculate_metrics(
        test_env.portfolio_values,
        test_env.trades,
        test_env.starting_balance
    )

    test_results[strategy_name] = {
        'metrics': comprehensive_metrics,
        'portfolio_values': test_env.portfolio_values.copy(),
        'trades': test_env.trades.copy()
    }

    print(f"  Return: {comprehensive_metrics['total_return']:.2%}")
    print(f"  Sharpe: {comprehensive_metrics['sharpe_ratio']:.2f}")
    print(f"  Win Rate: {comprehensive_metrics['win_rate']:.2%}\n")

print(f"‚úÖ Test evaluation complete for all strategies")

In [None]:
# Test comparison table
comparison_data = []
for strategy_name, result in test_results.items():
    m = result['metrics']
    comparison_data.append({
        'Strategy': strategy_name,
        'Return (%)': f"{m['total_return']*100:.2f}",
        'Sharpe': f"{m['sharpe_ratio']:.2f}",
        'Max DD (%)': f"{m['max_drawdown']*100:.2f}",
        'Win Rate (%)': f"{m['win_rate']*100:.2f}",
        'Trades': m['num_trades']
    })

comparison_df = pd.DataFrame(comparison_data)
print("\n" + "="*70)
print("üìä TEST RESULTS SUMMARY")
print("="*70)
print(comparison_df.to_string(index=False))
print("="*70)

In [None]:
# Plot test comparison - Portfolio values over time
fig, ax = plt.subplots(figsize=(14, 7))

for strategy_name, result in test_results.items():
    ax.plot(result['portfolio_values'], label=strategy_name, linewidth=2)

# Add buy & hold
price_col = f"{ticker}_Close_orig"
window_size = data_config.config['data']['window_size']
initial_price = test_data.iloc[window_size][price_col]
final_price = test_data.iloc[-1][price_col]
buy_hold_return = (final_price - initial_price) / initial_price
starting_balance = list(all_results.values())[0]['config']['trading']['starting_balance']

buy_hold_values = [starting_balance * (1 + buy_hold_return * i / len(test_data))
                   for i in range(len(test_results[list(test_results.keys())[0]]['portfolio_values']))]
ax.plot(buy_hold_values, label='Buy & Hold', linewidth=2, linestyle='--', color='black')

ax.set_title(f'Test Period Performance Comparison - {PROJECT_FOLDER}', fontsize=14)
ax.set_xlabel('Time Step')
ax.set_ylabel('Portfolio Value ($)')
ax.legend(loc='best')
ax.grid(True, alpha=0.3)

plt.tight_layout()

# Save the plot
plot_path = f"{results_dir}/test_portfolio_values.png"
plt.savefig(plot_path, dpi=100, bbox_inches='tight')
print(f"‚úÖ Test portfolio comparison saved to: {plot_path}")

plt.show()

In [None]:
# Final comparison bar chart
fig, axes = plt.subplots(1, 3, figsize=(15, 5))

strategies = list(test_results.keys())
returns = [test_results[s]['metrics']['total_return'] * 100 for s in strategies]
sharpes = [test_results[s]['metrics']['sharpe_ratio'] for s in strategies]
win_rates = [test_results[s]['metrics']['win_rate'] * 100 for s in strategies]

# Add buy & hold to comparison
strategies_with_bh = strategies + ['Buy & Hold']
returns_with_bh = returns + [buy_hold_return * 100]

x = np.arange(len(strategies))
x_with_bh = np.arange(len(strategies_with_bh))

axes[0].bar(x_with_bh, returns_with_bh, color=['skyblue']*len(strategies) + ['gray'])
axes[0].set_title('Total Return (%)')
axes[0].set_xticks(x_with_bh)
axes[0].set_xticklabels(strategies_with_bh, rotation=45, ha='right')
axes[0].grid(True, alpha=0.3, axis='y')

axes[1].bar(x, sharpes, color='orange')
axes[1].set_title('Sharpe Ratio')
axes[1].set_xticks(x)
axes[1].set_xticklabels(strategies, rotation=45, ha='right')
axes[1].grid(True, alpha=0.3, axis='y')

axes[2].bar(x, win_rates, color='green')
axes[2].set_title('Win Rate (%)')
axes[2].set_xticks(x)
axes[2].set_xticklabels(strategies, rotation=45, ha='right')
axes[2].grid(True, alpha=0.3, axis='y')

plt.suptitle(f'Final Test Metrics Comparison - {PROJECT_FOLDER}', fontsize=14, y=1.02)
plt.tight_layout()

# Save the plot
plot_path = f"{results_dir}/test_metrics_comparison.png"
plt.savefig(plot_path, dpi=100, bbox_inches='tight')
print(f"‚úÖ Test metrics comparison saved to: {plot_path}")

plt.show()

## 8. Save Final Results

In [None]:
# Save final results data (plots already saved above)
# Note: results_dir was created in cell 3

# Save CSV comparison
comparison_df.to_csv(f"{results_dir}/test_comparison.csv", index=False)

# Save comprehensive results
final_results = {
    'timestamp': datetime.now().isoformat(),
    'project': PROJECT_FOLDER,
    'test_mode': TEST_MODE,
    'run_name': run_name,
    'data_config': data_config.config,
    'strategies': {},
    'validation_results': validation_results,
    'test_results': {k: v['metrics'] for k, v in test_results.items()},
    'buy_hold_return': float(buy_hold_return)
}

for strategy_name, results in all_results.items():
    final_results['strategies'][strategy_name] = {
        'experiment_name': results['experiment_name'],
        'config': results['config'],
        'training_history': results['training_history']['training_history']
    }

# Save to JSON
results_path = f"{results_dir}/final_results.json"
with open(results_path, 'w') as f:
    json.dump(final_results, f, indent=2, default=str)

print(f"\n{'='*70}")
print(f"‚úÖ COMPLETE PIPELINE FINISHED!")
print(f"{'='*70}")
print(f"üìÅ Project: {PROJECT_FOLDER}")
print(f"üìù Run name: {run_name}")
print(f"üìä Strategies trained: {len(trading_configs)}")
print(f"üìà Results saved to: {results_dir}/")
print(f"üìÑ Final results: {results_path}")

# List saved files
saved_files = os.listdir(results_dir)
saved_files.sort()
print(f"\nüìÅ Saved files:")
for file in saved_files:
    size_kb = os.path.getsize(f"{results_dir}/{file}") / 1024
    file_type = "üìä Plot" if file.endswith('.png') else "üìÑ Data" if file.endswith('.csv') else "üìã Results"
    print(f"   {file_type}: {file} ({size_kb:.1f} KB)")

# Count plots saved
plot_files = [f for f in saved_files if f.endswith('.png')]
print(f"\nüìä Plots saved: {len(plot_files)}")

# Show best strategy
if test_results:
    print(f"\nüèÜ Best performing strategy:")
    best_strategy = max(test_results.items(), key=lambda x: x[1]['metrics']['total_return'])
    print(f"   {best_strategy[0]}")
    print(f"   ‚Ä¢ Return: {best_strategy[1]['metrics']['total_return']:.2%}")
    print(f"   ‚Ä¢ Sharpe: {best_strategy[1]['metrics']['sharpe_ratio']:.2f}")
    print(f"   ‚Ä¢ vs Buy & Hold: {(best_strategy[1]['metrics']['total_return'] - buy_hold_return):.2%}")

print(f"{'='*70}")

# Print final summary
logger.print_summary()