In [1]:
import sys, os
sys.path.insert(0, os.path.abspath('..'))

import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

#from src.data.data_collector import DataCollector
from src.models.model_trainer_rl_v2_2_buyhold import ModelTrainerRL, TradingEnvRL
from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import DummyVecEnv, VecNormalize
from src.models.backtester import PortfolioBacktester, PortfolioBacktesterRL
from src.utils.config_loader import load_config


config = load_config("config/config.yaml")

  if not hasattr(np, "object"):
Gym has been unmaintained since 2022 and does not support NumPy 2.0 amongst other critical functionality.
Please upgrade to Gymnasium, the maintained drop-in replacement of Gym, or contact the authors of your software and request that they upgrade.
See the migration guide at https://gymnasium.farama.org/introduction/migration_guide/ for additional information.


Use of plotly.io.kaleido.scope.default_format is deprecated and support will be removed after September 2025.
Please use plotly.io.defaults.default_format instead.




Use of plotly.io.kaleido.scope.default_width is deprecated and support will be removed after September 2025.
Please use plotly.io.defaults.default_width instead.




Use of plotly.io.kaleido.scope.default_height is deprecated and support will be removed after September 2025.
Please use plotly.io.defaults.default_height instead.




Use of plotly.io.kaleido.scope.default_scale is deprecated and support will be removed after Septembe

## V2.2 PPO Agent

### Function

In [None]:
def run_ppo_trading_pipeline(stock_symbol, config, year='2022', save_path="models/", show_plot=True):
    """
    Complete PPO trading pipeline: load data, train model, generate predictions, and backtest.
    
    Parameters:
    -----------
    stock_symbol : str
        Stock ticker symbol (e.g., 'CNP', 'MDU')
    config : dict
        Configuration dictionary loaded from config.yaml
    year : str, optional
        Year for data file selection (default: '2022')
    save_path : str, optional
        Directory to save/load models (default: 'models/')
    show_plot : bool, optional
        Whether to display the portfolio plot (default: True)
    
    Returns:
    --------
    tuple : (portfolio, metrics, actions)
        - portfolio: VectorBT portfolio object
        - metrics: Dictionary of performance metrics
        - actions: Array of predicted actions
    """
    # 1. Load Data
    try:
        data = pd.read_csv(f'data/processed/{stock_symbol}_processed_{year}.csv')
        if 'Date' in data.columns:
            data['Date'] = pd.to_datetime(data['Date'])
            data.set_index('Date', inplace=True)
        print(f"Data loaded successfully for {stock_symbol}.")
    except FileNotFoundError:
        print(f"Error: Data file not found for {stock_symbol}. Check path.")
        sys.exit()
    
    # 2. Split Train/Test
    split_idx = int(len(data) * 0.7)
    train_df = data.iloc[:split_idx]
    test_df = data.iloc[split_idx:]
    
    # 3. Training Phase
    print(f"Training PPO Agent for {stock_symbol}...")
    trainer = ModelTrainerRL(config['reinforcement_learning'])
    env_params = config['reinforcement_learning']['environment']
    
    env_train = TradingEnvRL(
        train_df, 
        initial_balance=env_params.get('initial_balance', 10000),
        commission=env_params.get('commission', 0.001),
        lookback_window=env_params.get('lookback_window', 30),
        reward_func='profit'
    )
    
    result = trainer.train_ppo(env_train)
    trainer.save_models(save_path)
    print("Training Complete. Models saved.")
    
    # 4. Inference Phase
    print("Generating Agent Predictions on Test Data...")
    model = PPO.load(os.path.join(save_path, "ppo_model"))
    
    env_test = TradingEnvRL(
        test_df, 
        initial_balance=env_params.get('initial_balance', 100000),
        commission=env_params.get('commission', 0.001),
        lookback_window=env_params.get('lookback_window', 30),
        reward_func='profit' 
    )
    
    vec_env_test = DummyVecEnv([lambda: env_test])
    
    norm_path = os.path.join(save_path, "ppo_vecnormalize.pkl")
    if os.path.exists(norm_path):
        vec_env_test = VecNormalize.load(norm_path, vec_env_test)
        vec_env_test.training = False
        vec_env_test.norm_reward = False
    else:
        print("WARNING: Normalization stats not found. Model predictions may be garbage.")
    
    obs = vec_env_test.reset()
    done = [False]
    actions = []
    
    while not done[0]:
        action, _ = model.predict(obs, deterministic=True)
        actions.append(action[0])
        obs, _, done, _ = vec_env_test.step(action)
    
    print(f"Generated {len(actions)} actions.")
    
    # 5. Backtesting Phase
    print("Running Backtest...")
    backtester = PortfolioBacktesterRL(env_params)
    
    portfolio = backtester.run_backtest(
        price_data=test_df['close'], 
        predicted_weights=np.array(actions).flatten(),
        lookback_window=env_params.get('lookback_window', 30)
    )
    
    comparison = backtester.compare_with_buy_and_hold_rl()
    metrics = backtester.get_performance_metrics()
    
    print(f"\n--- Strategy Performance for {stock_symbol} ---")
    for k, v in metrics.items():
        print(f"{k}: {v:.4f}")
    
    if show_plot:
        portfolio.plot().show()
        # Print trade statistics
        trades = portfolio.trades.records_readable
        print(f"\n--- Trade Statistics for {stock_symbol} ---")
        print(f"Total number of trades: {len(trades)}")
        print("\nTrade Direction Counts:")
        print(trades['Direction'].value_counts())

        # Analyze trade outcomes
        if 'PnL' in trades.columns:
                profitable_trades = trades[trades['PnL'] > 0]
                loss_trades = trades[trades['PnL'] < 0]
                
                print(f"\n--- Trade Outcomes ---")
                print(f"Number of profitable exits: {len(profitable_trades)}")
                print(f"Number of loss exits (cut loss): {len(loss_trades)}")
                print(f"Win rate: {len(profitable_trades) / len(trades) * 100:.2f}%")
                print(f"Average profit per winning trade: ${profitable_trades['PnL'].mean():.2f}" if len(profitable_trades) > 0 else "No profitable trades")
                print(f"Average loss per losing trade: ${loss_trades['PnL'].mean():.2f}" if len(loss_trades) > 0 else "No losing trades")
            


    
    return portfolio, metrics, np.array(actions).flatten()

In [3]:
# ## Single stock
# stock_symbol = "CNP"
# portfolio, metrics, actions = run_ppo_trading_pipeline(stock_symbol, config)

In [4]:
# # portfolio.trades.records_readable
# # Print trade statistics
# trades = portfolio.trades.records_readable
# print(f"\n--- Trade Statistics for abc ---")
# print(f"Total number of trades: {len(trades)}")
# print("\nTrade Direction Counts:")
# print(trades['Direction'].value_counts())

# # Analyze trade outcomes
# if 'PnL' in trades.columns:
#         profitable_trades = trades[trades['PnL'] > 0]
#         loss_trades = trades[trades['PnL'] < 0]
        
#         print(f"\n--- Trade Outcomes ---")
#         print(f"Number of profitable exits: {len(profitable_trades)}")
#         print(f"Number of loss exits (cut loss): {len(loss_trades)}")
#         print(f"Win rate: {len(profitable_trades) / len(trades) * 100:.2f}%")
#         print(f"Average profit per winning trade: ${profitable_trades['PnL'].mean():.2f}" if len(profitable_trades) > 0 else "No profitable trades")
#         print(f"Average loss per losing trade: ${loss_trades['PnL'].mean():.2f}" if len(loss_trades) > 0 else "No losing trades")
    


In [6]:
# Multiple stocks in a loop
stocks = ["AAPL", "AMZN", "TSLA", "BAC","MDU", "CWCO", "NEE", "DUK"]
results = {}

for stock in stocks:
    print(f"\n{'='*60}")
    print(f"Processing {stock}")
    print(f"{'='*60}")
    portfolio, metrics, actions = run_ppo_trading_pipeline(
        stock_symbol=stock, 
        config=config,
        show_plot=True  # Don't show plots in loop
    )
    results[stock] = {'portfolio': portfolio, 'metrics': metrics, 'actions': actions}



INFO:src.models.model_trainer_rl_v2_2_buyhold:Training PPO...



Processing AAPL
Data loaded successfully for AAPL.
Training PPO Agent for AAPL...
Using cpu device
-----------------------------
| time/              |      |
|    fps             | 2381 |
|    iterations      | 1    |
|    time_elapsed    | 0    |
|    total_timesteps | 2048 |
-----------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 2037         |
|    iterations           | 2            |
|    time_elapsed         | 2            |
|    total_timesteps      | 4096         |
| train/                  |              |
|    approx_kl            | 0.0040051187 |
|    clip_fraction        | 0.0213       |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.42        |
|    explained_variance   | 0.00198      |
|    learning_rate        | 0.0002       |
|    loss                 | 0.0664       |
|    n_updates            | 10           |
|    policy_gradient_loss | -0.00313     |
| 

INFO:src.models.model_trainer_rl_v2_2_buyhold:Training PPO for 200000 timesteps
INFO:BacktesterRL:Preparing Backtest. Raw Prices: 279, Predictions: 218
INFO:BacktesterRL:Running vectorbt simulation...
INFO:BacktesterRL:Backtest successfully completed.


Saved model and normalization stats for models/ppo_vecnormalize.pkl
Training Complete. Models saved.
Generating Agent Predictions on Test Data...
Generated 218 actions.
Running Backtest...


INFO:BacktesterRL:Strategy Return: 93.71%
INFO:BacktesterRL:Buy & Hold Return: 12.81%
INFO:BacktesterRL:Outperformance: 80.91%



--- Strategy Performance for AAPL ---
Total Return (%): 93.7100
Annual Return (%): 202.5500
Sharpe Ratio: 3.5365
Sortino Ratio: 7.8942
Max Drawdown (%): -8.3400
Calmar Ratio: 24.2825
Win Rate (%): 100.0000
Total Trades: 11.0000
Final Value ($): 193714.4200




INFO:src.models.model_trainer_rl_v2_2_buyhold:Training PPO...



--- Trade Statistics for AAPL ---
Total number of trades: 11

--- Trade Outcomes ---
Number of profitable exits: 11
Number of loss exits (cut loss): 0
Win rate: 100.00%
Average profit per winning trade: $8519.49
No losing trades

Processing AMZN
Data loaded successfully for AMZN.
Training PPO Agent for AMZN...
Using cpu device
-----------------------------
| time/              |      |
|    fps             | 1741 |
|    iterations      | 1    |
|    time_elapsed    | 1    |
|    total_timesteps | 2048 |
-----------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 1293         |
|    iterations           | 2            |
|    time_elapsed         | 3            |
|    total_timesteps      | 4096         |
| train/                  |              |
|    approx_kl            | 0.0024744412 |
|    clip_fraction        | 0.0122       |
|    clip_range           | 0.2          |
|    entropy_loss         | -1

INFO:src.models.model_trainer_rl_v2_2_buyhold:Training PPO for 200000 timesteps
INFO:BacktesterRL:Preparing Backtest. Raw Prices: 279, Predictions: 218
INFO:BacktesterRL:Running vectorbt simulation...
INFO:BacktesterRL:Backtest successfully completed.
INFO:BacktesterRL:Strategy Return: 29.03%
INFO:BacktesterRL:Buy & Hold Return: -0.92%
INFO:BacktesterRL:Outperformance: 29.95%


Saved model and normalization stats for models/ppo_vecnormalize.pkl
Training Complete. Models saved.
Generating Agent Predictions on Test Data...
Generated 218 actions.
Running Backtest...

--- Strategy Performance for AMZN ---
Total Return (%): 29.0300
Annual Return (%): 53.2200
Sharpe Ratio: 1.2652
Sortino Ratio: 2.0402
Max Drawdown (%): -16.4600
Calmar Ratio: 3.2342
Win Rate (%): 64.7100
Total Trades: 17.0000
Final Value ($): 129027.8700




INFO:src.models.model_trainer_rl_v2_2_buyhold:Training PPO...



--- Trade Statistics for AMZN ---
Total number of trades: 17

--- Trade Outcomes ---
Number of profitable exits: 11
Number of loss exits (cut loss): 6
Win rate: 64.71%
Average profit per winning trade: $5004.71
Average loss per losing trade: $-4337.33

Processing TSLA
Data loaded successfully for TSLA.
Training PPO Agent for TSLA...
Using cpu device
-----------------------------
| time/              |      |
|    fps             | 1718 |
|    iterations      | 1    |
|    time_elapsed    | 1    |
|    total_timesteps | 2048 |
-----------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 1304         |
|    iterations           | 2            |
|    time_elapsed         | 3            |
|    total_timesteps      | 4096         |
| train/                  |              |
|    approx_kl            | 0.0030523378 |
|    clip_fraction        | 0.0156       |
|    clip_range           | 0.2          |
|    en

INFO:src.models.model_trainer_rl_v2_2_buyhold:Training PPO for 200000 timesteps
INFO:BacktesterRL:Preparing Backtest. Raw Prices: 279, Predictions: 218
INFO:BacktesterRL:Running vectorbt simulation...


Saved model and normalization stats for models/ppo_vecnormalize.pkl
Training Complete. Models saved.
Generating Agent Predictions on Test Data...
Generated 218 actions.
Running Backtest...


INFO:BacktesterRL:Backtest successfully completed.
INFO:BacktesterRL:Strategy Return: 265.43%
INFO:BacktesterRL:Buy & Hold Return: 0.02%
INFO:BacktesterRL:Outperformance: 265.40%



--- Strategy Performance for TSLA ---
Total Return (%): 265.4300
Annual Return (%): 775.5900
Sharpe Ratio: 4.0730
Sortino Ratio: 9.5877
Max Drawdown (%): -13.3200
Calmar Ratio: 58.2441
Win Rate (%): 60.5300
Total Trades: 38.0000
Final Value ($): 365426.2200




INFO:src.models.model_trainer_rl_v2_2_buyhold:Training PPO...



--- Trade Statistics for TSLA ---
Total number of trades: 38

--- Trade Outcomes ---
Number of profitable exits: 23
Number of loss exits (cut loss): 15
Win rate: 60.53%
Average profit per winning trade: $13942.75
Average loss per losing trade: $-3683.81

Processing BAC
Data loaded successfully for BAC.
Training PPO Agent for BAC...
Using cpu device
-----------------------------
| time/              |      |
|    fps             | 1712 |
|    iterations      | 1    |
|    time_elapsed    | 1    |
|    total_timesteps | 2048 |
-----------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 1356         |
|    iterations           | 2            |
|    time_elapsed         | 3            |
|    total_timesteps      | 4096         |
| train/                  |              |
|    approx_kl            | 0.0051751514 |
|    clip_fraction        | 0.034        |
|    clip_range           | 0.2          |
|    ent

INFO:src.models.model_trainer_rl_v2_2_buyhold:Training PPO for 200000 timesteps
INFO:BacktesterRL:Preparing Backtest. Raw Prices: 279, Predictions: 218
INFO:BacktesterRL:Running vectorbt simulation...
INFO:BacktesterRL:Backtest successfully completed.
INFO:BacktesterRL:Strategy Return: 72.99%
INFO:BacktesterRL:Buy & Hold Return: 14.96%
INFO:BacktesterRL:Outperformance: 58.03%


Saved model and normalization stats for models/ppo_vecnormalize.pkl
Training Complete. Models saved.
Generating Agent Predictions on Test Data...
Generated 218 actions.
Running Backtest...

--- Strategy Performance for BAC ---
Total Return (%): 72.9900
Annual Return (%): 150.3300
Sharpe Ratio: 4.9938
Sortino Ratio: 11.9589
Max Drawdown (%): -3.5800
Calmar Ratio: 41.9369
Win Rate (%): 100.0000
Total Trades: 23.0000
Final Value ($): 172988.6000




INFO:src.models.model_trainer_rl_v2_2_buyhold:Training PPO...



--- Trade Statistics for BAC ---
Total number of trades: 23

--- Trade Outcomes ---
Number of profitable exits: 23
Number of loss exits (cut loss): 0
Win rate: 100.00%
Average profit per winning trade: $3173.42
No losing trades

Processing MDU
Data loaded successfully for MDU.
Training PPO Agent for MDU...
Using cpu device
-----------------------------
| time/              |      |
|    fps             | 1800 |
|    iterations      | 1    |
|    time_elapsed    | 1    |
|    total_timesteps | 2048 |
-----------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 1288         |
|    iterations           | 2            |
|    time_elapsed         | 3            |
|    total_timesteps      | 4096         |
| train/                  |              |
|    approx_kl            | 0.0048560854 |
|    clip_fraction        | 0.0417       |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.43 

INFO:src.models.model_trainer_rl_v2_2_buyhold:Training PPO for 200000 timesteps
INFO:BacktesterRL:Preparing Backtest. Raw Prices: 279, Predictions: 218
INFO:BacktesterRL:Running vectorbt simulation...
INFO:BacktesterRL:Backtest successfully completed.
INFO:BacktesterRL:Strategy Return: 39.65%


Saved model and normalization stats for models/ppo_vecnormalize.pkl
Training Complete. Models saved.
Generating Agent Predictions on Test Data...
Generated 218 actions.
Running Backtest...


INFO:BacktesterRL:Buy & Hold Return: 18.21%
INFO:BacktesterRL:Outperformance: 21.44%



--- Strategy Performance for MDU ---
Total Return (%): 39.6500
Annual Return (%): 74.9200
Sharpe Ratio: 2.2901
Sortino Ratio: 3.5213
Max Drawdown (%): -9.6800
Calmar Ratio: 7.7381
Win Rate (%): 88.8900
Total Trades: 9.0000
Final Value ($): 139648.9700




INFO:src.models.model_trainer_rl_v2_2_buyhold:Training PPO...



--- Trade Statistics for MDU ---
Total number of trades: 9

--- Trade Outcomes ---
Number of profitable exits: 8
Number of loss exits (cut loss): 1
Win rate: 88.89%
Average profit per winning trade: $5074.18
Average loss per losing trade: $-944.44

Processing CWCO
Data loaded successfully for CWCO.
Training PPO Agent for CWCO...
Using cpu device
-----------------------------
| time/              |      |
|    fps             | 1816 |
|    iterations      | 1    |
|    time_elapsed    | 1    |
|    total_timesteps | 2048 |
-----------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 1381        |
|    iterations           | 2           |
|    time_elapsed         | 2           |
|    total_timesteps      | 4096        |
| train/                  |             |
|    approx_kl            | 0.005786637 |
|    clip_fraction        | 0.0321      |
|    clip_range           | 0.2         |
|    entropy_loss    

INFO:src.models.model_trainer_rl_v2_2_buyhold:Training PPO for 200000 timesteps
INFO:BacktesterRL:Preparing Backtest. Raw Prices: 279, Predictions: 218
INFO:BacktesterRL:Running vectorbt simulation...
INFO:BacktesterRL:Backtest successfully completed.
INFO:BacktesterRL:Strategy Return: 118.67%
INFO:BacktesterRL:Buy & Hold Return: 32.68%
INFO:BacktesterRL:Outperformance: 85.99%


Saved model and normalization stats for models/ppo_vecnormalize.pkl
Training Complete. Models saved.
Generating Agent Predictions on Test Data...
Generated 218 actions.
Running Backtest...

--- Strategy Performance for CWCO ---
Total Return (%): 118.6700
Annual Return (%): 270.6000
Sharpe Ratio: 5.1256
Sortino Ratio: 13.2960
Max Drawdown (%): -3.3500
Calmar Ratio: 80.7608
Win Rate (%): 90.0000
Total Trades: 20.0000
Final Value ($): 218668.5000




INFO:src.models.model_trainer_rl_v2_2_buyhold:Training PPO...



--- Trade Statistics for CWCO ---
Total number of trades: 20

--- Trade Outcomes ---
Number of profitable exits: 18
Number of loss exits (cut loss): 2
Win rate: 90.00%
Average profit per winning trade: $6935.75
Average loss per losing trade: $-3087.46

Processing NEE
Data loaded successfully for NEE.
Training PPO Agent for NEE...
Using cpu device
-----------------------------
| time/              |      |
|    fps             | 1821 |
|    iterations      | 1    |
|    time_elapsed    | 1    |
|    total_timesteps | 2048 |
-----------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 1275         |
|    iterations           | 2            |
|    time_elapsed         | 3            |
|    total_timesteps      | 4096         |
| train/                  |              |
|    approx_kl            | 0.0031473818 |
|    clip_fraction        | 0.0102       |
|    clip_range           | 0.2          |
|    entro

INFO:src.models.model_trainer_rl_v2_2_buyhold:Training PPO for 200000 timesteps
INFO:BacktesterRL:Preparing Backtest. Raw Prices: 279, Predictions: 218
INFO:BacktesterRL:Running vectorbt simulation...
INFO:BacktesterRL:Backtest successfully completed.
INFO:BacktesterRL:Strategy Return: 117.45%
INFO:BacktesterRL:Buy & Hold Return: 28.02%
INFO:BacktesterRL:Outperformance: 89.43%


Saved model and normalization stats for models/ppo_vecnormalize.pkl
Training Complete. Models saved.
Generating Agent Predictions on Test Data...
Generated 218 actions.
Running Backtest...

--- Strategy Performance for NEE ---
Total Return (%): 117.4500
Annual Return (%): 267.1500
Sharpe Ratio: 5.9963
Sortino Ratio: 17.5785
Max Drawdown (%): -3.0600
Calmar Ratio: 87.4099
Win Rate (%): 77.4200
Total Trades: 31.0000
Final Value ($): 217450.9200




INFO:src.models.model_trainer_rl_v2_2_buyhold:Training PPO...



--- Trade Statistics for NEE ---
Total number of trades: 31

--- Trade Outcomes ---
Number of profitable exits: 24
Number of loss exits (cut loss): 7
Win rate: 77.42%
Average profit per winning trade: $5203.63
Average loss per losing trade: $-1062.30

Processing DUK
Data loaded successfully for DUK.
Training PPO Agent for DUK...
Using cpu device
-----------------------------
| time/              |      |
|    fps             | 1839 |
|    iterations      | 1    |
|    time_elapsed    | 1    |
|    total_timesteps | 2048 |
-----------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 1344         |
|    iterations           | 2            |
|    time_elapsed         | 3            |
|    total_timesteps      | 4096         |
| train/                  |              |
|    approx_kl            | 0.0048644273 |
|    clip_fraction        | 0.0242       |
|    clip_range           | 0.2          |
|    entrop

INFO:src.models.model_trainer_rl_v2_2_buyhold:Training PPO for 200000 timesteps
INFO:BacktesterRL:Preparing Backtest. Raw Prices: 279, Predictions: 218
INFO:BacktesterRL:Running vectorbt simulation...
INFO:BacktesterRL:Backtest successfully completed.


Saved model and normalization stats for models/ppo_vecnormalize.pkl
Training Complete. Models saved.
Generating Agent Predictions on Test Data...
Generated 218 actions.
Running Backtest...


INFO:BacktesterRL:Strategy Return: 37.71%
INFO:BacktesterRL:Buy & Hold Return: 19.81%
INFO:BacktesterRL:Outperformance: 17.90%



--- Strategy Performance for DUK ---
Total Return (%): 37.7100
Annual Return (%): 70.8700
Sharpe Ratio: 3.1346
Sortino Ratio: 5.1131
Max Drawdown (%): -6.6900
Calmar Ratio: 10.5851
Win Rate (%): 68.7500
Total Trades: 32.0000
Final Value ($): 137707.5300



--- Trade Statistics for DUK ---
Total number of trades: 32

--- Trade Outcomes ---
Number of profitable exits: 22
Number of loss exits (cut loss): 10
Win rate: 68.75%
Average profit per winning trade: $2164.10
Average loss per losing trade: $-990.27
