In [None]:
import sys, os
sys.path.insert(0, os.path.abspath('..'))

import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

#from src.data.data_collector import DataCollector
from src.models.model_trainer_rl_v2_2_buyhold import ModelTrainerRL, TradingEnvRL
from stable_baselines3 import PPO, SAC
from stable_baselines3.common.vec_env import DummyVecEnv, VecNormalize
from src.models.backtester import PortfolioBacktester, PortfolioBacktesterRL
from src.utils.config_loader import load_config


config = load_config("config/config.yaml")

  if not hasattr(np, "object"):
Gym has been unmaintained since 2022 and does not support NumPy 2.0 amongst other critical functionality.
Please upgrade to Gymnasium, the maintained drop-in replacement of Gym, or contact the authors of your software and request that they upgrade.
See the migration guide at https://gymnasium.farama.org/introduction/migration_guide/ for additional information.


Use of plotly.io.kaleido.scope.default_format is deprecated and support will be removed after September 2025.
Please use plotly.io.defaults.default_format instead.




Use of plotly.io.kaleido.scope.default_width is deprecated and support will be removed after September 2025.
Please use plotly.io.defaults.default_width instead.




Use of plotly.io.kaleido.scope.default_height is deprecated and support will be removed after September 2025.
Please use plotly.io.defaults.default_height instead.




Use of plotly.io.kaleido.scope.default_scale is deprecated and support will be removed after Septembe

## V2.2 PPO Agent

### Function

In [None]:
def run_sac_trading_pipeline(stock_symbol, config, year='2022', save_path="models/", show_plot=True):
    """
    Complete SAC trading pipeline: load data, train model, generate predictions, and backtest.
    
    Parameters:
    -----------
    stock_symbol : str
        Stock ticker symbol (e.g., 'CNP', 'MDU')
    config : dict
        Configuration dictionary loaded from config.yaml
    year : str, optional
        Year for data file selection (default: '2022')
    save_path : str, optional
        Directory to save/load models (default: 'models/')
    show_plot : bool, optional
        Whether to display the portfolio plot (default: True)
    
    Returns:
    --------
    tuple : (portfolio, metrics, actions)
        - portfolio: VectorBT portfolio object
        - metrics: Dictionary of performance metrics
        - actions: Array of predicted actions
    """
    # 1. Load Data
    try:
        data = pd.read_csv(f'data/processed/{stock_symbol}_processed_{year}.csv')
        if 'Date' in data.columns:
            data['Date'] = pd.to_datetime(data['Date'])
            data.set_index('Date', inplace=True)
        print(f"Data loaded successfully for {stock_symbol}.")
    except FileNotFoundError:
        print(f"Error: Data file not found for {stock_symbol}. Check path.")
        sys.exit()
    
    # 2. Split Train/Test
    split_idx = int(len(data) * 0.7)
    train_df = data.iloc[:split_idx]
    test_df = data.iloc[split_idx:]
    
    # 3. Training Phase
    print(f"Training sac Agent for {stock_symbol}...")
    trainer = ModelTrainerRL(config['reinforcement_learning'])
    env_params = config['reinforcement_learning']['environment']
    
    env_train = TradingEnvRL(
        train_df, 
        initial_balance=env_params.get('initial_balance', 10000),
        commission=env_params.get('commission', 0.001),
        lookback_window=env_params.get('lookback_window', 30),
        reward_func='profit'
    )
    
    result = trainer.train_sac(env_train)
    trainer.save_models(save_path)
    print("Training Complete. Models saved.")
    
    # 4. Inference Phase
    print("Generating Agent Predictions on Test Data...")
    model = SAC.load(os.path.join(save_path, "sac  _model"))
    
    env_test = TradingEnvRL(
        test_df, 
        initial_balance=env_params.get('initial_balance', 100000),
        commission=env_params.get('commission', 0.001),
        lookback_window=env_params.get('lookback_window', 30),
        reward_func='profit' 
    )
    
    vec_env_test = DummyVecEnv([lambda: env_test])
    
    norm_path = os.path.join(save_path, "sac_vecnormalize.pkl")
    if os.path.exists(norm_path):
        vec_env_test = VecNormalize.load(norm_path, vec_env_test)
        vec_env_test.training = False
        vec_env_test.norm_reward = False
    else:
        print("WARNING: Normalization stats not found. Model predictions may be garbage.")
    
    obs = vec_env_test.reset()
    done = [False]
    actions = []
    
    while not done[0]:
        action, _ = model.predict(obs, deterministic=True)
        actions.append(action[0])
        obs, _, done, _ = vec_env_test.step(action)
    
    print(f"Generated {len(actions)} actions.")
    
    # 5. Backtesting Phase
    print("Running Backtest...")
    backtester = PortfolioBacktesterRL(env_params)
    
    portfolio = backtester.run_backtest(
        price_data=test_df['close'], 
        predicted_weights=np.array(actions).flatten(),
        lookback_window=env_params.get('lookback_window', 30)
    )
    
    comparison = backtester.compare_with_buy_and_hold_rl()
    metrics = backtester.get_performance_metrics()
    
    print(f"\n--- Strategy Performance for {stock_symbol} ---")
    for k, v in metrics.items():
        print(f"{k}: {v:.4f}")
    
    if show_plot:
        portfolio.plot().show()
        # Print trade statistics
        trades = portfolio.trades.records_readable
        print(f"\n--- Trade Statistics for {stock_symbol} ---")
        print(f"Total number of trades: {len(trades)}")
        #print("\nTrade Direction Counts:")
        #print(trades['Direction'].value_counts())

        # Analyze trade outcomes
        if 'PnL' in trades.columns:
                profitable_trades = trades[trades['PnL'] > 0]
                loss_trades = trades[trades['PnL'] < 0]
                
                print(f"\n--- Trade Outcomes ---")
                print(f"Number of profitable exits: {len(profitable_trades)}")
                print(f"Number of loss exits (cut loss): {len(loss_trades)}")
                print(f"Win rate: {len(profitable_trades) / len(trades) * 100:.2f}%")
                print(f"Average profit per winning trade: ${profitable_trades['PnL'].mean():.2f}" if len(profitable_trades) > 0 else "No profitable trades")
                print(f"Average loss per losing trade: ${loss_trades['PnL'].mean():.2f}" if len(loss_trades) > 0 else "No losing trades")
            


    
    return portfolio, metrics, np.array(actions).flatten()

In [None]:
## Single stock
stock_symbol = "CNP"
portfolio, metrics, actions = run_ppo_trading_pipeline(stock_symbol, config)



INFO:src.models.model_trainer_rl_v2_2_buyhold:Training PPO...


Data loaded successfully for CNP.
Training PPO Agent for CNP...
Using cpu device
-----------------------------
| time/              |      |
|    fps             | 2198 |
|    iterations      | 1    |
|    time_elapsed    | 0    |
|    total_timesteps | 2048 |
-----------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 1855         |
|    iterations           | 2            |
|    time_elapsed         | 2            |
|    total_timesteps      | 4096         |
| train/                  |              |
|    approx_kl            | 0.0032407343 |
|    clip_fraction        | 0.00952      |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.42        |
|    explained_variance   | -0.441       |
|    learning_rate        | 0.0002       |
|    loss                 | 0.143        |
|    n_updates            | 10           |
|    policy_gradient_loss | -0.00354     |
|    std             

In [11]:
# portfolio.trades.records_readable
# Print trade statistics
trades = portfolio.trades.records_readable
print(f"\n--- Trade Statistics for abc ---")
print(f"Total number of trades: {len(trades)}")
print("\nTrade Direction Counts:")
print(trades['Direction'].value_counts())

# Analyze trade outcomes
if 'PnL' in trades.columns:
        profitable_trades = trades[trades['PnL'] > 0]
        loss_trades = trades[trades['PnL'] < 0]
        
        print(f"\n--- Trade Outcomes ---")
        print(f"Number of profitable exits: {len(profitable_trades)}")
        print(f"Number of loss exits (cut loss): {len(loss_trades)}")
        print(f"Win rate: {len(profitable_trades) / len(trades) * 100:.2f}%")
        print(f"Average profit per winning trade: ${profitable_trades['PnL'].mean():.2f}" if len(profitable_trades) > 0 else "No profitable trades")
        print(f"Average loss per losing trade: ${loss_trades['PnL'].mean():.2f}" if len(loss_trades) > 0 else "No losing trades")
    



--- Trade Statistics for abc ---
Total number of trades: 30

Trade Direction Counts:
Direction
Long    30
Name: count, dtype: int64

--- Trade Outcomes ---
Number of profitable exits: 24
Number of loss exits (cut loss): 6
Win rate: 80.00%
Average profit per winning trade: $1691.00
Average loss per losing trade: $-171.27


In [4]:
# Multiple stocks in a loop
stocks = ["CNP", "MDU", "CWCO", "NEE", "DUK"]
results = {}

for stock in stocks:
    print(f"\n{'='*60}")
    print(f"Processing {stock}")
    print(f"{'='*60}")
    portfolio, metrics, actions = run_ppo_trading_pipeline(
        stock_symbol=stock, 
        config=config,
        show_plot=True  # Don't show plots in loop
    )
    results[stock] = {'portfolio': portfolio, 'metrics': metrics, 'actions': actions}



INFO:src.models.model_trainer_rl_v2_2_buyhold:Training PPO...



Processing CNP
Data loaded successfully for CNP.
Training PPO Agent for CNP...
Using cpu device
-----------------------------
| time/              |      |
|    fps             | 2322 |
|    iterations      | 1    |
|    time_elapsed    | 0    |
|    total_timesteps | 2048 |
-----------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 2013         |
|    iterations           | 2            |
|    time_elapsed         | 2            |
|    total_timesteps      | 4096         |
| train/                  |              |
|    approx_kl            | 0.0050826063 |
|    clip_fraction        | 0.0315       |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.43        |
|    explained_variance   | 0.107        |
|    learning_rate        | 0.0002       |
|    loss                 | 0.0123       |
|    n_updates            | 10           |
|    policy_gradient_loss | -0.00813     |
|    

INFO:src.models.model_trainer_rl_v2_2_buyhold:Training PPO for 200000 timesteps
INFO:BacktesterRL:Preparing Backtest. Raw Prices: 279, Predictions: 218
INFO:BacktesterRL:Running vectorbt simulation...
INFO:BacktesterRL:Backtest successfully completed.
INFO:BacktesterRL:Strategy Return: 16.65%
INFO:BacktesterRL:Buy & Hold Return: 29.90%
INFO:BacktesterRL:Outperformance: -13.25%


Saved model and normalization stats for models/ppo_vecnormalize.pkl
Training Complete. Models saved.
Generating Agent Predictions on Test Data...
Generated 218 actions.
Running Backtest...

--- Strategy Performance for CNP ---
Total Return (%): 16.6500
Annual Return (%): 29.4200
Sharpe Ratio: 3.3401
Sortino Ratio: 7.8293
Max Drawdown (%): -2.8000
Calmar Ratio: 10.4953
Win Rate (%): 80.0000
Total Trades: 20.0000
Final Value ($): 116653.0800




INFO:src.models.model_trainer_rl_v2_2_buyhold:Training PPO...



Processing MDU
Data loaded successfully for MDU.
Training PPO Agent for MDU...
Using cpu device
-----------------------------
| time/              |      |
|    fps             | 2205 |
|    iterations      | 1    |
|    time_elapsed    | 0    |
|    total_timesteps | 2048 |
-----------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 1918        |
|    iterations           | 2           |
|    time_elapsed         | 2           |
|    total_timesteps      | 4096        |
| train/                  |             |
|    approx_kl            | 0.003931541 |
|    clip_fraction        | 0.0203      |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.43       |
|    explained_variance   | -0.0143     |
|    learning_rate        | 0.0002      |
|    loss                 | 0.0962      |
|    n_updates            | 10          |
|    policy_gradient_loss | -0.00512    |
|    std             

KeyboardInterrupt: 