In [1]:
import sys, os
sys.path.insert(0, os.path.abspath('..'))

import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

#from src.data.data_collector import DataCollector
from src.models.model_trainer_rl_v2_2_buyhold import ModelTrainerRL, TradingEnvRL
from stable_baselines3 import PPO, SAC
from stable_baselines3.common.vec_env import DummyVecEnv, VecNormalize
from src.models.backtester import PortfolioBacktester, PortfolioBacktesterRL
from src.utils.config_loader import load_config


config = load_config("config/config.yaml")

  if not hasattr(np, "object"):
Gym has been unmaintained since 2022 and does not support NumPy 2.0 amongst other critical functionality.
Please upgrade to Gymnasium, the maintained drop-in replacement of Gym, or contact the authors of your software and request that they upgrade.
See the migration guide at https://gymnasium.farama.org/introduction/migration_guide/ for additional information.


Use of plotly.io.kaleido.scope.default_format is deprecated and support will be removed after September 2025.
Please use plotly.io.defaults.default_format instead.




Use of plotly.io.kaleido.scope.default_width is deprecated and support will be removed after September 2025.
Please use plotly.io.defaults.default_width instead.




Use of plotly.io.kaleido.scope.default_height is deprecated and support will be removed after September 2025.
Please use plotly.io.defaults.default_height instead.




Use of plotly.io.kaleido.scope.default_scale is deprecated and support will be removed after Septembe

## V2.2 SAC Agent

### Function

In [8]:
def run_sac_trading_pipeline(stock_symbol, config, year='2022', save_path="models/", show_plot=True):
    """
    Complete SAC trading pipeline: load data, train model, generate predictions, and backtest.
    
    Parameters:
    -----------
    stock_symbol : str
        Stock ticker symbol (e.g., 'CNP', 'MDU')
    config : dict
        Configuration dictionary loaded from config.yaml
    year : str, optional
        Year for data file selection (default: '2022')
    save_path : str, optional
        Directory to save/load models (default: 'models/')
    show_plot : bool, optional
        Whether to display the portfolio plot (default: True)
    
    Returns:
    --------
    tuple : (portfolio, metrics, actions)
        - portfolio: VectorBT portfolio object
        - metrics: Dictionary of performance metrics
        - actions: Array of predicted actions
    """
    # 1. Load Data
    try:
        data = pd.read_csv(f'data/processed/{stock_symbol}_processed_{year}.csv')
        if 'Date' in data.columns:
            data['Date'] = pd.to_datetime(data['Date'])
            data.set_index('Date', inplace=True)
        print(f"Data loaded successfully for {stock_symbol}.")
    except FileNotFoundError:
        print(f"Error: Data file not found for {stock_symbol}. Check path.")
        sys.exit()
    
    # 2. Split Train/Test
    split_idx = int(len(data) * 0.7)
    train_df = data.iloc[:split_idx]
    test_df = data.iloc[split_idx:]
    
    # 3. Training Phase
    print(f"Training sac Agent for {stock_symbol}...")
    trainer = ModelTrainerRL(config['reinforcement_learning'])
    env_params = config['reinforcement_learning']['environment']
    
    env_train = TradingEnvRL(
        train_df, 
        initial_balance=env_params.get('initial_balance', 10000),
        commission=env_params.get('commission', 0.001),
        lookback_window=env_params.get('lookback_window', 30),
        reward_func='profit'
    )
    
    result = trainer.train_sac(env_train)
    trainer.save_models(save_path)
    print("Training Complete. Models saved.")
    
    # 4. Inference Phase
    print("Generating Agent Predictions on Test Data...")
    model = SAC.load(os.path.join(save_path, "sac_model"))
    
    env_test = TradingEnvRL(
        test_df, 
        initial_balance=env_params.get('initial_balance', 100000),
        commission=env_params.get('commission', 0.001),
        lookback_window=env_params.get('lookback_window', 30),
        reward_func='profit' 
    )
    
    vec_env_test = DummyVecEnv([lambda: env_test])
    
    norm_path = os.path.join(save_path, "sac_vecnormalize.pkl")
    if os.path.exists(norm_path):
        vec_env_test = VecNormalize.load(norm_path, vec_env_test)
        vec_env_test.training = False
        vec_env_test.norm_reward = False
    else:
        print("WARNING: Normalization stats not found. Model predictions may be garbage.")
    
    obs = vec_env_test.reset()
    done = [False]
    actions = []
    
    while not done[0]:
        action, _ = model.predict(obs, deterministic=True)
        actions.append(action[0])
        obs, _, done, _ = vec_env_test.step(action)
    
    print(f"Generated {len(actions)} actions.")
    
    # 5. Backtesting Phase
    print("Running Backtest...")
    backtester = PortfolioBacktesterRL(env_params)
    
    portfolio = backtester.run_backtest(
        price_data=test_df['close'], 
        predicted_weights=np.array(actions).flatten(),
        lookback_window=env_params.get('lookback_window', 30)
    )
    
    comparison = backtester.compare_with_buy_and_hold_rl()
    metrics = backtester.get_performance_metrics()
    
    print(f"\n--- Strategy Performance for {stock_symbol} ---")
    for k, v in metrics.items():
        print(f"{k}: {v:.4f}")
    
    if show_plot:
        portfolio.plot().show()
        # Print trade statistics
        trades = portfolio.trades.records_readable
        print(f"\n--- Trade Statistics for {stock_symbol} ---")
        print(f"Total number of trades: {len(trades)}")
        #print("\nTrade Direction Counts:")
        #print(trades['Direction'].value_counts())

        # Analyze trade outcomes
        if 'PnL' in trades.columns:
                profitable_trades = trades[trades['PnL'] > 0]
                loss_trades = trades[trades['PnL'] < 0]
                
                print(f"\n--- Trade Outcomes ---")
                print(f"Number of profitable exits: {len(profitable_trades)}")
                print(f"Number of loss exits (cut loss): {len(loss_trades)}")
                print(f"Win rate: {len(profitable_trades) / len(trades) * 100:.2f}%")
                print(f"Average profit per winning trade: ${profitable_trades['PnL'].mean():.2f}" if len(profitable_trades) > 0 else "No profitable trades")
                print(f"Average loss per losing trade: ${loss_trades['PnL'].mean():.2f}" if len(loss_trades) > 0 else "No losing trades")
            


    
    return portfolio, metrics, np.array(actions).flatten()

In [3]:
# ## Single stock
# stock_symbol = "CNP"
# portfolio, metrics, actions = run_ppo_trading_pipeline(stock_symbol, config)

In [4]:
# # portfolio.trades.records_readable
# # Print trade statistics
# trades = portfolio.trades.records_readable
# print(f"\n--- Trade Statistics for abc ---")
# print(f"Total number of trades: {len(trades)}")
# print("\nTrade Direction Counts:")
# print(trades['Direction'].value_counts())

# # Analyze trade outcomes
# if 'PnL' in trades.columns:
#         profitable_trades = trades[trades['PnL'] > 0]
#         loss_trades = trades[trades['PnL'] < 0]
        
#         print(f"\n--- Trade Outcomes ---")
#         print(f"Number of profitable exits: {len(profitable_trades)}")
#         print(f"Number of loss exits (cut loss): {len(loss_trades)}")
#         print(f"Win rate: {len(profitable_trades) / len(trades) * 100:.2f}%")
#         print(f"Average profit per winning trade: ${profitable_trades['PnL'].mean():.2f}" if len(profitable_trades) > 0 else "No profitable trades")
#         print(f"Average loss per losing trade: ${loss_trades['PnL'].mean():.2f}" if len(loss_trades) > 0 else "No losing trades")
    


In [9]:
# Multiple stocks in a loop
stocks = ["AAPL", "AMZN", "TSLA", "BAC","MDU", "CWCO", "NEE", "DUK"]
results = {}

for stock in stocks:
    print(f"\n{'='*60}")
    print(f"Processing {stock}")
    print(f"{'='*60}")
    portfolio, metrics, actions = run_sac_trading_pipeline(
        stock_symbol=stock, 
        config=config,
        show_plot=True  # Don't show plots in loop
    )
    results[stock] = {'portfolio': portfolio, 'metrics': metrics, 'actions': actions}



INFO:src.models.model_trainer_rl_v2_2_buyhold:Training SAC for 150000 timesteps



Processing AAPL
Data loaded successfully for AAPL.
Training sac Agent for AAPL...
Using cpu device
---------------------------------
| time/              |          |
|    episodes        | 4        |
|    fps             | 117      |
|    time_elapsed    | 20       |
|    total_timesteps | 2348     |
| train/             |          |
|    actor_loss      | -5.24    |
|    critic_loss     | 0.0143   |
|    ent_coef        | 0.51     |
|    ent_coef_loss   | -1.13    |
|    learning_rate   | 0.0003   |
|    n_updates       | 2247     |
---------------------------------
---------------------------------
| time/              |          |
|    episodes        | 8        |
|    fps             | 116      |
|    time_elapsed    | 40       |
|    total_timesteps | 4696     |
| train/             |          |
|    actor_loss      | -6.86    |
|    critic_loss     | 0.00808  |
|    ent_coef        | 0.253    |
|    ent_coef_loss   | -2.32    |
|    learning_rate   | 0.0003   |
|    n_updates  

INFO:BacktesterRL:Preparing Backtest. Raw Prices: 279, Predictions: 218
INFO:BacktesterRL:Running vectorbt simulation...


Generated 218 actions.
Running Backtest...


INFO:BacktesterRL:Backtest successfully completed.
INFO:BacktesterRL:Strategy Return: 24.69%
INFO:BacktesterRL:Buy & Hold Return: 12.81%
INFO:BacktesterRL:Outperformance: 11.88%



--- Strategy Performance for AAPL ---
Total Return (%): 24.6900
Annual Return (%): 44.6900
Sharpe Ratio: 1.5328
Sortino Ratio: 2.7664
Max Drawdown (%): -9.2500
Calmar Ratio: 4.8298
Win Rate (%): 64.1500
Total Trades: 106.0000
Final Value ($): 124688.0800



Setting an Index with object dtype into a DataFrame will stop inferring another dtype in a future version. Cast the Index explicitly before setting it into the DataFrame.



INFO:src.models.model_trainer_rl_v2_2_buyhold:Training SAC for 150000 timesteps



--- Trade Statistics for AAPL ---
Total number of trades: 106

--- Trade Outcomes ---
Number of profitable exits: 68
Number of loss exits (cut loss): 38
Win rate: 64.15%
Average profit per winning trade: $620.43
Average loss per losing trade: $-460.55

Processing AMZN
Data loaded successfully for AMZN.
Training sac Agent for AMZN...
Using cpu device
---------------------------------
| time/              |          |
|    episodes        | 4        |
|    fps             | 110      |
|    time_elapsed    | 21       |
|    total_timesteps | 2348     |
| train/             |          |
|    actor_loss      | -5.3     |
|    critic_loss     | 0.008    |
|    ent_coef        | 0.51     |
|    ent_coef_loss   | -1.13    |
|    learning_rate   | 0.0003   |
|    n_updates       | 2247     |
---------------------------------
---------------------------------
| time/              |          |
|    episodes        | 8        |
|    fps             | 105      |
|    time_elapsed    | 44       |
|

INFO:BacktesterRL:Preparing Backtest. Raw Prices: 279, Predictions: 218
INFO:BacktesterRL:Running vectorbt simulation...
INFO:BacktesterRL:Backtest successfully completed.
INFO:BacktesterRL:Strategy Return: 49.00%
INFO:BacktesterRL:Buy & Hold Return: -0.92%
INFO:BacktesterRL:Outperformance: 49.92%


Generated 218 actions.
Running Backtest...

--- Strategy Performance for AMZN ---
Total Return (%): 49.0000
Annual Return (%): 94.9700
Sharpe Ratio: 2.1953
Sortino Ratio: 4.2582
Max Drawdown (%): -16.3700
Calmar Ratio: 5.8016
Win Rate (%): 56.8800
Total Trades: 109.0000
Final Value ($): 148999.1000



Setting an Index with object dtype into a DataFrame will stop inferring another dtype in a future version. Cast the Index explicitly before setting it into the DataFrame.



INFO:src.models.model_trainer_rl_v2_2_buyhold:Training SAC for 150000 timesteps



--- Trade Statistics for AMZN ---
Total number of trades: 109

--- Trade Outcomes ---
Number of profitable exits: 62
Number of loss exits (cut loss): 47
Win rate: 56.88%
Average profit per winning trade: $1166.32
Average loss per losing trade: $-496.02

Processing TSLA
Data loaded successfully for TSLA.
Training sac Agent for TSLA...
Using cpu device
---------------------------------
| time/              |          |
|    episodes        | 4        |
|    fps             | 90       |
|    time_elapsed    | 25       |
|    total_timesteps | 2348     |
| train/             |          |
|    actor_loss      | -5.22    |
|    critic_loss     | 0.0107   |
|    ent_coef        | 0.51     |
|    ent_coef_loss   | -1.14    |
|    learning_rate   | 0.0003   |
|    n_updates       | 2247     |
---------------------------------
---------------------------------
| time/              |          |
|    episodes        | 8        |
|    fps             | 92       |
|    time_elapsed    | 50       |


INFO:BacktesterRL:Preparing Backtest. Raw Prices: 279, Predictions: 218
INFO:BacktesterRL:Running vectorbt simulation...
INFO:BacktesterRL:Backtest successfully completed.
INFO:BacktesterRL:Strategy Return: 53.26%
INFO:BacktesterRL:Buy & Hold Return: 0.02%
INFO:BacktesterRL:Outperformance: 53.23%


Generated 218 actions.
Running Backtest...

--- Strategy Performance for TSLA ---
Total Return (%): 53.2600
Annual Return (%): 104.3900
Sharpe Ratio: 2.0671
Sortino Ratio: 5.3577
Max Drawdown (%): -10.9000
Calmar Ratio: 9.5727
Win Rate (%): 52.4300
Total Trades: 103.0000
Final Value ($): 153258.8500



Setting an Index with object dtype into a DataFrame will stop inferring another dtype in a future version. Cast the Index explicitly before setting it into the DataFrame.



INFO:src.models.model_trainer_rl_v2_2_buyhold:Training SAC for 150000 timesteps



--- Trade Statistics for TSLA ---
Total number of trades: 103

--- Trade Outcomes ---
Number of profitable exits: 54
Number of loss exits (cut loss): 49
Win rate: 52.43%
Average profit per winning trade: $1239.03
Average loss per losing trade: $-278.55

Processing BAC
Data loaded successfully for BAC.
Training sac Agent for BAC...
Using cpu device
---------------------------------
| time/              |          |
|    episodes        | 4        |
|    fps             | 94       |
|    time_elapsed    | 24       |
|    total_timesteps | 2348     |
| train/             |          |
|    actor_loss      | -5.25    |
|    critic_loss     | 0.0109   |
|    ent_coef        | 0.51     |
|    ent_coef_loss   | -1.15    |
|    learning_rate   | 0.0003   |
|    n_updates       | 2247     |
---------------------------------
---------------------------------
| time/              |          |
|    episodes        | 8        |
|    fps             | 95       |
|    time_elapsed    | 48       |
|  

INFO:BacktesterRL:Preparing Backtest. Raw Prices: 279, Predictions: 218
INFO:BacktesterRL:Running vectorbt simulation...
INFO:BacktesterRL:Backtest successfully completed.
INFO:BacktesterRL:Strategy Return: 34.20%
INFO:BacktesterRL:Buy & Hold Return: 14.96%
INFO:BacktesterRL:Outperformance: 19.23%


Generated 218 actions.
Running Backtest...

--- Strategy Performance for BAC ---
Total Return (%): 34.2000
Annual Return (%): 63.6400
Sharpe Ratio: 2.9827
Sortino Ratio: 5.4737
Max Drawdown (%): -5.6600
Calmar Ratio: 11.2511
Win Rate (%): 77.1900
Total Trades: 114.0000
Final Value ($): 134197.4700



Setting an Index with object dtype into a DataFrame will stop inferring another dtype in a future version. Cast the Index explicitly before setting it into the DataFrame.



INFO:src.models.model_trainer_rl_v2_2_buyhold:Training SAC for 150000 timesteps



--- Trade Statistics for BAC ---
Total number of trades: 114

--- Trade Outcomes ---
Number of profitable exits: 88
Number of loss exits (cut loss): 26
Win rate: 77.19%
Average profit per winning trade: $503.40
Average loss per losing trade: $-388.53

Processing MDU
Data loaded successfully for MDU.
Training sac Agent for MDU...
Using cpu device
---------------------------------
| time/              |          |
|    episodes        | 4        |
|    fps             | 74       |
|    time_elapsed    | 31       |
|    total_timesteps | 2348     |
| train/             |          |
|    actor_loss      | -5.24    |
|    critic_loss     | 0.00692  |
|    ent_coef        | 0.51     |
|    ent_coef_loss   | -1.13    |
|    learning_rate   | 0.0003   |
|    n_updates       | 2247     |
---------------------------------
---------------------------------
| time/              |          |
|    episodes        | 8        |
|    fps             | 57       |
|    time_elapsed    | 81       |
|    

INFO:BacktesterRL:Preparing Backtest. Raw Prices: 279, Predictions: 218
INFO:BacktesterRL:Running vectorbt simulation...
INFO:BacktesterRL:Backtest successfully completed.
INFO:BacktesterRL:Strategy Return: 42.89%
INFO:BacktesterRL:Buy & Hold Return: 18.21%
INFO:BacktesterRL:Outperformance: 24.68%


Generated 218 actions.
Running Backtest...

--- Strategy Performance for MDU ---
Total Return (%): 42.8900
Annual Return (%): 81.7700
Sharpe Ratio: 4.5696
Sortino Ratio: 14.8169
Max Drawdown (%): -2.9800
Calmar Ratio: 27.4167
Win Rate (%): 61.3200
Total Trades: 106.0000
Final Value ($): 142892.2000



Setting an Index with object dtype into a DataFrame will stop inferring another dtype in a future version. Cast the Index explicitly before setting it into the DataFrame.



INFO:src.models.model_trainer_rl_v2_2_buyhold:Training SAC for 150000 timesteps



--- Trade Statistics for MDU ---
Total number of trades: 106

--- Trade Outcomes ---
Number of profitable exits: 65
Number of loss exits (cut loss): 41
Win rate: 61.32%
Average profit per winning trade: $731.21
Average loss per losing trade: $-113.08

Processing CWCO
Data loaded successfully for CWCO.
Training sac Agent for CWCO...
Using cpu device
---------------------------------
| time/              |          |
|    episodes        | 4        |
|    fps             | 104      |
|    time_elapsed    | 22       |
|    total_timesteps | 2348     |
| train/             |          |
|    actor_loss      | -5.2     |
|    critic_loss     | 0.00982  |
|    ent_coef        | 0.51     |
|    ent_coef_loss   | -1.14    |
|    learning_rate   | 0.0003   |
|    n_updates       | 2247     |
---------------------------------
---------------------------------
| time/              |          |
|    episodes        | 8        |
|    fps             | 103      |
|    time_elapsed    | 45       |
| 

INFO:BacktesterRL:Preparing Backtest. Raw Prices: 279, Predictions: 218


Saved model and normalization stats for models/sac_vecnormalize.pkl
Training Complete. Models saved.
Generating Agent Predictions on Test Data...
Generated 218 actions.
Running Backtest...


INFO:BacktesterRL:Running vectorbt simulation...
INFO:BacktesterRL:Backtest successfully completed.
INFO:BacktesterRL:Strategy Return: 56.80%
INFO:BacktesterRL:Buy & Hold Return: 32.68%
INFO:BacktesterRL:Outperformance: 24.12%



--- Strategy Performance for CWCO ---
Total Return (%): 56.8000
Annual Return (%): 112.3600
Sharpe Ratio: 2.9625
Sortino Ratio: 5.8380
Max Drawdown (%): -7.8100
Calmar Ratio: 14.3897
Win Rate (%): 67.6200
Total Trades: 105.0000
Final Value ($): 156802.4200



Setting an Index with object dtype into a DataFrame will stop inferring another dtype in a future version. Cast the Index explicitly before setting it into the DataFrame.



INFO:src.models.model_trainer_rl_v2_2_buyhold:Training SAC for 150000 timesteps



--- Trade Statistics for CWCO ---
Total number of trades: 105

--- Trade Outcomes ---
Number of profitable exits: 71
Number of loss exits (cut loss): 34
Win rate: 67.62%
Average profit per winning trade: $967.04
Average loss per losing trade: $-348.76

Processing NEE
Data loaded successfully for NEE.
Training sac Agent for NEE...
Using cpu device
---------------------------------
| time/              |          |
|    episodes        | 4        |
|    fps             | 108      |
|    time_elapsed    | 21       |
|    total_timesteps | 2348     |
| train/             |          |
|    actor_loss      | -5.24    |
|    critic_loss     | 0.00951  |
|    ent_coef        | 0.51     |
|    ent_coef_loss   | -1.14    |
|    learning_rate   | 0.0003   |
|    n_updates       | 2247     |
---------------------------------
---------------------------------
| time/              |          |
|    episodes        | 8        |
|    fps             | 102      |
|    time_elapsed    | 45       |
|   

INFO:BacktesterRL:Preparing Backtest. Raw Prices: 279, Predictions: 218
INFO:BacktesterRL:Running vectorbt simulation...
INFO:BacktesterRL:Backtest successfully completed.
INFO:BacktesterRL:Strategy Return: 42.26%
INFO:BacktesterRL:Buy & Hold Return: 28.02%
INFO:BacktesterRL:Outperformance: 14.24%


Generated 218 actions.
Running Backtest...

--- Strategy Performance for NEE ---
Total Return (%): 42.2600
Annual Return (%): 80.4300
Sharpe Ratio: 3.1972
Sortino Ratio: 6.2045
Max Drawdown (%): -7.1100
Calmar Ratio: 11.3048
Win Rate (%): 57.1400
Total Trades: 98.0000
Final Value ($): 142258.6300



Setting an Index with object dtype into a DataFrame will stop inferring another dtype in a future version. Cast the Index explicitly before setting it into the DataFrame.



INFO:src.models.model_trainer_rl_v2_2_buyhold:Training SAC for 150000 timesteps



--- Trade Statistics for NEE ---
Total number of trades: 98

--- Trade Outcomes ---
Number of profitable exits: 56
Number of loss exits (cut loss): 42
Win rate: 57.14%
Average profit per winning trade: $1100.97
Average loss per losing trade: $-461.81

Processing DUK
Data loaded successfully for DUK.
Training sac Agent for DUK...
Using cpu device
---------------------------------
| time/              |          |
|    episodes        | 4        |
|    fps             | 103      |
|    time_elapsed    | 22       |
|    total_timesteps | 2348     |
| train/             |          |
|    actor_loss      | -5.1     |
|    critic_loss     | 0.00966  |
|    ent_coef        | 0.51     |
|    ent_coef_loss   | -1.13    |
|    learning_rate   | 0.0003   |
|    n_updates       | 2247     |
---------------------------------
---------------------------------
| time/              |          |
|    episodes        | 8        |
|    fps             | 103      |
|    time_elapsed    | 45       |
|    

INFO:BacktesterRL:Preparing Backtest. Raw Prices: 279, Predictions: 218
INFO:BacktesterRL:Running vectorbt simulation...
INFO:BacktesterRL:Backtest successfully completed.
INFO:BacktesterRL:Strategy Return: 32.13%
INFO:BacktesterRL:Buy & Hold Return: 19.81%
INFO:BacktesterRL:Outperformance: 12.32%


Saved model and normalization stats for models/sac_vecnormalize.pkl
Training Complete. Models saved.
Generating Agent Predictions on Test Data...
Generated 218 actions.
Running Backtest...

--- Strategy Performance for DUK ---
Total Return (%): 32.1300
Annual Return (%): 59.4400
Sharpe Ratio: 3.2595
Sortino Ratio: 5.4251
Max Drawdown (%): -4.2100
Calmar Ratio: 14.1173
Win Rate (%): 69.6100
Total Trades: 102.0000
Final Value ($): 132128.5000



--- Trade Statistics for DUK ---
Total number of trades: 102

--- Trade Outcomes ---
Number of profitable exits: 71
Number of loss exits (cut loss): 31
Win rate: 69.61%
Average profit per winning trade: $588.10
Average loss per losing trade: $-310.52



Setting an Index with object dtype into a DataFrame will stop inferring another dtype in a future version. Cast the Index explicitly before setting it into the DataFrame.

