In [1]:
import sys, os
sys.path.insert(0, os.path.abspath('..'))

import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

#from src.data.data_collector import DataCollector
from src.models.model_trainer_rl_v2_2 import ModelTrainerRL, TradingEnvRL
from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import DummyVecEnv, VecNormalize
from src.models.backtester import PortfolioBacktester, PortfolioBacktesterRL
from src.utils.config_loader import load_config


config = load_config("config/config.yaml")

  if not hasattr(np, "object"):
Gym has been unmaintained since 2022 and does not support NumPy 2.0 amongst other critical functionality.
Please upgrade to Gymnasium, the maintained drop-in replacement of Gym, or contact the authors of your software and request that they upgrade.
See the migration guide at https://gymnasium.farama.org/introduction/migration_guide/ for additional information.


Use of plotly.io.kaleido.scope.default_format is deprecated and support will be removed after September 2025.
Please use plotly.io.defaults.default_format instead.




Use of plotly.io.kaleido.scope.default_width is deprecated and support will be removed after September 2025.
Please use plotly.io.defaults.default_width instead.




Use of plotly.io.kaleido.scope.default_height is deprecated and support will be removed after September 2025.
Please use plotly.io.defaults.default_height instead.




Use of plotly.io.kaleido.scope.default_scale is deprecated and support will be removed after Septembe

## V2.2 PPO Agent

#### Template

#### MDU - YES

In [2]:

# 2. Load Data
try:
    # ensuring your path is correct
    data = pd.read_csv('data/processed/MDU_processed_2022.csv')
    if 'Date' in data.columns:
        data['Date'] = pd.to_datetime(data['Date'])
        data.set_index('Date', inplace=True)
    print("Data loaded successfully.")
except FileNotFoundError:
    print("Error: Data file not found. Check path.")
    sys.exit()

# 3. Split Train/Test
split_idx = int(len(data) * 0.7)
train_df = data.iloc[:split_idx]
test_df = data.iloc[split_idx:]

# --- TRAINING PHASE ---
print("Training PPO Agent...")

# Initialize Trainer (Pass config if needed, or defaults)
trainer = ModelTrainerRL(config['reinforcement_learning'])
env_params = config['reinforcement_learning']['environment']

# Create Training Environment
env_train = TradingEnvRL(
    train_df, 
    initial_balance=env_params.get('initial_balance', 10000),
    commission=env_params.get('commission', 0.001),
    lookback_window=env_params.get('lookback_window', 30),
    reward_func='profit'
)

# Train and Save
# This automatically handles the VecNormalize wrapper creation
result = trainer.train_ppo(env_train) 

# IMPORTANT: Save the model and the normalization stats to disk
# We will reload them for the test phase to ensure consistency
save_path = "models/"
trainer.save_models(save_path)
print("Training Complete. Models saved.")

# --- INFERENCE PHASE (UPDATED) ---
print("Generating Agent Predictions on Test Data...")

# 1. Load the Trained Model
model = PPO.load(os.path.join(save_path, "ppo_model"))

# 2. Create the Test Environment
# Must have the same observation space (columns) as training
env_test = TradingEnvRL(
    test_df, 
    initial_balance=env_params.get('initial_balance', 100000),
    commission=env_params.get('commission', 0.001),
    lookback_window=env_params.get('lookback_window', 30),
    reward_func='profit' 
)

# 3. Wrap it in DummyVecEnv (Required for SB3)
vec_env_test = DummyVecEnv([lambda: env_test])

# 4. Load Normalization Statistics
# This applies the Training Mean/Variance to the Test Data (Crucial!)
norm_path = os.path.join(save_path, "ppo_vecnormalize.pkl")
if os.path.exists(norm_path):
    vec_env_test = VecNormalize.load(norm_path, vec_env_test)
    vec_env_test.training = False     # Do not update stats on test data
    vec_env_test.norm_reward = False  # Do not normalize rewards for testing
else:
    print("WARNING: Normalization stats not found. Model predictions may be garbage.")

# 5. Run Inference Loop
obs = vec_env_test.reset()
done = [False]  # VecEnv returns done as a list/array
actions = []

while not done[0]:
    # Predict action (obs is now correctly scaled)
    action, _ = model.predict(obs, deterministic=True)
    actions.append(action[0]) # Extract single action
    
    # Step the environment
    obs, _, done, _ = vec_env_test.step(action)

print(f"Generated {len(actions)} actions.")


# --- BACKTESTING PHASE ---
print("Running Backtest...")

backtester = PortfolioBacktesterRL(env_params) 

# Note: 'actions' corresponds to the period [lookback_window : end]
# Ensure your backtester handles this alignment. 
# Usually, passing the full price_data and the lookback_window allows 
# the backtester to slice the prices correctly to match the actions.
portfolio = backtester.run_backtest(
    price_data=test_df['close'], 
    predicted_weights=np.array(actions).flatten(),
    lookback_window=env_params.get('lookback_window', 30)
)

comparison = backtester.compare_with_buy_and_hold_rl()

# Show Metrics
metrics = backtester.get_performance_metrics()
print("\n--- Strategy Performance ---")
for k, v in metrics.items():
    print(f"{k}: {v:.4f}")
    
# Plot
portfolio.plot().show()






Data loaded successfully.
Training PPO Agent...
Using cpu device


INFO:src.models.model_trainer_rl_v2_2:Training PPO...


-----------------------------
| time/              |      |
|    fps             | 2356 |
|    iterations      | 1    |
|    time_elapsed    | 0    |
|    total_timesteps | 2048 |
-----------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 1428        |
|    iterations           | 2           |
|    time_elapsed         | 2           |
|    total_timesteps      | 4096        |
| train/                  |             |
|    approx_kl            | 0.004436191 |
|    clip_fraction        | 0.022       |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.42       |
|    explained_variance   | -0.111      |
|    learning_rate        | 0.0002      |
|    loss                 | 0.0437      |
|    n_updates            | 10          |
|    policy_gradient_loss | -0.00517    |
|    std                  | 0.997       |
|    value_loss           | 0.493       |
----------------------------------

INFO:src.models.model_trainer_rl_v2_2:Training PPO for 200000 timesteps
INFO:BacktesterRL:Preparing Backtest. Raw Prices: 279, Predictions: 218
INFO:BacktesterRL:Running vectorbt simulation...


Saved model and normalization stats for models/ppo_vecnormalize.pkl
Training Complete. Models saved.
Generating Agent Predictions on Test Data...
Generated 218 actions.
Running Backtest...


INFO:BacktesterRL:Backtest successfully completed.
INFO:BacktesterRL:Strategy Return: 34.67%
INFO:BacktesterRL:Buy & Hold Return: 18.21%
INFO:BacktesterRL:Outperformance: 16.45%



--- Strategy Performance ---
Total Return (%): 34.6700
Annual Return (%): 64.5900
Sharpe Ratio: 1.9285
Sortino Ratio: 3.2204
Max Drawdown (%): -9.9900
Calmar Ratio: 6.4648
Win Rate (%): 46.5300
Total Trades: 101.0000
Final Value ($): 134665.2100


In [3]:
portfolio.trades.records_readable['Direction'].value_counts()

Direction
Short    91
Long     10
Name: count, dtype: int64

#### CWCO - YES

In [None]:

# 2. Load Data
try:
    # ensuring your path is correct
    data = pd.read_csv('data/processed/CWCO_processed_2022.csv')
    if 'Date' in data.columns:
        data['Date'] = pd.to_datetime(data['Date'])
        data.set_index('Date', inplace=True)
    print("Data loaded successfully.")
except FileNotFoundError:
    print("Error: Data file not found. Check path.")
    sys.exit()

# 3. Split Train/Test
split_idx = int(len(data) * 0.7)
train_df = data.iloc[:split_idx]
test_df = data.iloc[split_idx:]

# --- TRAINING PHASE ---
print("Training PPO Agent...")

# Initialize Trainer (Pass config if needed, or defaults)
trainer = ModelTrainerRL(config['reinforcement_learning'])
env_params = config['reinforcement_learning']['environment']

# Create Training Environment
env_train = TradingEnvRL(
    train_df, 
    initial_balance=env_params.get('initial_balance', 10000),
    commission=env_params.get('commission', 0.001),
    lookback_window=env_params.get('lookback_window', 30),
    reward_func='profit'
)

# Train and Save
# This automatically handles the VecNormalize wrapper creation
result = trainer.train_ppo(env_train) 

# IMPORTANT: Save the model and the normalization stats to disk
# We will reload them for the test phase to ensure consistency
save_path = "models/"
trainer.save_models(save_path)
print("Training Complete. Models saved.")

# --- INFERENCE PHASE (UPDATED) ---
print("Generating Agent Predictions on Test Data...")

# 1. Load the Trained Model
model = PPO.load(os.path.join(save_path, "ppo_model"))

# 2. Create the Test Environment
# Must have the same observation space (columns) as training
env_test = TradingEnvRL(
    test_df, 
    initial_balance=env_params.get('initial_balance', 100000),
    commission=env_params.get('commission', 0.001),
    lookback_window=env_params.get('lookback_window', 30),
    reward_func='profit' 
)

# 3. Wrap it in DummyVecEnv (Required for SB3)
vec_env_test = DummyVecEnv([lambda: env_test])

# 4. Load Normalization Statistics
# This applies the Training Mean/Variance to the Test Data (Crucial!)
norm_path = os.path.join(save_path, "ppo_vecnormalize.pkl")
if os.path.exists(norm_path):
    vec_env_test = VecNormalize.load(norm_path, vec_env_test)
    vec_env_test.training = False     # Do not update stats on test data
    vec_env_test.norm_reward = False  # Do not normalize rewards for testing
else:
    print("WARNING: Normalization stats not found. Model predictions may be garbage.")

# 5. Run Inference Loop
obs = vec_env_test.reset()
done = [False]  # VecEnv returns done as a list/array
actions = []

while not done[0]:
    # Predict action (obs is now correctly scaled)
    action, _ = model.predict(obs, deterministic=True)
    actions.append(action[0]) # Extract single action
    
    # Step the environment
    obs, _, done, _ = vec_env_test.step(action)

print(f"Generated {len(actions)} actions.")


# --- BACKTESTING PHASE ---
print("Running Backtest...")

backtester = PortfolioBacktesterRL(env_params) 

# Note: 'actions' corresponds to the period [lookback_window : end]
# Ensure your backtester handles this alignment. 
# Usually, passing the full price_data and the lookback_window allows 
# the backtester to slice the prices correctly to match the actions.
portfolio = backtester.run_backtest(
    price_data=test_df['close'], 
    predicted_weights=np.array(actions).flatten(),
    lookback_window=env_params.get('lookback_window', 30)
)

comparison = backtester.compare_with_buy_and_hold_rl()

# Show Metrics
metrics = backtester.get_performance_metrics()
print("\n--- Strategy Performance ---")
for k, v in metrics.items():
    print(f"{k}: {v:.4f}")
    
# Plot
portfolio.plot().show()
portfolio.trades.records_readable['Direction'].value_counts()




INFO:src.models.model_trainer_rl_v2_2:Training PPO...


Data loaded successfully.
Training PPO Agent...
Using cpu device
-----------------------------
| time/              |      |
|    fps             | 2666 |
|    iterations      | 1    |
|    time_elapsed    | 0    |
|    total_timesteps | 2048 |
-----------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 2180         |
|    iterations           | 2            |
|    time_elapsed         | 1            |
|    total_timesteps      | 4096         |
| train/                  |              |
|    approx_kl            | 0.0051948735 |
|    clip_fraction        | 0.0219       |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.42        |
|    explained_variance   | -0.164       |
|    learning_rate        | 0.0002       |
|    loss                 | 0.0984       |
|    n_updates            | 10           |
|    policy_gradient_loss | -0.00543     |
|    std                  | 1        

INFO:src.models.model_trainer_rl_v2_2:Training PPO for 200000 timesteps
INFO:BacktesterRL:Preparing Backtest. Raw Prices: 279, Predictions: 218
INFO:BacktesterRL:Running vectorbt simulation...
INFO:BacktesterRL:Backtest successfully completed.
INFO:BacktesterRL:Strategy Return: 243.29%
INFO:BacktesterRL:Buy & Hold Return: 32.68%
INFO:BacktesterRL:Outperformance: 210.61%


Saved model and normalization stats for models/ppo_vecnormalize.pkl
Training Complete. Models saved.
Generating Agent Predictions on Test Data...
Generated 218 actions.
Running Backtest...

--- Strategy Performance ---
Total Return (%): 243.2900
Annual Return (%): 688.6100
Sharpe Ratio: 6.2360
Sortino Ratio: 15.5453
Max Drawdown (%): -3.4400
Calmar Ratio: 199.9611
Win Rate (%): 81.6700
Total Trades: 60.0000
Final Value ($): 343290.0800


In [6]:
portfolio.trades.records_readable['Direction'].value_counts()

Direction
Short    42
Long     18
Name: count, dtype: int64

In [None]:
portfolio.trades.stats()

Start                          2025-01-10 00:00:00-05:00
End                            2025-11-20 00:00:00-05:00
Period                                 218 days 00:00:00
First Trade Start              2025-01-10 00:00:00-05:00
Last Trade End                 2025-11-20 00:00:00-05:00
Coverage                               218 days 00:00:00
Overlap Coverage                       132 days 00:00:00
Total Records                                         60
Total Long Trades                                     18
Total Short Trades                                    42
Total Closed Trades                                   59
Total Open Trades                                      1
Open Trade PnL                               -6301.33963
Win Rate [%]                                   83.050847
Max Win Streak                                        11
Max Loss Streak                                        2
Best Trade [%]                                 21.336127
Worst Trade [%]                

: 

#### NEE - YES

In [6]:



# 2. Load Data
try:
    # ensuring your path is correct
    data = pd.read_csv('data/processed/NEE_processed_2022.csv')
    if 'Date' in data.columns:
        data['Date'] = pd.to_datetime(data['Date'])
        data.set_index('Date', inplace=True)
    print("Data loaded successfully.")
except FileNotFoundError:
    print("Error: Data file not found. Check path.")
    sys.exit()

# 3. Split Train/Test
split_idx = int(len(data) * 0.7)
train_df = data.iloc[:split_idx]
test_df = data.iloc[split_idx:]

# --- TRAINING PHASE ---
print("Training PPO Agent...")

# Initialize Trainer (Pass config if needed, or defaults)
trainer = ModelTrainerRL(config['reinforcement_learning'])
env_params = config['reinforcement_learning']['environment']

# Create Training Environment
env_train = TradingEnvRL(
    train_df, 
    initial_balance=env_params.get('initial_balance', 10000),
    commission=env_params.get('commission', 0.001),
    lookback_window=env_params.get('lookback_window', 30),
    reward_func='profit'
)

# Train and Save
# This automatically handles the VecNormalize wrapper creation
result = trainer.train_ppo(env_train) 

# IMPORTANT: Save the model and the normalization stats to disk
# We will reload them for the test phase to ensure consistency
save_path = "models/"
trainer.save_models(save_path)
print("Training Complete. Models saved.")

# --- INFERENCE PHASE (UPDATED) ---
print("Generating Agent Predictions on Test Data...")

# 1. Load the Trained Model
model = PPO.load(os.path.join(save_path, "ppo_model"))

# 2. Create the Test Environment
# Must have the same observation space (columns) as training
env_test = TradingEnvRL(
    test_df, 
    initial_balance=env_params.get('initial_balance', 100000),
    commission=env_params.get('commission', 0.001),
    lookback_window=env_params.get('lookback_window', 30),
    reward_func='profit' 
)

# 3. Wrap it in DummyVecEnv (Required for SB3)
vec_env_test = DummyVecEnv([lambda: env_test])

# 4. Load Normalization Statistics
# This applies the Training Mean/Variance to the Test Data (Crucial!)
norm_path = os.path.join(save_path, "ppo_vecnormalize.pkl")
if os.path.exists(norm_path):
    vec_env_test = VecNormalize.load(norm_path, vec_env_test)
    vec_env_test.training = False     # Do not update stats on test data
    vec_env_test.norm_reward = False  # Do not normalize rewards for testing
else:
    print("WARNING: Normalization stats not found. Model predictions may be garbage.")

# 5. Run Inference Loop
obs = vec_env_test.reset()
done = [False]  # VecEnv returns done as a list/array
actions = []

while not done[0]:
    # Predict action (obs is now correctly scaled)
    action, _ = model.predict(obs, deterministic=True)
    actions.append(action[0]) # Extract single action
    
    # Step the environment
    obs, _, done, _ = vec_env_test.step(action)

print(f"Generated {len(actions)} actions.")


# --- BACKTESTING PHASE ---
print("Running Backtest...")

backtester = PortfolioBacktesterRL(env_params) 

# Note: 'actions' corresponds to the period [lookback_window : end]
# Ensure your backtester handles this alignment. 
# Usually, passing the full price_data and the lookback_window allows 
# the backtester to slice the prices correctly to match the actions.
portfolio = backtester.run_backtest(
    price_data=test_df['close'], 
    predicted_weights=np.array(actions).flatten(),
    lookback_window=env_params.get('lookback_window', 30)
)

comparison = backtester.compare_with_buy_and_hold_rl()

# Show Metrics
metrics = backtester.get_performance_metrics()
print("\n--- Strategy Performance ---")
for k, v in metrics.items():
    print(f"{k}: {v:.4f}")
    
# Plot
portfolio.plot().show()




INFO:src.models.model_trainer_rl_v2_2:Training PPO...


Data loaded successfully.
Training PPO Agent...
Using cpu device
-----------------------------
| time/              |      |
|    fps             | 1584 |
|    iterations      | 1    |
|    time_elapsed    | 1    |
|    total_timesteps | 2048 |
-----------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 1202         |
|    iterations           | 2            |
|    time_elapsed         | 3            |
|    total_timesteps      | 4096         |
| train/                  |              |
|    approx_kl            | 0.0041711084 |
|    clip_fraction        | 0.0138       |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.42        |
|    explained_variance   | 0.14         |
|    learning_rate        | 0.0002       |
|    loss                 | 0.0718       |
|    n_updates            | 10           |
|    policy_gradient_loss | -0.00575     |
|    std                  | 0.998    

INFO:src.models.model_trainer_rl_v2_2:Training PPO for 200000 timesteps
INFO:BacktesterRL:Preparing Backtest. Raw Prices: 279, Predictions: 218
INFO:BacktesterRL:Running vectorbt simulation...
INFO:BacktesterRL:Backtest successfully completed.
INFO:BacktesterRL:Strategy Return: 188.10%
INFO:BacktesterRL:Buy & Hold Return: 28.02%
INFO:BacktesterRL:Outperformance: 160.08%


Saved model and normalization stats for models/ppo_vecnormalize.pkl
Training Complete. Models saved.
Generating Agent Predictions on Test Data...
Generated 218 actions.
Running Backtest...

--- Strategy Performance ---
Total Return (%): 188.1000
Annual Return (%): 488.0400
Sharpe Ratio: 5.7212
Sortino Ratio: 11.6267
Max Drawdown (%): -6.5800
Calmar Ratio: 74.1300
Win Rate (%): 67.1200
Total Trades: 73.0000
Final Value ($): 288095.6900


#### AGX 2022 - NO

In [7]:


# 1. Load Configuration
config = load_config("config/config.yaml")

# 2. Load Data
try:
    # ensuring your path is correct
    data = pd.read_csv('data/processed/AGX_processed_2022.csv')
    if 'Date' in data.columns:
        data['Date'] = pd.to_datetime(data['Date'])
        data.set_index('Date', inplace=True)
    print("Data loaded successfully.")
except FileNotFoundError:
    print("Error: Data file not found. Check path.")
    sys.exit()

# 3. Split Train/Test
split_idx = int(len(data) * 0.7)
train_df = data.iloc[:split_idx]
test_df = data.iloc[split_idx:]

# --- TRAINING PHASE ---
print("Training PPO Agent...")

# Initialize Trainer (Pass config if needed, or defaults)
trainer = ModelTrainerRL(config['reinforcement_learning'])
env_params = config['reinforcement_learning']['environment']

# Create Training Environment
env_train = TradingEnvRL(
    train_df, 
    initial_balance=env_params.get('initial_balance', 10000),
    commission=env_params.get('commission', 0.001),
    lookback_window=env_params.get('lookback_window', 30),
    reward_func='profit'
)

# Train and Save
# This automatically handles the VecNormalize wrapper creation
result = trainer.train_ppo(env_train) 

# IMPORTANT: Save the model and the normalization stats to disk
# We will reload them for the test phase to ensure consistency
save_path = "models/"
trainer.save_models(save_path)
print("Training Complete. Models saved.")

# --- INFERENCE PHASE (UPDATED) ---
print("Generating Agent Predictions on Test Data...")

# 1. Load the Trained Model
model = PPO.load(os.path.join(save_path, "ppo_model"))

# 2. Create the Test Environment
# Must have the same observation space (columns) as training
env_test = TradingEnvRL(
    test_df, 
    initial_balance=env_params.get('initial_balance', 100000),
    commission=env_params.get('commission', 0.001),
    lookback_window=env_params.get('lookback_window', 30),
    reward_func='profit' 
)

# 3. Wrap it in DummyVecEnv (Required for SB3)
vec_env_test = DummyVecEnv([lambda: env_test])

# 4. Load Normalization Statistics
# This applies the Training Mean/Variance to the Test Data (Crucial!)
norm_path = os.path.join(save_path, "ppo_vecnormalize.pkl")
if os.path.exists(norm_path):
    vec_env_test = VecNormalize.load(norm_path, vec_env_test)
    vec_env_test.training = False     # Do not update stats on test data
    vec_env_test.norm_reward = False  # Do not normalize rewards for testing
else:
    print("WARNING: Normalization stats not found. Model predictions may be garbage.")

# 5. Run Inference Loop
obs = vec_env_test.reset()
done = [False]  # VecEnv returns done as a list/array
actions = []

while not done[0]:
    # Predict action (obs is now correctly scaled)
    action, _ = model.predict(obs, deterministic=True)
    actions.append(action[0]) # Extract single action
    
    # Step the environment
    obs, _, done, _ = vec_env_test.step(action)

print(f"Generated {len(actions)} actions.")


# --- BACKTESTING PHASE ---
print("Running Backtest...")

backtester = PortfolioBacktesterRL(env_params) 

# Note: 'actions' corresponds to the period [lookback_window : end]
# Ensure your backtester handles this alignment. 
# Usually, passing the full price_data and the lookback_window allows 
# the backtester to slice the prices correctly to match the actions.
portfolio = backtester.run_backtest(
    price_data=test_df['close'], 
    predicted_weights=np.array(actions).flatten(),
    lookback_window=env_params.get('lookback_window', 30)
)

comparison = backtester.compare_with_buy_and_hold_rl()

# Show Metrics
metrics = backtester.get_performance_metrics()
print("\n--- Strategy Performance ---")
for k, v in metrics.items():
    print(f"{k}: {v:.4f}")
    
# Plot
portfolio.plot().show()




INFO:src.models.model_trainer_rl_v2_2:Training PPO...


Data loaded successfully.
Training PPO Agent...
Using cpu device
-----------------------------
| time/              |      |
|    fps             | 1456 |
|    iterations      | 1    |
|    time_elapsed    | 1    |
|    total_timesteps | 2048 |
-----------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 1179         |
|    iterations           | 2            |
|    time_elapsed         | 3            |
|    total_timesteps      | 4096         |
| train/                  |              |
|    approx_kl            | 0.0076814424 |
|    clip_fraction        | 0.0423       |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.42        |
|    explained_variance   | -0.00769     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.125        |
|    n_updates            | 10           |
|    policy_gradient_loss | -0.00843     |
|    std                  | 1        

INFO:src.models.model_trainer_rl_v2_2:Training PPO for 200000 timesteps
INFO:BacktesterRL:Preparing Backtest. Raw Prices: 279, Predictions: 218
INFO:BacktesterRL:Running vectorbt simulation...
INFO:BacktesterRL:Backtest successfully completed.
INFO:BacktesterRL:Strategy Return: 43.36%
INFO:BacktesterRL:Buy & Hold Return: 123.31%
INFO:BacktesterRL:Outperformance: -79.96%


Saved model and normalization stats for models/ppo_vecnormalize.pkl
Training Complete. Models saved.
Generating Agent Predictions on Test Data...
Generated 218 actions.
Running Backtest...

--- Strategy Performance ---
Total Return (%): 43.3600
Annual Return (%): 82.7700
Sharpe Ratio: 1.1629
Sortino Ratio: 1.7377
Max Drawdown (%): -35.8200
Calmar Ratio: 2.3107
Win Rate (%): 52.2400
Total Trades: 67.0000
Final Value ($): 143357.3400


#### AGX FULL - YES

In [None]:


# 2. Load Data
try:
    # ensuring your path is correct
    data = pd.read_csv('data/processed/AGX_processed.csv')
    if 'Date' in data.columns:
        data['Date'] = pd.to_datetime(data['Date'])
        data.set_index('Date', inplace=True)
    print("Data loaded successfully.")
except FileNotFoundError:
    print("Error: Data file not found. Check path.")
    sys.exit()

# 3. Split Train/Test
split_idx = int(len(data) * 0.7)
train_df = data.iloc[:split_idx]
test_df = data.iloc[split_idx:]

# --- TRAINING PHASE ---
print("Training PPO Agent...")

# Initialize Trainer (Pass config if needed, or defaults)
trainer = ModelTrainerRL(config['reinforcement_learning'])
env_params = config['reinforcement_learning']['environment']

# Create Training Environment
env_train = TradingEnvRL(
    train_df, 
    initial_balance=env_params.get('initial_balance', 10000),
    commission=env_params.get('commission', 0.001),
    lookback_window=env_params.get('lookback_window', 30),
    reward_func='profit'
)

# Train and Save
# This automatically handles the VecNormalize wrapper creation
result = trainer.train_ppo(env_train) 

# IMPORTANT: Save the model and the normalization stats to disk
# We will reload them for the test phase to ensure consistency
save_path = "models/"
trainer.save_models(save_path)
print("Training Complete. Models saved.")

# --- INFERENCE PHASE (UPDATED) ---
print("Generating Agent Predictions on Test Data...")

# 1. Load the Trained Model
model = PPO.load(os.path.join(save_path, "ppo_model"))

# 2. Create the Test Environment
# Must have the same observation space (columns) as training
env_test = TradingEnvRL(
    test_df, 
    initial_balance=env_params.get('initial_balance', 100000),
    commission=env_params.get('commission', 0.001),
    lookback_window=env_params.get('lookback_window', 30),
    reward_func='profit' 
)

# 3. Wrap it in DummyVecEnv (Required for SB3)
vec_env_test = DummyVecEnv([lambda: env_test])

# 4. Load Normalization Statistics
# This applies the Training Mean/Variance to the Test Data (Crucial!)
norm_path = os.path.join(save_path, "ppo_vecnormalize.pkl")
if os.path.exists(norm_path):
    vec_env_test = VecNormalize.load(norm_path, vec_env_test)
    vec_env_test.training = False     # Do not update stats on test data
    vec_env_test.norm_reward = False  # Do not normalize rewards for testing
else:
    print("WARNING: Normalization stats not found. Model predictions may be garbage.")

# 5. Run Inference Loop
obs = vec_env_test.reset()
done = [False]  # VecEnv returns done as a list/array
actions = []

while not done[0]:
    # Predict action (obs is now correctly scaled)
    action, _ = model.predict(obs, deterministic=True)
    actions.append(action[0]) # Extract single action
    
    # Step the environment
    obs, _, done, _ = vec_env_test.step(action)

print(f"Generated {len(actions)} actions.")


# --- BACKTESTING PHASE ---
print("Running Backtest...")

backtester = PortfolioBacktesterRL(env_params) 

# Note: 'actions' corresponds to the period [lookback_window : end]
# Ensure your backtester handles this alignment. 
# Usually, passing the full price_data and the lookback_window allows 
# the backtester to slice the prices correctly to match the actions.
portfolio = backtester.run_backtest(
    price_data=test_df['close'], 
    predicted_weights=np.array(actions).flatten(),
    lookback_window=env_params.get('lookback_window', 30)
)

comparison = backtester.compare_with_buy_and_hold_rl()

# Show Metrics
metrics = backtester.get_performance_metrics()
print("\n--- Strategy Performance ---")
for k, v in metrics.items():
    print(f"{k}: {v:.4f}")
    
# Plot
portfolio.plot().show()




INFO:src.models.model_trainer_rl_v2_2:Training PPO...


Data loaded successfully.
Training PPO Agent...
Using cpu device
-----------------------------
| time/              |      |
|    fps             | 2388 |
|    iterations      | 1    |
|    time_elapsed    | 0    |
|    total_timesteps | 2048 |
-----------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 2043         |
|    iterations           | 2            |
|    time_elapsed         | 2            |
|    total_timesteps      | 4096         |
| train/                  |              |
|    approx_kl            | 0.0073090754 |
|    clip_fraction        | 0.0354       |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.43        |
|    explained_variance   | 0.0821       |
|    learning_rate        | 0.0002       |
|    loss                 | 0.0398       |
|    n_updates            | 10           |
|    policy_gradient_loss | -0.00844     |
|    std                  | 1.01     

INFO:BacktesterRL:Preparing Backtest. Raw Prices: 726, Predictions: 665
INFO:BacktesterRL:Running vectorbt simulation...
INFO:BacktesterRL:Backtest successfully completed.
INFO:BacktesterRL:Strategy Return: 749.78%
INFO:BacktesterRL:Buy & Hold Return: 635.13%
INFO:BacktesterRL:Outperformance: 114.65%


Generated 665 actions.
Running Backtest...

--- Strategy Performance ---
Total Return (%): 749.7800
Annual Return (%): 223.6500
Sharpe Ratio: 4.1127
Sortino Ratio: 13.0749
Max Drawdown (%): -9.1900
Calmar Ratio: 24.3298
Win Rate (%): 73.1600
Total Trades: 380.0000
Final Value ($): 849781.1200


#### BKH - NO

In [2]:
stock_symbol = "BKH"
# 2. Load Data
try:
    # ensuring your path is correct
    data = pd.read_csv(f'data/processed/{stock_symbol}_processed_2022.csv')
    if 'Date' in data.columns:
        data['Date'] = pd.to_datetime(data['Date'])
        data.set_index('Date', inplace=True)
    print("Data loaded successfully.")
except FileNotFoundError:
    print("Error: Data file not found. Check path.")
    sys.exit()

# 3. Split Train/Test
split_idx = int(len(data) * 0.7)
train_df = data.iloc[:split_idx]
test_df = data.iloc[split_idx:]

# --- TRAINING PHASE ---
print("Training PPO Agent...")

# Initialize Trainer (Pass config if needed, or defaults)
trainer = ModelTrainerRL(config['reinforcement_learning'])
env_params = config['reinforcement_learning']['environment']

# Create Training Environment
env_train = TradingEnvRL(
    train_df, 
    initial_balance=env_params.get('initial_balance', 10000),
    commission=env_params.get('commission', 0.001),
    lookback_window=env_params.get('lookback_window', 30),
    reward_func='profit'
)

# Train and Save
# This automatically handles the VecNormalize wrapper creation
result = trainer.train_ppo(env_train) 

# IMPORTANT: Save the model and the normalization stats to disk
# We will reload them for the test phase to ensure consistency
save_path = "models/"
trainer.save_models(save_path)
print("Training Complete. Models saved.")

# --- INFERENCE PHASE (UPDATED) ---
print("Generating Agent Predictions on Test Data...")

# 1. Load the Trained Model
model = PPO.load(os.path.join(save_path, "ppo_model"))

# 2. Create the Test Environment
# Must have the same observation space (columns) as training
env_test = TradingEnvRL(
    test_df, 
    initial_balance=env_params.get('initial_balance', 100000),
    commission=env_params.get('commission', 0.001),
    lookback_window=env_params.get('lookback_window', 30),
    reward_func='profit' 
)

# 3. Wrap it in DummyVecEnv (Required for SB3)
vec_env_test = DummyVecEnv([lambda: env_test])

# 4. Load Normalization Statistics
# This applies the Training Mean/Variance to the Test Data (Crucial!)
norm_path = os.path.join(save_path, f"ppo_vecnormalize_{stock_symbol}.pkl")
if os.path.exists(norm_path):
    vec_env_test = VecNormalize.load(norm_path, vec_env_test)
    vec_env_test.training = False     # Do not update stats on test data
    vec_env_test.norm_reward = False  # Do not normalize rewards for testing
else:
    print("WARNING: Normalization stats not found. Model predictions may be garbage.")

# 5. Run Inference Loop
obs = vec_env_test.reset()
done = [False]  # VecEnv returns done as a list/array
actions = []

while not done[0]:
    # Predict action (obs is now correctly scaled)
    action, _ = model.predict(obs, deterministic=True)
    actions.append(action[0]) # Extract single action
    
    # Step the environment
    obs, _, done, _ = vec_env_test.step(action)

print(f"Generated {len(actions)} actions.")


# --- BACKTESTING PHASE ---
print("Running Backtest...")

backtester = PortfolioBacktesterRL(env_params) 

# Note: 'actions' corresponds to the period [lookback_window : end]
# Ensure your backtester handles this alignment. 
# Usually, passing the full price_data and the lookback_window allows 
# the backtester to slice the prices correctly to match the actions.
portfolio = backtester.run_backtest(
    price_data=test_df['close'], 
    predicted_weights=np.array(actions).flatten(),
    lookback_window=env_params.get('lookback_window', 30)
)

comparison = backtester.compare_with_buy_and_hold_rl()

# Show Metrics
metrics = backtester.get_performance_metrics()
print("\n--- Strategy Performance ---")
for k, v in metrics.items():
    print(f"{k}: {v:.4f}")
    
# Plot
portfolio.plot().show()






Data loaded successfully.
Training PPO Agent...
Using cpu device


INFO:src.models.model_trainer_rl_v2_2:Training PPO...


-----------------------------
| time/              |      |
|    fps             | 1657 |
|    iterations      | 1    |
|    time_elapsed    | 1    |
|    total_timesteps | 2048 |
-----------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 1331         |
|    iterations           | 2            |
|    time_elapsed         | 3            |
|    total_timesteps      | 4096         |
| train/                  |              |
|    approx_kl            | 0.0030640177 |
|    clip_fraction        | 0.012        |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.43        |
|    explained_variance   | -0.366       |
|    learning_rate        | 0.0002       |
|    loss                 | 0.0151       |
|    n_updates            | 10           |
|    policy_gradient_loss | -0.0046      |
|    std                  | 1.02         |
|    value_loss           | 0.278        |
----------------

INFO:src.models.model_trainer_rl_v2_2:Training PPO for 200000 timesteps
INFO:BacktesterRL:Preparing Backtest. Raw Prices: 279, Predictions: 218
INFO:BacktesterRL:Running vectorbt simulation...


Saved model and normalization stats for models/ppo_vecnormalize.pkl
Training Complete. Models saved.
Generating Agent Predictions on Test Data...
Generated 218 actions.
Running Backtest...


INFO:BacktesterRL:Backtest successfully completed.
INFO:BacktesterRL:Strategy Return: -26.69%
INFO:BacktesterRL:Buy & Hold Return: 31.05%
INFO:BacktesterRL:Outperformance: -57.74%



--- Strategy Performance ---
Total Return (%): -26.6900
Annual Return (%): -40.5400
Sharpe Ratio: -2.0651
Sortino Ratio: -2.7834
Max Drawdown (%): -28.0700
Calmar Ratio: -1.4445
Win Rate (%): 2.3100
Total Trades: 130.0000
Final Value ($): 73308.9300


#### DUK - YES

In [4]:
stock_symbol = "DUK"
# 2. Load Data
try:
    # ensuring your path is correct
    data = pd.read_csv(f'data/processed/{stock_symbol}_processed_2022.csv')
    if 'Date' in data.columns:
        data['Date'] = pd.to_datetime(data['Date'])
        data.set_index('Date', inplace=True)
    print("Data loaded successfully.")
except FileNotFoundError:
    print("Error: Data file not found. Check path.")
    sys.exit()

# 3. Split Train/Test
split_idx = int(len(data) * 0.7)
train_df = data.iloc[:split_idx]
test_df = data.iloc[split_idx:]

# --- TRAINING PHASE ---
print("Training PPO Agent...")

# Initialize Trainer (Pass config if needed, or defaults)
trainer = ModelTrainerRL(config['reinforcement_learning'])
env_params = config['reinforcement_learning']['environment']

# Create Training Environment
env_train = TradingEnvRL(
    train_df, 
    initial_balance=env_params.get('initial_balance', 10000),
    commission=env_params.get('commission', 0.001),
    lookback_window=env_params.get('lookback_window', 30),
    reward_func='profit'
)

# Train and Save
# This automatically handles the VecNormalize wrapper creation
result = trainer.train_ppo(env_train) 

# IMPORTANT: Save the model and the normalization stats to disk
# We will reload them for the test phase to ensure consistency
save_path = "models/"
trainer.save_models(save_path)
print("Training Complete. Models saved.")

# --- INFERENCE PHASE (UPDATED) ---
print("Generating Agent Predictions on Test Data...")

# 1. Load the Trained Model
model = PPO.load(os.path.join(save_path, "ppo_model"))

# 2. Create the Test Environment
# Must have the same observation space (columns) as training
env_test = TradingEnvRL(
    test_df, 
    initial_balance=env_params.get('initial_balance', 100000),
    commission=env_params.get('commission', 0.001),
    lookback_window=env_params.get('lookback_window', 30),
    reward_func='profit' 
)

# 3. Wrap it in DummyVecEnv (Required for SB3)
vec_env_test = DummyVecEnv([lambda: env_test])

# 4. Load Normalization Statistics
# This applies the Training Mean/Variance to the Test Data (Crucial!)
norm_path = os.path.join(save_path, f"ppo_vecnormalize_{stock_symbol}.pkl")
if os.path.exists(norm_path):
    vec_env_test = VecNormalize.load(norm_path, vec_env_test)
    vec_env_test.training = False     # Do not update stats on test data
    vec_env_test.norm_reward = False  # Do not normalize rewards for testing
else:
    print("WARNING: Normalization stats not found. Model predictions may be garbage.")

# 5. Run Inference Loop
obs = vec_env_test.reset()
done = [False]  # VecEnv returns done as a list/array
actions = []

while not done[0]:
    # Predict action (obs is now correctly scaled)
    action, _ = model.predict(obs, deterministic=True)
    actions.append(action[0]) # Extract single action
    
    # Step the environment
    obs, _, done, _ = vec_env_test.step(action)

print(f"Generated {len(actions)} actions.")


# --- BACKTESTING PHASE ---
print("Running Backtest...")

backtester = PortfolioBacktesterRL(env_params) 

# Note: 'actions' corresponds to the period [lookback_window : end]
# Ensure your backtester handles this alignment. 
# Usually, passing the full price_data and the lookback_window allows 
# the backtester to slice the prices correctly to match the actions.
portfolio = backtester.run_backtest(
    price_data=test_df['close'], 
    predicted_weights=np.array(actions).flatten(),
    lookback_window=env_params.get('lookback_window', 30)
)

comparison = backtester.compare_with_buy_and_hold_rl()

# Show Metrics
metrics = backtester.get_performance_metrics()
print("\n--- Strategy Performance ---")
for k, v in metrics.items():
    print(f"{k}: {v:.4f}")
    
# Plot
portfolio.plot().show()




INFO:src.models.model_trainer_rl_v2_2:Training PPO...


Data loaded successfully.
Training PPO Agent...
Using cpu device
-----------------------------
| time/              |      |
|    fps             | 2308 |
|    iterations      | 1    |
|    time_elapsed    | 0    |
|    total_timesteps | 2048 |
-----------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 1916         |
|    iterations           | 2            |
|    time_elapsed         | 2            |
|    total_timesteps      | 4096         |
| train/                  |              |
|    approx_kl            | 0.0038990625 |
|    clip_fraction        | 0.0197       |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.42        |
|    explained_variance   | 0.00413      |
|    learning_rate        | 0.0002       |
|    loss                 | 0.0696       |
|    n_updates            | 10           |
|    policy_gradient_loss | -0.0051      |
|    std                  | 1.01     

INFO:src.models.model_trainer_rl_v2_2:Training PPO for 200000 timesteps
INFO:BacktesterRL:Preparing Backtest. Raw Prices: 279, Predictions: 218
INFO:BacktesterRL:Running vectorbt simulation...
INFO:BacktesterRL:Backtest successfully completed.
INFO:BacktesterRL:Strategy Return: 16.99%
INFO:BacktesterRL:Buy & Hold Return: 19.81%
INFO:BacktesterRL:Outperformance: -2.82%


Saved model and normalization stats for models/ppo_vecnormalize.pkl
Training Complete. Models saved.
Generating Agent Predictions on Test Data...
Generated 218 actions.
Running Backtest...

--- Strategy Performance ---
Total Return (%): 16.9900
Annual Return (%): 30.0500
Sharpe Ratio: 1.4535
Sortino Ratio: 2.1369
Max Drawdown (%): -8.5400
Calmar Ratio: 3.5209
Win Rate (%): 100.0000
Total Trades: 58.0000
Final Value ($): 116992.6500


#### YORW - YES

In [11]:
stock_symbol = "YORW"
# 2. Load Data
try:
    # ensuring your path is correct
    data = pd.read_csv(f'data/processed/{stock_symbol}_processed_2022.csv')
    if 'Date' in data.columns:
        data['Date'] = pd.to_datetime(data['Date'])
        data.set_index('Date', inplace=True)
    print("Data loaded successfully.")
except FileNotFoundError:
    print("Error: Data file not found. Check path.")
    sys.exit()

# 3. Split Train/Test
split_idx = int(len(data) * 0.7)
train_df = data.iloc[:split_idx]
test_df = data.iloc[split_idx:]

# --- TRAINING PHASE ---
print("Training PPO Agent...")

# Initialize Trainer (Pass config if needed, or defaults)
trainer = ModelTrainerRL(config['reinforcement_learning'])
env_params = config['reinforcement_learning']['environment']

# Create Training Environment
env_train = TradingEnvRL(
    train_df, 
    initial_balance=env_params.get('initial_balance', 10000),
    commission=env_params.get('commission', 0.001),
    lookback_window=env_params.get('lookback_window', 30),
    reward_func='profit'
)

# Train and Save
# This automatically handles the VecNormalize wrapper creation
result = trainer.train_ppo(env_train) 

# IMPORTANT: Save the model and the normalization stats to disk
# We will reload them for the test phase to ensure consistency
save_path = "models/"
trainer.save_models(save_path)
print("Training Complete. Models saved.")

# --- INFERENCE PHASE (UPDATED) ---
print("Generating Agent Predictions on Test Data...")

# 1. Load the Trained Model
model = PPO.load(os.path.join(save_path, "ppo_model"))

# 2. Create the Test Environment
# Must have the same observation space (columns) as training
env_test = TradingEnvRL(
    test_df, 
    initial_balance=env_params.get('initial_balance', 100000),
    commission=env_params.get('commission', 0.001),
    lookback_window=env_params.get('lookback_window', 30),
    reward_func='profit' 
)

# 3. Wrap it in DummyVecEnv (Required for SB3)
vec_env_test = DummyVecEnv([lambda: env_test])

# 4. Load Normalization Statistics
# This applies the Training Mean/Variance to the Test Data (Crucial!)
norm_path = os.path.join(save_path, f"ppo_vecnormalize_{stock_symbol}.pkl")
if os.path.exists(norm_path):
    vec_env_test = VecNormalize.load(norm_path, vec_env_test)
    vec_env_test.training = False     # Do not update stats on test data
    vec_env_test.norm_reward = False  # Do not normalize rewards for testing
else:
    print("WARNING: Normalization stats not found. Model predictions may be garbage.")

# 5. Run Inference Loop
obs = vec_env_test.reset()
done = [False]  # VecEnv returns done as a list/array
actions = []

while not done[0]:
    # Predict action (obs is now correctly scaled)
    action, _ = model.predict(obs, deterministic=True)
    actions.append(action[0]) # Extract single action
    
    # Step the environment
    obs, _, done, _ = vec_env_test.step(action)

print(f"Generated {len(actions)} actions.")


# --- BACKTESTING PHASE ---
print("Running Backtest...")

backtester = PortfolioBacktesterRL(env_params) 

# Note: 'actions' corresponds to the period [lookback_window : end]
# Ensure your backtester handles this alignment. 
# Usually, passing the full price_data and the lookback_window allows 
# the backtester to slice the prices correctly to match the actions.
portfolio = backtester.run_backtest(
    price_data=test_df['close'], 
    predicted_weights=np.array(actions).flatten(),
    lookback_window=env_params.get('lookback_window', 30)
)

comparison = backtester.compare_with_buy_and_hold_rl()

# Show Metrics
metrics = backtester.get_performance_metrics()
print("\n--- Strategy Performance ---")
for k, v in metrics.items():
    print(f"{k}: {v:.4f}")
    
# Plot
portfolio.plot().show()




INFO:src.models.model_trainer_rl_v2_2:Training PPO...


Data loaded successfully.
Training PPO Agent...
Using cpu device
-----------------------------
| time/              |      |
|    fps             | 1894 |
|    iterations      | 1    |
|    time_elapsed    | 1    |
|    total_timesteps | 2048 |
-----------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 1417         |
|    iterations           | 2            |
|    time_elapsed         | 2            |
|    total_timesteps      | 4096         |
| train/                  |              |
|    approx_kl            | 0.0050285477 |
|    clip_fraction        | 0.02         |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.42        |
|    explained_variance   | 0.202        |
|    learning_rate        | 0.0002       |
|    loss                 | 0.039        |
|    n_updates            | 10           |
|    policy_gradient_loss | -0.0053      |
|    std                  | 1.01     

INFO:src.models.model_trainer_rl_v2_2:Training PPO for 200000 timesteps
INFO:BacktesterRL:Preparing Backtest. Raw Prices: 279, Predictions: 218
INFO:BacktesterRL:Running vectorbt simulation...
INFO:BacktesterRL:Backtest successfully completed.
INFO:BacktesterRL:Strategy Return: 1.31%
INFO:BacktesterRL:Buy & Hold Return: 4.25%
INFO:BacktesterRL:Outperformance: -2.94%


Saved model and normalization stats for models/ppo_vecnormalize.pkl
Training Complete. Models saved.
Generating Agent Predictions on Test Data...
Generated 218 actions.
Running Backtest...

--- Strategy Performance ---
Total Return (%): 1.3100
Annual Return (%): 2.2100
Sharpe Ratio: 0.2088
Sortino Ratio: 0.2985
Max Drawdown (%): -15.6400
Calmar Ratio: 0.1411
Win Rate (%): 69.2300
Total Trades: 117.0000
Final Value ($): 101311.7700


#### PCG

In [15]:


stock_symbol = "PCG"
# 2. Load Data
try:
    # ensuring your path is correct
    data = pd.read_csv(f'data/processed/{stock_symbol}_processed_2022.csv')
    if 'Date' in data.columns:
        data['Date'] = pd.to_datetime(data['Date'])
        data.set_index('Date', inplace=True)
    print("Data loaded successfully.")
except FileNotFoundError:
    print("Error: Data file not found. Check path.")
    sys.exit()

# 3. Split Train/Test
split_idx = int(len(data) * 0.7)
train_df = data.iloc[:split_idx]
test_df = data.iloc[split_idx:]

# --- TRAINING PHASE ---
print("Training PPO Agent...")

# Initialize Trainer (Pass config if needed, or defaults)
trainer = ModelTrainerRL(config['reinforcement_learning'])
env_params = config['reinforcement_learning']['environment']

# Create Training Environment
env_train = TradingEnvRL(
    train_df, 
    initial_balance=env_params.get('initial_balance', 10000),
    commission=env_params.get('commission', 0.001),
    lookback_window=env_params.get('lookback_window', 30),
    reward_func='profit'
)

# Train and Save
# This automatically handles the VecNormalize wrapper creation
result = trainer.train_ppo(env_train) 

# IMPORTANT: Save the model and the normalization stats to disk
# We will reload them for the test phase to ensure consistency
save_path = "models/"
trainer.save_models(save_path)
print("Training Complete. Models saved.")

# --- INFERENCE PHASE (UPDATED) ---
print("Generating Agent Predictions on Test Data...")

# 1. Load the Trained Model
model = PPO.load(os.path.join(save_path, "ppo_model"))

# 2. Create the Test Environment
# Must have the same observation space (columns) as training
env_test = TradingEnvRL(
    test_df, 
    initial_balance=env_params.get('initial_balance', 100000),
    commission=env_params.get('commission', 0.001),
    lookback_window=env_params.get('lookback_window', 30),
    reward_func='profit' 
)

# 3. Wrap it in DummyVecEnv (Required for SB3)
vec_env_test = DummyVecEnv([lambda: env_test])

# 4. Load Normalization Statistics
# This applies the Training Mean/Variance to the Test Data (Crucial!)
norm_path = os.path.join(save_path, f"ppo_vecnormalize_{stock_symbol}.pkl")
if os.path.exists(norm_path):
    vec_env_test = VecNormalize.load(norm_path, vec_env_test)
    vec_env_test.training = False     # Do not update stats on test data
    vec_env_test.norm_reward = False  # Do not normalize rewards for testing
else:
    print("WARNING: Normalization stats not found. Model predictions may be garbage.")

# 5. Run Inference Loop
obs = vec_env_test.reset()
done = [False]  # VecEnv returns done as a list/array
actions = []

while not done[0]:
    # Predict action (obs is now correctly scaled)
    action, _ = model.predict(obs, deterministic=True)
    actions.append(action[0]) # Extract single action
    
    # Step the environment
    obs, _, done, _ = vec_env_test.step(action)

print(f"Generated {len(actions)} actions.")


# --- BACKTESTING PHASE ---
print("Running Backtest...")

backtester = PortfolioBacktesterRL(env_params) 

# Note: 'actions' corresponds to the period [lookback_window : end]
# Ensure your backtester handles this alignment. 
# Usually, passing the full price_data and the lookback_window allows 
# the backtester to slice the prices correctly to match the actions.
portfolio = backtester.run_backtest(
    price_data=test_df['close'], 
    predicted_weights=np.array(actions).flatten(),
    lookback_window=env_params.get('lookback_window', 30)
)

comparison = backtester.compare_with_buy_and_hold_rl()

# Show Metrics
metrics = backtester.get_performance_metrics()
print("\n--- Strategy Performance ---")
for k, v in metrics.items():
    print(f"{k}: {v:.4f}")
    
# Plot
portfolio.plot().show()




INFO:src.models.model_trainer_rl_v2_2:Training PPO...


Data loaded successfully.
Training PPO Agent...
Using cpu device
-----------------------------
| time/              |      |
|    fps             | 1833 |
|    iterations      | 1    |
|    time_elapsed    | 1    |
|    total_timesteps | 2048 |
-----------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 1452         |
|    iterations           | 2            |
|    time_elapsed         | 2            |
|    total_timesteps      | 4096         |
| train/                  |              |
|    approx_kl            | 0.0046446966 |
|    clip_fraction        | 0.0198       |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.42        |
|    explained_variance   | -0.233       |
|    learning_rate        | 0.0002       |
|    loss                 | 0.0807       |
|    n_updates            | 10           |
|    policy_gradient_loss | -0.00547     |
|    std                  | 1.01     

INFO:src.models.model_trainer_rl_v2_2:Training PPO for 200000 timesteps
INFO:BacktesterRL:Preparing Backtest. Raw Prices: 279, Predictions: 218
INFO:BacktesterRL:Running vectorbt simulation...
INFO:BacktesterRL:Backtest successfully completed.
INFO:BacktesterRL:Strategy Return: 1.67%
INFO:BacktesterRL:Buy & Hold Return: -7.38%
INFO:BacktesterRL:Outperformance: 9.06%


Saved model and normalization stats for models/ppo_vecnormalize.pkl
Training Complete. Models saved.
Generating Agent Predictions on Test Data...
Generated 218 actions.
Running Backtest...

--- Strategy Performance ---
Total Return (%): 1.6700
Annual Return (%): 2.8200
Sharpe Ratio: 0.2358
Sortino Ratio: 0.3283
Max Drawdown (%): -20.8800
Calmar Ratio: 0.1350
Win Rate (%): 75.6100
Total Trades: 123.0000
Final Value ($): 101674.5600


#### SRE

In [6]:


stock_symbol = "SRE"
# 2. Load Data
try:
    # ensuring your path is correct
    data = pd.read_csv(f'data/processed/{stock_symbol}_processed_2022.csv')
    if 'Date' in data.columns:
        data['Date'] = pd.to_datetime(data['Date'])
        data.set_index('Date', inplace=True)
    print("Data loaded successfully.")
except FileNotFoundError:
    print("Error: Data file not found. Check path.")
    sys.exit()

# 3. Split Train/Test
split_idx = int(len(data) * 0.7)
train_df = data.iloc[:split_idx]
test_df = data.iloc[split_idx:]

# --- TRAINING PHASE ---
print("Training PPO Agent...")

# Initialize Trainer (Pass config if needed, or defaults)
trainer = ModelTrainerRL(config['reinforcement_learning'])
env_params = config['reinforcement_learning']['environment']

# Create Training Environment
env_train = TradingEnvRL(
    train_df, 
    initial_balance=env_params.get('initial_balance', 10000),
    commission=env_params.get('commission', 0.001),
    lookback_window=env_params.get('lookback_window', 30),
    reward_func='profit'
)

# Train and Save
# This automatically handles the VecNormalize wrapper creation
result = trainer.train_ppo(env_train) 

# IMPORTANT: Save the model and the normalization stats to disk
# We will reload them for the test phase to ensure consistency
save_path = "models/"
trainer.save_models(save_path)
print("Training Complete. Models saved.")

# --- INFERENCE PHASE (UPDATED) ---
print("Generating Agent Predictions on Test Data...")

# 1. Load the Trained Model
model = PPO.load(os.path.join(save_path, "ppo_model"))

# 2. Create the Test Environment
# Must have the same observation space (columns) as training
env_test = TradingEnvRL(
    test_df, 
    initial_balance=env_params.get('initial_balance', 100000),
    commission=env_params.get('commission', 0.001),
    lookback_window=env_params.get('lookback_window', 30),
    reward_func='profit' 
)

# 3. Wrap it in DummyVecEnv (Required for SB3)
vec_env_test = DummyVecEnv([lambda: env_test])

# 4. Load Normalization Statistics
# This applies the Training Mean/Variance to the Test Data (Crucial!)
norm_path = os.path.join(save_path, f"ppo_vecnormalize.pkl")
if os.path.exists(norm_path):
    vec_env_test = VecNormalize.load(norm_path, vec_env_test)
    vec_env_test.training = False     # Do not update stats on test data
    vec_env_test.norm_reward = False  # Do not normalize rewards for testing
else:
    print("WARNING: Normalization stats not found. Model predictions may be garbage.")

# 5. Run Inference Loop
obs = vec_env_test.reset()
done = [False]  # VecEnv returns done as a list/array
actions = []

while not done[0]:
    # Predict action (obs is now correctly scaled)
    action, _ = model.predict(obs, deterministic=True)
    actions.append(action[0]) # Extract single action
    
    # Step the environment
    obs, _, done, _ = vec_env_test.step(action)

print(f"Generated {len(actions)} actions.")


# --- BACKTESTING PHASE ---
print("Running Backtest...")

backtester = PortfolioBacktesterRL(env_params) 

# Note: 'actions' corresponds to the period [lookback_window : end]
# Ensure your backtester handles this alignment. 
# Usually, passing the full price_data and the lookback_window allows 
# the backtester to slice the prices correctly to match the actions.
portfolio = backtester.run_backtest(
    price_data=test_df['close'], 
    predicted_weights=np.array(actions).flatten(),
    lookback_window=env_params.get('lookback_window', 30)
)

comparison = backtester.compare_with_buy_and_hold_rl()

# Show Metrics
metrics = backtester.get_performance_metrics()
print("\n--- Strategy Performance ---")
for k, v in metrics.items():
    print(f"{k}: {v:.4f}")
    
# Plot
portfolio.plot().show()




INFO:src.models.model_trainer_rl_v2_2:Training PPO...


Data loaded successfully.
Training PPO Agent...
Using cpu device
-----------------------------
| time/              |      |
|    fps             | 1789 |
|    iterations      | 1    |
|    time_elapsed    | 1    |
|    total_timesteps | 2048 |
-----------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 1200         |
|    iterations           | 2            |
|    time_elapsed         | 3            |
|    total_timesteps      | 4096         |
| train/                  |              |
|    approx_kl            | 0.0058072563 |
|    clip_fraction        | 0.0301       |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.42        |
|    explained_variance   | -0.136       |
|    learning_rate        | 0.0002       |
|    loss                 | 0.0189       |
|    n_updates            | 10           |
|    policy_gradient_loss | -0.00768     |
|    std                  | 0.999    

INFO:src.models.model_trainer_rl_v2_2:Training PPO for 200000 timesteps


Saved model and normalization stats for models/ppo_vecnormalize.pkl
Training Complete. Models saved.
Generating Agent Predictions on Test Data...


INFO:BacktesterRL:Preparing Backtest. Raw Prices: 279, Predictions: 218
INFO:BacktesterRL:Running vectorbt simulation...
INFO:BacktesterRL:Backtest successfully completed.
INFO:BacktesterRL:Strategy Return: 148.06%
INFO:BacktesterRL:Buy & Hold Return: 18.01%
INFO:BacktesterRL:Outperformance: 130.05%


Generated 218 actions.
Running Backtest...

--- Strategy Performance ---
Total Return (%): 148.0600
Annual Return (%): 357.7200
Sharpe Ratio: 4.4616
Sortino Ratio: 13.4872
Max Drawdown (%): -4.1000
Calmar Ratio: 87.2921
Win Rate (%): 64.0000
Total Trades: 50.0000
Final Value ($): 248055.7200


#### CNP

In [5]:
 

stock_symbol = "CNP"
# 2. Load Data
try:
    # ensuring your path is correct
    data = pd.read_csv(f'data/processed/{stock_symbol}_processed_2022.csv')
    if 'Date' in data.columns:
        data['Date'] = pd.to_datetime(data['Date'])
        data.set_index('Date', inplace=True)
    print("Data loaded successfully.")
except FileNotFoundError:
    print("Error: Data file not found. Check path.")
    sys.exit()

# 3. Split Train/Test
split_idx = int(len(data) * 0.7)
train_df = data.iloc[:split_idx]
test_df = data.iloc[split_idx:]

# --- TRAINING PHASE ---
print("Training PPO Agent...")

# Initialize Trainer (Pass config if needed, or defaults)
trainer = ModelTrainerRL(config['reinforcement_learning'])
env_params = config['reinforcement_learning']['environment']

# Create Training Environment
env_train = TradingEnvRL(
    train_df, 
    initial_balance=env_params.get('initial_balance', 10000),
    commission=env_params.get('commission', 0.001),
    lookback_window=env_params.get('lookback_window', 30),
    reward_func='profit'
)

# Train and Save
# This automatically handles the VecNormalize wrapper creation
result = trainer.train_ppo(env_train) 

# IMPORTANT: Save the model and the normalization stats to disk
# We will reload them for the test phase to ensure consistency
save_path = "models/"
trainer.save_models(save_path)
print("Training Complete. Models saved.")

# --- INFERENCE PHASE (UPDATED) ---
print("Generating Agent Predictions on Test Data...")

# 1. Load the Trained Model
model = PPO.load(os.path.join(save_path, "ppo_model"))

# 2. Create the Test Environment
# Must have the same observation space (columns) as training
env_test = TradingEnvRL(
    test_df, 
    initial_balance=env_params.get('initial_balance', 100000),
    commission=env_params.get('commission', 0.001),
    lookback_window=env_params.get('lookback_window', 30),
    reward_func='profit' 
)

# 3. Wrap it in DummyVecEnv (Required for SB3)
vec_env_test = DummyVecEnv([lambda: env_test])

# 4. Load Normalization Statistics
# This applies the Training Mean/Variance to the Test Data (Crucial!)
norm_path = os.path.join(save_path, f"ppo_vecnormalize.pkl")
if os.path.exists(norm_path):
    vec_env_test = VecNormalize.load(norm_path, vec_env_test)
    vec_env_test.training = False     # Do not update stats on test data
    vec_env_test.norm_reward = False  # Do not normalize rewards for testing
else:
    print("WARNING: Normalization stats not found. Model predictions may be garbage.")

# 5. Run Inference Loop
obs = vec_env_test.reset()
done = [False]  # VecEnv returns done as a list/array
actions = []

while not done[0]:
    # Predict action (obs is now correctly scaled)
    action, _ = model.predict(obs, deterministic=True)
    actions.append(action[0]) # Extract single action
    
    # Step the environment
    obs, _, done, _ = vec_env_test.step(action)

print(f"Generated {len(actions)} actions.")


# --- BACKTESTING PHASE ---
print("Running Backtest...")

backtester = PortfolioBacktesterRL(env_params) 

# Note: 'actions' corresponds to the period [lookback_window : end]
# Ensure your backtester handles this alignment. 
# Usually, passing the full price_data and the lookback_window allows 
# the backtester to slice the prices correctly to match the actions.
portfolio = backtester.run_backtest(
    price_data=test_df['close'], 
    predicted_weights=np.array(actions).flatten(),
    lookback_window=env_params.get('lookback_window', 30)
)

comparison = backtester.compare_with_buy_and_hold_rl()

# Show Metrics
metrics = backtester.get_performance_metrics()
print("\n--- Strategy Performance ---")
for k, v in metrics.items():
    print(f"{k}: {v:.4f}")
    
# Plot
portfolio.plot().show()




INFO:src.models.model_trainer_rl_v2_2:Training PPO...


Data loaded successfully.
Training PPO Agent...
Using cpu device
-----------------------------
| time/              |      |
|    fps             | 1568 |
|    iterations      | 1    |
|    time_elapsed    | 1    |
|    total_timesteps | 2048 |
-----------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 1172        |
|    iterations           | 2           |
|    time_elapsed         | 3           |
|    total_timesteps      | 4096        |
| train/                  |             |
|    approx_kl            | 0.005840274 |
|    clip_fraction        | 0.0351      |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.42       |
|    explained_variance   | 0.00921     |
|    learning_rate        | 0.0002      |
|    loss                 | 0.052       |
|    n_updates            | 10          |
|    policy_gradient_loss | -0.0076     |
|    std                  | 1.01        |
|    value_

INFO:src.models.model_trainer_rl_v2_2:Training PPO for 200000 timesteps


Saved model and normalization stats for models/ppo_vecnormalize.pkl
Training Complete. Models saved.
Generating Agent Predictions on Test Data...


INFO:BacktesterRL:Preparing Backtest. Raw Prices: 279, Predictions: 218
INFO:BacktesterRL:Running vectorbt simulation...
INFO:BacktesterRL:Backtest successfully completed.
INFO:BacktesterRL:Strategy Return: -7.59%
INFO:BacktesterRL:Buy & Hold Return: 29.90%
INFO:BacktesterRL:Outperformance: -37.50%


Generated 218 actions.
Running Backtest...

--- Strategy Performance ---
Total Return (%): -7.5900
Annual Return (%): -12.3900
Sharpe Ratio: -0.5636
Sortino Ratio: -0.8129
Max Drawdown (%): -18.3300
Calmar Ratio: -0.6758
Win Rate (%): 15.1800
Total Trades: 112.0000
Final Value ($): 92406.0400
