In [1]:
import sys, os
sys.path.insert(0, os.path.abspath('..'))

import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

#from src.data.data_collector import DataCollector
from src.models.model_trainer_rl_v2_2 import ModelTrainerRL, TradingEnvRL
from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import DummyVecEnv, VecNormalize
from src.models.backtester import PortfolioBacktester, PortfolioBacktesterRL
from src.utils.config_loader import load_config


config = load_config("config/config.yaml")

  if not hasattr(np, "object"):
Gym has been unmaintained since 2022 and does not support NumPy 2.0 amongst other critical functionality.
Please upgrade to Gymnasium, the maintained drop-in replacement of Gym, or contact the authors of your software and request that they upgrade.
See the migration guide at https://gymnasium.farama.org/introduction/migration_guide/ for additional information.


Use of plotly.io.kaleido.scope.default_format is deprecated and support will be removed after September 2025.
Please use plotly.io.defaults.default_format instead.




Use of plotly.io.kaleido.scope.default_width is deprecated and support will be removed after September 2025.
Please use plotly.io.defaults.default_width instead.




Use of plotly.io.kaleido.scope.default_height is deprecated and support will be removed after September 2025.
Please use plotly.io.defaults.default_height instead.




Use of plotly.io.kaleido.scope.default_scale is deprecated and support will be removed after Septembe

## V2.2

In [3]:
# Load Data
try:
    data = pd.read_csv('data/processed/CWCO_processed_2022.csv')
    if 'Date' in data.columns:
        data['Date'] = pd.to_datetime(data['Date'])
        data.set_index('Date', inplace=True)
    print("Data loaded successfully.")
except FileNotFoundError:
    print("Error: Data file not found. Check path.")
    # Exit if no data
    sys.exit()

# Split Data
split_idx = int(len(data) * 0.8)
train_df = data.iloc[:split_idx]
test_df = data.iloc[split_idx:]
# (Ensure your CSV has columns like 'close', 'rsi', 'sma', etc.)
try:
    data = pd.read_csv('data/processed/CWCO_processed_2022.csv')
    if 'Date' in data.columns:
        data['Date'] = pd.to_datetime(data['Date'])
        data.set_index('Date', inplace=True)
    print("Data loaded successfully.")
except FileNotFoundError:
    print("Error: Data file not found. Check path.")


# ... Load Data ...
# Ensure you fit your scaler ONLY on train_df, then transform test_df
split_idx = int(len(data) * 0.7)
train_df = data.iloc[:split_idx]
test_df = data.iloc[split_idx:]

# --- TRAINING ---
trainer = ModelTrainerRL(config={'ppo': {'learning_rate': 0.0003}, 'total_timesteps': 20000})
env_train = TradingEnvRL(train_df, initial_balance=10000)

# This trains AND stores the VecNormalize stats internally
result = trainer.train_ppo(env_train) 

# Save properly
trainer.save_models("models/")

# --- INFERENCE / BACKTEST ---
print("Running Inference...")

# 1. Load Model
model = PPO.load("models/ppo_model")

# 2. Create Test Environment
env_test = TradingEnvRL(test_df, initial_balance=100000)
vec_env_test = DummyVecEnv([lambda: env_test])

# 3. CRITICAL FIX: Load Normalization Stats
# We must apply the same scaling (mean/var) learned during training to the test data.
# We set training=False so we don't update the stats based on test data.
norm_path = "models/ppo_vecnormalize.pkl"
vec_env_test = VecNormalize.load(norm_path, vec_env_test)
vec_env_test.training = False 
vec_env_test.norm_reward = False # Don't normalize rewards during test

obs = vec_env_test.reset()
done = False
actions = []

while not done:
    # Now 'obs' is properly scaled (e.g., near 0.0 instead of 150.0)
    action, _ = model.predict(obs, deterministic=True)
    actions.append(action[0])
    
    # Step the vectorized environment
    obs, rewards, done, info = vec_env_test.step(action)

print(f"Generated {len(actions)} actions.")





INFO:src.models.model_trainer_rl_v2_2:Training PPO...


Data loaded successfully.
Data loaded successfully.
Using cpu device
-----------------------------
| time/              |      |
|    fps             | 1814 |
|    iterations      | 1    |
|    time_elapsed    | 1    |
|    total_timesteps | 2048 |
-----------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 1109        |
|    iterations           | 2           |
|    time_elapsed         | 3           |
|    total_timesteps      | 4096        |
| train/                  |             |
|    approx_kl            | 0.008379249 |
|    clip_fraction        | 0.0652      |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.44       |
|    explained_variance   | -0.252      |
|    learning_rate        | 0.0003      |
|    loss                 | 0.0358      |
|    n_updates            | 10          |
|    policy_gradient_loss | -0.0102     |
|    std                  | 1.04        |
|    va

INFO:src.models.model_trainer_rl_v2_2:Training PPO for 10000 timesteps


Saved model and normalization stats for models/ppo_vecnormalize.pkl
Running Inference...
Generated 248 actions.


In [4]:

# 2. Load Data
try:
    # ensuring your path is correct
    data = pd.read_csv('data/processed/MDU_processed_2022.csv')
    if 'Date' in data.columns:
        data['Date'] = pd.to_datetime(data['Date'])
        data.set_index('Date', inplace=True)
    print("Data loaded successfully.")
except FileNotFoundError:
    print("Error: Data file not found. Check path.")
    sys.exit()

# 3. Split Train/Test
split_idx = int(len(data) * 0.7)
train_df = data.iloc[:split_idx]
test_df = data.iloc[split_idx:]

# --- TRAINING PHASE ---
print("Training PPO Agent...")

# Initialize Trainer (Pass config if needed, or defaults)
trainer = ModelTrainerRL(config['reinforcement_learning'])
env_params = config['reinforcement_learning']['environment']

# Create Training Environment
env_train = TradingEnvRL(
    train_df, 
    initial_balance=env_params.get('initial_balance', 10000),
    commission=env_params.get('commission', 0.001),
    lookback_window=env_params.get('lookback_window', 30),
    reward_func='profit'
)

# Train and Save
# This automatically handles the VecNormalize wrapper creation
result = trainer.train_ppo(env_train) 

# IMPORTANT: Save the model and the normalization stats to disk
# We will reload them for the test phase to ensure consistency
save_path = "models/"
trainer.save_models(save_path)
print("Training Complete. Models saved.")

# --- INFERENCE PHASE (UPDATED) ---
print("Generating Agent Predictions on Test Data...")

# 1. Load the Trained Model
model = PPO.load(os.path.join(save_path, "ppo_model"))

# 2. Create the Test Environment
# Must have the same observation space (columns) as training
env_test = TradingEnvRL(
    test_df, 
    initial_balance=env_params.get('initial_balance', 100000),
    commission=env_params.get('commission', 0.001),
    lookback_window=env_params.get('lookback_window', 30),
    reward_func='profit' 
)

# 3. Wrap it in DummyVecEnv (Required for SB3)
vec_env_test = DummyVecEnv([lambda: env_test])

# 4. Load Normalization Statistics
# This applies the Training Mean/Variance to the Test Data (Crucial!)
norm_path = os.path.join(save_path, "ppo_vecnormalize.pkl")
if os.path.exists(norm_path):
    vec_env_test = VecNormalize.load(norm_path, vec_env_test)
    vec_env_test.training = False     # Do not update stats on test data
    vec_env_test.norm_reward = False  # Do not normalize rewards for testing
else:
    print("WARNING: Normalization stats not found. Model predictions may be garbage.")

# 5. Run Inference Loop
obs = vec_env_test.reset()
done = [False]  # VecEnv returns done as a list/array
actions = []

while not done[0]:
    # Predict action (obs is now correctly scaled)
    action, _ = model.predict(obs, deterministic=True)
    actions.append(action[0]) # Extract single action
    
    # Step the environment
    obs, _, done, _ = vec_env_test.step(action)

print(f"Generated {len(actions)} actions.")


# --- BACKTESTING PHASE ---
print("Running Backtest...")

backtester = PortfolioBacktesterRL(env_params) 

# Note: 'actions' corresponds to the period [lookback_window : end]
# Ensure your backtester handles this alignment. 
# Usually, passing the full price_data and the lookback_window allows 
# the backtester to slice the prices correctly to match the actions.
portfolio = backtester.run_backtest(
    price_data=test_df['close'], 
    predicted_weights=np.array(actions).flatten(),
    lookback_window=env_params.get('lookback_window', 30)
)

comparison = backtester.compare_with_buy_and_hold_rl()

# Show Metrics
metrics = backtester.get_performance_metrics()
print("\n--- Strategy Performance ---")
for k, v in metrics.items():
    print(f"{k}: {v:.4f}")
    
# Plot
portfolio.plot().show()




INFO:src.models.model_trainer_rl_v2_2:Training PPO...


Data loaded successfully.
Training PPO Agent...
Using cpu device
-----------------------------
| time/              |      |
|    fps             | 1638 |
|    iterations      | 1    |
|    time_elapsed    | 1    |
|    total_timesteps | 2048 |
-----------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 1288         |
|    iterations           | 2            |
|    time_elapsed         | 3            |
|    total_timesteps      | 4096         |
| train/                  |              |
|    approx_kl            | 0.0049444935 |
|    clip_fraction        | 0.0224       |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.42        |
|    explained_variance   | -0.479       |
|    learning_rate        | 0.0002       |
|    loss                 | 0.094        |
|    n_updates            | 10           |
|    policy_gradient_loss | -0.00507     |
|    std                  | 1.01     

INFO:src.models.model_trainer_rl_v2_2:Training PPO for 200000 timesteps


Saved model and normalization stats for models/ppo_vecnormalize.pkl
Training Complete. Models saved.
Generating Agent Predictions on Test Data...


INFO:BacktesterRL:Preparing Backtest. Raw Prices: 279, Predictions: 218
INFO:BacktesterRL:Running vectorbt simulation...
INFO:BacktesterRL:Backtest successfully completed.
INFO:BacktesterRL:Strategy Return: 88.28%
INFO:BacktesterRL:Buy & Hold Return: 18.21%
INFO:BacktesterRL:Outperformance: 70.06%


Generated 218 actions.
Running Backtest...

--- Strategy Performance ---
Total Return (%): 88.2800
Annual Return (%): 188.4700
Sharpe Ratio: 4.2726
Sortino Ratio: 7.8894
Max Drawdown (%): -9.1600
Calmar Ratio: 20.5729
Win Rate (%): 66.6700
Total Trades: 45.0000
Final Value ($): 188277.5000


In [5]:

# 2. Load Data
try:
    # ensuring your path is correct
    data = pd.read_csv('data/processed/CWCO_processed_2022.csv')
    if 'Date' in data.columns:
        data['Date'] = pd.to_datetime(data['Date'])
        data.set_index('Date', inplace=True)
    print("Data loaded successfully.")
except FileNotFoundError:
    print("Error: Data file not found. Check path.")
    sys.exit()

# 3. Split Train/Test
split_idx = int(len(data) * 0.7)
train_df = data.iloc[:split_idx]
test_df = data.iloc[split_idx:]

# --- TRAINING PHASE ---
print("Training PPO Agent...")

# Initialize Trainer (Pass config if needed, or defaults)
trainer = ModelTrainerRL(config['reinforcement_learning'])
env_params = config['reinforcement_learning']['environment']

# Create Training Environment
env_train = TradingEnvRL(
    train_df, 
    initial_balance=env_params.get('initial_balance', 10000),
    commission=env_params.get('commission', 0.001),
    lookback_window=env_params.get('lookback_window', 30),
    reward_func='profit'
)

# Train and Save
# This automatically handles the VecNormalize wrapper creation
result = trainer.train_ppo(env_train) 

# IMPORTANT: Save the model and the normalization stats to disk
# We will reload them for the test phase to ensure consistency
save_path = "models/"
trainer.save_models(save_path)
print("Training Complete. Models saved.")

# --- INFERENCE PHASE (UPDATED) ---
print("Generating Agent Predictions on Test Data...")

# 1. Load the Trained Model
model = PPO.load(os.path.join(save_path, "ppo_model"))

# 2. Create the Test Environment
# Must have the same observation space (columns) as training
env_test = TradingEnvRL(
    test_df, 
    initial_balance=env_params.get('initial_balance', 100000),
    commission=env_params.get('commission', 0.001),
    lookback_window=env_params.get('lookback_window', 30),
    reward_func='profit' 
)

# 3. Wrap it in DummyVecEnv (Required for SB3)
vec_env_test = DummyVecEnv([lambda: env_test])

# 4. Load Normalization Statistics
# This applies the Training Mean/Variance to the Test Data (Crucial!)
norm_path = os.path.join(save_path, "ppo_vecnormalize.pkl")
if os.path.exists(norm_path):
    vec_env_test = VecNormalize.load(norm_path, vec_env_test)
    vec_env_test.training = False     # Do not update stats on test data
    vec_env_test.norm_reward = False  # Do not normalize rewards for testing
else:
    print("WARNING: Normalization stats not found. Model predictions may be garbage.")

# 5. Run Inference Loop
obs = vec_env_test.reset()
done = [False]  # VecEnv returns done as a list/array
actions = []

while not done[0]:
    # Predict action (obs is now correctly scaled)
    action, _ = model.predict(obs, deterministic=True)
    actions.append(action[0]) # Extract single action
    
    # Step the environment
    obs, _, done, _ = vec_env_test.step(action)

print(f"Generated {len(actions)} actions.")


# --- BACKTESTING PHASE ---
print("Running Backtest...")

backtester = PortfolioBacktesterRL(env_params) 

# Note: 'actions' corresponds to the period [lookback_window : end]
# Ensure your backtester handles this alignment. 
# Usually, passing the full price_data and the lookback_window allows 
# the backtester to slice the prices correctly to match the actions.
portfolio = backtester.run_backtest(
    price_data=test_df['close'], 
    predicted_weights=np.array(actions).flatten(),
    lookback_window=env_params.get('lookback_window', 30)
)

comparison = backtester.compare_with_buy_and_hold_rl()

# Show Metrics
metrics = backtester.get_performance_metrics()
print("\n--- Strategy Performance ---")
for k, v in metrics.items():
    print(f"{k}: {v:.4f}")
    
# Plot
portfolio.plot().show()




INFO:src.models.model_trainer_rl_v2_2:Training PPO...


Data loaded successfully.
Training PPO Agent...
Using cpu device
-----------------------------
| time/              |      |
|    fps             | 1797 |
|    iterations      | 1    |
|    time_elapsed    | 1    |
|    total_timesteps | 2048 |
-----------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 1321         |
|    iterations           | 2            |
|    time_elapsed         | 3            |
|    total_timesteps      | 4096         |
| train/                  |              |
|    approx_kl            | 0.0072425175 |
|    clip_fraction        | 0.0415       |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.42        |
|    explained_variance   | -0.0323      |
|    learning_rate        | 0.0002       |
|    loss                 | 0.115        |
|    n_updates            | 10           |
|    policy_gradient_loss | -0.0109      |
|    std                  | 0.999    

INFO:src.models.model_trainer_rl_v2_2:Training PPO for 200000 timesteps


Saved model and normalization stats for models/ppo_vecnormalize.pkl
Training Complete. Models saved.
Generating Agent Predictions on Test Data...


INFO:BacktesterRL:Preparing Backtest. Raw Prices: 279, Predictions: 218
INFO:BacktesterRL:Running vectorbt simulation...
INFO:BacktesterRL:Backtest successfully completed.
INFO:BacktesterRL:Strategy Return: 143.30%
INFO:BacktesterRL:Buy & Hold Return: 32.68%
INFO:BacktesterRL:Outperformance: 110.62%


Generated 218 actions.
Running Backtest...

--- Strategy Performance ---
Total Return (%): 143.3000
Annual Return (%): 343.1200
Sharpe Ratio: 4.5309
Sortino Ratio: 9.6004
Max Drawdown (%): -6.5500
Calmar Ratio: 52.3516
Win Rate (%): 62.2800
Total Trades: 114.0000
Final Value ($): 243298.4700


## PPO Agent

#### NEE

In [6]:



# 2. Load Data
try:
    # ensuring your path is correct
    data = pd.read_csv('data/processed/NEE_processed_2022.csv')
    if 'Date' in data.columns:
        data['Date'] = pd.to_datetime(data['Date'])
        data.set_index('Date', inplace=True)
    print("Data loaded successfully.")
except FileNotFoundError:
    print("Error: Data file not found. Check path.")
    sys.exit()

# 3. Split Train/Test
split_idx = int(len(data) * 0.7)
train_df = data.iloc[:split_idx]
test_df = data.iloc[split_idx:]

# --- TRAINING PHASE ---
print("Training PPO Agent...")

# Initialize Trainer (Pass config if needed, or defaults)
trainer = ModelTrainerRL(config['reinforcement_learning'])
env_params = config['reinforcement_learning']['environment']

# Create Training Environment
env_train = TradingEnvRL(
    train_df, 
    initial_balance=env_params.get('initial_balance', 10000),
    commission=env_params.get('commission', 0.001),
    lookback_window=env_params.get('lookback_window', 30),
    reward_func='profit'
)

# Train and Save
# This automatically handles the VecNormalize wrapper creation
result = trainer.train_ppo(env_train) 

# IMPORTANT: Save the model and the normalization stats to disk
# We will reload them for the test phase to ensure consistency
save_path = "models/"
trainer.save_models(save_path)
print("Training Complete. Models saved.")

# --- INFERENCE PHASE (UPDATED) ---
print("Generating Agent Predictions on Test Data...")

# 1. Load the Trained Model
model = PPO.load(os.path.join(save_path, "ppo_model"))

# 2. Create the Test Environment
# Must have the same observation space (columns) as training
env_test = TradingEnvRL(
    test_df, 
    initial_balance=env_params.get('initial_balance', 100000),
    commission=env_params.get('commission', 0.001),
    lookback_window=env_params.get('lookback_window', 30),
    reward_func='profit' 
)

# 3. Wrap it in DummyVecEnv (Required for SB3)
vec_env_test = DummyVecEnv([lambda: env_test])

# 4. Load Normalization Statistics
# This applies the Training Mean/Variance to the Test Data (Crucial!)
norm_path = os.path.join(save_path, "ppo_vecnormalize.pkl")
if os.path.exists(norm_path):
    vec_env_test = VecNormalize.load(norm_path, vec_env_test)
    vec_env_test.training = False     # Do not update stats on test data
    vec_env_test.norm_reward = False  # Do not normalize rewards for testing
else:
    print("WARNING: Normalization stats not found. Model predictions may be garbage.")

# 5. Run Inference Loop
obs = vec_env_test.reset()
done = [False]  # VecEnv returns done as a list/array
actions = []

while not done[0]:
    # Predict action (obs is now correctly scaled)
    action, _ = model.predict(obs, deterministic=True)
    actions.append(action[0]) # Extract single action
    
    # Step the environment
    obs, _, done, _ = vec_env_test.step(action)

print(f"Generated {len(actions)} actions.")


# --- BACKTESTING PHASE ---
print("Running Backtest...")

backtester = PortfolioBacktesterRL(env_params) 

# Note: 'actions' corresponds to the period [lookback_window : end]
# Ensure your backtester handles this alignment. 
# Usually, passing the full price_data and the lookback_window allows 
# the backtester to slice the prices correctly to match the actions.
portfolio = backtester.run_backtest(
    price_data=test_df['close'], 
    predicted_weights=np.array(actions).flatten(),
    lookback_window=env_params.get('lookback_window', 30)
)

comparison = backtester.compare_with_buy_and_hold_rl()

# Show Metrics
metrics = backtester.get_performance_metrics()
print("\n--- Strategy Performance ---")
for k, v in metrics.items():
    print(f"{k}: {v:.4f}")
    
# Plot
portfolio.plot().show()




INFO:src.models.model_trainer_rl_v2_2:Training PPO...


Data loaded successfully.
Training PPO Agent...
Using cpu device
-----------------------------
| time/              |      |
|    fps             | 1584 |
|    iterations      | 1    |
|    time_elapsed    | 1    |
|    total_timesteps | 2048 |
-----------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 1202         |
|    iterations           | 2            |
|    time_elapsed         | 3            |
|    total_timesteps      | 4096         |
| train/                  |              |
|    approx_kl            | 0.0041711084 |
|    clip_fraction        | 0.0138       |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.42        |
|    explained_variance   | 0.14         |
|    learning_rate        | 0.0002       |
|    loss                 | 0.0718       |
|    n_updates            | 10           |
|    policy_gradient_loss | -0.00575     |
|    std                  | 0.998    

INFO:src.models.model_trainer_rl_v2_2:Training PPO for 200000 timesteps
INFO:BacktesterRL:Preparing Backtest. Raw Prices: 279, Predictions: 218
INFO:BacktesterRL:Running vectorbt simulation...
INFO:BacktesterRL:Backtest successfully completed.
INFO:BacktesterRL:Strategy Return: 188.10%
INFO:BacktesterRL:Buy & Hold Return: 28.02%
INFO:BacktesterRL:Outperformance: 160.08%


Saved model and normalization stats for models/ppo_vecnormalize.pkl
Training Complete. Models saved.
Generating Agent Predictions on Test Data...
Generated 218 actions.
Running Backtest...

--- Strategy Performance ---
Total Return (%): 188.1000
Annual Return (%): 488.0400
Sharpe Ratio: 5.7212
Sortino Ratio: 11.6267
Max Drawdown (%): -6.5800
Calmar Ratio: 74.1300
Win Rate (%): 67.1200
Total Trades: 73.0000
Final Value ($): 288095.6900


### AGX

In [7]:


# 1. Load Configuration
config = load_config("config/config.yaml")

# 2. Load Data
try:
    # ensuring your path is correct
    data = pd.read_csv('data/processed/AGX_processed_2022.csv')
    if 'Date' in data.columns:
        data['Date'] = pd.to_datetime(data['Date'])
        data.set_index('Date', inplace=True)
    print("Data loaded successfully.")
except FileNotFoundError:
    print("Error: Data file not found. Check path.")
    sys.exit()

# 3. Split Train/Test
split_idx = int(len(data) * 0.7)
train_df = data.iloc[:split_idx]
test_df = data.iloc[split_idx:]

# --- TRAINING PHASE ---
print("Training PPO Agent...")

# Initialize Trainer (Pass config if needed, or defaults)
trainer = ModelTrainerRL(config['reinforcement_learning'])
env_params = config['reinforcement_learning']['environment']

# Create Training Environment
env_train = TradingEnvRL(
    train_df, 
    initial_balance=env_params.get('initial_balance', 10000),
    commission=env_params.get('commission', 0.001),
    lookback_window=env_params.get('lookback_window', 30),
    reward_func='profit'
)

# Train and Save
# This automatically handles the VecNormalize wrapper creation
result = trainer.train_ppo(env_train) 

# IMPORTANT: Save the model and the normalization stats to disk
# We will reload them for the test phase to ensure consistency
save_path = "models/"
trainer.save_models(save_path)
print("Training Complete. Models saved.")

# --- INFERENCE PHASE (UPDATED) ---
print("Generating Agent Predictions on Test Data...")

# 1. Load the Trained Model
model = PPO.load(os.path.join(save_path, "ppo_model"))

# 2. Create the Test Environment
# Must have the same observation space (columns) as training
env_test = TradingEnvRL(
    test_df, 
    initial_balance=env_params.get('initial_balance', 100000),
    commission=env_params.get('commission', 0.001),
    lookback_window=env_params.get('lookback_window', 30),
    reward_func='profit' 
)

# 3. Wrap it in DummyVecEnv (Required for SB3)
vec_env_test = DummyVecEnv([lambda: env_test])

# 4. Load Normalization Statistics
# This applies the Training Mean/Variance to the Test Data (Crucial!)
norm_path = os.path.join(save_path, "ppo_vecnormalize.pkl")
if os.path.exists(norm_path):
    vec_env_test = VecNormalize.load(norm_path, vec_env_test)
    vec_env_test.training = False     # Do not update stats on test data
    vec_env_test.norm_reward = False  # Do not normalize rewards for testing
else:
    print("WARNING: Normalization stats not found. Model predictions may be garbage.")

# 5. Run Inference Loop
obs = vec_env_test.reset()
done = [False]  # VecEnv returns done as a list/array
actions = []

while not done[0]:
    # Predict action (obs is now correctly scaled)
    action, _ = model.predict(obs, deterministic=True)
    actions.append(action[0]) # Extract single action
    
    # Step the environment
    obs, _, done, _ = vec_env_test.step(action)

print(f"Generated {len(actions)} actions.")


# --- BACKTESTING PHASE ---
print("Running Backtest...")

backtester = PortfolioBacktesterRL(env_params) 

# Note: 'actions' corresponds to the period [lookback_window : end]
# Ensure your backtester handles this alignment. 
# Usually, passing the full price_data and the lookback_window allows 
# the backtester to slice the prices correctly to match the actions.
portfolio = backtester.run_backtest(
    price_data=test_df['close'], 
    predicted_weights=np.array(actions).flatten(),
    lookback_window=env_params.get('lookback_window', 30)
)

comparison = backtester.compare_with_buy_and_hold_rl()

# Show Metrics
metrics = backtester.get_performance_metrics()
print("\n--- Strategy Performance ---")
for k, v in metrics.items():
    print(f"{k}: {v:.4f}")
    
# Plot
portfolio.plot().show()




INFO:src.models.model_trainer_rl_v2_2:Training PPO...


Data loaded successfully.
Training PPO Agent...
Using cpu device
-----------------------------
| time/              |      |
|    fps             | 1456 |
|    iterations      | 1    |
|    time_elapsed    | 1    |
|    total_timesteps | 2048 |
-----------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 1179         |
|    iterations           | 2            |
|    time_elapsed         | 3            |
|    total_timesteps      | 4096         |
| train/                  |              |
|    approx_kl            | 0.0076814424 |
|    clip_fraction        | 0.0423       |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.42        |
|    explained_variance   | -0.00769     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.125        |
|    n_updates            | 10           |
|    policy_gradient_loss | -0.00843     |
|    std                  | 1        

INFO:src.models.model_trainer_rl_v2_2:Training PPO for 200000 timesteps
INFO:BacktesterRL:Preparing Backtest. Raw Prices: 279, Predictions: 218
INFO:BacktesterRL:Running vectorbt simulation...
INFO:BacktesterRL:Backtest successfully completed.
INFO:BacktesterRL:Strategy Return: 43.36%
INFO:BacktesterRL:Buy & Hold Return: 123.31%
INFO:BacktesterRL:Outperformance: -79.96%


Saved model and normalization stats for models/ppo_vecnormalize.pkl
Training Complete. Models saved.
Generating Agent Predictions on Test Data...
Generated 218 actions.
Running Backtest...

--- Strategy Performance ---
Total Return (%): 43.3600
Annual Return (%): 82.7700
Sharpe Ratio: 1.1629
Sortino Ratio: 1.7377
Max Drawdown (%): -35.8200
Calmar Ratio: 2.3107
Win Rate (%): 52.2400
Total Trades: 67.0000
Final Value ($): 143357.3400


In [8]:


# 1. Load Configuration
config = load_config("config/config.yaml")

# 2. Load Data
try:
    # ensuring your path is correct
    data = pd.read_csv('data/processed/AGX_processed.csv')
    if 'Date' in data.columns:
        data['Date'] = pd.to_datetime(data['Date'])
        data.set_index('Date', inplace=True)
    print("Data loaded successfully.")
except FileNotFoundError:
    print("Error: Data file not found. Check path.")
    sys.exit()

# 3. Split Train/Test
split_idx = int(len(data) * 0.7)
train_df = data.iloc[:split_idx]
test_df = data.iloc[split_idx:]

# --- TRAINING PHASE ---
print("Training PPO Agent...")

# Initialize Trainer (Pass config if needed, or defaults)
trainer = ModelTrainerRL(config['reinforcement_learning'])
env_params = config['reinforcement_learning']['environment']

# Create Training Environment
env_train = TradingEnvRL(
    train_df, 
    initial_balance=env_params.get('initial_balance', 10000),
    commission=env_params.get('commission', 0.001),
    lookback_window=env_params.get('lookback_window', 30),
    reward_func='profit'
)

# Train and Save
# This automatically handles the VecNormalize wrapper creation
result = trainer.train_ppo(env_train) 

# IMPORTANT: Save the model and the normalization stats to disk
# We will reload them for the test phase to ensure consistency
save_path = "models/"
trainer.save_models(save_path)
print("Training Complete. Models saved.")

# --- INFERENCE PHASE (UPDATED) ---
print("Generating Agent Predictions on Test Data...")

# 1. Load the Trained Model
model = PPO.load(os.path.join(save_path, "ppo_model"))

# 2. Create the Test Environment
# Must have the same observation space (columns) as training
env_test = TradingEnvRL(
    test_df, 
    initial_balance=env_params.get('initial_balance', 100000),
    commission=env_params.get('commission', 0.001),
    lookback_window=env_params.get('lookback_window', 30),
    reward_func='profit' 
)

# 3. Wrap it in DummyVecEnv (Required for SB3)
vec_env_test = DummyVecEnv([lambda: env_test])

# 4. Load Normalization Statistics
# This applies the Training Mean/Variance to the Test Data (Crucial!)
norm_path = os.path.join(save_path, "ppo_vecnormalize.pkl")
if os.path.exists(norm_path):
    vec_env_test = VecNormalize.load(norm_path, vec_env_test)
    vec_env_test.training = False     # Do not update stats on test data
    vec_env_test.norm_reward = False  # Do not normalize rewards for testing
else:
    print("WARNING: Normalization stats not found. Model predictions may be garbage.")

# 5. Run Inference Loop
obs = vec_env_test.reset()
done = [False]  # VecEnv returns done as a list/array
actions = []

while not done[0]:
    # Predict action (obs is now correctly scaled)
    action, _ = model.predict(obs, deterministic=True)
    actions.append(action[0]) # Extract single action
    
    # Step the environment
    obs, _, done, _ = vec_env_test.step(action)

print(f"Generated {len(actions)} actions.")


# --- BACKTESTING PHASE ---
print("Running Backtest...")

backtester = PortfolioBacktesterRL(env_params) 

# Note: 'actions' corresponds to the period [lookback_window : end]
# Ensure your backtester handles this alignment. 
# Usually, passing the full price_data and the lookback_window allows 
# the backtester to slice the prices correctly to match the actions.
portfolio = backtester.run_backtest(
    price_data=test_df['close'], 
    predicted_weights=np.array(actions).flatten(),
    lookback_window=env_params.get('lookback_window', 30)
)

comparison = backtester.compare_with_buy_and_hold_rl()

# Show Metrics
metrics = backtester.get_performance_metrics()
print("\n--- Strategy Performance ---")
for k, v in metrics.items():
    print(f"{k}: {v:.4f}")
    
# Plot
portfolio.plot().show()




INFO:src.models.model_trainer_rl_v2_2:Training PPO...


Data loaded successfully.
Training PPO Agent...
Using cpu device
-----------------------------
| time/              |      |
|    fps             | 2388 |
|    iterations      | 1    |
|    time_elapsed    | 0    |
|    total_timesteps | 2048 |
-----------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 2043         |
|    iterations           | 2            |
|    time_elapsed         | 2            |
|    total_timesteps      | 4096         |
| train/                  |              |
|    approx_kl            | 0.0073090754 |
|    clip_fraction        | 0.0354       |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.43        |
|    explained_variance   | 0.0821       |
|    learning_rate        | 0.0002       |
|    loss                 | 0.0398       |
|    n_updates            | 10           |
|    policy_gradient_loss | -0.00844     |
|    std                  | 1.01     

INFO:BacktesterRL:Preparing Backtest. Raw Prices: 726, Predictions: 665
INFO:BacktesterRL:Running vectorbt simulation...
INFO:BacktesterRL:Backtest successfully completed.
INFO:BacktesterRL:Strategy Return: 749.78%
INFO:BacktesterRL:Buy & Hold Return: 635.13%
INFO:BacktesterRL:Outperformance: 114.65%


Generated 665 actions.
Running Backtest...

--- Strategy Performance ---
Total Return (%): 749.7800
Annual Return (%): 223.6500
Sharpe Ratio: 4.1127
Sortino Ratio: 13.0749
Max Drawdown (%): -9.1900
Calmar Ratio: 24.3298
Win Rate (%): 73.1600
Total Trades: 380.0000
Final Value ($): 849781.1200


### BKH

In [2]:
stock_symbol = "BKH"
# 2. Load Data
try:
    # ensuring your path is correct
    data = pd.read_csv(f'data/processed/{stock_symbol}_processed_2022.csv')
    if 'Date' in data.columns:
        data['Date'] = pd.to_datetime(data['Date'])
        data.set_index('Date', inplace=True)
    print("Data loaded successfully.")
except FileNotFoundError:
    print("Error: Data file not found. Check path.")
    sys.exit()

# 3. Split Train/Test
split_idx = int(len(data) * 0.7)
train_df = data.iloc[:split_idx]
test_df = data.iloc[split_idx:]

# --- TRAINING PHASE ---
print("Training PPO Agent...")

# Initialize Trainer (Pass config if needed, or defaults)
trainer = ModelTrainerRL(config['reinforcement_learning'])
env_params = config['reinforcement_learning']['environment']

# Create Training Environment
env_train = TradingEnvRL(
    train_df, 
    initial_balance=env_params.get('initial_balance', 10000),
    commission=env_params.get('commission', 0.001),
    lookback_window=env_params.get('lookback_window', 30),
    reward_func='profit'
)

# Train and Save
# This automatically handles the VecNormalize wrapper creation
result = trainer.train_ppo(env_train) 

# IMPORTANT: Save the model and the normalization stats to disk
# We will reload them for the test phase to ensure consistency
save_path = "models/"
trainer.save_models(save_path)
print("Training Complete. Models saved.")

# --- INFERENCE PHASE (UPDATED) ---
print("Generating Agent Predictions on Test Data...")

# 1. Load the Trained Model
model = PPO.load(os.path.join(save_path, "ppo_model"))

# 2. Create the Test Environment
# Must have the same observation space (columns) as training
env_test = TradingEnvRL(
    test_df, 
    initial_balance=env_params.get('initial_balance', 100000),
    commission=env_params.get('commission', 0.001),
    lookback_window=env_params.get('lookback_window', 30),
    reward_func='profit' 
)

# 3. Wrap it in DummyVecEnv (Required for SB3)
vec_env_test = DummyVecEnv([lambda: env_test])

# 4. Load Normalization Statistics
# This applies the Training Mean/Variance to the Test Data (Crucial!)
norm_path = os.path.join(save_path, f"ppo_vecnormalize_{stock_symbol}.pkl")
if os.path.exists(norm_path):
    vec_env_test = VecNormalize.load(norm_path, vec_env_test)
    vec_env_test.training = False     # Do not update stats on test data
    vec_env_test.norm_reward = False  # Do not normalize rewards for testing
else:
    print("WARNING: Normalization stats not found. Model predictions may be garbage.")

# 5. Run Inference Loop
obs = vec_env_test.reset()
done = [False]  # VecEnv returns done as a list/array
actions = []

while not done[0]:
    # Predict action (obs is now correctly scaled)
    action, _ = model.predict(obs, deterministic=True)
    actions.append(action[0]) # Extract single action
    
    # Step the environment
    obs, _, done, _ = vec_env_test.step(action)

print(f"Generated {len(actions)} actions.")


# --- BACKTESTING PHASE ---
print("Running Backtest...")

backtester = PortfolioBacktesterRL(env_params) 

# Note: 'actions' corresponds to the period [lookback_window : end]
# Ensure your backtester handles this alignment. 
# Usually, passing the full price_data and the lookback_window allows 
# the backtester to slice the prices correctly to match the actions.
portfolio = backtester.run_backtest(
    price_data=test_df['close'], 
    predicted_weights=np.array(actions).flatten(),
    lookback_window=env_params.get('lookback_window', 30)
)

comparison = backtester.compare_with_buy_and_hold_rl()

# Show Metrics
metrics = backtester.get_performance_metrics()
print("\n--- Strategy Performance ---")
for k, v in metrics.items():
    print(f"{k}: {v:.4f}")
    
# Plot
portfolio.plot().show()






Data loaded successfully.
Training PPO Agent...
Using cpu device


INFO:src.models.model_trainer_rl_v2_2:Training PPO...


-----------------------------
| time/              |      |
|    fps             | 1657 |
|    iterations      | 1    |
|    time_elapsed    | 1    |
|    total_timesteps | 2048 |
-----------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 1331         |
|    iterations           | 2            |
|    time_elapsed         | 3            |
|    total_timesteps      | 4096         |
| train/                  |              |
|    approx_kl            | 0.0030640177 |
|    clip_fraction        | 0.012        |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.43        |
|    explained_variance   | -0.366       |
|    learning_rate        | 0.0002       |
|    loss                 | 0.0151       |
|    n_updates            | 10           |
|    policy_gradient_loss | -0.0046      |
|    std                  | 1.02         |
|    value_loss           | 0.278        |
----------------

INFO:src.models.model_trainer_rl_v2_2:Training PPO for 200000 timesteps
INFO:BacktesterRL:Preparing Backtest. Raw Prices: 279, Predictions: 218
INFO:BacktesterRL:Running vectorbt simulation...


Saved model and normalization stats for models/ppo_vecnormalize.pkl
Training Complete. Models saved.
Generating Agent Predictions on Test Data...
Generated 218 actions.
Running Backtest...


INFO:BacktesterRL:Backtest successfully completed.
INFO:BacktesterRL:Strategy Return: -26.69%
INFO:BacktesterRL:Buy & Hold Return: 31.05%
INFO:BacktesterRL:Outperformance: -57.74%



--- Strategy Performance ---
Total Return (%): -26.6900
Annual Return (%): -40.5400
Sharpe Ratio: -2.0651
Sortino Ratio: -2.7834
Max Drawdown (%): -28.0700
Calmar Ratio: -1.4445
Win Rate (%): 2.3100
Total Trades: 130.0000
Final Value ($): 73308.9300
