In [1]:
import sys, os
sys.path.insert(0, os.path.abspath('..'))

import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

#from src.data.data_collector import DataCollector
from src.models.model_trainer_rl_v2_2 import ModelTrainerRL, TradingEnvRL
from stable_baselines3 import PPO, A2C, DDPG, SAC, TD3
from stable_baselines3.common.vec_env import DummyVecEnv, VecNormalize
from src.models.backtester import PortfolioBacktester, PortfolioBacktesterRL
from src.utils.config_loader import load_config


config = load_config("config/config.yaml")

  if not hasattr(np, "object"):
Gym has been unmaintained since 2022 and does not support NumPy 2.0 amongst other critical functionality.
Please upgrade to Gymnasium, the maintained drop-in replacement of Gym, or contact the authors of your software and request that they upgrade.
See the migration guide at https://gymnasium.farama.org/introduction/migration_guide/ for additional information.


Use of plotly.io.kaleido.scope.default_format is deprecated and support will be removed after September 2025.
Please use plotly.io.defaults.default_format instead.




Use of plotly.io.kaleido.scope.default_width is deprecated and support will be removed after September 2025.
Please use plotly.io.defaults.default_width instead.




Use of plotly.io.kaleido.scope.default_height is deprecated and support will be removed after September 2025.
Please use plotly.io.defaults.default_height instead.




Use of plotly.io.kaleido.scope.default_scale is deprecated and support will be removed after Septembe

## V2.2 - SAC

In [2]:



# 2. Load Data
try:
    # ensuring your path is correct
    data = pd.read_csv('data/processed/CWCO_processed_2022.csv')
    if 'Date' in data.columns:
        data['Date'] = pd.to_datetime(data['Date'])
        data.set_index('Date', inplace=True)
    print("Data loaded successfully.")
except FileNotFoundError:
    print("Error: Data file not found. Check path.")
    sys.exit()

# 3. Split Train/Test
split_idx = int(len(data) * 0.7)
train_df = data.iloc[:split_idx]
test_df = data.iloc[split_idx:]

# --- TRAINING PHASE ---
print("Training SAC Agent...")

# Initialize Trainer (Pass config if needed, or defaults)
trainer = ModelTrainerRL(config['reinforcement_learning'])
env_params = config['reinforcement_learning']['environment']

# Create Training Environment
env_train = TradingEnvRL(
    train_df, 
    initial_balance=env_params.get('initial_balance', 10000),
    commission=env_params.get('commission', 0.001),
    lookback_window=env_params.get('lookback_window', 30),
    reward_func='profit'
)

# Train and Save
# This automatically handles the VecNormalize wrapper creation
result = trainer.train_sac(env_train) 

# IMPORTANT: Save the model and the normalization stats to disk
# We will reload them for the test phase to ensure consistency
save_path = "models/"
trainer.save_models(save_path)
print("Training Complete. Models saved.")

# --- INFERENCE PHASE (UPDATED) ---
print("Generating Agent Predictions on Test Data...")

# 1. Load the Trained Model
model = SAC.load(os.path.join(save_path, "sac_model"))

# 2. Create the Test Environment
# Must have the same observation space (columns) as training
env_test = TradingEnvRL(
    test_df, 
    initial_balance=env_params.get('initial_balance', 100000),
    commission=env_params.get('commission', 0.001),
    lookback_window=env_params.get('lookback_window', 30),
    reward_func='profit' 
)

# 3. Wrap it in DummyVecEnv (Required for SB3)
vec_env_test = DummyVecEnv([lambda: env_test])

# 4. Load Normalization Statistics
# This applies the Training Mean/Variance to the Test Data (Crucial!)
norm_path = os.path.join(save_path, "sac_vecnormalize.pkl")
if os.path.exists(norm_path):
    vec_env_test = VecNormalize.load(norm_path, vec_env_test)
    vec_env_test.training = False     # Do not update stats on test data
    vec_env_test.norm_reward = False  # Do not normalize rewards for testing
else:
    print("WARNING: Normalization stats not found. Model predictions may be garbage.")

# 5. Run Inference Loop
obs = vec_env_test.reset()
done = [False]  # VecEnv returns done as a list/array
actions = []

while not done[0]:
    # Predict action (obs is now correctly scaled)
    action, _ = model.predict(obs, deterministic=True)
    actions.append(action[0]) # Extract single action
    
    # Step the environment
    obs, _, done, _ = vec_env_test.step(action)

print(f"Generated {len(actions)} actions.")


# --- BACKTESTING PHASE ---
print("Running Backtest...")

backtester = PortfolioBacktesterRL(env_params) 

# Note: 'actions' corresponds to the period [lookback_window : end]
# Ensure your backtester handles this alignment. 
# Usually, passing the full price_data and the lookback_window allows 
# the backtester to slice the prices correctly to match the actions.
portfolio = backtester.run_backtest(
    price_data=test_df['close'], 
    predicted_weights=np.array(actions).flatten(),
    lookback_window=env_params.get('lookback_window', 30)
)

comparison = backtester.compare_with_buy_and_hold_rl()

# Show Metrics
metrics = backtester.get_performance_metrics()
print("\n--- Strategy Performance ---")
for k, v in metrics.items():
    print(f"{k}: {v:.4f}")
    
# Plot
portfolio.plot().show()






Data loaded successfully.
Training SAC Agent...
Using cpu device


INFO:src.models.model_trainer_rl_v2_2:Training SAC for 150000 timesteps


---------------------------------
| time/              |          |
|    episodes        | 4        |
|    fps             | 96       |
|    time_elapsed    | 24       |
|    total_timesteps | 2348     |
| train/             |          |
|    actor_loss      | -5.2     |
|    critic_loss     | 0.0139   |
|    ent_coef        | 0.51     |
|    ent_coef_loss   | -1.12    |
|    learning_rate   | 0.0003   |
|    n_updates       | 2247     |
---------------------------------
---------------------------------
| time/              |          |
|    episodes        | 8        |
|    fps             | 94       |
|    time_elapsed    | 49       |
|    total_timesteps | 4696     |
| train/             |          |
|    actor_loss      | -6.51    |
|    critic_loss     | 0.00761  |
|    ent_coef        | 0.253    |
|    ent_coef_loss   | -2.23    |
|    learning_rate   | 0.0003   |
|    n_updates       | 4595     |
---------------------------------
---------------------------------
| time/       

INFO:BacktesterRL:Preparing Backtest. Raw Prices: 279, Predictions: 218
INFO:BacktesterRL:Running vectorbt simulation...


Generated 218 actions.
Running Backtest...


INFO:BacktesterRL:Backtest successfully completed.
INFO:BacktesterRL:Strategy Return: 142.57%
INFO:BacktesterRL:Buy & Hold Return: 32.68%
INFO:BacktesterRL:Outperformance: 109.89%



--- Strategy Performance ---
Total Return (%): 142.5700
Annual Return (%): 340.9000
Sharpe Ratio: 5.0000
Sortino Ratio: 12.8690
Max Drawdown (%): -7.1700
Calmar Ratio: 47.5450
Win Rate (%): 70.5900
Total Trades: 119.0000
Final Value ($): 242571.6000


In [3]:



# 2. Load Data
try:
    # ensuring your path is correct
    data = pd.read_csv('data/processed/NEE_processed_2022.csv')
    if 'Date' in data.columns:
        data['Date'] = pd.to_datetime(data['Date'])
        data.set_index('Date', inplace=True)
    print("Data loaded successfully.")
except FileNotFoundError:
    print("Error: Data file not found. Check path.")
    sys.exit()

# 3. Split Train/Test
split_idx = int(len(data) * 0.7)
train_df = data.iloc[:split_idx]
test_df = data.iloc[split_idx:]

# --- TRAINING PHASE ---
print("Training SAC Agent...")

# Initialize Trainer (Pass config if needed, or defaults)
trainer = ModelTrainerRL(config['reinforcement_learning'])
env_params = config['reinforcement_learning']['environment']

# Create Training Environment
env_train = TradingEnvRL(
    train_df, 
    initial_balance=env_params.get('initial_balance', 10000),
    commission=env_params.get('commission', 0.001),
    lookback_window=env_params.get('lookback_window', 30),
    reward_func='profit'
)

# Train and Save
# This automatically handles the VecNormalize wrapper creation
result = trainer.train_sac(env_train) 

# IMPORTANT: Save the model and the normalization stats to disk
# We will reload them for the test phase to ensure consistency
save_path = "models/"
trainer.save_models(save_path)
print("Training Complete. Models saved.")

# --- INFERENCE PHASE (UPDATED) ---
print("Generating Agent Predictions on Test Data...")

# 1. Load the Trained Model
model = SAC.load(os.path.join(save_path, "sac_model"))

# 2. Create the Test Environment
# Must have the same observation space (columns) as training
env_test = TradingEnvRL(
    test_df, 
    initial_balance=env_params.get('initial_balance', 100000),
    commission=env_params.get('commission', 0.001),
    lookback_window=env_params.get('lookback_window', 30),
    reward_func='profit' 
)

# 3. Wrap it in DummyVecEnv (Required for SB3)
vec_env_test = DummyVecEnv([lambda: env_test])

# 4. Load Normalization Statistics
# This applies the Training Mean/Variance to the Test Data (Crucial!)
norm_path = os.path.join(save_path, "sac_vecnormalize.pkl")
if os.path.exists(norm_path):
    vec_env_test = VecNormalize.load(norm_path, vec_env_test)
    vec_env_test.training = False     # Do not update stats on test data
    vec_env_test.norm_reward = False  # Do not normalize rewards for testing
else:
    print("WARNING: Normalization stats not found. Model predictions may be garbage.")

# 5. Run Inference Loop
obs = vec_env_test.reset()
done = [False]  # VecEnv returns done as a list/array
actions = []

while not done[0]:
    # Predict action (obs is now correctly scaled)
    action, _ = model.predict(obs, deterministic=True)
    actions.append(action[0]) # Extract single action
    
    # Step the environment
    obs, _, done, _ = vec_env_test.step(action)

print(f"Generated {len(actions)} actions.")


# --- BACKTESTING PHASE ---
print("Running Backtest...")

backtester = PortfolioBacktesterRL(env_params) 

# Note: 'actions' corresponds to the period [lookback_window : end]
# Ensure your backtester handles this alignment. 
# Usually, passing the full price_data and the lookback_window allows 
# the backtester to slice the prices correctly to match the actions.
portfolio = backtester.run_backtest(
    price_data=test_df['close'], 
    predicted_weights=np.array(actions).flatten(),
    lookback_window=env_params.get('lookback_window', 30)
)

comparison = backtester.compare_with_buy_and_hold_rl()

# Show Metrics
metrics = backtester.get_performance_metrics()
print("\n--- Strategy Performance ---")
for k, v in metrics.items():
    print(f"{k}: {v:.4f}")
    
# Plot
portfolio.plot().show()




INFO:src.models.model_trainer_rl_v2_2:Training SAC for 150000 timesteps


Data loaded successfully.
Training SAC Agent...
Using cpu device
---------------------------------
| time/              |          |
|    episodes        | 4        |
|    fps             | 98       |
|    time_elapsed    | 23       |
|    total_timesteps | 2348     |
| train/             |          |
|    actor_loss      | -5.01    |
|    critic_loss     | 0.0172   |
|    ent_coef        | 0.51     |
|    ent_coef_loss   | -1.11    |
|    learning_rate   | 0.0003   |
|    n_updates       | 2247     |
---------------------------------
---------------------------------
| time/              |          |
|    episodes        | 8        |
|    fps             | 96       |
|    time_elapsed    | 48       |
|    total_timesteps | 4696     |
| train/             |          |
|    actor_loss      | -6.54    |
|    critic_loss     | 0.00835  |
|    ent_coef        | 0.253    |
|    ent_coef_loss   | -2.21    |
|    learning_rate   | 0.0003   |
|    n_updates       | 4595     |
-----------------

INFO:BacktesterRL:Preparing Backtest. Raw Prices: 279, Predictions: 218
INFO:BacktesterRL:Running vectorbt simulation...
INFO:BacktesterRL:Backtest successfully completed.
INFO:BacktesterRL:Strategy Return: 48.17%
INFO:BacktesterRL:Buy & Hold Return: 28.02%
INFO:BacktesterRL:Outperformance: 20.16%


Generated 218 actions.
Running Backtest...

--- Strategy Performance ---
Total Return (%): 48.1700
Annual Return (%): 93.1600
Sharpe Ratio: 2.9269
Sortino Ratio: 5.4960
Max Drawdown (%): -11.0700
Calmar Ratio: 8.4189
Win Rate (%): 53.9500
Total Trades: 152.0000
Final Value ($): 148173.6500
