In [None]:
import sys, os
sys.path.insert(0, os.path.abspath('..'))

import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

import pandas as pd
from src.models.model_trainer_rl_v4 import ModelTrainerRL, TradingEnvRL
from src.utils.config_loader import load_config
from src.models.backtester import PortfolioBacktester, PortfolioBacktesterRL
from stable_baselines3.common.vec_env import DummyVecEnv, VecNormalize

# Load configuration
config = load_config("config/config.yaml")

In [2]:


# Load and prepare data
data = pd.read_csv('data/processed/CWCO_processed_2022.csv')
if 'Date' in data.columns:
    data['Date'] = pd.to_datetime(data['Date'])
    data.set_index('Date', inplace=True)

# Split into train/test
split_idx = int(len(data) * 0.7)
df_train = data.iloc[:split_idx].copy()
df_test = data.iloc[split_idx:].copy()

# Initialize trainer
trainer = ModelTrainerRL(config['reinforcement_learning'])

# Prepare environments (handles feature scaling automatically)
env_train, env_test = trainer.prepare_environment(
    df_train, 
    df_test, 
    reward_func="profit"  # or "profit", "sortino", "cvar", "max_drawdown"
)

# Train PPO only
print("Training PPO...")
result = trainer.train_ppo(env_train)
model = result["model"]
print("Training Complete!")

# Evaluate with multiple seeds (recommended)
seeds = [42]
metrics_agg = trainer.evaluate_over_seeds(model, env_test, seeds, algorithm="PPO")

print("\n--- Multi-Seed Evaluation Results ---")
print(f"Total Return: {metrics_agg['total_return']['mean']:.2%} ± {metrics_agg['total_return']['ci95']}")
print(f"Sharpe Ratio: {metrics_agg['sharpe_ratio']['mean']:.3f} ± {metrics_agg['sharpe_ratio']['ci95']}")
print(f"Sortino Ratio: {metrics_agg['sortino_ratio']['mean']:.3f} ± {metrics_agg['sortino_ratio']['ci95']}")
print(f"Max Drawdown: {metrics_agg['max_drawdown']['mean']:.2%} ± {metrics_agg['max_drawdown']['ci95']}")

# Single run evaluation (if you only want one)
metrics_single = trainer.evaluate_model(model, env_test, algorithm="PPO", seed=42)
print("\n--- Single Run Evaluation ---")
print(f"Total Return: {metrics_single['total_return']:.2%}")
print(f"Sharpe Ratio: {metrics_single['sharpe_ratio']:.3f}")

# Save the model and artifacts
trainer.save_models("models/ppo_artifacts")

# Use model for inference (generate actions on new data)
obs, _ = env_test.reset(seed=42)
done = False
actions = []
while not done:
    action, _ = model.predict(obs, deterministic=True)
    actions.append(float(action[0]))
    obs, _, terminated, truncated, _ = env_test.step(action)
    done = terminated or truncated

print(f"\nGenerated {len(actions)} trading actions")

# --- CELL 5: RUN BACKTEST ---
print("Running Backtest...")

# Unpack the config dictionary using **
env_params = config['reinforcement_learning']['environment']

backtester = PortfolioBacktesterRL(env_params) 

portfolio = backtester.run_backtest(
    price_data=df_test['close'], 
    predicted_weights=np.array(actions),
    lookback_window=env_params.get('lookback_window', 30) # Important for data alignment
)
comparison = backtester.compare_with_buy_and_hold_rl()
# Show Metrics
metrics = backtester.get_performance_metrics()
print("\n--- Strategy Performance ---")
for k, v in metrics.items():
    print(f"{k}: {v:.4f}")

# Plot
portfolio.plot().show()



INFO:src.models.model_trainer_rl_v4:Prepared training env with 648 steps (scaled=True)
INFO:src.models.model_trainer_rl_v4:Prepared testing env with 279 steps (scaled=True)


Training PPO...



In the future `np.object` will be defined as the corresponding NumPy scalar.

Gym has been unmaintained since 2022 and does not support NumPy 2.0 amongst other critical functionality.
Please upgrade to Gymnasium, the maintained drop-in replacement of Gym, or contact the authors of your software and request that they upgrade.
See the migration guide at https://gymnasium.farama.org/introduction/migration_guide/ for additional information.
INFO:src.models.model_trainer_rl_v4:Training PPO for 200000 timesteps
INFO:src.models.model_trainer_rl_v4:Evaluation - Return: -31.82%, Sharpe: -1.471, Sortino: -1.991, Max DD: -41.95%
INFO:src.models.model_trainer_rl_v4:Multi-seed evaluation (n=1): Sharpe -1.471 ± CI(-1.471, -1.471)
INFO:src.models.model_trainer_rl_v4:Evaluation - Return: -31.82%, Sharpe: -1.471, Sortino: -1.991, Max DD: -41.95%
INFO:src.models.model_trainer_rl_v4:Saved results summary to models/ppo_artifacts\rl_results_summary.json
INFO:BacktesterRL:Preparing Backtest. Raw Prices: 27

Training Complete!

--- Multi-Seed Evaluation Results ---
Total Return: -31.82% ± [-0.3182351808373237, -0.3182351808373237]
Sharpe Ratio: -1.471 ± [-1.471037598026103, -1.471037598026103]
Sortino Ratio: -1.991 ± [-1.9908669133215005, -1.9908669133215005]
Max Drawdown: -41.95% ± [-0.41947278142858635, -0.41947278142858635]

--- Single Run Evaluation ---
Total Return: -31.82%
Sharpe Ratio: -1.471

Generated 217 trading actions
Running Backtest...


INFO:BacktesterRL:Backtest successfully completed.
INFO:BacktesterRL:Strategy Return: -30.36%
INFO:BacktesterRL:Buy & Hold Return: 31.49%
INFO:BacktesterRL:Outperformance: -61.85%



--- Strategy Performance ---
Total Return (%): -30.3600
Annual Return (%): -45.5900
Sharpe Ratio: -1.4745
Sortino Ratio: -1.9734
Max Drawdown (%): -42.1600
Calmar Ratio: -1.0815
Win Rate (%): 16.6700
Total Trades: 114.0000
Final Value ($): 69638.8700


In [3]:


# Load and prepare data
data = pd.read_csv('data/processed/AAPL_processed_2022.csv')
if 'Date' in data.columns:
    data['Date'] = pd.to_datetime(data['Date'])
    data.set_index('Date', inplace=True)

# Split into train/test
split_idx = int(len(data) * 0.7)
df_train = data.iloc[:split_idx].copy()
df_test = data.iloc[split_idx:].copy()

# Initialize trainer
trainer = ModelTrainerRL(config['reinforcement_learning'])

# Prepare environments (handles feature scaling automatically)
env_train, env_test = trainer.prepare_environment(
    df_train, 
    df_test, 
    reward_func="profit"  # or "profit", "sortino", "cvar", "max_drawdown"
)

# Train PPO only
print("Training PPO...")
result = trainer.train_ppo(env_train)
model = result["model"]
print("Training Complete!")

# Evaluate with multiple seeds (recommended)
seeds = [42]
metrics_agg = trainer.evaluate_over_seeds(model, env_test, seeds, algorithm="PPO")

print("\n--- Multi-Seed Evaluation Results ---")
print(f"Total Return: {metrics_agg['total_return']['mean']:.2%} ± {metrics_agg['total_return']['ci95']}")
print(f"Sharpe Ratio: {metrics_agg['sharpe_ratio']['mean']:.3f} ± {metrics_agg['sharpe_ratio']['ci95']}")
print(f"Sortino Ratio: {metrics_agg['sortino_ratio']['mean']:.3f} ± {metrics_agg['sortino_ratio']['ci95']}")
print(f"Max Drawdown: {metrics_agg['max_drawdown']['mean']:.2%} ± {metrics_agg['max_drawdown']['ci95']}")

# Single run evaluation (if you only want one)
metrics_single = trainer.evaluate_model(model, env_test, algorithm="PPO", seed=42)
print("\n--- Single Run Evaluation ---")
print(f"Total Return: {metrics_single['total_return']:.2%}")
print(f"Sharpe Ratio: {metrics_single['sharpe_ratio']:.3f}")

# Save the model and artifacts
trainer.save_models("models/ppo_artifacts")

# Use model for inference (generate actions on new data)
obs, _ = env_test.reset(seed=42)
done = False
actions = []
while not done:
    action, _ = model.predict(obs, deterministic=True)
    actions.append(float(action[0]))
    obs, _, terminated, truncated, _ = env_test.step(action)
    done = terminated or truncated

print(f"\nGenerated {len(actions)} trading actions")

# --- CELL 5: RUN BACKTEST ---
print("Running Backtest...")

# Unpack the config dictionary using **
env_params = config['reinforcement_learning']['environment']

backtester = PortfolioBacktesterRL(env_params) 

portfolio = backtester.run_backtest(
    price_data=df_test['close'], 
    predicted_weights=np.array(actions),
    lookback_window=env_params.get('lookback_window', 30) # Important for data alignment
)
comparison = backtester.compare_with_buy_and_hold_rl()
# Show Metrics
metrics = backtester.get_performance_metrics()
print("\n--- Strategy Performance ---")
for k, v in metrics.items():
    print(f"{k}: {v:.4f}")

# Plot
portfolio.plot().show()



INFO:src.models.model_trainer_rl_v4:Prepared training env with 648 steps (scaled=True)
INFO:src.models.model_trainer_rl_v4:Prepared testing env with 279 steps (scaled=True)
INFO:src.models.model_trainer_rl_v4:Training PPO for 200000 timesteps


Training PPO...


INFO:src.models.model_trainer_rl_v4:Evaluation - Return: -9.17%, Sharpe: -0.342, Sortino: -0.429, Max DD: -40.91%
INFO:src.models.model_trainer_rl_v4:Multi-seed evaluation (n=1): Sharpe -0.342 ± CI(-0.342, -0.342)
INFO:src.models.model_trainer_rl_v4:Evaluation - Return: -9.17%, Sharpe: -0.342, Sortino: -0.429, Max DD: -40.91%
INFO:src.models.model_trainer_rl_v4:Saved results summary to models/ppo_artifacts\rl_results_summary.json
INFO:BacktesterRL:Preparing Backtest. Raw Prices: 279, Predictions: 217
INFO:BacktesterRL:Running vectorbt simulation...
INFO:BacktesterRL:Backtest successfully completed.
INFO:BacktesterRL:Strategy Return: 12.85%
INFO:BacktesterRL:Buy & Hold Return: 13.99%
INFO:BacktesterRL:Outperformance: -1.13%


Training Complete!

--- Multi-Seed Evaluation Results ---
Total Return: -9.17% ± [-0.09167804158599785, -0.09167804158599785]
Sharpe Ratio: -0.342 ± [-0.3420297097068875, -0.3420297097068875]
Sortino Ratio: -0.429 ± [-0.42872269586915585, -0.42872269586915585]
Max Drawdown: -40.91% ± [-0.4090621371236639, -0.4090621371236639]

--- Single Run Evaluation ---
Total Return: -9.17%
Sharpe Ratio: -0.342

Generated 217 trading actions
Running Backtest...

--- Strategy Performance ---
Total Return (%): 12.8500
Annual Return (%): 22.5500
Sharpe Ratio: 0.7377
Sortino Ratio: 1.2182
Max Drawdown (%): -26.8100
Calmar Ratio: 0.8412
Win Rate (%): 46.8100
Total Trades: 47.0000
Final Value ($): 112853.3200


In [None]:


# Load and prepare data
data = pd.read_csv('data/processed/CWCO_processed_2022.csv')
if 'Date' in data.columns:
    data['Date'] = pd.to_datetime(data['Date'])
    data.set_index('Date', inplace=True)

# Split into train/test
split_idx = int(len(data) * 0.7)
df_train = data.iloc[:split_idx].copy()
df_test = data.iloc[split_idx:].copy()

# Initialize trainer
trainer = ModelTrainerRL(config['reinforcement_learning'])

# Prepare environments (handles feature scaling automatically)
env_train, env_test = trainer.prepare_environment(
    df_train, 
    df_test, 
    reward_func="profit"  # or "profit", "sortino", "cvar", "max_drawdown"
)

# Train PPO only
print("Training PPO...")
result = trainer.train_ppo(env_train)
model = result["model"]
print("Training Complete!")

# Evaluate with multiple seeds (recommended)
seeds = [42]
metrics_agg = trainer.evaluate_over_seeds(model, env_test, seeds, algorithm="PPO")


# 2. Wrap it in a DummyVecEnv (SB3 requires this for VecNormalize)
env_vec = DummyVecEnv([lambda: env_test])

print("\n--- Multi-Seed Evaluation Results ---")
print(f"Total Return: {metrics_agg['total_return']['mean']:.2%} ± {metrics_agg['total_return']['ci95']}")
print(f"Sharpe Ratio: {metrics_agg['sharpe_ratio']['mean']:.3f} ± {metrics_agg['sharpe_ratio']['ci95']}")
print(f"Sortino Ratio: {metrics_agg['sortino_ratio']['mean']:.3f} ± {metrics_agg['sortino_ratio']['ci95']}")
print(f"Max Drawdown: {metrics_agg['max_drawdown']['mean']:.2%} ± {metrics_agg['max_drawdown']['ci95']}")

# Single run evaluation (if you only want one)
metrics_single = trainer.evaluate_model(model, env_test, algorithm="PPO", seed=42)
print("\n--- Single Run Evaluation ---")
print(f"Total Return: {metrics_single['total_return']:.2%}")
print(f"Sharpe Ratio: {metrics_single['sharpe_ratio']:.3f}")

# Save the model and artifacts
trainer.save_models("models/ppo_artifacts")

# Use model for inference (generate actions on new data)
obs, _ = env_test.reset(seed=42)
done = False
actions = []
while not done:
    action, _ = model.predict(obs, deterministic=True)
    actions.append(float(action[0]))
    obs, _, terminated, truncated, _ = env_test.step(action)
    done = terminated or truncated

print(f"\nGenerated {len(actions)} trading actions")

# --- CELL 5: RUN BACKTEST ---
print("Running Backtest...")

# Unpack the config dictionary using **
env_params = config['reinforcement_learning']['environment']

backtester = PortfolioBacktesterRL(env_params) 

portfolio = backtester.run_backtest(
    price_data=df_test['close'], 
    predicted_weights=np.array(actions),
    lookback_window=env_params.get('lookback_window', 30) # Important for data alignment
)
comparison = backtester.compare_with_buy_and_hold_rl()
# Show Metrics
metrics = backtester.get_performance_metrics()
print("\n--- Strategy Performance ---")
for k, v in metrics.items():
    print(f"{k}: {v:.4f}")

# Plot
portfolio.plot().show()