In [5]:
import sys, os
sys.path.insert(0, os.path.abspath('..'))

import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

import pandas as pd
from src.models.model_trainer_rl_v3 import ModelTrainerRL, TradingEnvRL
from src.utils.config_loader import load_config
from src.models.backtester import PortfolioBacktester, PortfolioBacktesterRL

In [None]:


# Load configuration
config = load_config("config/config.yaml")

# Load and prepare data
data = pd.read_csv('data/processed/CWCO_processed_2022.csv')
if 'Date' in data.columns:
    data['Date'] = pd.to_datetime(data['Date'])
    data.set_index('Date', inplace=True)

# Split into train/test
split_idx = int(len(data) * 0.7)
df_train = data.iloc[:split_idx].copy()
df_test = data.iloc[split_idx:].copy()

# Initialize trainer
trainer = ModelTrainerRL(config['reinforcement_learning'])

# Prepare environments (handles feature scaling automatically)
env_train, env_test = trainer.prepare_environment(
    df_train, 
    df_test, 
    reward_func="profit"  # or "profit", "sortino", "cvar", "max_drawdown"
)

# Train PPO only
print("Training PPO...")
result = trainer.train_ppo(env_train)
model = result["model"]
print("Training Complete!")

# Evaluate with multiple seeds (recommended)
seeds = [42, 43, 44]
metrics_agg = trainer.evaluate_over_seeds(model, env_test, seeds, algorithm="PPO")

print("\n--- Multi-Seed Evaluation Results ---")
print(f"Total Return: {metrics_agg['total_return']['mean']:.2%} ± {metrics_agg['total_return']['ci95']}")
print(f"Sharpe Ratio: {metrics_agg['sharpe_ratio']['mean']:.3f} ± {metrics_agg['sharpe_ratio']['ci95']}")
print(f"Sortino Ratio: {metrics_agg['sortino_ratio']['mean']:.3f} ± {metrics_agg['sortino_ratio']['ci95']}")
print(f"Max Drawdown: {metrics_agg['max_drawdown']['mean']:.2%} ± {metrics_agg['max_drawdown']['ci95']}")

# Single run evaluation (if you only want one)
metrics_single = trainer.evaluate_model(model, env_test, algorithm="PPO", seed=42)
print("\n--- Single Run Evaluation ---")
print(f"Total Return: {metrics_single['total_return']:.2%}")
print(f"Sharpe Ratio: {metrics_single['sharpe_ratio']:.3f}")

# Save the model and artifacts
trainer.save_models("models/ppo_artifacts")

# Use model for inference (generate actions on new data)
obs, _ = env_test.reset(seed=42)
done = False
actions = []
while not done:
    action, _ = model.predict(obs, deterministic=True)
    actions.append(float(action[0]))
    obs, _, terminated, truncated, _ = env_test.step(action)
    done = terminated or truncated

print(f"\nGenerated {len(actions)} trading actions")

  data['Date'] = pd.to_datetime(data['Date'])


Training PPO...


  if not hasattr(np, "object"):
Gym has been unmaintained since 2022 and does not support NumPy 2.0 amongst other critical functionality.
Please upgrade to Gymnasium, the maintained drop-in replacement of Gym, or contact the authors of your software and request that they upgrade.
See the migration guide at https://gymnasium.farama.org/introduction/migration_guide/ for additional information.


Training Complete!

--- Multi-Seed Evaluation Results ---
Total Return: 244.36% ± [2.443639553017653, 2.443639553017653]
Sharpe Ratio: 5.222 ± [5.222246433001317, 5.222246433001317]
Sortino Ratio: 11.490 ± [11.490175520955288, 11.490175520955288]
Max Drawdown: -3.83% ± [-0.03825000486683916, -0.03825000486683916]

--- Single Run Evaluation ---
Total Return: 244.36%
Sharpe Ratio: 5.222

Generated 218 trading actions


In [None]:
import pandas as pd
from src.models.model_trainer_rl_v3 import ModelTrainerRL, TradingEnvRL
from src.utils.config_loader import load_config

# Load configuration
config = load_config("config/config.yaml")

# Load and prepare data
data = pd.read_csv('data/processed/CWCO_processed_2022.csv')
if 'Date' in data.columns:
    data['Date'] = pd.to_datetime(data['Date'])
    data.set_index('Date', inplace=True)

# Split into train/test
split_idx = int(len(data) * 0.7)
df_train = data.iloc[:split_idx].copy()
df_test = data.iloc[split_idx:].copy()

# Initialize trainer
trainer = ModelTrainerRL(config['reinforcement_learning'])

# Prepare environments (handles feature scaling automatically)
env_train, env_test = trainer.prepare_environment(
    df_train, 
    df_test, 
    reward_func="profit"  # or "profit", "sortino", "cvar", "max_drawdown"
)

# Train PPO only
print("Training PPO...")
result = trainer.train_ppo(env_train)
model = result["model"]
print("Training Complete!")

# Evaluate with multiple seeds (recommended)
seeds = [42, 43, 44]
metrics_agg = trainer.evaluate_over_seeds(model, env_test, seeds, algorithm="PPO")

print("\n--- Multi-Seed Evaluation Results ---")
print(f"Total Return: {metrics_agg['total_return']['mean']:.2%} ± {metrics_agg['total_return']['ci95']}")
print(f"Sharpe Ratio: {metrics_agg['sharpe_ratio']['mean']:.3f} ± {metrics_agg['sharpe_ratio']['ci95']}")
print(f"Sortino Ratio: {metrics_agg['sortino_ratio']['mean']:.3f} ± {metrics_agg['sortino_ratio']['ci95']}")
print(f"Max Drawdown: {metrics_agg['max_drawdown']['mean']:.2%} ± {metrics_agg['max_drawdown']['ci95']}")

# Single run evaluation (if you only want one)
metrics_single = trainer.evaluate_model(model, env_test, algorithm="PPO", seed=42)
print("\n--- Single Run Evaluation ---")
print(f"Total Return: {metrics_single['total_return']:.2%}")
print(f"Sharpe Ratio: {metrics_single['sharpe_ratio']:.3f}")

# Save the model and artifacts
trainer.save_models("models/ppo_artifacts")

# Use model for inference (generate actions on new data)
obs, _ = env_test.reset(seed=42)
done = False
actions = []
while not done:
    action, _ = model.predict(obs, deterministic=True)
    actions.append(float(action[0]))
    obs, _, terminated, truncated, _ = env_test.step(action)
    done = terminated or truncated

print(f"\nGenerated {len(actions)} trading actions")

# --- CELL 5: RUN BACKTEST ---
print("Running Backtest...")

# Unpack the config dictionary using **
env_params = config['reinforcement_learning']['environment']

backtester = PortfolioBacktesterRL(env_params) 

portfolio = backtester.run_backtest(
    price_data=df_test['close'], 
    predicted_weights=np.array(actions),
    lookback_window=env_params.get('lookback_window', 30) # Important for data alignment
)
comparison = backtester.compare_with_buy_and_hold_rl()
# Show Metrics
metrics = backtester.get_performance_metrics()
print("\n--- Strategy Performance ---")
for k, v in metrics.items():
    print(f"{k}: {v:.4f}")

# Plot
portfolio.plot().show()

In [6]:
# Unpack the config dictionary using **
env_params = config['reinforcement_learning']['environment']

backtester = PortfolioBacktesterRL(env_params) 

portfolio = backtester.run_backtest(
    price_data=df_test['close'], 
    predicted_weights=np.array(actions),
    lookback_window=env_params.get('lookback_window', 30) # Important for data alignment
)
comparison = backtester.compare_with_buy_and_hold_rl()
# Show Metrics
metrics = backtester.get_performance_metrics()
print("\n--- Strategy Performance ---")
for k, v in metrics.items():
    print(f"{k}: {v:.4f}")

# Plot
portfolio.plot().show()

INFO:BacktesterRL:Preparing Backtest. Raw Prices: 279, Predictions: 218
INFO:BacktesterRL:Running vectorbt simulation...
INFO:BacktesterRL:Backtest successfully completed.
INFO:BacktesterRL:Strategy Return: 181.86%
INFO:BacktesterRL:Buy & Hold Return: 32.68%
INFO:BacktesterRL:Outperformance: 149.17%



--- Strategy Performance ---
Total Return (%): 181.8600
Annual Return (%): 466.8700
Sharpe Ratio: 5.1687
Sortino Ratio: 12.1400
Max Drawdown (%): -4.7400
Calmar Ratio: 98.5591
Win Rate (%): 58.6700
Total Trades: 75.0000
Final Value ($): 281855.7300


In [7]:

# Load and prepare data
data = pd.read_csv('data/processed/AWK_processed_2022.csv')
if 'Date' in data.columns:
    data['Date'] = pd.to_datetime(data['Date'])
    data.set_index('Date', inplace=True)

# Split into train/test
split_idx = int(len(data) * 0.7)
df_train = data.iloc[:split_idx].copy()
df_test = data.iloc[split_idx:].copy()

# Initialize trainer
trainer = ModelTrainerRL(config['reinforcement_learning'])

# Prepare environments (handles feature scaling automatically)
env_train, env_test = trainer.prepare_environment(
    df_train, 
    df_test, 
    reward_func="profit"  # or "profit", "sortino", "cvar", "max_drawdown"
)

# Train PPO only
print("Training PPO...")
result = trainer.train_ppo(env_train)
model = result["model"]
print("Training Complete!")

# Evaluate with multiple seeds (recommended)
seeds = [42, 43, 44]
metrics_agg = trainer.evaluate_over_seeds(model, env_test, seeds, algorithm="PPO")

print("\n--- Multi-Seed Evaluation Results ---")
print(f"Total Return: {metrics_agg['total_return']['mean']:.2%} ± {metrics_agg['total_return']['ci95']}")
print(f"Sharpe Ratio: {metrics_agg['sharpe_ratio']['mean']:.3f} ± {metrics_agg['sharpe_ratio']['ci95']}")
print(f"Sortino Ratio: {metrics_agg['sortino_ratio']['mean']:.3f} ± {metrics_agg['sortino_ratio']['ci95']}")
print(f"Max Drawdown: {metrics_agg['max_drawdown']['mean']:.2%} ± {metrics_agg['max_drawdown']['ci95']}")

# Single run evaluation (if you only want one)
metrics_single = trainer.evaluate_model(model, env_test, algorithm="PPO", seed=42)
print("\n--- Single Run Evaluation ---")
print(f"Total Return: {metrics_single['total_return']:.2%}")
print(f"Sharpe Ratio: {metrics_single['sharpe_ratio']:.3f}")

# Save the model and artifacts
trainer.save_models("models/ppo_artifacts")

# Use model for inference (generate actions on new data)
obs, _ = env_test.reset(seed=42)
done = False
actions = []
while not done:
    action, _ = model.predict(obs, deterministic=True)
    actions.append(float(action[0]))
    obs, _, terminated, truncated, _ = env_test.step(action)
    done = terminated or truncated

print(f"\nGenerated {len(actions)} trading actions")

# --- CELL 5: RUN BACKTEST ---
print("Running Backtest...")

# Unpack the config dictionary using **
env_params = config['reinforcement_learning']['environment']

backtester = PortfolioBacktesterRL(env_params) 

portfolio = backtester.run_backtest(
    price_data=df_test['close'], 
    predicted_weights=np.array(actions),
    lookback_window=env_params.get('lookback_window', 30) # Important for data alignment
)
comparison = backtester.compare_with_buy_and_hold_rl()
# Show Metrics
metrics = backtester.get_performance_metrics()
print("\n--- Strategy Performance ---")
for k, v in metrics.items():
    print(f"{k}: {v:.4f}")

# Plot
portfolio.plot().show()



INFO:src.models.model_trainer_rl_v3:Fitted and applied feature scaling (StandardScaler)
INFO:src.models.model_trainer_rl_v3:Prepared training env with 648 steps (scaled=True)
INFO:src.models.model_trainer_rl_v3:Prepared testing env with 279 steps (scaled=True)
INFO:src.models.model_trainer_rl_v3:Training PPO for 200000 timesteps


Training PPO...


INFO:src.models.model_trainer_rl_v3:Evaluation - Return: 175.54%, Sharpe: 5.379, Sortino: 9.510, Max DD: -4.22%
INFO:src.models.model_trainer_rl_v3:Evaluation - Return: 175.54%, Sharpe: 5.379, Sortino: 9.510, Max DD: -4.22%


Training Complete!


INFO:src.models.model_trainer_rl_v3:Evaluation - Return: 175.54%, Sharpe: 5.379, Sortino: 9.510, Max DD: -4.22%
INFO:src.models.model_trainer_rl_v3:Multi-seed evaluation (n=3): Sharpe 5.379 ± CI(5.379, 5.379)
INFO:src.models.model_trainer_rl_v3:Evaluation - Return: 175.54%, Sharpe: 5.379, Sortino: 9.510, Max DD: -4.22%
INFO:src.models.model_trainer_rl_v3:Saved feature scaler to models/ppo_artifacts\feature_scaler.joblib
INFO:src.models.model_trainer_rl_v3:Saved results summary to models/ppo_artifacts\rl_results_summary.json
INFO:BacktesterRL:Preparing Backtest. Raw Prices: 279, Predictions: 218
INFO:BacktesterRL:Running vectorbt simulation...
INFO:BacktesterRL:Backtest successfully completed.
INFO:BacktesterRL:Strategy Return: 135.10%
INFO:BacktesterRL:Buy & Hold Return: 8.28%
INFO:BacktesterRL:Outperformance: 126.82%



--- Multi-Seed Evaluation Results ---
Total Return: 175.54% ± [1.7554005155130772, 1.7554005155130772]
Sharpe Ratio: 5.379 ± [5.37904420309044, 5.37904420309044]
Sortino Ratio: 9.510 ± [9.510054357167595, 9.510054357167595]
Max Drawdown: -4.22% ± [-0.0421935577622325, -0.0421935577622325]

--- Single Run Evaluation ---
Total Return: 175.54%
Sharpe Ratio: 5.379

Generated 218 trading actions
Running Backtest...

--- Strategy Performance ---
Total Return (%): 135.1000
Annual Return (%): 318.4000
Sharpe Ratio: 5.4808
Sortino Ratio: 10.9507
Max Drawdown (%): -6.5300
Calmar Ratio: 48.7576
Win Rate (%): 64.5200
Total Trades: 62.0000
Final Value ($): 235101.0000


In [8]:

# Load and prepare data
data = pd.read_csv('data/processed/AAPL_processed_2022.csv')
if 'Date' in data.columns:
    data['Date'] = pd.to_datetime(data['Date'])
    data.set_index('Date', inplace=True)

# Split into train/test
split_idx = int(len(data) * 0.7)
df_train = data.iloc[:split_idx].copy()
df_test = data.iloc[split_idx:].copy()

# Initialize trainer
trainer = ModelTrainerRL(config['reinforcement_learning'])

# Prepare environments (handles feature scaling automatically)
env_train, env_test = trainer.prepare_environment(
    df_train, 
    df_test, 
    reward_func="profit"  # or "profit", "sortino", "cvar", "max_drawdown"
)

# Train PPO only
print("Training PPO...")
result = trainer.train_ppo(env_train)
model = result["model"]
print("Training Complete!")

# Evaluate with multiple seeds (recommended)
seeds = [42, 43, 44]
metrics_agg = trainer.evaluate_over_seeds(model, env_test, seeds, algorithm="PPO")

print("\n--- Multi-Seed Evaluation Results ---")
print(f"Total Return: {metrics_agg['total_return']['mean']:.2%} ± {metrics_agg['total_return']['ci95']}")
print(f"Sharpe Ratio: {metrics_agg['sharpe_ratio']['mean']:.3f} ± {metrics_agg['sharpe_ratio']['ci95']}")
print(f"Sortino Ratio: {metrics_agg['sortino_ratio']['mean']:.3f} ± {metrics_agg['sortino_ratio']['ci95']}")
print(f"Max Drawdown: {metrics_agg['max_drawdown']['mean']:.2%} ± {metrics_agg['max_drawdown']['ci95']}")

# Single run evaluation (if you only want one)
metrics_single = trainer.evaluate_model(model, env_test, algorithm="PPO", seed=42)
print("\n--- Single Run Evaluation ---")
print(f"Total Return: {metrics_single['total_return']:.2%}")
print(f"Sharpe Ratio: {metrics_single['sharpe_ratio']:.3f}")

# Save the model and artifacts
trainer.save_models("models/ppo_artifacts")

# Use model for inference (generate actions on new data)
obs, _ = env_test.reset(seed=42)
done = False
actions = []
while not done:
    action, _ = model.predict(obs, deterministic=True)
    actions.append(float(action[0]))
    obs, _, terminated, truncated, _ = env_test.step(action)
    done = terminated or truncated

print(f"\nGenerated {len(actions)} trading actions")

# --- CELL 5: RUN BACKTEST ---
print("Running Backtest...")

# Unpack the config dictionary using **
env_params = config['reinforcement_learning']['environment']

backtester = PortfolioBacktesterRL(env_params) 

portfolio = backtester.run_backtest(
    price_data=df_test['close'], 
    predicted_weights=np.array(actions),
    lookback_window=env_params.get('lookback_window', 30) # Important for data alignment
)
comparison = backtester.compare_with_buy_and_hold_rl()
# Show Metrics
metrics = backtester.get_performance_metrics()
print("\n--- Strategy Performance ---")
for k, v in metrics.items():
    print(f"{k}: {v:.4f}")

# Plot
portfolio.plot().show()



INFO:src.models.model_trainer_rl_v3:Fitted and applied feature scaling (StandardScaler)
INFO:src.models.model_trainer_rl_v3:Prepared training env with 648 steps (scaled=True)
INFO:src.models.model_trainer_rl_v3:Prepared testing env with 279 steps (scaled=True)
INFO:src.models.model_trainer_rl_v3:Training PPO for 200000 timesteps


Training PPO...


INFO:src.models.model_trainer_rl_v3:Evaluation - Return: 302.20%, Sharpe: 5.244, Sortino: 9.991, Max DD: -8.65%
INFO:src.models.model_trainer_rl_v3:Evaluation - Return: 302.20%, Sharpe: 5.244, Sortino: 9.991, Max DD: -8.65%


Training Complete!


INFO:src.models.model_trainer_rl_v3:Evaluation - Return: 302.20%, Sharpe: 5.244, Sortino: 9.991, Max DD: -8.65%
INFO:src.models.model_trainer_rl_v3:Multi-seed evaluation (n=3): Sharpe 5.244 ± CI(5.244, 5.244)
INFO:src.models.model_trainer_rl_v3:Evaluation - Return: 302.20%, Sharpe: 5.244, Sortino: 9.991, Max DD: -8.65%
INFO:src.models.model_trainer_rl_v3:Saved feature scaler to models/ppo_artifacts\feature_scaler.joblib
INFO:src.models.model_trainer_rl_v3:Saved results summary to models/ppo_artifacts\rl_results_summary.json
INFO:BacktesterRL:Preparing Backtest. Raw Prices: 279, Predictions: 218
INFO:BacktesterRL:Running vectorbt simulation...
INFO:BacktesterRL:Backtest successfully completed.
INFO:BacktesterRL:Strategy Return: 250.75%
INFO:BacktesterRL:Buy & Hold Return: 12.81%
INFO:BacktesterRL:Outperformance: 237.94%



--- Multi-Seed Evaluation Results ---
Total Return: 302.20% ± [3.0219798482917777, 3.0219798482917777]
Sharpe Ratio: 5.244 ± [5.244263009753673, 5.244263009753673]
Sortino Ratio: 9.991 ± [9.991156063671909, 9.991156063671909]
Max Drawdown: -8.65% ± [-0.08648914799215279, -0.08648914799215271]

--- Single Run Evaluation ---
Total Return: 302.20%
Sharpe Ratio: 5.244

Generated 218 trading actions
Running Backtest...

--- Strategy Performance ---
Total Return (%): 250.7500
Annual Return (%): 717.5000
Sharpe Ratio: 5.6958
Sortino Ratio: 14.5169
Max Drawdown (%): -5.5000
Calmar Ratio: 130.4601
Win Rate (%): 70.0000
Total Trades: 60.0000
Final Value ($): 350745.6500


In [9]:

# Load and prepare data
data = pd.read_csv('data/processed/BAC_processed_2022.csv')
if 'Date' in data.columns:
    data['Date'] = pd.to_datetime(data['Date'])
    data.set_index('Date', inplace=True)

# Split into train/test
split_idx = int(len(data) * 0.7)
df_train = data.iloc[:split_idx].copy()
df_test = data.iloc[split_idx:].copy()

# Initialize trainer
trainer = ModelTrainerRL(config['reinforcement_learning'])

# Prepare environments (handles feature scaling automatically)
env_train, env_test = trainer.prepare_environment(
    df_train, 
    df_test, 
    reward_func="profit"  # or "profit", "sortino", "cvar", "max_drawdown"
)

# Train PPO only
print("Training PPO...")
result = trainer.train_ppo(env_train)
model = result["model"]
print("Training Complete!")

# Evaluate with multiple seeds (recommended)
seeds = [42, 43, 44]
metrics_agg = trainer.evaluate_over_seeds(model, env_test, seeds, algorithm="PPO")

print("\n--- Multi-Seed Evaluation Results ---")
print(f"Total Return: {metrics_agg['total_return']['mean']:.2%} ± {metrics_agg['total_return']['ci95']}")
print(f"Sharpe Ratio: {metrics_agg['sharpe_ratio']['mean']:.3f} ± {metrics_agg['sharpe_ratio']['ci95']}")
print(f"Sortino Ratio: {metrics_agg['sortino_ratio']['mean']:.3f} ± {metrics_agg['sortino_ratio']['ci95']}")
print(f"Max Drawdown: {metrics_agg['max_drawdown']['mean']:.2%} ± {metrics_agg['max_drawdown']['ci95']}")

# Single run evaluation (if you only want one)
metrics_single = trainer.evaluate_model(model, env_test, algorithm="PPO", seed=42)
print("\n--- Single Run Evaluation ---")
print(f"Total Return: {metrics_single['total_return']:.2%}")
print(f"Sharpe Ratio: {metrics_single['sharpe_ratio']:.3f}")

# Save the model and artifacts
trainer.save_models("models/ppo_artifacts")

# Use model for inference (generate actions on new data)
obs, _ = env_test.reset(seed=42)
done = False
actions = []
while not done:
    action, _ = model.predict(obs, deterministic=True)
    actions.append(float(action[0]))
    obs, _, terminated, truncated, _ = env_test.step(action)
    done = terminated or truncated

print(f"\nGenerated {len(actions)} trading actions")

# --- CELL 5: RUN BACKTEST ---
print("Running Backtest...")

# Unpack the config dictionary using **
env_params = config['reinforcement_learning']['environment']

backtester = PortfolioBacktesterRL(env_params) 

portfolio = backtester.run_backtest(
    price_data=df_test['close'], 
    predicted_weights=np.array(actions),
    lookback_window=env_params.get('lookback_window', 30) # Important for data alignment
)
comparison = backtester.compare_with_buy_and_hold_rl()
# Show Metrics
metrics = backtester.get_performance_metrics()
print("\n--- Strategy Performance ---")
for k, v in metrics.items():
    print(f"{k}: {v:.4f}")

# Plot
portfolio.plot().show()



INFO:src.models.model_trainer_rl_v3:Fitted and applied feature scaling (StandardScaler)
INFO:src.models.model_trainer_rl_v3:Prepared training env with 648 steps (scaled=True)
INFO:src.models.model_trainer_rl_v3:Prepared testing env with 279 steps (scaled=True)
INFO:src.models.model_trainer_rl_v3:Training PPO for 200000 timesteps


Training PPO...


INFO:src.models.model_trainer_rl_v3:Evaluation - Return: 83.15%, Sharpe: 2.654, Sortino: 5.345, Max DD: -11.98%
INFO:src.models.model_trainer_rl_v3:Evaluation - Return: 83.15%, Sharpe: 2.654, Sortino: 5.345, Max DD: -11.98%


Training Complete!


INFO:src.models.model_trainer_rl_v3:Evaluation - Return: 83.15%, Sharpe: 2.654, Sortino: 5.345, Max DD: -11.98%
INFO:src.models.model_trainer_rl_v3:Multi-seed evaluation (n=3): Sharpe 2.654 ± CI(2.654, 2.654)
INFO:src.models.model_trainer_rl_v3:Evaluation - Return: 83.15%, Sharpe: 2.654, Sortino: 5.345, Max DD: -11.98%
INFO:src.models.model_trainer_rl_v3:Saved feature scaler to models/ppo_artifacts\feature_scaler.joblib
INFO:src.models.model_trainer_rl_v3:Saved results summary to models/ppo_artifacts\rl_results_summary.json
INFO:BacktesterRL:Preparing Backtest. Raw Prices: 279, Predictions: 218
INFO:BacktesterRL:Running vectorbt simulation...
INFO:BacktesterRL:Backtest successfully completed.
INFO:BacktesterRL:Strategy Return: 88.40%
INFO:BacktesterRL:Buy & Hold Return: 14.96%
INFO:BacktesterRL:Outperformance: 73.44%



--- Multi-Seed Evaluation Results ---
Total Return: 83.15% ± [0.8315418682795332, 0.8315418682795332]
Sharpe Ratio: 2.654 ± [2.653860712908524, 2.653860712908524]
Sortino Ratio: 5.345 ± [5.345100215598574, 5.345100215598574]
Max Drawdown: -11.98% ± [-0.11982130495069536, -0.11982130495069536]

--- Single Run Evaluation ---
Total Return: 83.15%
Sharpe Ratio: 2.654

Generated 218 trading actions
Running Backtest...

--- Strategy Performance ---
Total Return (%): 88.4000
Annual Return (%): 188.7900
Sharpe Ratio: 3.4324
Sortino Ratio: 6.8280
Max Drawdown (%): -11.7200
Calmar Ratio: 16.1030
Win Rate (%): 50.0000
Total Trades: 36.0000
Final Value ($): 188402.0500
