In [2]:
import sys, os
sys.path.insert(0, os.path.abspath('..'))

import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

#from src.data.data_collector import DataCollector
from src.models.model_trainer_rl_v2 import ModelTrainerRL, TradingEnvRL
from src.models.backtester import PortfolioBacktester, PortfolioBacktesterRL
from src.utils.config_loader import load_config


config = load_config("config/config.yaml")

## PPO Agent

AAPL

In [None]:
# Load Data
# (Ensure your CSV has columns like 'close', 'rsi', 'sma', etc.)
try:
    data = pd.read_csv('data/processed/AAPL_processed.csv')
    if 'Date' in data.columns:
        data['Date'] = pd.to_datetime(data['Date'])
        data.set_index('Date', inplace=True)
    print("Data loaded successfully.")
except FileNotFoundError:
    print("Error: Data file not found. Check path.")

# Split Train/Test
split_idx = int(len(data) * 0.7)
train_df = data.iloc[:split_idx]
test_df = data.iloc[split_idx:]

# Initialize Trainer
trainer = ModelTrainerRL(config['reinforcement_learning'])

print("Training PPO Agent...")


env_params = config['reinforcement_learning']['environment']

env_train = TradingEnvRL(
    train_df, 
    initial_balance=env_params.get('initial_balance', 10000),
    commission=env_params.get('commission', 0.001),
    lookback_window=env_params.get('lookback_window', 30)
)

result = trainer.train_ppo(env_train)
model = result['model'] 

print("Training Complete.")

print("Generating Agent Predictions on Test Data...")


env_params = config['reinforcement_learning']['environment']


env_test = TradingEnvRL(
    test_df, 
    initial_balance=env_params.get('initial_balance', 100000),
    commission=env_params.get('commission', 0.001),
    lookback_window=env_params.get('lookback_window', 30)
)

obs, _ = env_test.reset()
done = False
actions = []

# Manual Inference Loop
while not done:
    action, _ = model.predict(obs, deterministic=True)
    actions.append(action[0]) 
    obs, _, terminated, truncated, _ = env_test.step(action)
    done = terminated or truncated


#  RUN BACKTEST ---
print("Running Backtest...")


backtester = PortfolioBacktesterRL(env_params) 

portfolio = backtester.run_backtest(
    price_data=test_df['close'], 
    predicted_weights=np.array(actions),
    lookback_window=env_params.get('lookback_window', 30) # Important for data alignment
)
comparison = backtester.compare_with_buy_and_hold_rl()
# Show Metrics
metrics = backtester.get_performance_metrics()
print("\n--- Strategy Performance ---")
for k, v in metrics.items():
    print(f"{k}: {v:.4f}")

# Plot
portfolio.plot().show()



INFO:src.models.model_trainer_rl_v2:Training PPO for 200000 timesteps


Data loaded successfully.
Training PPO Agent...


INFO:BacktesterRL:Preparing Backtest. Raw Prices: 732, Predictions: 671
INFO:BacktesterRL:Running vectorbt simulation...
INFO:BacktesterRL:Backtest successfully completed.
INFO:BacktesterRL:Strategy Return: 70.81%
INFO:BacktesterRL:Buy & Hold Return: 70.81%
INFO:BacktesterRL:Outperformance: 0.00%


Training Complete.
Generating Agent Predictions on Test Data...
Running Backtest...

--- Strategy Performance ---
Total Return (%): 70.8100
Annual Return (%): 33.8100
Sharpe Ratio: 1.0837
Sortino Ratio: 1.6422
Max Drawdown (%): -33.3600
Calmar Ratio: 1.0133
Win Rate (%): 100.0000
Total Trades: 1.0000
Final Value ($): 170807.2800


In [3]:
# Load Data
# (Ensure your CSV has columns like 'close', 'rsi', 'sma', etc.)
try:
    data = pd.read_csv('data/processed/AAPL_processed.csv')
    if 'Date' in data.columns:
        data['Date'] = pd.to_datetime(data['Date'])
        data.set_index('Date', inplace=True)
    print("Data loaded successfully.")
except FileNotFoundError:
    print("Error: Data file not found. Check path.")

# Split Train/Test
split_idx = int(len(data) * 0.7)
train_df = data.iloc[:split_idx]
test_df = data.iloc[split_idx:]

# Initialize Trainer
trainer = ModelTrainerRL(config['reinforcement_learning'])

print("Training PPO Agent...")


env_params = config['reinforcement_learning']['environment']

env_train = TradingEnvRL(
    train_df, 
    initial_balance=env_params.get('initial_balance', 10000),
    commission=env_params.get('commission', 0.001),
    lookback_window=env_params.get('lookback_window', 30),
    reward_func='profit'  # or 'sharpe'
)

result = trainer.train_ppo(env_train)
model = result['model'] 

print("Training Complete.")

print("Generating Agent Predictions on Test Data...")


env_params = config['reinforcement_learning']['environment']


env_test = TradingEnvRL(
    test_df, 
    initial_balance=env_params.get('initial_balance', 100000),
    commission=env_params.get('commission', 0.001),
    lookback_window=env_params.get('lookback_window', 30),
    reward_func='profit'  # or 'sharpe'
)

obs, _ = env_test.reset()
done = False
actions = []

# Manual Inference Loop
while not done:
    action, _ = model.predict(obs, deterministic=True)
    actions.append(action[0]) 
    obs, _, terminated, truncated, _ = env_test.step(action)
    done = terminated or truncated


#  RUN BACKTEST ---
print("Running Backtest...")


backtester = PortfolioBacktesterRL(env_params) 

portfolio = backtester.run_backtest(
    price_data=test_df['close'], 
    predicted_weights=np.array(actions),
    lookback_window=env_params.get('lookback_window', 30) # Important for data alignment
)
comparison = backtester.compare_with_buy_and_hold_rl()
# Show Metrics
metrics = backtester.get_performance_metrics()
print("\n--- Strategy Performance ---")
for k, v in metrics.items():
    print(f"{k}: {v:.4f}")

# Plot
portfolio.plot().show()





Data loaded successfully.
Training PPO Agent...



In the future `np.object` will be defined as the corresponding NumPy scalar.

Gym has been unmaintained since 2022 and does not support NumPy 2.0 amongst other critical functionality.
Please upgrade to Gymnasium, the maintained drop-in replacement of Gym, or contact the authors of your software and request that they upgrade.
See the migration guide at https://gymnasium.farama.org/introduction/migration_guide/ for additional information.
INFO:src.models.model_trainer_rl_v2:Training PPO for 200000 timesteps


Training Complete.
Generating Agent Predictions on Test Data...


INFO:BacktesterRL:Preparing Backtest. Raw Prices: 732, Predictions: 671
INFO:BacktesterRL:Running vectorbt simulation...


Running Backtest...


INFO:BacktesterRL:Backtest successfully completed.
INFO:BacktesterRL:Strategy Return: 69.20%
INFO:BacktesterRL:Buy & Hold Return: 70.81%
INFO:BacktesterRL:Outperformance: -1.61%



--- Strategy Performance ---
Total Return (%): 69.2000
Annual Return (%): 33.1200
Sharpe Ratio: 1.1479
Sortino Ratio: 1.7972
Max Drawdown (%): -27.7600
Calmar Ratio: 1.1930
Win Rate (%): 68.9900
Total Trades: 129.0000
Final Value ($): 169197.8200


In [4]:
# Load Data
# (Ensure your CSV has columns like 'close', 'rsi', 'sma', etc.)
try:
    data = pd.read_csv('data/processed/AAPL_processed.csv')
    if 'Date' in data.columns:
        data['Date'] = pd.to_datetime(data['Date'])
        data.set_index('Date', inplace=True)
    print("Data loaded successfully.")
except FileNotFoundError:
    print("Error: Data file not found. Check path.")

# Split Train/Test
split_idx = int(len(data) * 0.7)
train_df = data.iloc[:split_idx]
test_df = data.iloc[split_idx:]

# Initialize Trainer
trainer = ModelTrainerRL(config['reinforcement_learning'])

print("Training PPO Agent...")


env_params = config['reinforcement_learning']['environment']

env_train = TradingEnvRL(
    train_df, 
    initial_balance=env_params.get('initial_balance', 10000),
    commission=env_params.get('commission', 0.001),
    lookback_window=env_params.get('lookback_window', 30),
    reward_func='sharpe'  # or 'sharpe'
)

result = trainer.train_ppo(env_train)
model = result['model'] 

print("Training Complete.")

print("Generating Agent Predictions on Test Data...")


env_params = config['reinforcement_learning']['environment']


env_test = TradingEnvRL(
    test_df, 
    initial_balance=env_params.get('initial_balance', 100000),
    commission=env_params.get('commission', 0.001),
    lookback_window=env_params.get('lookback_window', 30),
    reward_func='sharpe'  # or 'sharpe'
)

obs, _ = env_test.reset()
done = False
actions = []

# Manual Inference Loop
while not done:
    action, _ = model.predict(obs, deterministic=True)
    actions.append(action[0]) 
    obs, _, terminated, truncated, _ = env_test.step(action)
    done = terminated or truncated


#  RUN BACKTEST ---
print("Running Backtest...")


backtester = PortfolioBacktesterRL(env_params) 

portfolio = backtester.run_backtest(
    price_data=test_df['close'], 
    predicted_weights=np.array(actions),
    lookback_window=env_params.get('lookback_window', 30) # Important for data alignment
)
comparison = backtester.compare_with_buy_and_hold_rl()
# Show Metrics
metrics = backtester.get_performance_metrics()
print("\n--- Strategy Performance ---")
for k, v in metrics.items():
    print(f"{k}: {v:.4f}")

# Plot
portfolio.plot().show()



INFO:src.models.model_trainer_rl_v2:Training PPO for 200000 timesteps


Data loaded successfully.
Training PPO Agent...
Training Complete.
Generating Agent Predictions on Test Data...


INFO:BacktesterRL:Preparing Backtest. Raw Prices: 732, Predictions: 671
INFO:BacktesterRL:Running vectorbt simulation...
INFO:BacktesterRL:Backtest successfully completed.
INFO:BacktesterRL:Strategy Return: 52.36%
INFO:BacktesterRL:Buy & Hold Return: 70.81%
INFO:BacktesterRL:Outperformance: -18.45%


Running Backtest...

--- Strategy Performance ---
Total Return (%): 52.3600
Annual Return (%): 25.7400
Sharpe Ratio: 0.8954
Sortino Ratio: 1.3485
Max Drawdown (%): -34.6700
Calmar Ratio: 0.7424
Win Rate (%): 73.3300
Total Trades: 15.0000
Final Value ($): 152358.5100


## AAPL 2022

In [5]:
# Load Data
# (Ensure your CSV has columns like 'close', 'rsi', 'sma', etc.)
try:
    data = pd.read_csv('data/processed/AAPL_processed_2022.csv')
    if 'Date' in data.columns:
        data['Date'] = pd.to_datetime(data['Date'])
        data.set_index('Date', inplace=True)
    print("Data loaded successfully.")
except FileNotFoundError:
    print("Error: Data file not found. Check path.")

# Split Train/Test
split_idx = int(len(data) * 0.7)
train_df = data.iloc[:split_idx]
test_df = data.iloc[split_idx:]

# Initialize Trainer
trainer = ModelTrainerRL(config['reinforcement_learning'])

print("Training PPO Agent...")


env_params = config['reinforcement_learning']['environment']

env_train = TradingEnvRL(
    train_df, 
    initial_balance=env_params.get('initial_balance', 10000),
    commission=env_params.get('commission', 0.001),
    lookback_window=env_params.get('lookback_window', 30),
    reward_func='profit'  # or 'sharpe'
)

result = trainer.train_ppo(env_train)
model = result['model'] 

print("Training Complete.")

print("Generating Agent Predictions on Test Data...")


env_params = config['reinforcement_learning']['environment']


env_test = TradingEnvRL(
    test_df, 
    initial_balance=env_params.get('initial_balance', 100000),
    commission=env_params.get('commission', 0.001),
    lookback_window=env_params.get('lookback_window', 30),
    reward_func='profit'  # or 'sharpe'
)

obs, _ = env_test.reset()
done = False
actions = []

# Manual Inference Loop
while not done:
    action, _ = model.predict(obs, deterministic=True)
    actions.append(action[0]) 
    obs, _, terminated, truncated, _ = env_test.step(action)
    done = terminated or truncated


#  RUN BACKTEST ---
print("Running Backtest...")


backtester = PortfolioBacktesterRL(env_params) 

portfolio = backtester.run_backtest(
    price_data=test_df['close'], 
    predicted_weights=np.array(actions),
    lookback_window=env_params.get('lookback_window', 30) # Important for data alignment
)
comparison = backtester.compare_with_buy_and_hold_rl()
# Show Metrics
metrics = backtester.get_performance_metrics()
print("\n--- Strategy Performance ---")
for k, v in metrics.items():
    print(f"{k}: {v:.4f}")

# Plot
portfolio.plot().show()



INFO:src.models.model_trainer_rl_v2:Training PPO for 200000 timesteps


Data loaded successfully.
Training PPO Agent...


INFO:BacktesterRL:Preparing Backtest. Raw Prices: 279, Predictions: 218
INFO:BacktesterRL:Running vectorbt simulation...
INFO:BacktesterRL:Backtest successfully completed.
INFO:BacktesterRL:Strategy Return: -13.00%
INFO:BacktesterRL:Buy & Hold Return: 12.81%
INFO:BacktesterRL:Outperformance: -25.81%


Training Complete.
Generating Agent Predictions on Test Data...
Running Backtest...

--- Strategy Performance ---
Total Return (%): -13.0000
Annual Return (%): -20.8000
Sharpe Ratio: -0.7093
Sortino Ratio: -0.9662
Max Drawdown (%): -34.3400
Calmar Ratio: -0.6056
Win Rate (%): 33.0600
Total Trades: 121.0000
Final Value ($): 86999.2000


In [6]:
# Load Data
# (Ensure your CSV has columns like 'close', 'rsi', 'sma', etc.)
try:
    data = pd.read_csv('data/processed/AAPL_processed_2022.csv')
    if 'Date' in data.columns:
        data['Date'] = pd.to_datetime(data['Date'])
        data.set_index('Date', inplace=True)
    print("Data loaded successfully.")
except FileNotFoundError:
    print("Error: Data file not found. Check path.")

# Split Train/Test
split_idx = int(len(data) * 0.7)
train_df = data.iloc[:split_idx]
test_df = data.iloc[split_idx:]

# Initialize Trainer
trainer = ModelTrainerRL(config['reinforcement_learning'])

print("Training PPO Agent...")


env_params = config['reinforcement_learning']['environment']

env_train = TradingEnvRL(
    train_df, 
    initial_balance=env_params.get('initial_balance', 10000),
    commission=env_params.get('commission', 0.001),
    lookback_window=env_params.get('lookback_window', 30),
    reward_func='sharpe'  # or 'sharpe'
)

result = trainer.train_ppo(env_train)
model = result['model'] 

print("Training Complete.")

print("Generating Agent Predictions on Test Data...")


env_params = config['reinforcement_learning']['environment']


env_test = TradingEnvRL(
    test_df, 
    initial_balance=env_params.get('initial_balance', 100000),
    commission=env_params.get('commission', 0.001),
    lookback_window=env_params.get('lookback_window', 30),
    reward_func='sharpe'  # or 'sharpe'
)

obs, _ = env_test.reset()
done = False
actions = []

# Manual Inference Loop
while not done:
    action, _ = model.predict(obs, deterministic=True)
    actions.append(action[0]) 
    obs, _, terminated, truncated, _ = env_test.step(action)
    done = terminated or truncated


#  RUN BACKTEST ---
print("Running Backtest...")


backtester = PortfolioBacktesterRL(env_params) 

portfolio = backtester.run_backtest(
    price_data=test_df['close'], 
    predicted_weights=np.array(actions),
    lookback_window=env_params.get('lookback_window', 30) # Important for data alignment
)
comparison = backtester.compare_with_buy_and_hold_rl()
# Show Metrics
metrics = backtester.get_performance_metrics()
print("\n--- Strategy Performance ---")
for k, v in metrics.items():
    print(f"{k}: {v:.4f}")

# Plot
portfolio.plot().show()



INFO:src.models.model_trainer_rl_v2:Training PPO for 200000 timesteps


Data loaded successfully.
Training PPO Agent...


INFO:BacktesterRL:Preparing Backtest. Raw Prices: 279, Predictions: 218
INFO:BacktesterRL:Running vectorbt simulation...
INFO:BacktesterRL:Backtest successfully completed.
INFO:BacktesterRL:Strategy Return: -5.05%
INFO:BacktesterRL:Buy & Hold Return: 12.81%
INFO:BacktesterRL:Outperformance: -17.86%


Training Complete.
Generating Agent Predictions on Test Data...
Running Backtest...

--- Strategy Performance ---
Total Return (%): -5.0500
Annual Return (%): -8.3100
Sharpe Ratio: -0.0283
Sortino Ratio: -0.0385
Max Drawdown (%): -32.0100
Calmar Ratio: -0.2597
Win Rate (%): 38.3300
Total Trades: 120.0000
Final Value ($): 94947.6900


## CWCO 2022

In [7]:
# Load Data
# (Ensure your CSV has columns like 'close', 'rsi', 'sma', etc.)
try:
    data = pd.read_csv('data/processed/CWCO_processed_2022.csv')
    if 'Date' in data.columns:
        data['Date'] = pd.to_datetime(data['Date'])
        data.set_index('Date', inplace=True)
    print("Data loaded successfully.")
except FileNotFoundError:
    print("Error: Data file not found. Check path.")

# Split Train/Test
split_idx = int(len(data) * 0.7)
train_df = data.iloc[:split_idx]
test_df = data.iloc[split_idx:]

# Initialize Trainer
trainer = ModelTrainerRL(config['reinforcement_learning'])

print("Training PPO Agent...")


env_params = config['reinforcement_learning']['environment']

env_train = TradingEnvRL(
    train_df, 
    initial_balance=env_params.get('initial_balance', 10000),
    commission=env_params.get('commission', 0.001),
    lookback_window=env_params.get('lookback_window', 30),
    reward_func='profit'  # or 'sharpe'
)

result = trainer.train_ppo(env_train)
model = result['model'] 

print("Training Complete.")

print("Generating Agent Predictions on Test Data...")


env_params = config['reinforcement_learning']['environment']


env_test = TradingEnvRL(
    test_df, 
    initial_balance=env_params.get('initial_balance', 100000),
    commission=env_params.get('commission', 0.001),
    lookback_window=env_params.get('lookback_window', 30),
    reward_func='profit'  # or 'sharpe'
)

obs, _ = env_test.reset()
done = False
actions = []

# Manual Inference Loop
while not done:
    action, _ = model.predict(obs, deterministic=True)
    actions.append(action[0]) 
    obs, _, terminated, truncated, _ = env_test.step(action)
    done = terminated or truncated


#  RUN BACKTEST ---
print("Running Backtest...")


backtester = PortfolioBacktesterRL(env_params) 

portfolio = backtester.run_backtest(
    price_data=test_df['close'], 
    predicted_weights=np.array(actions),
    lookback_window=env_params.get('lookback_window', 30) # Important for data alignment
)
comparison = backtester.compare_with_buy_and_hold_rl()
# Show Metrics
metrics = backtester.get_performance_metrics()
print("\n--- Strategy Performance ---")
for k, v in metrics.items():
    print(f"{k}: {v:.4f}")

# Plot
portfolio.plot().show()



INFO:src.models.model_trainer_rl_v2:Training PPO for 200000 timesteps


Data loaded successfully.
Training PPO Agent...


INFO:BacktesterRL:Preparing Backtest. Raw Prices: 279, Predictions: 218
INFO:BacktesterRL:Running vectorbt simulation...
INFO:BacktesterRL:Backtest successfully completed.
INFO:BacktesterRL:Strategy Return: 32.68%
INFO:BacktesterRL:Buy & Hold Return: 32.68%
INFO:BacktesterRL:Outperformance: 0.00%


Training Complete.
Generating Agent Predictions on Test Data...
Running Backtest...

--- Strategy Performance ---
Total Return (%): 32.6800
Annual Return (%): 60.5600
Sharpe Ratio: 1.4765
Sortino Ratio: 2.2145
Max Drawdown (%): -21.4800
Calmar Ratio: 2.8195
Win Rate (%): 100.0000
Total Trades: 1.0000
Final Value ($): 132682.1900


In [8]:
# Load Data
# (Ensure your CSV has columns like 'close', 'rsi', 'sma', etc.)
try:
    data = pd.read_csv('data/processed/CWCO_processed.csv')
    if 'Date' in data.columns:
        data['Date'] = pd.to_datetime(data['Date'])
        data.set_index('Date', inplace=True)
    print("Data loaded successfully.")
except FileNotFoundError:
    print("Error: Data file not found. Check path.")

# Split Train/Test
split_idx = int(len(data) * 0.7)
train_df = data.iloc[:split_idx]
test_df = data.iloc[split_idx:]

# Initialize Trainer
trainer = ModelTrainerRL(config['reinforcement_learning'])

print("Training PPO Agent...")


env_params = config['reinforcement_learning']['environment']

env_train = TradingEnvRL(
    train_df, 
    initial_balance=env_params.get('initial_balance', 10000),
    commission=env_params.get('commission', 0.001),
    lookback_window=env_params.get('lookback_window', 30),
    reward_func='profit'  # or 'sharpe'
)

result = trainer.train_ppo(env_train)
model = result['model'] 

print("Training Complete.")

print("Generating Agent Predictions on Test Data...")


env_params = config['reinforcement_learning']['environment']


env_test = TradingEnvRL(
    test_df, 
    initial_balance=env_params.get('initial_balance', 100000),
    commission=env_params.get('commission', 0.001),
    lookback_window=env_params.get('lookback_window', 30),
    reward_func='profit'  # or 'sharpe'
)

obs, _ = env_test.reset()
done = False
actions = []

# Manual Inference Loop
while not done:
    action, _ = model.predict(obs, deterministic=True)
    actions.append(action[0]) 
    obs, _, terminated, truncated, _ = env_test.step(action)
    done = terminated or truncated


#  RUN BACKTEST ---
print("Running Backtest...")


backtester = PortfolioBacktesterRL(env_params) 

portfolio = backtester.run_backtest(
    price_data=test_df['close'], 
    predicted_weights=np.array(actions),
    lookback_window=env_params.get('lookback_window', 30) # Important for data alignment
)
comparison = backtester.compare_with_buy_and_hold_rl()
# Show Metrics
metrics = backtester.get_performance_metrics()
print("\n--- Strategy Performance ---")
for k, v in metrics.items():
    print(f"{k}: {v:.4f}")

# Plot
portfolio.plot().show()



INFO:src.models.model_trainer_rl_v2:Training PPO for 200000 timesteps


Data loaded successfully.
Training PPO Agent...
Training Complete.
Generating Agent Predictions on Test Data...


INFO:BacktesterRL:Preparing Backtest. Raw Prices: 732, Predictions: 671
INFO:BacktesterRL:Running vectorbt simulation...
INFO:BacktesterRL:Backtest successfully completed.
INFO:BacktesterRL:Strategy Return: -72.14%
INFO:BacktesterRL:Buy & Hold Return: 144.10%
INFO:BacktesterRL:Outperformance: -216.24%


Running Backtest...

--- Strategy Performance ---
Total Return (%): -72.1400
Annual Return (%): -50.1000
Sharpe Ratio: -1.3459
Sortino Ratio: -1.7300
Max Drawdown (%): -75.5900
Calmar Ratio: -0.6628
Win Rate (%): 25.2900
Total Trades: 348.0000
Final Value ($): 27862.2800


In [10]:
# Load Data
# (Ensure your CSV has columns like 'close', 'rsi', 'sma', etc.)
try:
    data = pd.read_csv('data/processed/CWCO_processed_2022.csv')
    if 'Date' in data.columns:
        data['Date'] = pd.to_datetime(data['Date'])
        data.set_index('Date', inplace=True)
    print("Data loaded successfully.")
except FileNotFoundError:
    print("Error: Data file not found. Check path.")

# Split Train/Test
split_idx = int(len(data) * 0.7)
train_df = data.iloc[:split_idx]
test_df = data.iloc[split_idx:]

# Initialize Trainer
trainer = ModelTrainerRL(config['reinforcement_learning'])

print("Training PPO Agent...")


env_params = config['reinforcement_learning']['environment']

env_train = TradingEnvRL(
    train_df, 
    initial_balance=env_params.get('initial_balance', 10000),
    commission=env_params.get('commission', 0.001),
    lookback_window=env_params.get('lookback_window', 30),
    reward_func='cvar'  # or 'sharpe'
)

result = trainer.train_ppo(env_train)
model = result['model'] 

print("Training Complete.")

print("Generating Agent Predictions on Test Data...")


env_params = config['reinforcement_learning']['environment']


env_test = TradingEnvRL(
    test_df, 
    initial_balance=env_params.get('initial_balance', 100000),
    commission=env_params.get('commission', 0.001),
    lookback_window=env_params.get('lookback_window', 30),
    reward_func='cvar'  # or 'sharpe'
)

obs, _ = env_test.reset()
done = False
actions = []

# Manual Inference Loop
while not done:
    action, _ = model.predict(obs, deterministic=True)
    actions.append(action[0]) 
    obs, _, terminated, truncated, _ = env_test.step(action)
    done = terminated or truncated


#  RUN BACKTEST ---
print("Running Backtest...")


backtester = PortfolioBacktesterRL(env_params) 

portfolio = backtester.run_backtest(
    price_data=test_df['close'], 
    predicted_weights=np.array(actions),
    lookback_window=env_params.get('lookback_window', 30) # Important for data alignment
)
comparison = backtester.compare_with_buy_and_hold_rl()
# Show Metrics
metrics = backtester.get_performance_metrics()
print("\n--- Strategy Performance ---")
for k, v in metrics.items():
    print(f"{k}: {v:.4f}")

# Plot
portfolio.plot().show()



INFO:src.models.model_trainer_rl_v2:Training PPO for 200000 timesteps


Data loaded successfully.
Training PPO Agent...


INFO:BacktesterRL:Preparing Backtest. Raw Prices: 279, Predictions: 218
INFO:BacktesterRL:Running vectorbt simulation...
INFO:BacktesterRL:Backtest successfully completed.
INFO:BacktesterRL:Strategy Return: 32.68%
INFO:BacktesterRL:Buy & Hold Return: 32.68%
INFO:BacktesterRL:Outperformance: 0.00%


Training Complete.
Generating Agent Predictions on Test Data...
Running Backtest...

--- Strategy Performance ---
Total Return (%): 32.6800
Annual Return (%): 60.5600
Sharpe Ratio: 1.4765
Sortino Ratio: 2.2145
Max Drawdown (%): -21.4800
Calmar Ratio: 2.8195
Win Rate (%): 100.0000
Total Trades: 1.0000
Final Value ($): 132682.1900
