In [1]:
import sys, os
sys.path.insert(0, os.path.abspath('..'))

import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

#from src.data.data_collector import DataCollector
from src.models.model_trainer_rl_v2 import ModelTrainerRL, TradingEnvRL
from src.models.backtester import PortfolioBacktester, PortfolioBacktesterRL
from src.utils.config_loader import load_config

In [2]:
config = load_config("config/config.yaml")

### AGX - Profit

In [4]:
try:
    data = pd.read_csv('data/processed/AGX_processed_2022.csv')
    if 'Date' in data.columns:
        data['Date'] = pd.to_datetime(data['Date'])
        data.set_index('Date', inplace=True)
    print("Data loaded successfully.")
except FileNotFoundError:
    print("Error: Data file not found. Check path.")

# Split Train/Test
split_idx = int(len(data) * 0.7)
train_df = data.iloc[:split_idx]
test_df = data.iloc[split_idx:]

# Initialize Trainer
trainer = ModelTrainerRL(config['reinforcement_learning'])

print("Training SAC Agent...")


env_params = config['reinforcement_learning']['environment']

env_train = TradingEnvRL(
    train_df, 
    initial_balance=env_params.get('initial_balance', 10000),
    commission=env_params.get('commission', 0.001),
    lookback_window=env_params.get('lookback_window', 30),
    reward_func='profit'
)

result = trainer.train_sac(env_train)
model = result['model'] 

print("Training Complete.")

print("Generating Agent Predictions on Test Data...")

# Unpack the config dictionary using **
env_params = config['reinforcement_learning']['environment']

env_test = TradingEnvRL(
    test_df, 
    initial_balance=env_params.get('initial_balance', 10000),
    commission=env_params.get('commission', 0.001),
    lookback_window=env_params.get('lookback_window', 30),
    reward_func='profit'
)

obs, _ = env_test.reset()
done = False
actions = []

# Manual Inference Loop
while not done:
    action, _ = model.predict(obs, deterministic=True)
    actions.append(action[0]) 
    obs, _, terminated, truncated, _ = env_test.step(action)
    done = terminated or truncated


# --- CELL 5: RUN BACKTEST ---
print("Running Backtest...")

# FIX: Use the correct config path
# We pass the same env_params we extracted in Cell 4
backtester = PortfolioBacktesterRL(env_params) 

portfolio = backtester.run_backtest(
    price_data=test_df['close'], 
    predicted_weights=np.array(actions),
    lookback_window=env_params.get('lookback_window', 30) # Important for data alignment
)
comparison = backtester.compare_with_buy_and_hold_rl()
# Show Metrics
metrics = backtester.get_performance_metrics()
print("\n--- Strategy Performance ---")
for k, v in metrics.items():
    print(f"{k}: {v:.4f}")

# Plot
portfolio.plot().show()



INFO:src.models.model_trainer_rl_v2:Training SAC for 150000 timesteps


Data loaded successfully.
Training SAC Agent...


INFO:BacktesterRL:Preparing Backtest. Raw Prices: 279, Predictions: 218
INFO:BacktesterRL:Running vectorbt simulation...


Training Complete.
Generating Agent Predictions on Test Data...
Running Backtest...


INFO:BacktesterRL:Backtest successfully completed.
INFO:BacktesterRL:Strategy Return: -71.56%
INFO:BacktesterRL:Buy & Hold Return: 123.31%
INFO:BacktesterRL:Outperformance: -194.87%



--- Strategy Performance ---
Total Return (%): -71.5600
Annual Return (%): -87.8200
Sharpe Ratio: -2.0385
Sortino Ratio: -2.6670
Max Drawdown (%): -79.4700
Calmar Ratio: -1.1051
Win Rate (%): 11.7600
Total Trades: 119.0000
Final Value ($): 28439.1300


### NEE - Profit

In [3]:
try:
    data = pd.read_csv('data/processed/NEE_processed_2022.csv')
    if 'Date' in data.columns:
        data['Date'] = pd.to_datetime(data['Date'])
        data.set_index('Date', inplace=True)
    print("Data loaded successfully.")
except FileNotFoundError:
    print("Error: Data file not found. Check path.")

# Split Train/Test
split_idx = int(len(data) * 0.7)
train_df = data.iloc[:split_idx]
test_df = data.iloc[split_idx:]

# Initialize Trainer
trainer = ModelTrainerRL(config['reinforcement_learning'])

print("Training SAC Agent...")


env_params = config['reinforcement_learning']['environment']

env_train = TradingEnvRL(
    train_df, 
    initial_balance=env_params.get('initial_balance', 10000),
    commission=env_params.get('commission', 0.001),
    lookback_window=env_params.get('lookback_window', 30),
    reward_func='profit'
)

result = trainer.train_sac(env_train)
model = result['model'] 

print("Training Complete.")

print("Generating Agent Predictions on Test Data...")

# Unpack the config dictionary using **
env_params = config['reinforcement_learning']['environment']

env_test = TradingEnvRL(
    test_df, 
    initial_balance=env_params.get('initial_balance', 10000),
    commission=env_params.get('commission', 0.001),
    lookback_window=env_params.get('lookback_window', 30),
    reward_func='profit'
)

obs, _ = env_test.reset()
done = False
actions = []

# Manual Inference Loop
while not done:
    action, _ = model.predict(obs, deterministic=True)
    actions.append(action[0]) 
    obs, _, terminated, truncated, _ = env_test.step(action)
    done = terminated or truncated


# --- CELL 5: RUN BACKTEST ---
print("Running Backtest...")

# FIX: Use the correct config path
# We pass the same env_params we extracted in Cell 4
backtester = PortfolioBacktesterRL(env_params) 

portfolio = backtester.run_backtest(
    price_data=test_df['close'], 
    predicted_weights=np.array(actions),
    lookback_window=env_params.get('lookback_window', 30) # Important for data alignment
)
comparison = backtester.compare_with_buy_and_hold_rl()
# Show Metrics
metrics = backtester.get_performance_metrics()
print("\n--- Strategy Performance ---")
for k, v in metrics.items():
    print(f"{k}: {v:.4f}")

# Plot
portfolio.plot().show()





Data loaded successfully.
Training SAC Agent...



In the future `np.object` will be defined as the corresponding NumPy scalar.

Gym has been unmaintained since 2022 and does not support NumPy 2.0 amongst other critical functionality.
Please upgrade to Gymnasium, the maintained drop-in replacement of Gym, or contact the authors of your software and request that they upgrade.
See the migration guide at https://gymnasium.farama.org/introduction/migration_guide/ for additional information.
INFO:src.models.model_trainer_rl_v2:Training SAC for 150000 timesteps
INFO:BacktesterRL:Preparing Backtest. Raw Prices: 279, Predictions: 218
INFO:BacktesterRL:Running vectorbt simulation...


Training Complete.
Generating Agent Predictions on Test Data...
Running Backtest...


INFO:BacktesterRL:Backtest successfully completed.
INFO:BacktesterRL:Strategy Return: 75.01%
INFO:BacktesterRL:Buy & Hold Return: 28.02%
INFO:BacktesterRL:Outperformance: 46.99%



--- Strategy Performance ---
Total Return (%): 75.0100
Annual Return (%): 155.2400
Sharpe Ratio: 3.6110
Sortino Ratio: 7.5062
Max Drawdown (%): -10.3200
Calmar Ratio: 15.0404
Win Rate (%): 56.9400
Total Trades: 144.0000
Final Value ($): 175006.0100


### CWCO - sharpe

In [None]:
try:
    data = pd.read_csv('data/processed/CWCO_processed_2022.csv')
    if 'Date' in data.columns:
        data['Date'] = pd.to_datetime(data['Date'])
        data.set_index('Date', inplace=True)
    print("Data loaded successfully.")
except FileNotFoundError:
    print("Error: Data file not found. Check path.")

# Split Train/Test
split_idx = int(len(data) * 0.7)
train_df = data.iloc[:split_idx]
test_df = data.iloc[split_idx:]

# Initialize Trainer
trainer = ModelTrainerRL(config['reinforcement_learning'])

print("Training SAC Agent...")


env_params = config['reinforcement_learning']['environment']

env_train = TradingEnvRL(
    train_df, 
    initial_balance=env_params.get('initial_balance', 10000),
    commission=env_params.get('commission', 0.001),
    lookback_window=env_params.get('lookback_window', 30),
    reward_func='sharpe'
)

result = trainer.train_sac(env_train)
model = result['model'] 

print("Training Complete.")

print("Generating Agent Predictions on Test Data...")

# Unpack the config dictionary using **
env_params = config['reinforcement_learning']['environment']

env_test = TradingEnvRL(
    test_df, 
    initial_balance=env_params.get('initial_balance', 10000),
    commission=env_params.get('commission', 0.001),
    lookback_window=env_params.get('lookback_window', 30),
    reward_func='sharpe'
)

obs, _ = env_test.reset()
done = False
actions = []

# Manual Inference Loop
while not done:
    action, _ = model.predict(obs, deterministic=True)
    actions.append(action[0]) 
    obs, _, terminated, truncated, _ = env_test.step(action)
    done = terminated or truncated


# --- CELL 5: RUN BACKTEST ---
print("Running Backtest...")

# FIX: Use the correct config path
# We pass the same env_params we extracted in Cell 4
backtester = PortfolioBacktesterRL(env_params) 

portfolio = backtester.run_backtest(
    price_data=test_df['close'], 
    predicted_weights=np.array(actions),
    lookback_window=env_params.get('lookback_window', 30) # Important for data alignment
)
comparison = backtester.compare_with_buy_and_hold_rl()
# Show Metrics
metrics = backtester.get_performance_metrics()
print("\n--- Strategy Performance ---")
for k, v in metrics.items():
    print(f"{k}: {v:.4f}")

# Plot
portfolio.plot().show()





Data loaded successfully.
Training SAC Agent...



In the future `np.object` will be defined as the corresponding NumPy scalar.

Gym has been unmaintained since 2022 and does not support NumPy 2.0 amongst other critical functionality.
Please upgrade to Gymnasium, the maintained drop-in replacement of Gym, or contact the authors of your software and request that they upgrade.
See the migration guide at https://gymnasium.farama.org/introduction/migration_guide/ for additional information.
INFO:src.models.model_trainer_rl_v2:Training SAC for 150000 timesteps


### CWCO - Sortino

In [None]:
try:
    data = pd.read_csv('data/processed/CWCO_processed_2022.csv')
    if 'Date' in data.columns:
        data['Date'] = pd.to_datetime(data['Date'])
        data.set_index('Date', inplace=True)
    print("Data loaded successfully.")
except FileNotFoundError:
    print("Error: Data file not found. Check path.")

# Split Train/Test
split_idx = int(len(data) * 0.7)
train_df = data.iloc[:split_idx]
test_df = data.iloc[split_idx:]

# Initialize Trainer
trainer = ModelTrainerRL(config['reinforcement_learning'])

print("Training SAC Agent...")


env_params = config['reinforcement_learning']['environment']

env_train = TradingEnvRL(
    train_df, 
    initial_balance=env_params.get('initial_balance', 10000),
    commission=env_params.get('commission', 0.001),
    lookback_window=env_params.get('lookback_window', 30),
    reward_func='sortino'
)

result = trainer.train_sac(env_train)
model = result['model'] 

print("Training Complete.")

print("Generating Agent Predictions on Test Data...")

# Unpack the config dictionary using **
env_params = config['reinforcement_learning']['environment']

env_test = TradingEnvRL(
    test_df, 
    initial_balance=env_params.get('initial_balance', 10000),
    commission=env_params.get('commission', 0.001),
    lookback_window=env_params.get('lookback_window', 30),
    reward_func='sortino'
)

obs, _ = env_test.reset()
done = False
actions = []

# Manual Inference Loop
while not done:
    action, _ = model.predict(obs, deterministic=True)
    actions.append(action[0]) 
    obs, _, terminated, truncated, _ = env_test.step(action)
    done = terminated or truncated


# --- CELL 5: RUN BACKTEST ---
print("Running Backtest...")

# FIX: Use the correct config path
# We pass the same env_params we extracted in Cell 4
backtester = PortfolioBacktesterRL(env_params) 

portfolio = backtester.run_backtest(
    price_data=test_df['close'], 
    predicted_weights=np.array(actions),
    lookback_window=env_params.get('lookback_window', 30) # Important for data alignment
)
comparison = backtester.compare_with_buy_and_hold_rl()
# Show Metrics
metrics = backtester.get_performance_metrics()
print("\n--- Strategy Performance ---")
for k, v in metrics.items():
    print(f"{k}: {v:.4f}")

# Plot
portfolio.plot().show()

### CWCO - Cvar

In [None]:
try:
    data = pd.read_csv('data/processed/CWCO_processed_2022.csv')
    if 'Date' in data.columns:
        data['Date'] = pd.to_datetime(data['Date'])
        data.set_index('Date', inplace=True)
    print("Data loaded successfully.")
except FileNotFoundError:
    print("Error: Data file not found. Check path.")

# Split Train/Test
split_idx = int(len(data) * 0.7)
train_df = data.iloc[:split_idx]
test_df = data.iloc[split_idx:]

# Initialize Trainer
trainer = ModelTrainerRL(config['reinforcement_learning'])

print("Training SAC Agent...")


env_params = config['reinforcement_learning']['environment']

env_train = TradingEnvRL(
    train_df, 
    initial_balance=env_params.get('initial_balance', 10000),
    commission=env_params.get('commission', 0.001),
    lookback_window=env_params.get('lookback_window', 30),
    reward_func='cvar'
)

result = trainer.train_sac(env_train)
model = result['model'] 

print("Training Complete.")

print("Generating Agent Predictions on Test Data...")

# Unpack the config dictionary using **
env_params = config['reinforcement_learning']['environment']

env_test = TradingEnvRL(
    test_df, 
    initial_balance=env_params.get('initial_balance', 10000),
    commission=env_params.get('commission', 0.001),
    lookback_window=env_params.get('lookback_window', 30),
    reward_func='cvar'
)

obs, _ = env_test.reset()
done = False
actions = []

# Manual Inference Loop
while not done:
    action, _ = model.predict(obs, deterministic=True)
    actions.append(action[0]) 
    obs, _, terminated, truncated, _ = env_test.step(action)
    done = terminated or truncated


# --- CELL 5: RUN BACKTEST ---
print("Running Backtest...")

# FIX: Use the correct config path
# We pass the same env_params we extracted in Cell 4
backtester = PortfolioBacktesterRL(env_params) 

portfolio = backtester.run_backtest(
    price_data=test_df['close'], 
    predicted_weights=np.array(actions),
    lookback_window=env_params.get('lookback_window', 30) # Important for data alignment
)
comparison = backtester.compare_with_buy_and_hold_rl()
# Show Metrics
metrics = backtester.get_performance_metrics()
print("\n--- Strategy Performance ---")
for k, v in metrics.items():
    print(f"{k}: {v:.4f}")

# Plot
portfolio.plot().show()

### CWCO - Drawdown

In [None]:
try:
    data = pd.read_csv('data/processed/CWCO_processed_2022.csv')
    if 'Date' in data.columns:
        data['Date'] = pd.to_datetime(data['Date'])
        data.set_index('Date', inplace=True)
    print("Data loaded successfully.")
except FileNotFoundError:
    print("Error: Data file not found. Check path.")

# Split Train/Test
split_idx = int(len(data) * 0.7)
train_df = data.iloc[:split_idx]
test_df = data.iloc[split_idx:]

# Initialize Trainer
trainer = ModelTrainerRL(config['reinforcement_learning'])

print("Training SAC Agent...")


env_params = config['reinforcement_learning']['environment']

env_train = TradingEnvRL(
    train_df, 
    initial_balance=env_params.get('initial_balance', 10000),
    commission=env_params.get('commission', 0.001),
    lookback_window=env_params.get('lookback_window', 30),
    reward_func='max_drawdown'
)

result = trainer.train_sac(env_train)
model = result['model'] 

print("Training Complete.")

print("Generating Agent Predictions on Test Data...")

# Unpack the config dictionary using **
env_params = config['reinforcement_learning']['environment']

env_test = TradingEnvRL(
    test_df, 
    initial_balance=env_params.get('initial_balance', 10000),
    commission=env_params.get('commission', 0.001),
    lookback_window=env_params.get('lookback_window', 30),
    reward_func='max_drawdown'
)

obs, _ = env_test.reset()
done = False
actions = []

# Manual Inference Loop
while not done:
    action, _ = model.predict(obs, deterministic=True)
    actions.append(action[0]) 
    obs, _, terminated, truncated, _ = env_test.step(action)
    done = terminated or truncated


# --- CELL 5: RUN BACKTEST ---
print("Running Backtest...")

# FIX: Use the correct config path
# We pass the same env_params we extracted in Cell 4
backtester = PortfolioBacktesterRL(env_params) 

portfolio = backtester.run_backtest(
    price_data=test_df['close'], 
    predicted_weights=np.array(actions),
    lookback_window=env_params.get('lookback_window', 30) # Important for data alignment
)
comparison = backtester.compare_with_buy_and_hold_rl()
# Show Metrics
metrics = backtester.get_performance_metrics()
print("\n--- Strategy Performance ---")
for k, v in metrics.items():
    print(f"{k}: {v:.4f}")

# Plot
portfolio.plot().show()